From 919f417463af5c7708284c72c764203e5d753bb8 Mon Sep 17 00:00:00 2001 From: Kirill Zhukov Date: Wed, 22 Mar 2023 14:32:13 +0100 Subject: [PATCH] - bugfix with migration to version 3 - added docs about api_timeout config parameter - bugfix with processing of test tasks with new revision --- build_analytics/build_analytics/api_client.py | 22 ++++-- build_analytics/build_analytics/db.py | 77 ++++++++----------- .../build_analytics/extractor/extractor.py | 12 +-- build_analytics/config_default.yml | 6 +- build_analytics/migrations/3.sql | 5 +- 5 files changed, 62 insertions(+), 60 deletions(-) diff --git a/build_analytics/build_analytics/api_client.py b/build_analytics/build_analytics/api_client.py index 0497acb..a6187ff 100644 --- a/build_analytics/build_analytics/api_client.py +++ b/build_analytics/build_analytics/api_client.py @@ -143,14 +143,22 @@ class APIclient(): return Build(**params) def get_test_tasks(self, build_task_id: int) -> List[TestTask]: - ep = f'/api/v1/tests/{build_task_id}/latest' - url = urljoin(self.api_root, ep) - headers = {'accept': 'application/json'} + result: List[TestTask] = [] + revision = 1 + while True: + ep = f'/api/v1/tests/{build_task_id}/{revision}' + url = urljoin(self.api_root, ep) + headers = {'accept': 'application/json'} - response = requests.get( - url, headers=headers, timeout=self.timeout) - response.raise_for_status() - return self.__parse_test_tasks(response.json(), build_task_id) + response = requests.get( + url, headers=headers, timeout=self.timeout) + response.raise_for_status() + raw_tasks = response.json() + if len(raw_tasks) == 0: + break + result = result + self.__parse_test_tasks(raw_tasks, build_task_id) + revision += 1 + return result def __parse_test_tasks(self, raw_tasks: List[Dict[str, Any]], build_task_id: int, diff --git a/build_analytics/build_analytics/db.py b/build_analytics/build_analytics/db.py index 0dbd7ed..3509bb3 100644 --- a/build_analytics/build_analytics/db.py +++ b/build_analytics/build_analytics/db.py @@ -26,14 +26,15 @@ class DB(): def __del__(self): self.close_conn() - def build_exists(self, build_id: int) -> bool: + def row_exists(self, pk: int, table: str) -> bool: + assert table in ['builds', 'test_tasks'] sql = f''' - SELECT COUNT(id) - FROM builds - WHERE id = %s; - ''' + SELECT COUNT(id) + FROM {table} + WHERE id = %s; + ''' cur = self.__conn.cursor() - cur.execute(sql, (build_id,)) + cur.execute(sql, (pk,)) val = int(cur.fetchone()[0]) return val == 1 @@ -245,31 +246,6 @@ class DB(): val = int(cur.fetchone()[0]) return val == 1 - def insert_test_task(self, task: TestTaskDB): - cur = self.__conn.cursor() - # inserting test task itself - sql = ''' - INSERT INTO test_tasks(id, build_task_id, revision, status_id, package_fullname, started_at) - VALUES - (%s, %s, %s, %s, %s, %s); - ''' - cur.execute(sql, (task.id, task.build_task_id, task.revision, task.status_id, - task.package_fullname, task.started_at)) - - if task.steps_stats: - # inserting test steps stats - for ss in task.steps_stats: - sql = ''' - INSERT INTO test_steps_stats (test_task_id, stat_name_id, start_ts, finish_ts) - VALUES - (%s, %s, %s, %s); - ''' - cur.execute(sql, (ss.test_task_id, ss.stat_name_id, - ss.start_ts, ss.finish_ts)) - - # commiting changes - self.__conn.commit() - def get_build_tasks_for_unfinished_tests(self, not_before: datetime) -> List[int]: ''' getting build tasks id of unfinished test tasks @@ -287,27 +263,37 @@ class DB(): result = [int(row[0]) for row in cur.fetchall()] return result - def update_test_tasks(self, test_tasks: List[TestTaskDB]): + def insert_update_test_tasks(self, test_tasks: List[TestTaskDB]): cur = self.__conn.cursor() # test tasks for task in test_tasks: - sql = ''' - UPDATE test_tasks - SET revision = %s, - status_id = %s, - started_at = %s - WHERE id = %s; - ''' - cur.execute(sql, (task.revision, task.status_id, - task.started_at, task.id)) - assert cur.rowcount == 1 + if self.row_exists(pk=task.id, table='test_tasks'): + sql = ''' + UPDATE test_tasks + SET revision = %s, + status_id = %s, + started_at = %s + WHERE id = %s; + ''' + cur.execute(sql, (task.revision, task.status_id, + task.started_at, task.id)) + assert cur.rowcount == 1 + else: + sql = ''' + INSERT INTO test_tasks( + id, build_task_id, revision, status_id, package_fullname, started_at) + VALUES + (%s, %s, %s, %s, %s, %s); + ''' + cur.execute(sql, (task.id, task.build_task_id, task.revision, task.status_id, + task.package_fullname, task.started_at)) # test step if not task.steps_stats: continue for s in task.steps_stats: - logging.info('test_task_id %s, stat_name_id %s', - s.test_task_id, s.stat_name_id) + logging.debug('test_task_id %s, stat_name_id %s', + s.test_task_id, s.stat_name_id) if self.stat_exists(s.test_task_id, s.stat_name_id, 'test_steps_stats', @@ -323,7 +309,8 @@ class DB(): assert cur.rowcount == 1 else: sql = ''' - INSERT INTO test_steps_stats (test_task_id, stat_name_id, start_ts, finish_ts) + INSERT INTO test_steps_stats ( + test_task_id, stat_name_id, start_ts, finish_ts) VALUES (%s, %s, %s, %s); ''' cur.execute(sql, (s.test_task_id, s.stat_name_id, diff --git a/build_analytics/build_analytics/extractor/extractor.py b/build_analytics/build_analytics/extractor/extractor.py index 4789b35..cea2946 100644 --- a/build_analytics/build_analytics/extractor/extractor.py +++ b/build_analytics/build_analytics/extractor/extractor.py @@ -35,7 +35,7 @@ class Extractor: break # some builds could move from one page to another - if self.db.build_exists(build_id=build.id): + if self.db.row_exists(pk=build.id, table='builds'): continue # inserting build build tasks and build tasks statistics @@ -64,10 +64,10 @@ class Extractor: 'getting test tasks for build task %s', build_task.id) test_tasks = self.api.get_test_tasks(build_task.id) logging.info('received %d tests tasks', len(test_tasks)) - for t in test_tasks: - logging.info( - 'build task %s: inserting test task %s', build_task.id, t.id) - self.db.insert_test_task(t.as_db_model()) + if len(test_tasks) > 0: + logging.info('inserting test tasks') + as_db = [t.as_db_model() for t in test_tasks] + self.db.insert_update_test_tasks(as_db) build_count += 1 page_num += 1 return build_count @@ -140,7 +140,7 @@ class Extractor: tasks_api = self.api.get_test_tasks(build_task_id) logging.info('updating test tasks') tasks_db = [t.as_db_model() for t in tasks_api] - self.db.update_test_tasks(tasks_db) + self.db.insert_update_test_tasks(tasks_db) except Exception as err: # pylint: disable=broad-except logging.error( 'failed to update tests for %d build task: %s', diff --git a/build_analytics/config_default.yml b/build_analytics/config_default.yml index febe104..5e5e1c3 100644 --- a/build_analytics/config_default.yml +++ b/build_analytics/config_default.yml @@ -62,10 +62,14 @@ scrape_interval: 3600 # default: 5808 (first build with correct metrics) start_from: - # oldest_to_update # oldest (in days) unfinished object (build/task/step...) that we will try to update # required: false # default: 7 oldest_to_update_days: 7 +# api_timeout +# how long (in seconds) we will wait for API response +# required: false +# default: 30 +api_timeout: 30 \ No newline at end of file diff --git a/build_analytics/migrations/3.sql b/build_analytics/migrations/3.sql index 9169351..c327cbb 100644 --- a/build_analytics/migrations/3.sql +++ b/build_analytics/migrations/3.sql @@ -68,7 +68,10 @@ CREATE INDEX test_steps_stats_start_ts ON test_steps_stats(start_ts); CREATE INDEX test_steps_stats_end_ts -ON test_steps_stats(end_ts); +ON test_steps_stats(finish_ts); + +-- increasing size of name field +ALTER TABLE build_tasks ALTER COLUMN name TYPE varchar(150); UPDATE schema_version