From 5a590cbadbe8bd8a63a27211dccb58ce3ac6b56a Mon Sep 17 00:00:00 2001 From: Kirill Zhukov Date: Fri, 21 Apr 2023 15:13:48 +0200 Subject: [PATCH] built_analytics: [ALBS-1077] Now we delete build if it was deleted from ALBS Bugfix 'Key error' when db_port/db_host is not set Bugfix update_builds ignoring opldest_to_update attribute --- build_analytics/build_analytics/api_client.py | 9 +++- build_analytics/build_analytics/db.py | 43 ++++++++++++------- .../build_analytics/extractor/extractor.py | 8 +++- .../build_analytics/extractor/start.py | 15 ++++--- .../build_analytics/models/db_config.py | 8 +++- 5 files changed, 58 insertions(+), 25 deletions(-) diff --git a/build_analytics/build_analytics/api_client.py b/build_analytics/build_analytics/api_client.py index a16c8d6..512e5f9 100644 --- a/build_analytics/build_analytics/api_client.py +++ b/build_analytics/build_analytics/api_client.py @@ -47,11 +47,18 @@ class APIclient(): b, err, exc_info=True) return result - def get_build(self, build_id: int) -> Build: + def get_build(self, build_id: int) -> Optional[Build]: + ''' + method returns None if build was deleted from ALBS + ''' ep = f'/api/v1/builds/{build_id}' url = urljoin(self.api_root, ep) headers = {'accept': 'application/json'} response = requests.get(url, headers=headers, timeout=self.timeout) + + if response.status_code == 404: + return None + response.raise_for_status() return self._parse_build(response.json()) diff --git a/build_analytics/build_analytics/db.py b/build_analytics/build_analytics/db.py index 21efd0f..f525f75 100644 --- a/build_analytics/build_analytics/db.py +++ b/build_analytics/build_analytics/db.py @@ -62,34 +62,34 @@ class DB(): build_task.started_at, build_task.finished_at, build_task.status_id)) # inserting web node stats - for stat in web_node_stats: + for wn_stat in web_node_stats: # do not insert empty stats - if stat.start_ts is None: + if wn_stat.start_ts is None: continue sql = ''' INSERT INTO web_node_stats (build_task_id, stat_name_id, start_ts, end_ts) VALUES (%s, %s, %s, %s); ''' - cur.execute(sql, (stat.build_task_id, stat.stat_name_id, - stat.start_ts, stat.end_ts)) + cur.execute(sql, (wn_stat.build_task_id, wn_stat.stat_name_id, + wn_stat.start_ts, wn_stat.end_ts)) logging.debug('raw SQL query: %s', cur.query) self.__conn.commit() # inserting build node stats - for stat in build_node_stats: + for bn_stat in build_node_stats: # do not insert empty stats - if stat.start_ts is None: + if bn_stat.start_ts is None: continue sql = ''' INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts) VALUES (%s, %s, %s, %s); ''' - cur.execute(sql, (stat.build_task_id, stat.stat_name_id, - stat.start_ts, stat.end_ts)) + cur.execute(sql, (bn_stat.build_task_id, bn_stat.stat_name_id, + bn_stat.start_ts, bn_stat.end_ts)) logging.debug('raw SQL query: %s', cur.query) # commiting changes @@ -121,11 +121,12 @@ class DB(): # getting unfinished builds sql = 'SELECT id FROM builds where finished_at is NULL AND created_at > %s;' + builds_to_check: Dict[int, bool] = {} cur = self.__conn.cursor() cur.execute(sql, (not_before.timestamp(),)) logging.debug('raw SQL query: %s', cur.query) for row in cur.fetchall(): - res[row[0]] = {} + builds_to_check[row[0]] = True # getting list of unfinished tasks sql = 'SELECT id, build_id, status_id FROM build_tasks WHERE status_id < 2;' @@ -135,6 +136,8 @@ class DB(): build_task_id: int = row[0] build_id: int = row[1] status_id: int = row[2] + if build_id not in builds_to_check: + continue try: res[build_id][build_task_id] = status_id except KeyError: @@ -195,11 +198,11 @@ class DB(): logging.debug('raw SQL query: %s', cur.query) # updating build_node_stats - for stat in build_node_stats: + for bn_stat in build_node_stats: logging.debug( - 'updating build_node_stats %s build_task %s', stat.stat_name_id, build_task.id) - if self.stat_exists(task_id=stat.build_task_id, - stat_name_id=stat.stat_name_id, + 'updating build_node_stats %s build_task %s', bn_stat.stat_name_id, build_task.id) + if self.stat_exists(task_id=bn_stat.build_task_id, + stat_name_id=bn_stat.stat_name_id, table_name='build_node_stats', column_name='build_task_id'): sql = ''' @@ -213,9 +216,9 @@ class DB(): VALUES (%(build_task_id)s, %(stat_name_id)s, %(start_ts)s, %(end_ts)s); ''' params = {'build_task_id': build_task.id, - 'stat_name_id': stat.stat_name_id, - 'start_ts': stat.start_ts, - 'end_ts': stat.end_ts} + 'stat_name_id': bn_stat.stat_name_id, + 'start_ts': bn_stat.start_ts, + 'end_ts': bn_stat.end_ts} logging.debug('raw SQL query: %s', cur.query) cur.execute(sql, params) @@ -318,3 +321,11 @@ class DB(): s.start_ts, s.finish_ts)) # commiting changes self.__conn.commit() + + def delete_build(self, build_id: int): + params = (build_id,) + sql = "DELETE FROM builds WHERE id = %s;" + cur = self.__conn.cursor() + + cur.execute(sql, params) + self.__conn.commit() diff --git a/build_analytics/build_analytics/extractor/extractor.py b/build_analytics/build_analytics/extractor/extractor.py index dfac09a..bd1a99a 100644 --- a/build_analytics/build_analytics/extractor/extractor.py +++ b/build_analytics/build_analytics/extractor/extractor.py @@ -105,13 +105,19 @@ class Extractor: b.build_id, b.id, BuildTaskEnum(b.status_id).name) def update_builds(self): - logging.info('Getting list of tasks from DB') + logging.info('Getting unfinished builds that were created after %s ', + self.config.oldest_to_update) unfinished_tasks = self.db.get_unfinished_builds( self.config.oldest_to_update) for build_id, build_tasks_db in unfinished_tasks.items(): try: logging.info('Getting status of build %d', build_id) build = self.api.get_build(build_id) + if not build: + logging.warning( + "build %s was deleted from albs, removing it", build_id) + self.db.delete_build(build_id) + continue logging.info('Updating build tasks') build_tasks_to_check = [ diff --git a/build_analytics/build_analytics/extractor/start.py b/build_analytics/build_analytics/extractor/start.py index 6adb6ab..7a04510 100644 --- a/build_analytics/build_analytics/extractor/start.py +++ b/build_analytics/build_analytics/extractor/start.py @@ -3,6 +3,7 @@ import logging from logging.handlers import RotatingFileHandler import sys import time +from typing import Dict, Any import yaml @@ -26,11 +27,15 @@ def __get_config(yml_path: str) -> ExtractorConfig: raw['oldest_build_age'] = datetime.now().astimezone() \ - timedelta(days=raw['data_store_days']) - raw['db_config'] = DbConfig(name=raw['db_name'], - port=int(raw['db_port']), - host=raw['db_host'], - username=raw['db_username'], - password=raw['db_password']) + # Dbconfig + db_params: Dict[str, Any] = {'name': raw['db_name'], + 'username': raw['db_username'], + 'password': raw['db_password'], } + if 'db_port' in raw: + db_params['port'] = raw['db_port'] + if 'db_host' in raw: + db_params['host'] = raw['db_host'] + raw['db_config'] = DbConfig(**db_params) if 'oldest_to_update_days' in raw: raw['oldest_to_update_days'] = datetime.now().astimezone() \ diff --git a/build_analytics/build_analytics/models/db_config.py b/build_analytics/build_analytics/models/db_config.py index 24c5efd..8eb92b3 100644 --- a/build_analytics/build_analytics/models/db_config.py +++ b/build_analytics/build_analytics/models/db_config.py @@ -1,9 +1,13 @@ from pydantic import BaseModel, Field +DB_PORT = 5432 +DB_HOST = "localhost" + + class DbConfig(BaseModel): name: str = Field(description="db name") - port: int = Field(description="db server port") - host: str = Field(description="db server ip/hostname") + port: int = Field(description="db server port", default=DB_PORT) + host: str = Field(description="db server ip/hostname", default=DB_HOST) username: str = Field(description="username to connect with") password: str = Field(description="password to connect with1")