From d47fe3b4cd1d2bb8ba9b27217dc62e9aa969c449 Mon Sep 17 00:00:00 2001 From: Kirill Zhukov Date: Fri, 12 May 2023 11:22:55 +0200 Subject: [PATCH] Release 0.3.4 (2023-05-12) build_analytics - Bigfix ALBS-1111 --- .../build_analytics/extractor/extractor.py | 26 ++++++++++++------- .../build_analytics/extractor/start.py | 9 ------- .../models/extractor_config.py | 17 ++++++------ build_analytics/config_default.yml | 5 +--- releases.txt | 4 +++ 5 files changed, 30 insertions(+), 31 deletions(-) diff --git a/build_analytics/build_analytics/extractor/extractor.py b/build_analytics/build_analytics/extractor/extractor.py index bd1a99a..05e8a9b 100644 --- a/build_analytics/build_analytics/extractor/extractor.py +++ b/build_analytics/build_analytics/extractor/extractor.py @@ -1,8 +1,10 @@ # pylint: disable=relative-beyond-top-level +from datetime import datetime, timedelta import logging from typing import Dict, List + from ..api_client import APIclient from ..const import BuildTaskEnum from ..db import DB @@ -26,11 +28,13 @@ class Extractor: stop = False while not stop: + oldest_build_age = datetime.now().astimezone() - \ + timedelta(days=self.config.data_store_days) logging.info("page: %s", page_num) for build in self.api.get_builds(page_num): # check if we shoud stop processing build if build.id <= last_build_id or \ - build.created_at <= self.config.oldest_build_age: + build.created_at <= oldest_build_age: stop = True break @@ -73,9 +77,10 @@ class Extractor: return build_count def build_cleanup(self): - logging.info('Removing all buidls older then %s', - self.config.oldest_build_age.strftime("%m/%d/%Y, %H:%M:%S")) - removed_count = self.db.cleanup_builds(self.config.oldest_build_age) + oldest_to_keep = datetime.now().astimezone() - \ + timedelta(days=self.config.data_store_days) + logging.info('Removing all buidls older then %s', oldest_to_keep) + removed_count = self.db.cleanup_builds(oldest_to_keep) logging.info('removed %d entries', removed_count) def __update_build_tasks(self, build_tasks: List[BuildTask], @@ -105,10 +110,11 @@ class Extractor: b.build_id, b.id, BuildTaskEnum(b.status_id).name) def update_builds(self): + not_before = datetime.now().astimezone() - \ + timedelta(days=self.config.oldest_to_update_days) logging.info('Getting unfinished builds that were created after %s ', - self.config.oldest_to_update) - unfinished_tasks = self.db.get_unfinished_builds( - self.config.oldest_to_update) + not_before) + unfinished_tasks = self.db.get_unfinished_builds(not_before) for build_id, build_tasks_db in unfinished_tasks.items(): try: logging.info('Getting status of build %d', build_id) @@ -137,10 +143,12 @@ class Extractor: build_id, err, exc_info=True) def updating_test_tasks(self): + not_before = datetime.now().astimezone() - \ + timedelta(days=self.config.oldest_to_update_days) logging.info('getting build tasks for builds created after %s', - self.config.oldest_to_update) + not_before) build_task_ids = self.db.get_build_tasks_for_tests_update( - self.config.oldest_to_update) + not_before) for build_task_id in build_task_ids: try: logging.info('getting tests for build task %s', build_task_id) diff --git a/build_analytics/build_analytics/extractor/start.py b/build_analytics/build_analytics/extractor/start.py index 7a04510..c32aaca 100644 --- a/build_analytics/build_analytics/extractor/start.py +++ b/build_analytics/build_analytics/extractor/start.py @@ -1,4 +1,3 @@ -from datetime import datetime, timedelta import logging from logging.handlers import RotatingFileHandler import sys @@ -23,10 +22,6 @@ def __get_config(yml_path: str) -> ExtractorConfig: with open(yml_path, 'r', encoding='utf-8') as flr: raw = yaml.safe_load(flr) - # adding new attrs - raw['oldest_build_age'] = datetime.now().astimezone() \ - - timedelta(days=raw['data_store_days']) - # Dbconfig db_params: Dict[str, Any] = {'name': raw['db_name'], 'username': raw['db_username'], @@ -37,10 +32,6 @@ def __get_config(yml_path: str) -> ExtractorConfig: db_params['host'] = raw['db_host'] raw['db_config'] = DbConfig(**db_params) - if 'oldest_to_update_days' in raw: - raw['oldest_to_update_days'] = datetime.now().astimezone() \ - - timedelta(days=raw['oldest_to_update_days']) - return ExtractorConfig(**raw) diff --git a/build_analytics/build_analytics/models/extractor_config.py b/build_analytics/build_analytics/models/extractor_config.py index 852855b..ff46d54 100644 --- a/build_analytics/build_analytics/models/extractor_config.py +++ b/build_analytics/build_analytics/models/extractor_config.py @@ -1,4 +1,3 @@ -from datetime import datetime, timedelta from pathlib import Path from pydantic import HttpUrl, Field, BaseModel # pylint: disable=no-name-in-module @@ -8,10 +7,10 @@ from .db_config import DbConfig # DEFAULTS ALBS_URL_DEFAULT = 'https://build.almalinux.org' LOG_FILE_DEFAULT = '/tmp/extractor.log' -API_DEFAULT = 30 +API_TIMEOUT_DEFAULT = 30 SCRAPE_INTERVAL_DEFAULT = 3600 START_FROM_DEFAULT = 5808 -OLDEST_TO_UPDATE_DEFAULT = datetime.now().astimezone() - timedelta(days=7) +OLDEST_TO_UPDATE_DAYS_DEFAULT = 7 class ExtractorConfig(BaseModel): @@ -22,17 +21,17 @@ class ExtractorConfig(BaseModel): default=LOG_FILE_DEFAULT) albs_url: HttpUrl = Field(description='ALBS root URL', default=ALBS_URL_DEFAULT) - oldest_build_age: datetime = \ - Field(description='oldest build age to store') + data_store_days: int = \ + Field(description='oldest build (in days) to keep in DB') jwt: str = Field(description='ALBS JWT token') db_config: DbConfig = Field(description="database configuration") api_timeout: int = Field( description="max time in seconds to wait for API response", - default=API_DEFAULT) + default=API_TIMEOUT_DEFAULT) scrape_interval: int = Field(description='how often (in seconds) we will extract data from ALBS', default=SCRAPE_INTERVAL_DEFAULT) start_from: int = Field(description='build id to start populating empty db with', default=START_FROM_DEFAULT) - oldest_to_update: datetime = \ - Field(description='oldest unfinished object (build/task/step...) that we will try to update', - default=OLDEST_TO_UPDATE_DEFAULT) + oldest_to_update_days: int = \ + Field(description='oldest (in days) unfinished object (build/task/step...) that we will try to update', + default=OLDEST_TO_UPDATE_DAYS_DEFAULT) diff --git a/build_analytics/config_default.yml b/build_analytics/config_default.yml index 7b2dad7..45bb864 100644 --- a/build_analytics/config_default.yml +++ b/build_analytics/config_default.yml @@ -10,7 +10,6 @@ albs_url: https://build.almalinux.org # required: yes jwt: "" - # db_host # IP/hostname of database server # required: no @@ -28,7 +27,6 @@ db_port: 5432 # required: yes db_username: albs_analytics - # db_password # password to connect with # required: yes @@ -39,7 +37,6 @@ db_password: super_secret_password # required: yes db_name: albs_analytics - # log_file # file to write logs to # required: no @@ -62,7 +59,7 @@ scrape_interval: 3600 # default: 5808 (first build with correct metrics) start_from: 5808 -# oldest_to_update +# oldest_to_update_days # oldest (in days) unfinished object (build/task/step...) that we will try to update # required: false # default: 7 diff --git a/releases.txt b/releases.txt index a6e30c1..59531e8 100644 --- a/releases.txt +++ b/releases.txt @@ -30,3 +30,7 @@ build-analytics - update_builds() ignoring opldest_to_update attribute - [ALBS-1099] Test task started_at attribute is NULL - Max recursion error in 'Test task details.json' + +0.3.4 (2023-05-12) +build_analytics + - Bigfix ALBS-1111 \ No newline at end of file