Compare commits
	
		
			3 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| c731cba102 | |||
| 7c05bbacb6 | |||
| d47fe3b4cd | 
| @ -27,6 +27,8 @@ class APIclient(): | ||||
|         self.api_root = api_root | ||||
|         self.jwt = jwt | ||||
|         self.timeout = timeout | ||||
|         # will be set at first call of __send_request | ||||
|         self.session: Optional[requests.Session] = None | ||||
| 
 | ||||
|     def get_builds(self, page_num: int = 1) -> List[Build]: | ||||
|         ep = '/api/v1/builds' | ||||
| @ -34,8 +36,7 @@ class APIclient(): | ||||
|         params = {'pageNumber': page_num} | ||||
|         headers = {'accept': 'appilication/json'} | ||||
| 
 | ||||
|         response = requests.get( | ||||
|             url, params=params, headers=headers, timeout=self.timeout) | ||||
|         response = self.__send_request(url, 'get', params, headers) | ||||
|         response.raise_for_status() | ||||
| 
 | ||||
|         result = [] | ||||
| @ -54,7 +55,7 @@ class APIclient(): | ||||
|         ep = f'/api/v1/builds/{build_id}' | ||||
|         url = urljoin(self.api_root, ep) | ||||
|         headers = {'accept': 'application/json'} | ||||
|         response = requests.get(url, headers=headers, timeout=self.timeout) | ||||
|         response = self.__send_request(url, 'get', headers=headers) | ||||
| 
 | ||||
|         if response.status_code == 404: | ||||
|             return None | ||||
| @ -228,3 +229,31 @@ class APIclient(): | ||||
|                 start_ts = stat.start_ts | ||||
| 
 | ||||
|         return start_ts | ||||
| 
 | ||||
|     def __send_request(self, | ||||
|                        url: str, | ||||
|                        method: str, | ||||
|                        params: Optional[Dict[str, Any]] = None, | ||||
|                        headers: Optional[Dict[str, Any]] = None, | ||||
|                        ) -> requests.Response: | ||||
|         """ | ||||
|         Simple wrapper around requests.get/posts.. methods  | ||||
|         so we can use same session between API calls | ||||
|         """ | ||||
|         if not self.session: | ||||
|             self.session = requests.Session() | ||||
| 
 | ||||
|         m = getattr(self.session, method, None) | ||||
|         if not m: | ||||
|             raise ValueError(f"method {method} is not supported") | ||||
| 
 | ||||
|         # pylint: disable=not-callable | ||||
|         return m(url, params=params, headers=headers, timeout=self.timeout) | ||||
| 
 | ||||
|     def close_session(self): | ||||
|         if self.session: | ||||
|             self.session.close() | ||||
|             self.session = None | ||||
| 
 | ||||
|     def __del__(self): | ||||
|         self.close_session() | ||||
|  | ||||
| @ -3,7 +3,7 @@ | ||||
| from enum import IntEnum | ||||
| 
 | ||||
| # supported schema version | ||||
| DB_SCHEMA_VER = 3 | ||||
| DB_SCHEMA_VER = 4 | ||||
| 
 | ||||
| 
 | ||||
| # ENUMS | ||||
| @ -13,6 +13,8 @@ class ArchEnum(IntEnum): | ||||
|     aarch64 = 2 | ||||
|     ppc64le = 3 | ||||
|     s390x = 4 | ||||
|     src = 5 | ||||
|     x86_64_v2 = 6 | ||||
| 
 | ||||
| 
 | ||||
| class BuildTaskEnum(IntEnum): | ||||
|  | ||||
| @ -1,8 +1,10 @@ | ||||
| # pylint: disable=relative-beyond-top-level | ||||
| 
 | ||||
| from datetime import datetime, timedelta | ||||
| import logging | ||||
| from typing import Dict, List | ||||
| 
 | ||||
| 
 | ||||
| from ..api_client import APIclient | ||||
| from ..const import BuildTaskEnum | ||||
| from ..db import DB | ||||
| @ -26,11 +28,13 @@ class Extractor: | ||||
|         stop = False | ||||
| 
 | ||||
|         while not stop: | ||||
|             oldest_build_age = datetime.now().astimezone() - \ | ||||
|                 timedelta(days=self.config.data_store_days) | ||||
|             logging.info("page: %s", page_num) | ||||
|             for build in self.api.get_builds(page_num): | ||||
|                 # check if we shoud stop processing build | ||||
|                 if build.id <= last_build_id or \ | ||||
|                    build.created_at <= self.config.oldest_build_age: | ||||
|                    build.created_at <= oldest_build_age: | ||||
|                     stop = True | ||||
|                     break | ||||
| 
 | ||||
| @ -73,9 +77,10 @@ class Extractor: | ||||
|         return build_count | ||||
| 
 | ||||
|     def build_cleanup(self): | ||||
|         logging.info('Removing all buidls older then %s', | ||||
|                      self.config.oldest_build_age.strftime("%m/%d/%Y, %H:%M:%S")) | ||||
|         removed_count = self.db.cleanup_builds(self.config.oldest_build_age) | ||||
|         oldest_to_keep = datetime.now().astimezone() - \ | ||||
|             timedelta(days=self.config.data_store_days) | ||||
|         logging.info('Removing all buidls older then %s', oldest_to_keep) | ||||
|         removed_count = self.db.cleanup_builds(oldest_to_keep) | ||||
|         logging.info('removed %d entries', removed_count) | ||||
| 
 | ||||
|     def __update_build_tasks(self, build_tasks: List[BuildTask], | ||||
| @ -105,10 +110,11 @@ class Extractor: | ||||
|                     b.build_id, b.id, BuildTaskEnum(b.status_id).name) | ||||
| 
 | ||||
|     def update_builds(self): | ||||
|         not_before = datetime.now().astimezone() - \ | ||||
|             timedelta(days=self.config.oldest_to_update_days) | ||||
|         logging.info('Getting unfinished builds that were created after %s ', | ||||
|                      self.config.oldest_to_update) | ||||
|         unfinished_tasks = self.db.get_unfinished_builds( | ||||
|             self.config.oldest_to_update) | ||||
|                      not_before) | ||||
|         unfinished_tasks = self.db.get_unfinished_builds(not_before) | ||||
|         for build_id, build_tasks_db in unfinished_tasks.items(): | ||||
|             try: | ||||
|                 logging.info('Getting status of build %d', build_id) | ||||
| @ -137,10 +143,12 @@ class Extractor: | ||||
|                               build_id, err, exc_info=True) | ||||
| 
 | ||||
|     def updating_test_tasks(self): | ||||
|         not_before = datetime.now().astimezone() - \ | ||||
|             timedelta(days=self.config.oldest_to_update_days) | ||||
|         logging.info('getting build tasks for builds created after %s', | ||||
|                      self.config.oldest_to_update) | ||||
|                      not_before) | ||||
|         build_task_ids = self.db.get_build_tasks_for_tests_update( | ||||
|             self.config.oldest_to_update) | ||||
|             not_before) | ||||
|         for build_task_id in build_task_ids: | ||||
|             try: | ||||
|                 logging.info('getting tests for build task %s', build_task_id) | ||||
|  | ||||
| @ -1,4 +1,3 @@ | ||||
| from datetime import datetime, timedelta | ||||
| import logging | ||||
| from logging.handlers import RotatingFileHandler | ||||
| import sys | ||||
| @ -23,10 +22,6 @@ def __get_config(yml_path: str) -> ExtractorConfig: | ||||
|     with open(yml_path, 'r', encoding='utf-8') as flr: | ||||
|         raw = yaml.safe_load(flr) | ||||
| 
 | ||||
|     # adding new attrs | ||||
|     raw['oldest_build_age'] = datetime.now().astimezone() \ | ||||
|         - timedelta(days=raw['data_store_days']) | ||||
| 
 | ||||
|     # Dbconfig | ||||
|     db_params: Dict[str, Any] = {'name': raw['db_name'], | ||||
|                                  'username': raw['db_username'], | ||||
| @ -37,10 +32,6 @@ def __get_config(yml_path: str) -> ExtractorConfig: | ||||
|         db_params['host'] = raw['db_host'] | ||||
|     raw['db_config'] = DbConfig(**db_params) | ||||
| 
 | ||||
|     if 'oldest_to_update_days' in raw: | ||||
|         raw['oldest_to_update_days'] = datetime.now().astimezone() \ | ||||
|             - timedelta(days=raw['oldest_to_update_days']) | ||||
| 
 | ||||
|     return ExtractorConfig(**raw) | ||||
| 
 | ||||
| 
 | ||||
| @ -108,7 +99,10 @@ def start(yml_path: str): | ||||
|         else: | ||||
|             logging.info('test tasks were updated') | ||||
| 
 | ||||
|         # freeing up resources | ||||
|         extractor.db.close_conn() | ||||
|         extractor.api.close_session() | ||||
| 
 | ||||
|         logging.info("Extraction was finished") | ||||
|         logging.info("Sleeping for %d seconds", config.scrape_interval) | ||||
|         time.sleep(config.scrape_interval) | ||||
|  | ||||
| @ -1,4 +1,3 @@ | ||||
| from datetime import datetime, timedelta | ||||
| from pathlib import Path | ||||
| 
 | ||||
| from pydantic import HttpUrl, Field, BaseModel  # pylint: disable=no-name-in-module | ||||
| @ -8,10 +7,10 @@ from .db_config import DbConfig | ||||
| # DEFAULTS | ||||
| ALBS_URL_DEFAULT = 'https://build.almalinux.org' | ||||
| LOG_FILE_DEFAULT = '/tmp/extractor.log' | ||||
| API_DEFAULT = 30 | ||||
| API_TIMEOUT_DEFAULT = 30 | ||||
| SCRAPE_INTERVAL_DEFAULT = 3600 | ||||
| START_FROM_DEFAULT = 5808 | ||||
| OLDEST_TO_UPDATE_DEFAULT = datetime.now().astimezone() - timedelta(days=7) | ||||
| OLDEST_TO_UPDATE_DAYS_DEFAULT = 7 | ||||
| 
 | ||||
| 
 | ||||
| class ExtractorConfig(BaseModel): | ||||
| @ -22,17 +21,17 @@ class ExtractorConfig(BaseModel): | ||||
|                            default=LOG_FILE_DEFAULT) | ||||
|     albs_url: HttpUrl = Field(description='ALBS root URL', | ||||
|                               default=ALBS_URL_DEFAULT) | ||||
|     oldest_build_age: datetime = \ | ||||
|         Field(description='oldest build age to store') | ||||
|     data_store_days: int = \ | ||||
|         Field(description='oldest build (in days) to keep in DB') | ||||
|     jwt: str = Field(description='ALBS JWT token') | ||||
|     db_config: DbConfig = Field(description="database configuration") | ||||
|     api_timeout: int = Field( | ||||
|         description="max time in seconds to wait for API response", | ||||
|         default=API_DEFAULT) | ||||
|         default=API_TIMEOUT_DEFAULT) | ||||
|     scrape_interval: int = Field(description='how often (in seconds) we will extract data from ALBS', | ||||
|                                  default=SCRAPE_INTERVAL_DEFAULT) | ||||
|     start_from: int = Field(description='build id to start populating empty db with', | ||||
|                             default=START_FROM_DEFAULT) | ||||
|     oldest_to_update: datetime = \ | ||||
|         Field(description='oldest unfinished object (build/task/step...) that we will try to update', | ||||
|               default=OLDEST_TO_UPDATE_DEFAULT) | ||||
|     oldest_to_update_days: int = \ | ||||
|         Field(description='oldest (in days) unfinished object (build/task/step...) that we will try to update', | ||||
|               default=OLDEST_TO_UPDATE_DAYS_DEFAULT) | ||||
|  | ||||
| @ -10,7 +10,6 @@ albs_url: https://build.almalinux.org | ||||
| # required: yes | ||||
| jwt: "" | ||||
| 
 | ||||
| 
 | ||||
| # db_host | ||||
| # IP/hostname of database server | ||||
| # required: no | ||||
| @ -28,7 +27,6 @@ db_port: 5432 | ||||
| # required: yes | ||||
| db_username: albs_analytics | ||||
| 
 | ||||
| 
 | ||||
| # db_password | ||||
| # password to connect with | ||||
| # required: yes | ||||
| @ -39,7 +37,6 @@ db_password: super_secret_password | ||||
| # required: yes | ||||
| db_name: albs_analytics | ||||
| 
 | ||||
| 
 | ||||
| # log_file | ||||
| # file to  write logs to | ||||
| # required: no | ||||
| @ -62,7 +59,7 @@ scrape_interval: 3600 | ||||
| # default: 5808 (first build with correct metrics) | ||||
| start_from: 5808 | ||||
| 
 | ||||
| # oldest_to_update | ||||
| # oldest_to_update_days | ||||
| # oldest (in days) unfinished object (build/task/step...) that we will try to update | ||||
| # required: false | ||||
| # default: 7 | ||||
|  | ||||
							
								
								
									
										11
									
								
								build_analytics/migrations/4.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								build_analytics/migrations/4.sql
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| BEGIN; | ||||
| 
 | ||||
| INSERT INTO arch_enum (id, value) | ||||
| VALUES | ||||
|     (5, 'src'), | ||||
|     (6, 'x86_64_v2'); | ||||
| 
 | ||||
| UPDATE schema_version | ||||
| SET version = 4; | ||||
| 
 | ||||
| COMMIT; | ||||
							
								
								
									
										14
									
								
								releases.txt
									
									
									
									
									
								
							
							
						
						
									
										14
									
								
								releases.txt
									
									
									
									
									
								
							| @ -27,6 +27,18 @@ build-analytics | ||||
|     - [ALBS-1077] start deleting builds that were removed from ALBS | ||||
|   Bugfixes | ||||
|     - 'Key error' when db_port/db_host is not set | ||||
|     - update_builds() ignoring opldest_to_update attribute | ||||
|     - update_builds() ignoring odldest_to_update attribute | ||||
|     - [ALBS-1099] Test task started_at attribute is NULL | ||||
|     - Max recursion error in 'Test task details.json' | ||||
| 
 | ||||
| 0.3.4 (2023-05-12) | ||||
| build_analytics | ||||
|   - Bigfix ALBS-1111 | ||||
| 
 | ||||
| 0.3.5 (2023-06-01) | ||||
| build_analytics: | ||||
|   ALBS-1103 start using persistent HTTP connections | ||||
| 
 | ||||
| 0.3.6 (2024-10-08) | ||||
| build_analytics:  | ||||
|   buildsystem#360 Added src and x86_64_v2 arches | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user