Compare commits

...

10 Commits
0.3.0 ... main

Author SHA1 Message Date
Kirill Zhukov 7c05bbacb6 Release 0.3.5 (2023-06-01)
build_analytics:
  ALBS-1103 start using persistent HTTP connections
2023-06-01 11:57:27 +02:00
Kirill Zhukov d47fe3b4cd Release 0.3.4 (2023-05-12)
build_analytics
  - Bigfix ALBS-1111
2023-05-12 11:22:55 +02:00
Kirill Zhukov f74bc0748a 0.3.3 (2023-04-24)
build-analytics
  Improvements
    - [ALBS-1077] start deleting builds that were removed from ALBS
  Bugfixes
    - 'Key error' when db_port/db_host is not set
    - update_builds() ignoring opldest_to_update attribute
    - [ALBS-1099] Test task started_at attribute is NULL
    - Max recursion error in 'Test task details.json'
2023-04-24 09:20:58 +02:00
Kirill Zhukov 5a590cbadb built_analytics:
[ALBS-1077] Now we delete build if it was deleted from ALBS
  Bugfix 'Key error' when db_port/db_host is not set
  Bugfix update_builds ignoring opldest_to_update attribute
2023-04-21 15:13:48 +02:00
kzhukov 4b5adb52d5 ALBS-1099 (#4)
Co-authored-by: Kirill Zhukov <kzhukov@cloudlinux.com>
Reviewed-on: #4
2023-04-21 07:53:09 +00:00
Kirill Zhukov 40ce2c583d Release 0.3.2 (2023-03-23)
- Bugfix ALBS-1060
2023-03-23 13:06:43 +01:00
Kirill Zhukov 4145ce8e9e Bugfix ALBS-1060 2023-03-23 13:04:28 +01:00
Kirill Zhukov ae8b2a7089 0.3.1 (2023-03-22)
- db: bugfix with migration to version 3
 - added info about api_timeout config parameter
 - bugfix with processing of test tasks with new revision
2023-03-22 14:39:14 +01:00
kzhukov 9f3796db07 Merge pull request '0.3.1' (#3) from 0.3.1 into main
Reviewed-on: #3
2023-03-22 13:37:03 +00:00
Kirill Zhukov 919f417463 - bugfix with migration to version 3
- added docs about api_timeout config parameter
 - bugfix with processing of test tasks with new revision
2023-03-22 14:35:01 +01:00
11 changed files with 275 additions and 173 deletions

View File

@ -1,7 +1,7 @@
from datetime import datetime from datetime import datetime
import logging import logging
from urllib.parse import urljoin from urllib.parse import urljoin
from typing import Dict, List, Any from typing import Dict, List, Any, Optional
import requests import requests
@ -27,6 +27,8 @@ class APIclient():
self.api_root = api_root self.api_root = api_root
self.jwt = jwt self.jwt = jwt
self.timeout = timeout self.timeout = timeout
# will be set at first call of __send_request
self.session: Optional[requests.Session] = None
def get_builds(self, page_num: int = 1) -> List[Build]: def get_builds(self, page_num: int = 1) -> List[Build]:
ep = '/api/v1/builds' ep = '/api/v1/builds'
@ -34,8 +36,7 @@ class APIclient():
params = {'pageNumber': page_num} params = {'pageNumber': page_num}
headers = {'accept': 'appilication/json'} headers = {'accept': 'appilication/json'}
response = requests.get( response = self.__send_request(url, 'get', params, headers)
url, params=params, headers=headers, timeout=self.timeout)
response.raise_for_status() response.raise_for_status()
result = [] result = []
@ -47,11 +48,18 @@ class APIclient():
b, err, exc_info=True) b, err, exc_info=True)
return result return result
def get_build(self, build_id: int) -> Build: def get_build(self, build_id: int) -> Optional[Build]:
'''
method returns None if build was deleted from ALBS
'''
ep = f'/api/v1/builds/{build_id}' ep = f'/api/v1/builds/{build_id}'
url = urljoin(self.api_root, ep) url = urljoin(self.api_root, ep)
headers = {'accept': 'application/json'} headers = {'accept': 'application/json'}
response = requests.get(url, headers=headers, timeout=self.timeout) response = self.__send_request(url, 'get', headers=headers)
if response.status_code == 404:
return None
response.raise_for_status() response.raise_for_status()
return self._parse_build(response.json()) return self._parse_build(response.json())
@ -104,7 +112,8 @@ class APIclient():
if not task['performance_stats']: if not task['performance_stats']:
logging.warning( logging.warning(
"no perfomance_stats for build_id: %s, build_task_id: %s", build_id, task['id']) "no perfomance_stats for build_id: %s, build_task_id: %s", build_id, task['id'])
stats = {'build_node_stats': {}, 'build_done_stats': {}} stats: dict[str, Any] = {
'build_node_stats': {}, 'build_done_stats': {}}
else: else:
stats = task['performance_stats'][0]['statistics'] stats = task['performance_stats'][0]['statistics']
@ -143,27 +152,28 @@ class APIclient():
return Build(**params) return Build(**params)
def get_test_tasks(self, build_task_id: int) -> List[TestTask]: def get_test_tasks(self, build_task_id: int) -> List[TestTask]:
ep = f'/api/v1/tests/{build_task_id}/latest' result: List[TestTask] = []
url = urljoin(self.api_root, ep) revision = 1
headers = {'accept': 'application/json'} while True:
ep = f'/api/v1/tests/{build_task_id}/{revision}'
url = urljoin(self.api_root, ep)
headers = {'accept': 'application/json'}
response = requests.get( response = requests.get(
url, headers=headers, timeout=self.timeout) url, headers=headers, timeout=self.timeout)
response.raise_for_status() response.raise_for_status()
return self.__parse_test_tasks(response.json(), build_task_id) raw_tasks = response.json()
if len(raw_tasks) == 0:
break
result = result + self.__parse_test_tasks(raw_tasks, build_task_id)
revision += 1
return result
def __parse_test_tasks(self, raw_tasks: List[Dict[str, Any]], def __parse_test_tasks(self, raw_tasks: List[Dict[str, Any]],
build_task_id: int, build_task_id: int) -> List[TestTask]:
started_at: str = None) -> List[TestTask]:
result: List[TestTask] = [] result: List[TestTask] = []
for task in raw_tasks: for task in raw_tasks:
if task['alts_response']: if task['alts_response']:
try:
started_raw = task['alts_response']['stats']['started_at']
except KeyError:
started_at = None
else:
started_at = datetime.fromisoformat(started_raw+TZ_OFFSET)
try: try:
stats_raw = task['alts_response']['stats'] stats_raw = task['alts_response']['stats']
except KeyError: except KeyError:
@ -171,7 +181,6 @@ class APIclient():
else: else:
steps_stats = self.__parse_test_steps_stats(stats_raw) steps_stats = self.__parse_test_steps_stats(stats_raw)
else: else:
started_at = None
steps_stats = None steps_stats = None
params = { params = {
'id': task['id'], 'id': task['id'],
@ -181,7 +190,8 @@ class APIclient():
'package_fullname': '_'.join([task['package_name'], 'package_fullname': '_'.join([task['package_name'],
task['package_version'], task['package_version'],
task['package_release']]), task['package_release']]),
'started_at': started_at, 'started_at': self.__get_test_task_started_at(
steps_stats) if steps_stats else None,
'steps_stats': steps_stats 'steps_stats': steps_stats
} }
@ -201,3 +211,49 @@ class APIclient():
p[k] = datetime.fromisoformat(p[k]+TZ_OFFSET) p[k] = datetime.fromisoformat(p[k]+TZ_OFFSET)
teast_steps_params[field_name] = TestStepStat(**p) teast_steps_params[field_name] = TestStepStat(**p)
return TestStepsStats(**teast_steps_params) return TestStepsStats(**teast_steps_params)
def __get_test_task_started_at(self, stats: TestStepsStats) -> Optional[datetime]:
"""
getting started_at attribute for test by using oldest start_ts timestamp
among all test tasks steps
"""
if not stats:
return None
start_ts = None
for field_name in stats.__fields__.keys():
stat: TestStepStat = getattr(stats, field_name)
if not stat:
continue
if not start_ts or start_ts > stat.start_ts:
start_ts = stat.start_ts
return start_ts
def __send_request(self,
url: str,
method: str,
params: Optional[Dict[str, Any]] = None,
headers: Optional[Dict[str, Any]] = None,
) -> requests.Response:
"""
Simple wrapper around requests.get/posts.. methods
so we can use same session between API calls
"""
if not self.session:
self.session = requests.Session()
m = getattr(self.session, method, None)
if not m:
raise ValueError(f"method {method} is not supported")
# pylint: disable=not-callable
return m(url, params=params, headers=headers, timeout=self.timeout)
def close_session(self):
if self.session:
self.session.close()
self.session = None
def __del__(self):
self.close_session()

View File

@ -26,14 +26,15 @@ class DB():
def __del__(self): def __del__(self):
self.close_conn() self.close_conn()
def build_exists(self, build_id: int) -> bool: def row_exists(self, pk: int, table: str) -> bool:
assert table in ['builds', 'test_tasks']
sql = f''' sql = f'''
SELECT COUNT(id) SELECT COUNT(id)
FROM builds FROM {table}
WHERE id = %s; WHERE id = %s;
''' '''
cur = self.__conn.cursor() cur = self.__conn.cursor()
cur.execute(sql, (build_id,)) cur.execute(sql, (pk,))
val = int(cur.fetchone()[0]) val = int(cur.fetchone()[0])
return val == 1 return val == 1
@ -61,34 +62,34 @@ class DB():
build_task.started_at, build_task.finished_at, build_task.status_id)) build_task.started_at, build_task.finished_at, build_task.status_id))
# inserting web node stats # inserting web node stats
for stat in web_node_stats: for wn_stat in web_node_stats:
# do not insert empty stats # do not insert empty stats
if stat.start_ts is None: if wn_stat.start_ts is None:
continue continue
sql = ''' sql = '''
INSERT INTO web_node_stats (build_task_id, stat_name_id, start_ts, end_ts) INSERT INTO web_node_stats (build_task_id, stat_name_id, start_ts, end_ts)
VALUES (%s, %s, %s, %s); VALUES (%s, %s, %s, %s);
''' '''
cur.execute(sql, (stat.build_task_id, stat.stat_name_id, cur.execute(sql, (wn_stat.build_task_id, wn_stat.stat_name_id,
stat.start_ts, stat.end_ts)) wn_stat.start_ts, wn_stat.end_ts))
logging.debug('raw SQL query: %s', cur.query) logging.debug('raw SQL query: %s', cur.query)
self.__conn.commit() self.__conn.commit()
# inserting build node stats # inserting build node stats
for stat in build_node_stats: for bn_stat in build_node_stats:
# do not insert empty stats # do not insert empty stats
if stat.start_ts is None: if bn_stat.start_ts is None:
continue continue
sql = ''' sql = '''
INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts) INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts)
VALUES (%s, %s, %s, %s); VALUES (%s, %s, %s, %s);
''' '''
cur.execute(sql, (stat.build_task_id, stat.stat_name_id, cur.execute(sql, (bn_stat.build_task_id, bn_stat.stat_name_id,
stat.start_ts, stat.end_ts)) bn_stat.start_ts, bn_stat.end_ts))
logging.debug('raw SQL query: %s', cur.query) logging.debug('raw SQL query: %s', cur.query)
# commiting changes # commiting changes
@ -120,11 +121,12 @@ class DB():
# getting unfinished builds # getting unfinished builds
sql = 'SELECT id FROM builds where finished_at is NULL AND created_at > %s;' sql = 'SELECT id FROM builds where finished_at is NULL AND created_at > %s;'
builds_to_check: Dict[int, bool] = {}
cur = self.__conn.cursor() cur = self.__conn.cursor()
cur.execute(sql, (not_before.timestamp(),)) cur.execute(sql, (not_before.timestamp(),))
logging.debug('raw SQL query: %s', cur.query) logging.debug('raw SQL query: %s', cur.query)
for row in cur.fetchall(): for row in cur.fetchall():
res[row[0]] = {} builds_to_check[row[0]] = True
# getting list of unfinished tasks # getting list of unfinished tasks
sql = 'SELECT id, build_id, status_id FROM build_tasks WHERE status_id < 2;' sql = 'SELECT id, build_id, status_id FROM build_tasks WHERE status_id < 2;'
@ -134,6 +136,8 @@ class DB():
build_task_id: int = row[0] build_task_id: int = row[0]
build_id: int = row[1] build_id: int = row[1]
status_id: int = row[2] status_id: int = row[2]
if build_id not in builds_to_check:
continue
try: try:
res[build_id][build_task_id] = status_id res[build_id][build_task_id] = status_id
except KeyError: except KeyError:
@ -194,11 +198,11 @@ class DB():
logging.debug('raw SQL query: %s', cur.query) logging.debug('raw SQL query: %s', cur.query)
# updating build_node_stats # updating build_node_stats
for stat in build_node_stats: for bn_stat in build_node_stats:
logging.debug( logging.debug(
'updating build_node_stats %s build_task %s', stat.stat_name_id, build_task.id) 'updating build_node_stats %s build_task %s', bn_stat.stat_name_id, build_task.id)
if self.stat_exists(task_id=stat.build_task_id, if self.stat_exists(task_id=bn_stat.build_task_id,
stat_name_id=stat.stat_name_id, stat_name_id=bn_stat.stat_name_id,
table_name='build_node_stats', table_name='build_node_stats',
column_name='build_task_id'): column_name='build_task_id'):
sql = ''' sql = '''
@ -212,9 +216,9 @@ class DB():
VALUES (%(build_task_id)s, %(stat_name_id)s, %(start_ts)s, %(end_ts)s); VALUES (%(build_task_id)s, %(stat_name_id)s, %(start_ts)s, %(end_ts)s);
''' '''
params = {'build_task_id': build_task.id, params = {'build_task_id': build_task.id,
'stat_name_id': stat.stat_name_id, 'stat_name_id': bn_stat.stat_name_id,
'start_ts': stat.start_ts, 'start_ts': bn_stat.start_ts,
'end_ts': stat.end_ts} 'end_ts': bn_stat.end_ts}
logging.debug('raw SQL query: %s', cur.query) logging.debug('raw SQL query: %s', cur.query)
cur.execute(sql, params) cur.execute(sql, params)
@ -245,69 +249,55 @@ class DB():
val = int(cur.fetchone()[0]) val = int(cur.fetchone()[0])
return val == 1 return val == 1
def insert_test_task(self, task: TestTaskDB): def get_build_tasks_for_tests_update(self, not_before: datetime) -> List[int]:
cur = self.__conn.cursor()
# inserting test task itself
sql = '''
INSERT INTO test_tasks(id, build_task_id, revision, status_id, package_fullname, started_at)
VALUES
(%s, %s, %s, %s, %s, %s);
'''
cur.execute(sql, (task.id, task.build_task_id, task.revision, task.status_id,
task.package_fullname, task.started_at))
if task.steps_stats:
# inserting test steps stats
for ss in task.steps_stats:
sql = '''
INSERT INTO test_steps_stats (test_task_id, stat_name_id, start_ts, finish_ts)
VALUES
(%s, %s, %s, %s);
'''
cur.execute(sql, (ss.test_task_id, ss.stat_name_id,
ss.start_ts, ss.finish_ts))
# commiting changes
self.__conn.commit()
def get_build_tasks_for_unfinished_tests(self, not_before: datetime) -> List[int]:
''' '''
getting build tasks id of unfinished test tasks Getting build tasks id for test tasks that we need to update
https://cloudlinux.atlassian.net/browse/ALBS-1060
''' '''
cur = self.__conn.cursor() cur = self.__conn.cursor()
sql = ''' sql = '''
SELECT DISTINCT bt.id SELECT bt.id
FROM build_tasks as bt FROM build_tasks AS bt
INNER JOIN test_tasks AS tt INNER JOIN builds AS b
ON bt.id = tt.build_task_id ON b.id = bt.build_id
WHERE tt.status_id < 3 AND bt.started_at > %s; WHERE b.created_at > %s;
''' '''
cur.execute(sql, (not_before.timestamp(),)) cur.execute(sql, (not_before.timestamp(),))
logging.debug('raw SQL query: %s', cur.query) logging.debug('raw SQL query: %s', cur.query)
result = [int(row[0]) for row in cur.fetchall()] result = [int(row[0]) for row in cur.fetchall()]
return result return result
def update_test_tasks(self, test_tasks: List[TestTaskDB]): def insert_update_test_tasks(self, test_tasks: List[TestTaskDB]):
cur = self.__conn.cursor() cur = self.__conn.cursor()
# test tasks # test tasks
for task in test_tasks: for task in test_tasks:
sql = ''' if self.row_exists(pk=task.id, table='test_tasks'):
UPDATE test_tasks sql = '''
SET revision = %s, UPDATE test_tasks
status_id = %s, SET revision = %s,
started_at = %s status_id = %s,
WHERE id = %s; started_at = %s
''' WHERE id = %s;
cur.execute(sql, (task.revision, task.status_id, '''
task.started_at, task.id)) cur.execute(sql, (task.revision, task.status_id,
assert cur.rowcount == 1 task.started_at, task.id))
assert cur.rowcount == 1
else:
sql = '''
INSERT INTO test_tasks(
id, build_task_id, revision, status_id, package_fullname, started_at)
VALUES
(%s, %s, %s, %s, %s, %s);
'''
cur.execute(sql, (task.id, task.build_task_id, task.revision, task.status_id,
task.package_fullname, task.started_at))
# test step # test step
if not task.steps_stats: if not task.steps_stats:
continue continue
for s in task.steps_stats: for s in task.steps_stats:
logging.info('test_task_id %s, stat_name_id %s', logging.debug('test_task_id %s, stat_name_id %s',
s.test_task_id, s.stat_name_id) s.test_task_id, s.stat_name_id)
if self.stat_exists(s.test_task_id, if self.stat_exists(s.test_task_id,
s.stat_name_id, s.stat_name_id,
'test_steps_stats', 'test_steps_stats',
@ -323,10 +313,19 @@ class DB():
assert cur.rowcount == 1 assert cur.rowcount == 1
else: else:
sql = ''' sql = '''
INSERT INTO test_steps_stats (test_task_id, stat_name_id, start_ts, finish_ts) INSERT INTO test_steps_stats (
test_task_id, stat_name_id, start_ts, finish_ts)
VALUES (%s, %s, %s, %s); VALUES (%s, %s, %s, %s);
''' '''
cur.execute(sql, (s.test_task_id, s.stat_name_id, cur.execute(sql, (s.test_task_id, s.stat_name_id,
s.start_ts, s.finish_ts)) s.start_ts, s.finish_ts))
# commiting changes # commiting changes
self.__conn.commit() self.__conn.commit()
def delete_build(self, build_id: int):
params = (build_id,)
sql = "DELETE FROM builds WHERE id = %s;"
cur = self.__conn.cursor()
cur.execute(sql, params)
self.__conn.commit()

View File

@ -1,8 +1,10 @@
# pylint: disable=relative-beyond-top-level # pylint: disable=relative-beyond-top-level
from datetime import datetime, timedelta
import logging import logging
from typing import Dict, List from typing import Dict, List
from ..api_client import APIclient from ..api_client import APIclient
from ..const import BuildTaskEnum from ..const import BuildTaskEnum
from ..db import DB from ..db import DB
@ -26,16 +28,18 @@ class Extractor:
stop = False stop = False
while not stop: while not stop:
oldest_build_age = datetime.now().astimezone() - \
timedelta(days=self.config.data_store_days)
logging.info("page: %s", page_num) logging.info("page: %s", page_num)
for build in self.api.get_builds(page_num): for build in self.api.get_builds(page_num):
# check if we shoud stop processing build # check if we shoud stop processing build
if build.id <= last_build_id or \ if build.id <= last_build_id or \
build.created_at <= self.config.oldest_build_age: build.created_at <= oldest_build_age:
stop = True stop = True
break break
# some builds could move from one page to another # some builds could move from one page to another
if self.db.build_exists(build_id=build.id): if self.db.row_exists(pk=build.id, table='builds'):
continue continue
# inserting build build tasks and build tasks statistics # inserting build build tasks and build tasks statistics
@ -64,18 +68,19 @@ class Extractor:
'getting test tasks for build task %s', build_task.id) 'getting test tasks for build task %s', build_task.id)
test_tasks = self.api.get_test_tasks(build_task.id) test_tasks = self.api.get_test_tasks(build_task.id)
logging.info('received %d tests tasks', len(test_tasks)) logging.info('received %d tests tasks', len(test_tasks))
for t in test_tasks: if len(test_tasks) > 0:
logging.info( logging.info('inserting test tasks')
'build task %s: inserting test task %s', build_task.id, t.id) as_db = [t.as_db_model() for t in test_tasks]
self.db.insert_test_task(t.as_db_model()) self.db.insert_update_test_tasks(as_db)
build_count += 1 build_count += 1
page_num += 1 page_num += 1
return build_count return build_count
def build_cleanup(self): def build_cleanup(self):
logging.info('Removing all buidls older then %s', oldest_to_keep = datetime.now().astimezone() - \
self.config.oldest_build_age.strftime("%m/%d/%Y, %H:%M:%S")) timedelta(days=self.config.data_store_days)
removed_count = self.db.cleanup_builds(self.config.oldest_build_age) logging.info('Removing all buidls older then %s', oldest_to_keep)
removed_count = self.db.cleanup_builds(oldest_to_keep)
logging.info('removed %d entries', removed_count) logging.info('removed %d entries', removed_count)
def __update_build_tasks(self, build_tasks: List[BuildTask], def __update_build_tasks(self, build_tasks: List[BuildTask],
@ -105,13 +110,20 @@ class Extractor:
b.build_id, b.id, BuildTaskEnum(b.status_id).name) b.build_id, b.id, BuildTaskEnum(b.status_id).name)
def update_builds(self): def update_builds(self):
logging.info('Getting list of tasks from DB') not_before = datetime.now().astimezone() - \
unfinished_tasks = self.db.get_unfinished_builds( timedelta(days=self.config.oldest_to_update_days)
self.config.oldest_to_update) logging.info('Getting unfinished builds that were created after %s ',
not_before)
unfinished_tasks = self.db.get_unfinished_builds(not_before)
for build_id, build_tasks_db in unfinished_tasks.items(): for build_id, build_tasks_db in unfinished_tasks.items():
try: try:
logging.info('Getting status of build %d', build_id) logging.info('Getting status of build %d', build_id)
build = self.api.get_build(build_id) build = self.api.get_build(build_id)
if not build:
logging.warning(
"build %s was deleted from albs, removing it", build_id)
self.db.delete_build(build_id)
continue
logging.info('Updating build tasks') logging.info('Updating build tasks')
build_tasks_to_check = [ build_tasks_to_check = [
@ -131,16 +143,19 @@ class Extractor:
build_id, err, exc_info=True) build_id, err, exc_info=True)
def updating_test_tasks(self): def updating_test_tasks(self):
logging.info('getting build task ids of unfinished tests') not_before = datetime.now().astimezone() - \
build_task_ids = self.db.get_build_tasks_for_unfinished_tests( timedelta(days=self.config.oldest_to_update_days)
self.config.oldest_to_update) logging.info('getting build tasks for builds created after %s',
not_before)
build_task_ids = self.db.get_build_tasks_for_tests_update(
not_before)
for build_task_id in build_task_ids: for build_task_id in build_task_ids:
try: try:
logging.info('getting tests for build task %s', build_task_id) logging.info('getting tests for build task %s', build_task_id)
tasks_api = self.api.get_test_tasks(build_task_id) tasks_api = self.api.get_test_tasks(build_task_id)
logging.info('updating test tasks') logging.info('updating test tasks')
tasks_db = [t.as_db_model() for t in tasks_api] tasks_db = [t.as_db_model() for t in tasks_api]
self.db.update_test_tasks(tasks_db) self.db.insert_update_test_tasks(tasks_db)
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
logging.error( logging.error(
'failed to update tests for %d build task: %s', 'failed to update tests for %d build task: %s',

View File

@ -1,8 +1,8 @@
from datetime import datetime, timedelta
import logging import logging
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
import sys import sys
import time import time
from typing import Dict, Any
import yaml import yaml
@ -22,19 +22,15 @@ def __get_config(yml_path: str) -> ExtractorConfig:
with open(yml_path, 'r', encoding='utf-8') as flr: with open(yml_path, 'r', encoding='utf-8') as flr:
raw = yaml.safe_load(flr) raw = yaml.safe_load(flr)
# adding new attrs # Dbconfig
raw['oldest_build_age'] = datetime.now().astimezone() \ db_params: Dict[str, Any] = {'name': raw['db_name'],
- timedelta(days=raw['data_store_days']) 'username': raw['db_username'],
'password': raw['db_password'], }
raw['db_config'] = DbConfig(name=raw['db_name'], if 'db_port' in raw:
port=int(raw['db_port']), db_params['port'] = raw['db_port']
host=raw['db_host'], if 'db_host' in raw:
username=raw['db_username'], db_params['host'] = raw['db_host']
password=raw['db_password']) raw['db_config'] = DbConfig(**db_params)
if 'oldest_to_update_days' in raw:
raw['oldest_to_update_days'] = datetime.now().astimezone() \
- timedelta(days=raw['oldest_to_update_days'])
return ExtractorConfig(**raw) return ExtractorConfig(**raw)
@ -95,7 +91,7 @@ def start(yml_path: str):
else: else:
logging.info('update finished') logging.info('update finished')
logging.info('updating statuses of unfinished test tasks') logging.info('updating/inserting test tasks')
try: try:
extractor.updating_test_tasks() extractor.updating_test_tasks()
except Exception as err: # pylint: disable=broad-except except Exception as err: # pylint: disable=broad-except
@ -103,7 +99,10 @@ def start(yml_path: str):
else: else:
logging.info('test tasks were updated') logging.info('test tasks were updated')
# freeing up resources
extractor.db.close_conn() extractor.db.close_conn()
extractor.api.close_session()
logging.info("Extraction was finished") logging.info("Extraction was finished")
logging.info("Sleeping for %d seconds", config.scrape_interval) logging.info("Sleeping for %d seconds", config.scrape_interval)
time.sleep(config.scrape_interval) time.sleep(config.scrape_interval)

View File

@ -1,9 +1,13 @@
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
DB_PORT = 5432
DB_HOST = "localhost"
class DbConfig(BaseModel): class DbConfig(BaseModel):
name: str = Field(description="db name") name: str = Field(description="db name")
port: int = Field(description="db server port") port: int = Field(description="db server port", default=DB_PORT)
host: str = Field(description="db server ip/hostname") host: str = Field(description="db server ip/hostname", default=DB_HOST)
username: str = Field(description="username to connect with") username: str = Field(description="username to connect with")
password: str = Field(description="password to connect with1") password: str = Field(description="password to connect with1")

View File

@ -1,4 +1,3 @@
from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
from pydantic import HttpUrl, Field, BaseModel # pylint: disable=no-name-in-module from pydantic import HttpUrl, Field, BaseModel # pylint: disable=no-name-in-module
@ -8,10 +7,10 @@ from .db_config import DbConfig
# DEFAULTS # DEFAULTS
ALBS_URL_DEFAULT = 'https://build.almalinux.org' ALBS_URL_DEFAULT = 'https://build.almalinux.org'
LOG_FILE_DEFAULT = '/tmp/extractor.log' LOG_FILE_DEFAULT = '/tmp/extractor.log'
API_DEFAULT = 30 API_TIMEOUT_DEFAULT = 30
SCRAPE_INTERVAL_DEFAULT = 3600 SCRAPE_INTERVAL_DEFAULT = 3600
START_FROM_DEFAULT = 5808 START_FROM_DEFAULT = 5808
OLDEST_TO_UPDATE_DEFAULT = datetime.now().astimezone() - timedelta(days=7) OLDEST_TO_UPDATE_DAYS_DEFAULT = 7
class ExtractorConfig(BaseModel): class ExtractorConfig(BaseModel):
@ -22,17 +21,17 @@ class ExtractorConfig(BaseModel):
default=LOG_FILE_DEFAULT) default=LOG_FILE_DEFAULT)
albs_url: HttpUrl = Field(description='ALBS root URL', albs_url: HttpUrl = Field(description='ALBS root URL',
default=ALBS_URL_DEFAULT) default=ALBS_URL_DEFAULT)
oldest_build_age: datetime = \ data_store_days: int = \
Field(description='oldest build age to store') Field(description='oldest build (in days) to keep in DB')
jwt: str = Field(description='ALBS JWT token') jwt: str = Field(description='ALBS JWT token')
db_config: DbConfig = Field(description="database configuration") db_config: DbConfig = Field(description="database configuration")
api_timeout: int = Field( api_timeout: int = Field(
description="max time in seconds to wait for API response", description="max time in seconds to wait for API response",
default=API_DEFAULT) default=API_TIMEOUT_DEFAULT)
scrape_interval: int = Field(description='how often (in seconds) we will extract data from ALBS', scrape_interval: int = Field(description='how often (in seconds) we will extract data from ALBS',
default=SCRAPE_INTERVAL_DEFAULT) default=SCRAPE_INTERVAL_DEFAULT)
start_from: int = Field(description='build id to start populating empty db with', start_from: int = Field(description='build id to start populating empty db with',
default=START_FROM_DEFAULT) default=START_FROM_DEFAULT)
oldest_to_update: datetime = \ oldest_to_update_days: int = \
Field(description='oldest unfinished object (build/task/step...) that we will try to update', Field(description='oldest (in days) unfinished object (build/task/step...) that we will try to update',
default=OLDEST_TO_UPDATE_DEFAULT) default=OLDEST_TO_UPDATE_DAYS_DEFAULT)

View File

@ -10,7 +10,6 @@ albs_url: https://build.almalinux.org
# required: yes # required: yes
jwt: "" jwt: ""
# db_host # db_host
# IP/hostname of database server # IP/hostname of database server
# required: no # required: no
@ -28,7 +27,6 @@ db_port: 5432
# required: yes # required: yes
db_username: albs_analytics db_username: albs_analytics
# db_password # db_password
# password to connect with # password to connect with
# required: yes # required: yes
@ -39,7 +37,6 @@ db_password: super_secret_password
# required: yes # required: yes
db_name: albs_analytics db_name: albs_analytics
# log_file # log_file
# file to write logs to # file to write logs to
# required: no # required: no
@ -60,12 +57,16 @@ scrape_interval: 3600
# build_id to start populating empty db with # build_id to start populating empty db with
# required: false # required: false
# default: 5808 (first build with correct metrics) # default: 5808 (first build with correct metrics)
start_from: start_from: 5808
# oldest_to_update_days
# oldest_to_update
# oldest (in days) unfinished object (build/task/step...) that we will try to update # oldest (in days) unfinished object (build/task/step...) that we will try to update
# required: false # required: false
# default: 7 # default: 7
oldest_to_update_days: 7 oldest_to_update_days: 7
# api_timeout
# how long (in seconds) we will wait for API response
# required: false
# default: 30
api_timeout: 30

View File

@ -1,8 +1,8 @@
{ {
"__inputs": [ "__inputs": [
{ {
"name": "DS_POSTGRESQL", "name": "DS_ALBS_ANALYTICS",
"label": "PostgreSQL", "label": "albs_analytics",
"description": "", "description": "",
"type": "datasource", "type": "datasource",
"pluginId": "postgres", "pluginId": "postgres",
@ -21,7 +21,7 @@
"type": "grafana", "type": "grafana",
"id": "grafana", "id": "grafana",
"name": "Grafana", "name": "Grafana",
"version": "9.3.2" "version": "9.3.6"
}, },
{ {
"type": "datasource", "type": "datasource",
@ -58,7 +58,7 @@
} }
] ]
}, },
"editable": false, "editable": true,
"fiscalYearStartMonth": 0, "fiscalYearStartMonth": 0,
"graphTooltip": 0, "graphTooltip": 0,
"id": null, "id": null,
@ -68,7 +68,7 @@
{ {
"datasource": { "datasource": {
"type": "postgres", "type": "postgres",
"uid": "${DS_POSTGRESQL}" "uid": "${DS_ALBS_ANALYTICS}"
}, },
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -152,13 +152,13 @@
"showHeader": true, "showHeader": true,
"sortBy": [] "sortBy": []
}, },
"pluginVersion": "9.3.2", "pluginVersion": "9.3.6",
"targets": [ "targets": [
{ {
"cacheDurationSeconds": 300, "cacheDurationSeconds": 300,
"datasource": { "datasource": {
"type": "postgres", "type": "postgres",
"uid": "${DS_POSTGRESQL}" "uid": "${DS_ALBS_ANALYTICS}"
}, },
"editorMode": "code", "editorMode": "code",
"fields": [ "fields": [
@ -216,7 +216,7 @@
{ {
"datasource": { "datasource": {
"type": "postgres", "type": "postgres",
"uid": "${DS_POSTGRESQL}" "uid": "${DS_ALBS_ANALYTICS}"
}, },
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -263,13 +263,13 @@
}, },
"showUnfilled": false "showUnfilled": false
}, },
"pluginVersion": "9.3.2", "pluginVersion": "9.3.6",
"targets": [ "targets": [
{ {
"cacheDurationSeconds": 300, "cacheDurationSeconds": 300,
"datasource": { "datasource": {
"type": "postgres", "type": "postgres",
"uid": "${DS_POSTGRESQL}" "uid": "${DS_ALBS_ANALYTICS}"
}, },
"editorMode": "code", "editorMode": "code",
"fields": [ "fields": [
@ -315,7 +315,7 @@
{ {
"datasource": { "datasource": {
"type": "postgres", "type": "postgres",
"uid": "${DS_POSTGRESQL}" "uid": "${DS_ALBS_ANALYTICS}"
}, },
"fieldConfig": { "fieldConfig": {
"defaults": { "defaults": {
@ -409,18 +409,18 @@
"showHeader": true, "showHeader": true,
"sortBy": [ "sortBy": [
{ {
"desc": true, "desc": false,
"displayName": "finished" "displayName": "started"
} }
] ]
}, },
"pluginVersion": "9.3.2", "pluginVersion": "9.3.6",
"targets": [ "targets": [
{ {
"cacheDurationSeconds": 300, "cacheDurationSeconds": 300,
"datasource": { "datasource": {
"type": "postgres", "type": "postgres",
"uid": "${DS_POSTGRESQL}" "uid": "${DS_ALBS_ANALYTICS}"
}, },
"editorMode": "code", "editorMode": "code",
"fields": [ "fields": [
@ -485,16 +485,16 @@
"current": {}, "current": {},
"datasource": { "datasource": {
"type": "postgres", "type": "postgres",
"uid": "${DS_POSTGRESQL}" "uid": "${DS_ALBS_ANALYTICS}"
}, },
"definition": "SELECT id\nFROM test_tasks\nORDER BY id DESC", "definition": "SELECT id\nFROM test_tasks\nORDER BY id DESC\nLIMIT 1000;",
"hide": 0, "hide": 0,
"includeAll": false, "includeAll": false,
"label": "Test task id", "label": "Test task id",
"multi": false, "multi": false,
"name": "id", "name": "id",
"options": [], "options": [],
"query": "SELECT id\nFROM test_tasks\nORDER BY id DESC", "query": "SELECT id\nFROM test_tasks\nORDER BY id DESC\nLIMIT 1000;",
"refresh": 1, "refresh": 1,
"regex": "", "regex": "",
"skipUrlSync": false, "skipUrlSync": false,
@ -511,6 +511,6 @@
"timezone": "", "timezone": "",
"title": "Test task details", "title": "Test task details",
"uid": "8nFXlkB4z", "uid": "8nFXlkB4z",
"version": 8, "version": 3,
"weekStart": "" "weekStart": ""
} }

View File

@ -68,7 +68,10 @@ CREATE INDEX test_steps_stats_start_ts
ON test_steps_stats(start_ts); ON test_steps_stats(start_ts);
CREATE INDEX test_steps_stats_end_ts CREATE INDEX test_steps_stats_end_ts
ON test_steps_stats(end_ts); ON test_steps_stats(finish_ts);
-- increasing size of name field
ALTER TABLE build_tasks ALTER COLUMN name TYPE varchar(150);
UPDATE schema_version UPDATE schema_version

View File

@ -1,14 +0,0 @@
0.1.0 (2023-03-01)
First version
0.2.0 (2023-03-15)
- New parameter start_from
- Moved to double persition for timestamps
- Added metrics for build steps
0.2.1 (2023-03-15)
- Added canceled Build task status
0.3.0 (2023-03-22)
- Added test tasks stats
- New config parameter: oldest_to_update_days

40
releases.txt Normal file
View File

@ -0,0 +1,40 @@
0.1.0 (2023-03-01)
First version
0.2.0 (2023-03-15)
- New parameter start_from
- Moved to double persition for timestamps
- Added metrics for build steps
0.2.1 (2023-03-15)
- Added canceled Build task status
0.3.0 (2023-03-22)
- Added test tasks stats
- New config parameter: oldest_to_update_days
0.3.1 (2023-03-22)
- db: bugfix with migration to version 3
- added info about api_timeout config parameter
- bugfix with processing of test tasks with new revision
0.3.2 (2023-03-23)
- Bugfix ALBS-1060
0.3.3 (2023-04-24)
build-analytics
Improvements
- [ALBS-1077] start deleting builds that were removed from ALBS
Bugfixes
- 'Key error' when db_port/db_host is not set
- update_builds() ignoring opldest_to_update attribute
- [ALBS-1099] Test task started_at attribute is NULL
- Max recursion error in 'Test task details.json'
0.3.4 (2023-05-12)
build_analytics
- Bigfix ALBS-1111
0.3.5 (2023-06-01)
build_analytics:
ALBS-1103 start using persistent HTTP connections