diff --git a/build_analytics/build_analytics/api_client.py b/build_analytics/build_analytics/api_client.py index c72dca8..f30adbd 100644 --- a/build_analytics/build_analytics/api_client.py +++ b/build_analytics/build_analytics/api_client.py @@ -53,6 +53,7 @@ class APIclient(): return self._parse_build(response.json()) def __parse_build_node_stats(self, stats: Dict) -> BuildNodeStats: + logging.debug('raw json: %s', stats) keys = ['build_all', 'build_binaries', 'build_packages', 'build_srpm', 'build_node_task', 'cas_notarize_artifacts', 'cas_source_authenticate', 'git_checkout', 'upload'] @@ -66,11 +67,14 @@ class APIclient(): stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None) except KeyError: params[k] = BuildStat() - return BuildNodeStats(**params) + build_node_stats = BuildNodeStats(**params) + logging.debug('BuildNodeStats: %s', build_node_stats) + return build_node_stats def __parse_web_node_stats(self, stats: Dict) -> WebNodeStats: keys = ['build_done', 'logs_processing', 'packages_processing'] params = {} + logging.debug('raw json: %s', stats) for k in keys: try: params[k] = BuildStat( @@ -80,7 +84,9 @@ class APIclient(): stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None) except KeyError: params[k] = BuildStat() - return WebNodeStats(**params) + web_node_stats = WebNodeStats(**params) + logging.debug('WebNodeStats %s', web_node_stats) + return web_node_stats def _parse_build_tasks(self, tasks_json: Dict, build_id: int) -> List[BuildTask]: result = [] diff --git a/build_analytics/build_analytics/models/enums.py b/build_analytics/build_analytics/const.py similarity index 91% rename from build_analytics/build_analytics/models/enums.py rename to build_analytics/build_analytics/const.py index 0b63a09..c29d1d9 100644 --- a/build_analytics/build_analytics/models/enums.py +++ b/build_analytics/build_analytics/const.py @@ -2,7 +2,11 @@ from enum import IntEnum +# supported schema version +DB_SCHEMA_VER = 1 + +# ENUMS class ArchEnum(IntEnum): i686 = 0 x86_64 = 1 diff --git a/build_analytics/build_analytics/db.py b/build_analytics/build_analytics/db.py index dac6d6a..f2a12a1 100644 --- a/build_analytics/build_analytics/db.py +++ b/build_analytics/build_analytics/db.py @@ -1,5 +1,6 @@ from datetime import datetime -from typing import Union, Dict, List +from typing import Union, Dict, List, Optional +import logging import psycopg2 @@ -55,15 +56,18 @@ class DB(): ''' cur.execute(sql, (stat.build_task_id, stat.stat_name_id, stat.start_ts, stat.end_ts)) + logging.debug('raw SQL query: %s', cur.query) # inserting build node stats for stat in build_node_stats: + logging.debug('BuildNodeStats: %s', stat) sql = ''' INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts) VALUES (%s, %s, %s, %s); ''' cur.execute(sql, (stat.build_task_id, stat.stat_name_id, stat.start_ts, stat.end_ts)) + logging.debug('raw SQL query: %s', cur.query) # commiting changes self.__conn.commit() @@ -149,17 +153,31 @@ class DB(): end_ts = %s WHERE build_task_id = %s; ''' - cur.execute(sql, (stat.start_ts, stat.end_ts)) + cur.execute(sql, (stat.start_ts, stat.end_ts, build_task.id)) # updating build_node_stats for stat in build_node_stats: sql = ''' UPDATE build_node_stats SET start_ts = %s, - end_ts = %s, + end_ts = %s WHERE build_task_id = %s; ''' - cur.execute(sql, (stat.start_ts, stat.end_ts)) + cur.execute(sql, (stat.start_ts, stat.end_ts, build_task.id)) # commiting changes self.__conn.commit() + + def get_db_schema_version(self) -> Optional[int]: + sql = ''' + SELECT * + FROM schema_version + LIMIT 1; + ''' + cur = self.__conn.cursor() + cur.execute(sql) + val = cur.fetchone() + print(val) + if not val: + return None + return int(val[0]) diff --git a/build_analytics/build_analytics/extractor/extractor.py b/build_analytics/build_analytics/extractor/extractor.py index f210692..3a3a953 100644 --- a/build_analytics/build_analytics/extractor/extractor.py +++ b/build_analytics/build_analytics/extractor/extractor.py @@ -4,7 +4,7 @@ import logging from typing import List, Dict from ..models.extractor_config import ExtractorConfig -from ..models.enums import BuildTaskEnum +from ..const import BuildTaskEnum from ..models.build import BuildTask from ..db import DB from ..api_client import APIclient @@ -22,7 +22,7 @@ class Extractor: page_num = 1 last_build_id = self.db.get_latest_build_id() if not last_build_id: - last_build_id = self.start_from + last_build_id = self.start_from - 1 logging.info("last_build_id: %s", last_build_id) stop = False @@ -49,7 +49,7 @@ class Extractor: self.db.insert_buildtask(build_task.as_db_model(), build_task.web_node_stats.as_db_model( build_task.id), - build_task.web_node_stats.as_db_model( + build_task.build_node_stats.as_db_model( build_task.id)) except Exception as error: # pylint: disable=broad-except logging.error('failed to insert build task %d: %s', diff --git a/build_analytics/build_analytics/extractor/start.py b/build_analytics/build_analytics/extractor/start.py index 5467a1c..90f59c1 100644 --- a/build_analytics/build_analytics/extractor/start.py +++ b/build_analytics/build_analytics/extractor/start.py @@ -1,6 +1,7 @@ from datetime import datetime, timedelta import logging from logging.handlers import RotatingFileHandler +import sys import time import yaml @@ -8,6 +9,7 @@ import yaml # pylint: disable=relative-beyond-top-level from ..api_client import APIclient from ..db import DB +from ..const import DB_SCHEMA_VER from .extractor import Extractor from ..models.extractor_config import ExtractorConfig from ..models.db_config import DbConfig @@ -46,16 +48,28 @@ def start(yml_path: str): # configuring logging logging.basicConfig(level=logging.INFO, - format='%(asctime)s %(levelname)s <%(funcName)s> %(message)s', + format='%(asctime)s %(levelname)s %(funcName)s() %(message)s', handlers=[RotatingFileHandler(config.log_file, maxBytes=10000000, backupCount=3)]) + # some pre-flight checks + db = DB(config.db_config) + cur_version = db.get_db_schema_version() + if not cur_version: + logging.error( + 'Cant get db schema version. Make sure that schema_version exists') + sys.exit(1) + if cur_version != DB_SCHEMA_VER: + logging.error('unsupported DB schema: want %s, have %s', + DB_SCHEMA_VER, cur_version) + sys.exit(1) + while True: logging.info('Starting extraction proccess') api = APIclient(api_root=config.albs_url, jwt=config.jwt, timeout=config.api_timeout) - db = DB(config.db_config) + extractor = Extractor(config, api, db) logging.info('Starting builds insertion') diff --git a/build_analytics/build_analytics/models/build_node_stats.py b/build_analytics/build_analytics/models/build_node_stats.py index a71715d..39eb74e 100644 --- a/build_analytics/build_analytics/models/build_node_stats.py +++ b/build_analytics/build_analytics/models/build_node_stats.py @@ -5,7 +5,7 @@ from pydantic import BaseModel # pylint: disable=no-name-in-module from .build_stat import BuildStat from .build_node_stat_db import BuildNodeStatDB -from .enums import BuildNodeStatsEnum +from ..const import BuildNodeStatsEnum class BuildNodeStats(BaseModel): diff --git a/build_analytics/build_analytics/models/build_task.py b/build_analytics/build_analytics/models/build_task.py index c84378e..613c906 100644 --- a/build_analytics/build_analytics/models/build_task.py +++ b/build_analytics/build_analytics/models/build_task.py @@ -1,11 +1,11 @@ from datetime import datetime -from typing import Optional, Tuple +from typing import Optional from pydantic import BaseModel # pylint: disable=no-name-in-module from .build_task_db import BuildTaskDB from .build_node_stats import BuildNodeStats -from .enums import ArchEnum +from ..const import ArchEnum from .web_node_stats import WebNodeStats diff --git a/build_analytics/build_analytics/models/web_node_stats.py b/build_analytics/build_analytics/models/web_node_stats.py index 46f77c6..0476863 100644 --- a/build_analytics/build_analytics/models/web_node_stats.py +++ b/build_analytics/build_analytics/models/web_node_stats.py @@ -5,7 +5,7 @@ from pydantic import BaseModel # pylint: disable=no-name-in-module from .build_stat import BuildStat from .web_node_stat_db import WebNodeStatDB -from .enums import WebNodeStatsEnum +from ..const import WebNodeStatsEnum class WebNodeStats(BaseModel): diff --git a/build_analytics/config_default.yml b/build_analytics/config_default.yml index c7b5e66..8e68a61 100644 --- a/build_analytics/config_default.yml +++ b/build_analytics/config_default.yml @@ -60,4 +60,4 @@ scrape_interval: 3600 # build_id to start populating empty db with # required: false # default: 5808 (first build with correct metrics) -start_from: 5808 +start_from: diff --git a/build_analytics/migrations/1.sql b/build_analytics/migrations/1.sql index 40c6029..cf17985 100644 --- a/build_analytics/migrations/1.sql +++ b/build_analytics/migrations/1.sql @@ -4,8 +4,8 @@ BEGIN; CREATE TABLE builds ( id INTEGER PRIMARY KEY, url VARCHAR(50) NOT NULL, - created_at REAL NOT NULL, - finished_at REAL + created_at DOUBLE PRECISION NOT NULL, + finished_at DOUBLE PRECISION ); @@ -85,8 +85,8 @@ CREATE TABLE build_tasks ( build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE, arch_id INTEGER REFERENCES arch_enum(id) ON DELETE SET NULL, status_id INTEGER REFERENCES build_task_status_enum(id) ON DELETE SET NULL, - started_at REAL, - finished_at REAL + started_at DOUBLE PRECISION, + finished_at DOUBLE PRECISION ); CREATE INDEX build_tasks_build_id @@ -103,8 +103,8 @@ ON build_tasks(finished_at); CREATE TABLE web_node_stats ( build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE, stat_name_id INTEGER REFERENCES web_node_stats_enum(id) ON DELETE SET NULL, - start_ts REAL, - end_ts REAL + start_ts DOUBLE PRECISION, + end_ts DOUBLE PRECISION ); CREATE INDEX web_node_stats_build_task_id @@ -121,8 +121,8 @@ ON web_node_stats(end_ts); CREATE TABLE build_node_stats ( build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE, stat_name_id INTEGER REFERENCES build_node_stats_enum(id) ON DELETE SET NULL, - start_ts REAL, - end_ts REAL + start_ts DOUBLE PRECISION, + end_ts DOUBLE PRECISION ); CREATE INDEX build_node_stats_build_task_id @@ -140,8 +140,8 @@ CREATE TABLE sign_tasks ( id INTEGER PRIMARY KEY, build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE, buildtask_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE, - started_at REAL, - finished_at REAL + started_at DOUBLE PRECISION, + finished_at DOUBLE PRECISION ); CREATE INDEX sign_tasks_build_id @@ -157,7 +157,6 @@ CREATE INDEX sign_tasks_finished_at ON sign_tasks(finished_at); - -- schema_version CREATE TABLE schema_version ( version INTEGER diff --git a/build_analytics/releases.txt b/build_analytics/releases.txt index 409ef5f..bb25103 100644 --- a/build_analytics/releases.txt +++ b/build_analytics/releases.txt @@ -2,4 +2,5 @@ First version 0.2.0 -New parameter start_from \ No newline at end of file +New parameter start_from +moved to double persition for timestamps \ No newline at end of file