- fixed bug with wrong ts of build_tasks steps

- db: moved to double percicion for all ts columns
- added  db_schema_version check
This commit is contained in:
Kirill Zhukov 2023-03-07 16:02:14 +01:00
parent d737bae353
commit c6a14cfe3a
11 changed files with 70 additions and 28 deletions

View File

@ -53,6 +53,7 @@ class APIclient():
return self._parse_build(response.json()) return self._parse_build(response.json())
def __parse_build_node_stats(self, stats: Dict) -> BuildNodeStats: def __parse_build_node_stats(self, stats: Dict) -> BuildNodeStats:
logging.debug('raw json: %s', stats)
keys = ['build_all', 'build_binaries', 'build_packages', 'build_srpm', 'build_node_task', keys = ['build_all', 'build_binaries', 'build_packages', 'build_srpm', 'build_node_task',
'cas_notarize_artifacts', 'cas_source_authenticate', 'git_checkout', 'upload'] 'cas_notarize_artifacts', 'cas_source_authenticate', 'git_checkout', 'upload']
@ -66,11 +67,14 @@ class APIclient():
stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None) stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None)
except KeyError: except KeyError:
params[k] = BuildStat() params[k] = BuildStat()
return BuildNodeStats(**params) build_node_stats = BuildNodeStats(**params)
logging.debug('BuildNodeStats: %s', build_node_stats)
return build_node_stats
def __parse_web_node_stats(self, stats: Dict) -> WebNodeStats: def __parse_web_node_stats(self, stats: Dict) -> WebNodeStats:
keys = ['build_done', 'logs_processing', 'packages_processing'] keys = ['build_done', 'logs_processing', 'packages_processing']
params = {} params = {}
logging.debug('raw json: %s', stats)
for k in keys: for k in keys:
try: try:
params[k] = BuildStat( params[k] = BuildStat(
@ -80,7 +84,9 @@ class APIclient():
stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None) stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None)
except KeyError: except KeyError:
params[k] = BuildStat() params[k] = BuildStat()
return WebNodeStats(**params) web_node_stats = WebNodeStats(**params)
logging.debug('WebNodeStats %s', web_node_stats)
return web_node_stats
def _parse_build_tasks(self, tasks_json: Dict, build_id: int) -> List[BuildTask]: def _parse_build_tasks(self, tasks_json: Dict, build_id: int) -> List[BuildTask]:
result = [] result = []

View File

@ -2,7 +2,11 @@
from enum import IntEnum from enum import IntEnum
# supported schema version
DB_SCHEMA_VER = 1
# ENUMS
class ArchEnum(IntEnum): class ArchEnum(IntEnum):
i686 = 0 i686 = 0
x86_64 = 1 x86_64 = 1

View File

@ -1,5 +1,6 @@
from datetime import datetime from datetime import datetime
from typing import Union, Dict, List from typing import Union, Dict, List, Optional
import logging
import psycopg2 import psycopg2
@ -55,15 +56,18 @@ class DB():
''' '''
cur.execute(sql, (stat.build_task_id, stat.stat_name_id, cur.execute(sql, (stat.build_task_id, stat.stat_name_id,
stat.start_ts, stat.end_ts)) stat.start_ts, stat.end_ts))
logging.debug('raw SQL query: %s', cur.query)
# inserting build node stats # inserting build node stats
for stat in build_node_stats: for stat in build_node_stats:
logging.debug('BuildNodeStats: %s', stat)
sql = ''' sql = '''
INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts) INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts)
VALUES (%s, %s, %s, %s); VALUES (%s, %s, %s, %s);
''' '''
cur.execute(sql, (stat.build_task_id, stat.stat_name_id, cur.execute(sql, (stat.build_task_id, stat.stat_name_id,
stat.start_ts, stat.end_ts)) stat.start_ts, stat.end_ts))
logging.debug('raw SQL query: %s', cur.query)
# commiting changes # commiting changes
self.__conn.commit() self.__conn.commit()
@ -149,17 +153,31 @@ class DB():
end_ts = %s end_ts = %s
WHERE build_task_id = %s; WHERE build_task_id = %s;
''' '''
cur.execute(sql, (stat.start_ts, stat.end_ts)) cur.execute(sql, (stat.start_ts, stat.end_ts, build_task.id))
# updating build_node_stats # updating build_node_stats
for stat in build_node_stats: for stat in build_node_stats:
sql = ''' sql = '''
UPDATE build_node_stats UPDATE build_node_stats
SET start_ts = %s, SET start_ts = %s,
end_ts = %s, end_ts = %s
WHERE build_task_id = %s; WHERE build_task_id = %s;
''' '''
cur.execute(sql, (stat.start_ts, stat.end_ts)) cur.execute(sql, (stat.start_ts, stat.end_ts, build_task.id))
# commiting changes # commiting changes
self.__conn.commit() self.__conn.commit()
def get_db_schema_version(self) -> Optional[int]:
sql = '''
SELECT *
FROM schema_version
LIMIT 1;
'''
cur = self.__conn.cursor()
cur.execute(sql)
val = cur.fetchone()
print(val)
if not val:
return None
return int(val[0])

View File

@ -4,7 +4,7 @@ import logging
from typing import List, Dict from typing import List, Dict
from ..models.extractor_config import ExtractorConfig from ..models.extractor_config import ExtractorConfig
from ..models.enums import BuildTaskEnum from ..const import BuildTaskEnum
from ..models.build import BuildTask from ..models.build import BuildTask
from ..db import DB from ..db import DB
from ..api_client import APIclient from ..api_client import APIclient
@ -22,7 +22,7 @@ class Extractor:
page_num = 1 page_num = 1
last_build_id = self.db.get_latest_build_id() last_build_id = self.db.get_latest_build_id()
if not last_build_id: if not last_build_id:
last_build_id = self.start_from last_build_id = self.start_from - 1
logging.info("last_build_id: %s", last_build_id) logging.info("last_build_id: %s", last_build_id)
stop = False stop = False
@ -49,7 +49,7 @@ class Extractor:
self.db.insert_buildtask(build_task.as_db_model(), self.db.insert_buildtask(build_task.as_db_model(),
build_task.web_node_stats.as_db_model( build_task.web_node_stats.as_db_model(
build_task.id), build_task.id),
build_task.web_node_stats.as_db_model( build_task.build_node_stats.as_db_model(
build_task.id)) build_task.id))
except Exception as error: # pylint: disable=broad-except except Exception as error: # pylint: disable=broad-except
logging.error('failed to insert build task %d: %s', logging.error('failed to insert build task %d: %s',

View File

@ -1,6 +1,7 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
import logging import logging
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
import sys
import time import time
import yaml import yaml
@ -8,6 +9,7 @@ import yaml
# pylint: disable=relative-beyond-top-level # pylint: disable=relative-beyond-top-level
from ..api_client import APIclient from ..api_client import APIclient
from ..db import DB from ..db import DB
from ..const import DB_SCHEMA_VER
from .extractor import Extractor from .extractor import Extractor
from ..models.extractor_config import ExtractorConfig from ..models.extractor_config import ExtractorConfig
from ..models.db_config import DbConfig from ..models.db_config import DbConfig
@ -46,16 +48,28 @@ def start(yml_path: str):
# configuring logging # configuring logging
logging.basicConfig(level=logging.INFO, logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s <%(funcName)s> %(message)s', format='%(asctime)s %(levelname)s %(funcName)s() %(message)s',
handlers=[RotatingFileHandler(config.log_file, handlers=[RotatingFileHandler(config.log_file,
maxBytes=10000000, maxBytes=10000000,
backupCount=3)]) backupCount=3)])
# some pre-flight checks
db = DB(config.db_config)
cur_version = db.get_db_schema_version()
if not cur_version:
logging.error(
'Cant get db schema version. Make sure that schema_version exists')
sys.exit(1)
if cur_version != DB_SCHEMA_VER:
logging.error('unsupported DB schema: want %s, have %s',
DB_SCHEMA_VER, cur_version)
sys.exit(1)
while True: while True:
logging.info('Starting extraction proccess') logging.info('Starting extraction proccess')
api = APIclient(api_root=config.albs_url, api = APIclient(api_root=config.albs_url,
jwt=config.jwt, timeout=config.api_timeout) jwt=config.jwt, timeout=config.api_timeout)
db = DB(config.db_config)
extractor = Extractor(config, api, db) extractor = Extractor(config, api, db)
logging.info('Starting builds insertion') logging.info('Starting builds insertion')

View File

@ -5,7 +5,7 @@ from pydantic import BaseModel # pylint: disable=no-name-in-module
from .build_stat import BuildStat from .build_stat import BuildStat
from .build_node_stat_db import BuildNodeStatDB from .build_node_stat_db import BuildNodeStatDB
from .enums import BuildNodeStatsEnum from ..const import BuildNodeStatsEnum
class BuildNodeStats(BaseModel): class BuildNodeStats(BaseModel):

View File

@ -1,11 +1,11 @@
from datetime import datetime from datetime import datetime
from typing import Optional, Tuple from typing import Optional
from pydantic import BaseModel # pylint: disable=no-name-in-module from pydantic import BaseModel # pylint: disable=no-name-in-module
from .build_task_db import BuildTaskDB from .build_task_db import BuildTaskDB
from .build_node_stats import BuildNodeStats from .build_node_stats import BuildNodeStats
from .enums import ArchEnum from ..const import ArchEnum
from .web_node_stats import WebNodeStats from .web_node_stats import WebNodeStats

View File

@ -5,7 +5,7 @@ from pydantic import BaseModel # pylint: disable=no-name-in-module
from .build_stat import BuildStat from .build_stat import BuildStat
from .web_node_stat_db import WebNodeStatDB from .web_node_stat_db import WebNodeStatDB
from .enums import WebNodeStatsEnum from ..const import WebNodeStatsEnum
class WebNodeStats(BaseModel): class WebNodeStats(BaseModel):

View File

@ -60,4 +60,4 @@ scrape_interval: 3600
# build_id to start populating empty db with # build_id to start populating empty db with
# required: false # required: false
# default: 5808 (first build with correct metrics) # default: 5808 (first build with correct metrics)
start_from: 5808 start_from:

View File

@ -4,8 +4,8 @@ BEGIN;
CREATE TABLE builds ( CREATE TABLE builds (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
url VARCHAR(50) NOT NULL, url VARCHAR(50) NOT NULL,
created_at REAL NOT NULL, created_at DOUBLE PRECISION NOT NULL,
finished_at REAL finished_at DOUBLE PRECISION
); );
@ -85,8 +85,8 @@ CREATE TABLE build_tasks (
build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE, build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
arch_id INTEGER REFERENCES arch_enum(id) ON DELETE SET NULL, arch_id INTEGER REFERENCES arch_enum(id) ON DELETE SET NULL,
status_id INTEGER REFERENCES build_task_status_enum(id) ON DELETE SET NULL, status_id INTEGER REFERENCES build_task_status_enum(id) ON DELETE SET NULL,
started_at REAL, started_at DOUBLE PRECISION,
finished_at REAL finished_at DOUBLE PRECISION
); );
CREATE INDEX build_tasks_build_id CREATE INDEX build_tasks_build_id
@ -103,8 +103,8 @@ ON build_tasks(finished_at);
CREATE TABLE web_node_stats ( CREATE TABLE web_node_stats (
build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE, build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
stat_name_id INTEGER REFERENCES web_node_stats_enum(id) ON DELETE SET NULL, stat_name_id INTEGER REFERENCES web_node_stats_enum(id) ON DELETE SET NULL,
start_ts REAL, start_ts DOUBLE PRECISION,
end_ts REAL end_ts DOUBLE PRECISION
); );
CREATE INDEX web_node_stats_build_task_id CREATE INDEX web_node_stats_build_task_id
@ -121,8 +121,8 @@ ON web_node_stats(end_ts);
CREATE TABLE build_node_stats ( CREATE TABLE build_node_stats (
build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE, build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
stat_name_id INTEGER REFERENCES build_node_stats_enum(id) ON DELETE SET NULL, stat_name_id INTEGER REFERENCES build_node_stats_enum(id) ON DELETE SET NULL,
start_ts REAL, start_ts DOUBLE PRECISION,
end_ts REAL end_ts DOUBLE PRECISION
); );
CREATE INDEX build_node_stats_build_task_id CREATE INDEX build_node_stats_build_task_id
@ -140,8 +140,8 @@ CREATE TABLE sign_tasks (
id INTEGER PRIMARY KEY, id INTEGER PRIMARY KEY,
build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE, build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
buildtask_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE, buildtask_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
started_at REAL, started_at DOUBLE PRECISION,
finished_at REAL finished_at DOUBLE PRECISION
); );
CREATE INDEX sign_tasks_build_id CREATE INDEX sign_tasks_build_id
@ -157,7 +157,6 @@ CREATE INDEX sign_tasks_finished_at
ON sign_tasks(finished_at); ON sign_tasks(finished_at);
-- schema_version -- schema_version
CREATE TABLE schema_version ( CREATE TABLE schema_version (
version INTEGER version INTEGER

View File

@ -2,4 +2,5 @@
First version First version
0.2.0 0.2.0
New parameter start_from New parameter start_from
moved to double persition for timestamps