- fixed bug with wrong ts of build_tasks steps

- db: moved to double percicion for all ts columns
- added  db_schema_version check
This commit is contained in:
Kirill Zhukov 2023-03-07 16:02:14 +01:00
parent d737bae353
commit c6a14cfe3a
11 changed files with 70 additions and 28 deletions

View File

@ -53,6 +53,7 @@ class APIclient():
return self._parse_build(response.json())
def __parse_build_node_stats(self, stats: Dict) -> BuildNodeStats:
logging.debug('raw json: %s', stats)
keys = ['build_all', 'build_binaries', 'build_packages', 'build_srpm', 'build_node_task',
'cas_notarize_artifacts', 'cas_source_authenticate', 'git_checkout', 'upload']
@ -66,11 +67,14 @@ class APIclient():
stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None)
except KeyError:
params[k] = BuildStat()
return BuildNodeStats(**params)
build_node_stats = BuildNodeStats(**params)
logging.debug('BuildNodeStats: %s', build_node_stats)
return build_node_stats
def __parse_web_node_stats(self, stats: Dict) -> WebNodeStats:
keys = ['build_done', 'logs_processing', 'packages_processing']
params = {}
logging.debug('raw json: %s', stats)
for k in keys:
try:
params[k] = BuildStat(
@ -80,7 +84,9 @@ class APIclient():
stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None)
except KeyError:
params[k] = BuildStat()
return WebNodeStats(**params)
web_node_stats = WebNodeStats(**params)
logging.debug('WebNodeStats %s', web_node_stats)
return web_node_stats
def _parse_build_tasks(self, tasks_json: Dict, build_id: int) -> List[BuildTask]:
result = []

View File

@ -2,7 +2,11 @@
from enum import IntEnum
# supported schema version
DB_SCHEMA_VER = 1
# ENUMS
class ArchEnum(IntEnum):
i686 = 0
x86_64 = 1

View File

@ -1,5 +1,6 @@
from datetime import datetime
from typing import Union, Dict, List
from typing import Union, Dict, List, Optional
import logging
import psycopg2
@ -55,15 +56,18 @@ class DB():
'''
cur.execute(sql, (stat.build_task_id, stat.stat_name_id,
stat.start_ts, stat.end_ts))
logging.debug('raw SQL query: %s', cur.query)
# inserting build node stats
for stat in build_node_stats:
logging.debug('BuildNodeStats: %s', stat)
sql = '''
INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts)
VALUES (%s, %s, %s, %s);
'''
cur.execute(sql, (stat.build_task_id, stat.stat_name_id,
stat.start_ts, stat.end_ts))
logging.debug('raw SQL query: %s', cur.query)
# commiting changes
self.__conn.commit()
@ -149,17 +153,31 @@ class DB():
end_ts = %s
WHERE build_task_id = %s;
'''
cur.execute(sql, (stat.start_ts, stat.end_ts))
cur.execute(sql, (stat.start_ts, stat.end_ts, build_task.id))
# updating build_node_stats
for stat in build_node_stats:
sql = '''
UPDATE build_node_stats
SET start_ts = %s,
end_ts = %s,
end_ts = %s
WHERE build_task_id = %s;
'''
cur.execute(sql, (stat.start_ts, stat.end_ts))
cur.execute(sql, (stat.start_ts, stat.end_ts, build_task.id))
# commiting changes
self.__conn.commit()
def get_db_schema_version(self) -> Optional[int]:
sql = '''
SELECT *
FROM schema_version
LIMIT 1;
'''
cur = self.__conn.cursor()
cur.execute(sql)
val = cur.fetchone()
print(val)
if not val:
return None
return int(val[0])

View File

@ -4,7 +4,7 @@ import logging
from typing import List, Dict
from ..models.extractor_config import ExtractorConfig
from ..models.enums import BuildTaskEnum
from ..const import BuildTaskEnum
from ..models.build import BuildTask
from ..db import DB
from ..api_client import APIclient
@ -22,7 +22,7 @@ class Extractor:
page_num = 1
last_build_id = self.db.get_latest_build_id()
if not last_build_id:
last_build_id = self.start_from
last_build_id = self.start_from - 1
logging.info("last_build_id: %s", last_build_id)
stop = False
@ -49,7 +49,7 @@ class Extractor:
self.db.insert_buildtask(build_task.as_db_model(),
build_task.web_node_stats.as_db_model(
build_task.id),
build_task.web_node_stats.as_db_model(
build_task.build_node_stats.as_db_model(
build_task.id))
except Exception as error: # pylint: disable=broad-except
logging.error('failed to insert build task %d: %s',

View File

@ -1,6 +1,7 @@
from datetime import datetime, timedelta
import logging
from logging.handlers import RotatingFileHandler
import sys
import time
import yaml
@ -8,6 +9,7 @@ import yaml
# pylint: disable=relative-beyond-top-level
from ..api_client import APIclient
from ..db import DB
from ..const import DB_SCHEMA_VER
from .extractor import Extractor
from ..models.extractor_config import ExtractorConfig
from ..models.db_config import DbConfig
@ -46,16 +48,28 @@ def start(yml_path: str):
# configuring logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s <%(funcName)s> %(message)s',
format='%(asctime)s %(levelname)s %(funcName)s() %(message)s',
handlers=[RotatingFileHandler(config.log_file,
maxBytes=10000000,
backupCount=3)])
# some pre-flight checks
db = DB(config.db_config)
cur_version = db.get_db_schema_version()
if not cur_version:
logging.error(
'Cant get db schema version. Make sure that schema_version exists')
sys.exit(1)
if cur_version != DB_SCHEMA_VER:
logging.error('unsupported DB schema: want %s, have %s',
DB_SCHEMA_VER, cur_version)
sys.exit(1)
while True:
logging.info('Starting extraction proccess')
api = APIclient(api_root=config.albs_url,
jwt=config.jwt, timeout=config.api_timeout)
db = DB(config.db_config)
extractor = Extractor(config, api, db)
logging.info('Starting builds insertion')

View File

@ -5,7 +5,7 @@ from pydantic import BaseModel # pylint: disable=no-name-in-module
from .build_stat import BuildStat
from .build_node_stat_db import BuildNodeStatDB
from .enums import BuildNodeStatsEnum
from ..const import BuildNodeStatsEnum
class BuildNodeStats(BaseModel):

View File

@ -1,11 +1,11 @@
from datetime import datetime
from typing import Optional, Tuple
from typing import Optional
from pydantic import BaseModel # pylint: disable=no-name-in-module
from .build_task_db import BuildTaskDB
from .build_node_stats import BuildNodeStats
from .enums import ArchEnum
from ..const import ArchEnum
from .web_node_stats import WebNodeStats

View File

@ -5,7 +5,7 @@ from pydantic import BaseModel # pylint: disable=no-name-in-module
from .build_stat import BuildStat
from .web_node_stat_db import WebNodeStatDB
from .enums import WebNodeStatsEnum
from ..const import WebNodeStatsEnum
class WebNodeStats(BaseModel):

View File

@ -60,4 +60,4 @@ scrape_interval: 3600
# build_id to start populating empty db with
# required: false
# default: 5808 (first build with correct metrics)
start_from: 5808
start_from:

View File

@ -4,8 +4,8 @@ BEGIN;
CREATE TABLE builds (
id INTEGER PRIMARY KEY,
url VARCHAR(50) NOT NULL,
created_at REAL NOT NULL,
finished_at REAL
created_at DOUBLE PRECISION NOT NULL,
finished_at DOUBLE PRECISION
);
@ -85,8 +85,8 @@ CREATE TABLE build_tasks (
build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
arch_id INTEGER REFERENCES arch_enum(id) ON DELETE SET NULL,
status_id INTEGER REFERENCES build_task_status_enum(id) ON DELETE SET NULL,
started_at REAL,
finished_at REAL
started_at DOUBLE PRECISION,
finished_at DOUBLE PRECISION
);
CREATE INDEX build_tasks_build_id
@ -103,8 +103,8 @@ ON build_tasks(finished_at);
CREATE TABLE web_node_stats (
build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
stat_name_id INTEGER REFERENCES web_node_stats_enum(id) ON DELETE SET NULL,
start_ts REAL,
end_ts REAL
start_ts DOUBLE PRECISION,
end_ts DOUBLE PRECISION
);
CREATE INDEX web_node_stats_build_task_id
@ -121,8 +121,8 @@ ON web_node_stats(end_ts);
CREATE TABLE build_node_stats (
build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
stat_name_id INTEGER REFERENCES build_node_stats_enum(id) ON DELETE SET NULL,
start_ts REAL,
end_ts REAL
start_ts DOUBLE PRECISION,
end_ts DOUBLE PRECISION
);
CREATE INDEX build_node_stats_build_task_id
@ -140,8 +140,8 @@ CREATE TABLE sign_tasks (
id INTEGER PRIMARY KEY,
build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
buildtask_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
started_at REAL,
finished_at REAL
started_at DOUBLE PRECISION,
finished_at DOUBLE PRECISION
);
CREATE INDEX sign_tasks_build_id
@ -157,7 +157,6 @@ CREATE INDEX sign_tasks_finished_at
ON sign_tasks(finished_at);
-- schema_version
CREATE TABLE schema_version (
version INTEGER

View File

@ -3,3 +3,4 @@ First version
0.2.0
New parameter start_from
moved to double persition for timestamps