Release 0.3.5 (2023-06-01)

build_analytics: ALBS-1103 start using persistent HTTP connections
Release 0.3.4 (2023-05-12)
2023-06-01 11:57:27 +02:00 · 2023-05-12 11:22:55 +02:00 · 2023-04-24 09:20:58 +02:00 · 2023-04-21 15:13:48 +02:00 · 2023-04-21 07:53:09 +00:00 · 2023-03-23 13:06:43 +01:00
45 changed files with 11600 additions and 733 deletions
--- a/build_analitycs/build_analytics/api_client.py
+++ b/build_analitycs/build_analytics/api_client.py
@ -1,94 +0,0 @@
-from datetime import datetime
-import logging
-
-from urllib.parse import urljoin
-from typing import Dict, List
-
-
-from .models.build import Build
-from .models.build_task import BuildTask
-
-import requests
-
-
-TZ_OFFSET = '+00:00'
-
-
-class APIclient():
-    """
-    client for working with ALBS API
-    """
-
-    def __init__(self, api_root: str, jwt: str, timeout: int):
-        self.api_root = api_root
-        self.jwt = jwt
-        self.timeout = timeout
-
-    def get_builds(self, page_num: int = 1) -> List[Build]:
-        ep = '/api/v1/builds'
-        url = urljoin(self.api_root, ep)
-        params = {'pageNumber': page_num}
-        headers = {'accept': 'appilication/json'}
-
-        response = requests.get(
-            url, params=params, headers=headers, timeout=self.timeout)
-        response.raise_for_status()
-
-        result = []
-        for b in response.json()['builds']:
-            try:
-                result.append(self._parse_build(b))
-            except Exception as err:  # pylint: disable=broad-except
-                logging.error("Cant convert build JSON %s to Buildmodel: %s",
-                              b, err, exc_info=True)
-        return result
-
-    def get_build(self, build_id: int) -> Build:
-        ep = f'/api/v1/builds/{build_id}'
-        url = urljoin(self.api_root, ep)
-        headers = {'accept': 'application/json'}
-        response = requests.get(url, headers=headers, timeout=self.timeout)
-        response.raise_for_status()
-        return self._parse_build(response.json())
-
-    def _parse_build_tasks(self, tasks_json: Dict, build_id: int) -> List[BuildTask]:
-        result = []
-        for task in tasks_json:
-            try:
-                started_at = datetime.fromisoformat(
-                    task['started_at']+TZ_OFFSET) \
-                    if task['started_at'] else None
-                finished_at = datetime.fromisoformat(task['finished_at']+TZ_OFFSET) \
-                    if task['finished_at'] else None
-                name = task['ref']['url'].split('/')[-1].replace('.git', '')
-                params = {'id': task['id'],
-                          'name': name,
-                          'build_id': build_id,
-                          'started_at': started_at,
-                          'finished_at': finished_at,
-                          'arch': task['arch'],
-                          'status_id': task['status']}
-                result.append(BuildTask(**params))
-            except Exception as err:  # pylint: disable=broad-except
-                logging.error("Cant convert build_task JSON %s (build_id %s) to BuildTask model: %s",
-                              task, build_id, err, exc_info=True)
-
-        result.sort(key=lambda x: x.id, reverse=True)
-        return result
-
-    def _parse_build(self, build_json: Dict) -> Build:
-        url = f"https://build.almalinux.org/build/{build_json['id']}"
-        created_at = datetime.fromisoformat(build_json['created_at']+TZ_OFFSET)
-        finished_at = datetime.fromisoformat(build_json['finished_at']+TZ_OFFSET) \
-            if build_json['finished_at'] else None
-        build_tasks = self._parse_build_tasks(
-            build_json['tasks'], build_json['id'])
-
-        params = {
-            'id': build_json['id'],
-            'url': url,
-            'created_at': created_at,
-            'finished_at': finished_at,
-            'build_tasks': build_tasks}
-
-        return Build(**params)
--- a/build_analitycs/build_analytics/db.py
+++ b/build_analitycs/build_analytics/db.py
@ -1,115 +0,0 @@
-from datetime import datetime
-from typing import Union, Dict
-
-import psycopg2
-
-from .models.build_db import BuildDB
-from .models.build_task_db import BuildTaskDB
-from .models.db_config import DbConfig
-
-
-class DB():
-    def __init__(self, config: DbConfig):
-        self.__conn = psycopg2.connect(database=config.name,
-                                       host=config.host,
-                                       user=config.username,
-                                       password=config.password,
-                                       port=config.port)
-
-    def close_conn(self):
-        self.__conn.close()
-
-    def __del__(self):
-        self.close_conn()
-
-    def insert_build(self, build: BuildDB):
-        sql = '''
-                INSERT INTO builds(id, url, created_at, finished_at)
-                VALUES (%s, %s, %s, %s);
-              '''
-
-        cur = self.__conn.cursor()
-        cur.execute(sql, (build.id, build.url,
-                          build.created_at, build.finished_at))
-        self.__conn.commit()
-
-    def insert_buildtask(self, build_task: BuildTaskDB):
-        sql = '''
-                INSERT INTO build_tasks(id, name, build_id, arch_id, started_at, finished_at, status_id)
-                VALUES (%s, %s, %s, %s, %s, %s, %s);
-              '''
-
-        cur = self.__conn.cursor()
-        cur.execute(sql, (build_task.id, build_task.name, build_task.build_id, build_task.arch_id,
-                          build_task.started_at, build_task.finished_at, build_task.status_id))
-        self.__conn.commit()
-
-    def get_latest_build_id(self) -> Union[int, None]:
-        sql = "SELECT id from builds ORDER BY id DESC LIMIT 1;"
-        cur = self.__conn.cursor()
-        cur.execute(sql)
-        val = cur.fetchone()
-        if not val:
-            return None
-        return int(val[0])
-
-    def cleanup_builds(self, oldest_to_keep: datetime) -> int:
-        params = (int(oldest_to_keep.timestamp()),)
-        sql = "DELETE FROM builds WHERE created_at < %s;"
-        cur = self.__conn.cursor()
-        cur.execute(sql, params)
-        self.__conn.commit()
-        return cur.rowcount
-
-    def get_unfinished_builds(self) -> Dict[int, Dict[int, int]]:
-        """
-        Getting list of unfinished builds and build_tasks
-        Dict[build_id, Dict[build_task_id, task_status_id]]
-        """
-        res: Dict[int, Dict[int, int]] = {}
-
-        # getting unfinished builds
-        sql = 'SELECT id FROM builds where finished_at is NULL;'
-        cur = self.__conn.cursor()
-        cur.execute(sql)
-        for row in cur.fetchall():
-            res[row[0]] = {}
-
-        # getting list of unfinished tasks
-        sql = 'SELECT id, build_id, status_id FROM build_tasks WHERE status_id < 2;'
-        cur = self.__conn.cursor()
-        cur.execute(sql)
-        for row in cur.fetchall():
-            build_task_id: int = row[0]
-            build_id: int = row[1]
-            status_id: int = row[2]
-            try:
-                res[build_id][build_task_id] = status_id
-            except KeyError:
-                res[build_id] = {build_task_id: status_id}
-
-        return res
-
-    def update_build(self, build: BuildDB):
-        sql = '''
-                UPDATE builds
-                SET finished_at = %s
-                WHERE id = %s;
-              '''
-
-        cur = self.__conn.cursor()
-        cur.execute(sql, (build.finished_at, build.id))
-        self.__conn.commit()
-
-    def update_build_task(self, build: BuildTaskDB):
-        sql = '''
-                UPDATE build_tasks
-                SET status_id = %s,
-                    started_at = %s,
-                    finished_at = %s
-                WHERE id = %s;
-              '''
-        cur = self.__conn.cursor()
-        cur.execute(sql, (build.status_id, build.started_at,
-                    build.finished_at, build.id))
-        self.__conn.commit()
--- a/build_analitycs/build_analytics/extractor/extractor.py
+++ b/build_analitycs/build_analytics/extractor/extractor.py
@ -1,95 +0,0 @@
-# pylint: disable=relative-beyond-top-level
-
-import logging
-from typing import List, Dict
-
-from ..models.extractor_config import ExtractorConfig
-from ..models.enums import BuildTaskEnum
-from ..models.build import BuildTask
-from ..db import DB
-from ..api_client import APIclient
-
-
-class Extractor:
-    def __init__(self, config: ExtractorConfig, api: APIclient, db: DB):
-        self.oldest_build_age = config.oldest_build_age
-        self.api = api
-        self.db = db
-
-    def extract_and_store(self) -> int:
-        build_count = 0
-        page_num = 1
-        last_build_id = self.db.get_latest_build_id()
-        if not last_build_id:
-            last_build_id = 0
-        logging.info("last_build_id: %s", last_build_id)
-        stop = False
-
-        while not stop:
-            logging.info("page: %s", page_num)
-            for build in self.api.get_builds(page_num):
-                # check if we shoud stop processing build
-                if build.id <= last_build_id or \
-                   build.created_at <= self.oldest_build_age:
-                    stop = True
-                    break
-
-                # inserting build and build tasks
-                logging.info("inserting %s", build.id)
-                self.db.insert_build(build.as_db_model())
-                for build_task in build.build_tasks:
-                    self.db.insert_buildtask(build_task.as_db_model())
-                build_count += 1
-            page_num += 1
-        return build_count
-
-    def build_cleanup(self):
-        logging.info('Removing all buidls older then %s',
-                     self.oldest_build_age.strftime("%m/%d/%Y, %H:%M:%S"))
-        removed_count = self.db.cleanup_builds(self.oldest_build_age)
-        logging.info('removed %d entries', removed_count)
-
-    def __update_build_tasks_statuses(self, build_tasks: List[BuildTask],
-                                      build_tasks_status_db: Dict[int, int]):
-        for b in build_tasks:
-            if b.status_id != build_tasks_status_db[b.id]:
-                logging.info('build taks %d status have changed %s ->  %s. Updating DB',
-                             b.id, BuildTaskEnum(
-                                 build_tasks_status_db[b.id]).name,
-                             BuildTaskEnum(b.status_id).name)
-                try:
-                    self.db.update_build_task(b.as_db_model())
-                except Exception as err:  # pylint: disable=broad-except
-                    logging.error(
-                        'failed to update build task %d: %s',
-                        b.id, err, exc_info=True)
-                else:
-                    logging.info('build task %d was updated', b.id)
-            else:
-                logging.info(
-                    "build_task %d is still %s. Skipping", b.id, BuildTaskEnum(b.status_id).name)
-
-    def update_builds(self):
-        logging.info('Getting list of tasks from DB')
-        unfinished_tasks = self.db.get_unfinished_builds()
-        for build_id, build_tasks_db in unfinished_tasks.items():
-            try:
-                logging.info('Getting status of build %d', build_id)
-                build = self.api.get_build(build_id)
-
-                logging.info('Updating build tasks')
-                build_tasks_to_check = [
-                    b for b in build.build_tasks if b.id in build_tasks_db]
-                self.__update_build_tasks_statuses(
-                    build_tasks_to_check, build_tasks_db)
-
-                if build.finished_at:
-                    logging.info(
-                        "build is finished, we need to update finished_at attribute")
-                    self.db.update_build(build.as_db_model())
-
-                logging.info('finished proccessing build %d', build_id)
-
-            except Exception as err:  # pylint: disable=broad-except
-                logging.error("Cant process build %d:  %s",
-                              build_id, err, exc_info=True)
--- a/build_analitycs/build_analytics/models/enums.py
+++ b/build_analitycs/build_analytics/models/enums.py
@ -1,17 +0,0 @@
-from enum import IntEnum
-
-
-class ArchEnum(IntEnum):
-    i686 = 0
-    x86_64 = 1
-    aarch64 = 2
-    ppc64le = 3
-    s390x = 4
-
-
-class BuildTaskEnum(IntEnum):
-    idle = 0
-    started = 1
-    completed = 2
-    failed = 3
-    excluded = 4
--- a/build_analitycs/db_schema/postgres.sql
+++ b/build_analitycs/db_schema/postgres.sql
@ -1,92 +0,0 @@
-- builds
-DROP TABLE IF EXISTS builds CASCADE;
-CREATE TABLE builds (
-    id INTEGER PRIMARY KEY,
-    url VARCHAR(50) NOT NULL,
-    created_at REAL NOT NULL,
-    finished_at REAL
-);
-
-
-CREATE INDEX IF NOT EXISTS builds_created_at
-ON builds(created_at);
-
-CREATE INDEX IF NOT EXISTS builds_finished_at
-ON builds(finished_at);
-
-
-- build_taks_enum
-DROP TABLE IF EXISTS build_task_status_enum CASCADE;
-CREATE TABLE IF NOT EXISTS build_task_status_enum(
-    id INTEGER PRIMARY KEY,
-    value VARCHAR(15)
-);
-
-INSERT INTO build_task_status_enum (id, value)
-VALUES
-    (0, 'idle'),
-    (1, 'started'),
-    (2, 'completed'),
-    (3, 'failed'),
-    (4, 'excluded');
-
-
-- arch_enum
-DROP TABLE IF EXISTS arch_enum CASCADE;
-CREATE TABLE arch_enum(
-    id INTEGER PRIMARY KEY,
-    value VARCHAR(15)
-);
-
-INSERT INTO arch_enum(id, value)
-VALUES
-    (0, 'i686'),
-    (1, 'x86_64'),
-    (2, 'aarch64'),
-    (3, 'ppc64le'),
-    (4, 's390x');
-
-
-- build_tasks
-DROP TABLE IF EXISTS build_tasks CASCADE;
-CREATE TABLE build_tasks (
-    id INTEGER PRIMARY KEY,
-    name VARCHAR(50) NOT NULL,
-    build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
-    arch_id INTEGER REFERENCES arch_enum(id) ON DELETE SET NULL,
-    status_id INTEGER REFERENCES build_task_status_enum(id) ON DELETE SET NULL,
-    started_at REAL,
-    finished_at REAL
-);
-
-CREATE INDEX build_tasks_build_id
-ON build_tasks(build_id);
-
-CREATE INDEX build_tasks_started_at
-ON build_tasks(started_at);
-
-CREATE INDEX build_tasks_finished_at
-ON build_tasks(finished_at);
-
-
-- sign_tasks
-DROP TABLE IF EXISTS sign_tasks CASCADE;
-CREATE TABLE sign_tasks (
-    id INTEGER PRIMARY KEY,
-    build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
-    buildtask_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
-    started_at REAL,
-    finished_at REAL
-);
-
-CREATE INDEX sign_tasks_build_id
-ON sign_tasks(build_id);
-
-CREATE INDEX sign_tasks_buildtask_id
-ON sign_tasks(buildtask_id);
-
-CREATE INDEX sing_tasks_started_at
-ON sign_tasks(started_at);
-
-CREATE INDEX sign_tasks_finished_at
-ON sign_tasks(finished_at);
--- a/build_analitycs/releases.txt
+++ b/build_analitycs/releases.txt
@ -1,2 +0,0 @@
-0.1.0 (2023-03-01)
-First version
--- a/build_analytics/init.py
+++ b/build_analytics/init.py
--- a/build_analytics/build_analytics/init.py
+++ b/build_analytics/build_analytics/init.py
--- a/build_analytics/build_analytics/api_client.py
+++ b/build_analytics/build_analytics/api_client.py
@ -0,0 +1,259 @@
+from datetime import datetime
+import logging
+from urllib.parse import urljoin
+from typing import Dict, List, Any, Optional
+
+import requests
+
+
+from .models.build import Build
+from .models.build_task import BuildTask
+from .models.build_node_stats import BuildNodeStats
+from .models.build_stat import BuildStat
+from .models.web_node_stats import WebNodeStats
+from .models.test_task import TestTask
+from .models.test_steps_stats import TestStepsStats
+from .models.test_step_stat import TestStepStat
+
+TZ_OFFSET = '+00:00'
+
+
+class APIclient():
+    """
+    client for working with ALBS API
+    """
+
+    def __init__(self, api_root: str, jwt: str, timeout: int):
+        self.api_root = api_root
+        self.jwt = jwt
+        self.timeout = timeout
+        # will be set at first call of __send_request
+        self.session: Optional[requests.Session] = None
+
+    def get_builds(self, page_num: int = 1) -> List[Build]:
+        ep = '/api/v1/builds'
+        url = urljoin(self.api_root, ep)
+        params = {'pageNumber': page_num}
+        headers = {'accept': 'appilication/json'}
+
+        response = self.__send_request(url, 'get', params, headers)
+        response.raise_for_status()
+
+        result = []
+        for b in response.json()['builds']:
+            try:
+                result.append(self._parse_build(b))
+            except Exception as err:  # pylint: disable=broad-except
+                logging.error("Cant convert build JSON %s to Buildmodel: %s",
+                              b, err, exc_info=True)
+        return result
+
+    def get_build(self, build_id: int) -> Optional[Build]:
+        '''
+        method returns None if build was deleted from ALBS
+        '''
+        ep = f'/api/v1/builds/{build_id}'
+        url = urljoin(self.api_root, ep)
+        headers = {'accept': 'application/json'}
+        response = self.__send_request(url, 'get', headers=headers)
+
+        if response.status_code == 404:
+            return None
+
+        response.raise_for_status()
+        return self._parse_build(response.json())
+
+    def __parse_build_node_stats(self, stats: Dict) -> BuildNodeStats:
+        logging.debug('raw json: %s', stats)
+
+        keys = ['build_all', 'build_binaries', 'build_packages', 'build_srpm', 'build_node_task',
+                'cas_notarize_artifacts', 'cas_source_authenticate', 'git_checkout', 'upload']
+        params = {}
+        for k in keys:
+            try:
+                params[k] = BuildStat(
+                    start_ts=datetime.fromisoformat(
+                        stats[k]['start_ts']+TZ_OFFSET) if stats[k]['start_ts'] else None,
+                    end_ts=datetime.fromisoformat(
+                        stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None)
+            except KeyError:
+                params[k] = BuildStat()
+        build_node_stats = BuildNodeStats(**params)
+        logging.debug('BuildNodeStats: %s', build_node_stats)
+        return build_node_stats
+
+    def __parse_web_node_stats(self, stats: Dict) -> WebNodeStats:
+        keys = ['build_done', 'logs_processing',
+                'packages_processing', 'multilib_processing']
+        params = {}
+        logging.debug('raw json: %s', stats)
+        for k in keys:
+            try:
+                params[k] = BuildStat(
+                    start_ts=datetime.fromisoformat(
+                        stats[k]['start_ts']+TZ_OFFSET) if stats[k]['start_ts'] else None,
+                    end_ts=datetime.fromisoformat(
+                        stats[k]['end_ts']+TZ_OFFSET) if stats[k]['end_ts'] else None)
+            except KeyError:
+                params[k] = BuildStat()
+        web_node_stats = WebNodeStats(**params)
+        logging.debug('WebNodeStats %s', web_node_stats)
+        return web_node_stats
+
+    def _parse_build_tasks(self, tasks_json: Dict, build_id: int) -> List[BuildTask]:
+        result = []
+        for task in tasks_json:
+            try:
+                started_at = datetime.fromisoformat(
+                    task['started_at']+TZ_OFFSET) if task['started_at'] else None
+                finished_at = datetime.fromisoformat(
+                    task['finished_at']+TZ_OFFSET) if task['finished_at'] else None
+                name = task['ref']['url'].split('/')[-1].replace('.git', '')
+                if not task['performance_stats']:
+                    logging.warning(
+                        "no perfomance_stats for build_id: %s, build_task_id: %s", build_id, task['id'])
+                    stats: dict[str, Any] = {
+                        'build_node_stats': {}, 'build_done_stats': {}}
+                else:
+                    stats = task['performance_stats'][0]['statistics']
+
+                params = {'id': task['id'],
+                          'name': name,
+                          'build_id': build_id,
+                          'started_at': started_at,
+                          'finished_at': finished_at,
+                          'arch': task['arch'],
+                          'status_id': task['status'],
+                          'build_node_stats': self.__parse_build_node_stats(stats['build_node_stats']),
+                          'web_node_stats': self.__parse_web_node_stats(stats['build_done_stats'])}
+                result.append(BuildTask(**params))
+            except Exception as err:  # pylint: disable=broad-except
+                logging.error("Cant convert build_task JSON %s (build_id %s) to BuildTask model: %s",
+                              task, build_id, err, exc_info=True)
+
+        result.sort(key=lambda x: x.id, reverse=True)
+        return result
+
+    def _parse_build(self, build_json: Dict) -> Build:
+        url = f"https://build.almalinux.org/build/{build_json['id']}"
+        created_at = datetime.fromisoformat(build_json['created_at']+TZ_OFFSET)
+        finished_at = datetime.fromisoformat(
+            build_json['finished_at']+TZ_OFFSET) if build_json['finished_at'] else None
+        build_tasks = self._parse_build_tasks(
+            build_json['tasks'], build_json['id'])
+
+        params = {
+            'id': build_json['id'],
+            'url': url,
+            'created_at': created_at,
+            'finished_at': finished_at,
+            'build_tasks': build_tasks}
+
+        return Build(**params)
+
+    def get_test_tasks(self, build_task_id: int) -> List[TestTask]:
+        result: List[TestTask] = []
+        revision = 1
+        while True:
+            ep = f'/api/v1/tests/{build_task_id}/{revision}'
+            url = urljoin(self.api_root, ep)
+            headers = {'accept': 'application/json'}
+
+            response = requests.get(
+                url, headers=headers, timeout=self.timeout)
+            response.raise_for_status()
+            raw_tasks = response.json()
+            if len(raw_tasks) == 0:
+                break
+            result = result + self.__parse_test_tasks(raw_tasks, build_task_id)
+            revision += 1
+        return result
+
+    def __parse_test_tasks(self, raw_tasks: List[Dict[str, Any]],
+                           build_task_id: int) -> List[TestTask]:
+        result: List[TestTask] = []
+        for task in raw_tasks:
+            if task['alts_response']:
+                try:
+                    stats_raw = task['alts_response']['stats']
+                except KeyError:
+                    steps_stats = None
+                else:
+                    steps_stats = self.__parse_test_steps_stats(stats_raw)
+            else:
+                steps_stats = None
+            params = {
+                'id': task['id'],
+                'build_task_id': build_task_id,
+                'revision': task['revision'],
+                'status': task['status'],
+                'package_fullname': '_'.join([task['package_name'],
+                                              task['package_version'],
+                                              task['package_release']]),
+                'started_at': self.__get_test_task_started_at(
+                    steps_stats) if steps_stats else None,
+                'steps_stats': steps_stats
+            }
+
+            result.append(TestTask(**params))
+        return result
+
+    def __parse_test_steps_stats(self, stats_raw: Dict[str, Any]) -> TestStepsStats:
+        teast_steps_params = {}
+        for field_name in TestStepsStats.__fields__.keys():
+            try:
+                p = stats_raw[field_name]
+            except KeyError:
+                continue
+            # there are must be a better way
+            for k in ['start_ts', 'finish_ts']:
+                if k in p:
+                    p[k] = datetime.fromisoformat(p[k]+TZ_OFFSET)
+            teast_steps_params[field_name] = TestStepStat(**p)
+        return TestStepsStats(**teast_steps_params)
+
+    def __get_test_task_started_at(self, stats: TestStepsStats) -> Optional[datetime]:
+        """
+        getting started_at attribute for test by using oldest start_ts timestamp
+        among all test tasks steps
+        """
+        if not stats:
+            return None
+
+        start_ts = None
+        for field_name in stats.__fields__.keys():
+            stat: TestStepStat = getattr(stats, field_name)
+            if not stat:
+                continue
+            if not start_ts or start_ts > stat.start_ts:
+                start_ts = stat.start_ts
+
+        return start_ts
+
+    def __send_request(self,
+                       url: str,
+                       method: str,
+                       params: Optional[Dict[str, Any]] = None,
+                       headers: Optional[Dict[str, Any]] = None,
+                       ) -> requests.Response:
+        """
+        Simple wrapper around requests.get/posts.. methods 
+        so we can use same session between API calls
+        """
+        if not self.session:
+            self.session = requests.Session()
+
+        m = getattr(self.session, method, None)
+        if not m:
+            raise ValueError(f"method {method} is not supported")
+
+        # pylint: disable=not-callable
+        return m(url, params=params, headers=headers, timeout=self.timeout)
+
+    def close_session(self):
+        if self.session:
+            self.session.close()
+            self.session = None
+
+    def __del__(self):
+        self.close_session()
--- a/build_analytics/build_analytics/const.py
+++ b/build_analytics/build_analytics/const.py
@ -0,0 +1,61 @@
+# pylint: disable=invalid-name
+
+from enum import IntEnum
+
+# supported schema version
+DB_SCHEMA_VER = 3
+
+
+# ENUMS
+class ArchEnum(IntEnum):
+    i686 = 0
+    x86_64 = 1
+    aarch64 = 2
+    ppc64le = 3
+    s390x = 4
+
+
+class BuildTaskEnum(IntEnum):
+    idle = 0
+    started = 1
+    completed = 2
+    failed = 3
+    excluded = 4
+    canceled = 5
+
+
+class WebNodeStatsEnum(IntEnum):
+    build_done = 0
+    logs_processing = 1
+    packages_processing = 2
+    multilib_processing = 3
+
+
+class BuildNodeStatsEnum(IntEnum):
+    upload = 0
+    build_all = 1
+    build_srpm = 2
+    git_checkout = 3
+    build_binaries = 4
+    build_packages = 5
+    build_node_task = 6
+    cas_notarize_artifacts = 7
+    cas_source_authenticate = 8
+
+
+class TestTaskStatusEnum(IntEnum):
+    created = 1
+    started = 2
+    completed = 3
+    failed = 4
+
+
+class TestStepEnum(IntEnum):
+    install_package = 0
+    stop_enviroment = 1
+    initial_provision = 2
+    start_environment = 3
+    uninstall_package = 4
+    initialize_terraform = 5
+    package_integrity_tests = 6
+    stop_environment = 7
--- a/build_analytics/build_analytics/db.py
+++ b/build_analytics/build_analytics/db.py
@ -0,0 +1,331 @@
+from datetime import datetime
+from typing import Union, Dict, List, Optional
+import logging
+
+import psycopg2
+
+from .models.build_db import BuildDB
+from .models.build_task_db import BuildTaskDB
+from .models.build_node_stat_db import BuildNodeStatDB
+from .models.db_config import DbConfig
+from .models.web_node_stat_db import WebNodeStatDB
+from .models.test_task_db import TestTaskDB
+
+
+class DB():
+    def __init__(self, config: DbConfig):
+        self.__conn = psycopg2.connect(database=config.name,
+                                       host=config.host,
+                                       user=config.username,
+                                       password=config.password,
+                                       port=config.port)
+
+    def close_conn(self):
+        self.__conn.close()
+
+    def __del__(self):
+        self.close_conn()
+
+    def row_exists(self, pk: int, table: str) -> bool:
+        assert table in ['builds', 'test_tasks']
+        sql = f'''
+              SELECT COUNT(id)
+              FROM {table}
+              WHERE id = %s;
+              '''
+        cur = self.__conn.cursor()
+        cur.execute(sql, (pk,))
+        val = int(cur.fetchone()[0])
+        return val == 1
+
+    def insert_build(self, build: BuildDB):
+        sql = '''
+                INSERT INTO builds(id, url, created_at, finished_at)
+                VALUES (%s, %s, %s, %s);
+              '''
+
+        cur = self.__conn.cursor()
+        cur.execute(sql, (build.id, build.url,
+                          build.created_at, build.finished_at))
+        self.__conn.commit()
+
+    def insert_buildtask(self, build_task: BuildTaskDB, web_node_stats: List[WebNodeStatDB],
+                         build_node_stats: List[BuildNodeStatDB]):
+
+        cur = self.__conn.cursor()
+        # inserting build_task
+        sql = '''
+                INSERT INTO build_tasks(id, name, build_id, arch_id, started_at, finished_at, status_id)
+                VALUES (%s, %s, %s, %s, %s, %s, %s);
+              '''
+        cur.execute(sql, (build_task.id, build_task.name, build_task.build_id, build_task.arch_id,
+                          build_task.started_at, build_task.finished_at, build_task.status_id))
+
+        # inserting web node stats
+        for wn_stat in web_node_stats:
+
+            # do not insert empty stats
+            if wn_stat.start_ts is None:
+                continue
+
+            sql = '''
+                    INSERT INTO web_node_stats (build_task_id, stat_name_id, start_ts, end_ts)
+                    VALUES (%s, %s, %s, %s);
+                '''
+            cur.execute(sql, (wn_stat.build_task_id, wn_stat.stat_name_id,
+                        wn_stat.start_ts, wn_stat.end_ts))
+            logging.debug('raw SQL query: %s', cur.query)
+            self.__conn.commit()
+
+        # inserting build node stats
+        for bn_stat in build_node_stats:
+
+            # do not insert empty stats
+            if bn_stat.start_ts is None:
+                continue
+
+            sql = '''
+                    INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts)
+                    VALUES (%s, %s, %s, %s);
+                '''
+            cur.execute(sql, (bn_stat.build_task_id, bn_stat.stat_name_id,
+                        bn_stat.start_ts, bn_stat.end_ts))
+            logging.debug('raw SQL query: %s', cur.query)
+
+        # commiting changes
+        self.__conn.commit()
+
+    def get_latest_build_id(self) -> Union[int, None]:
+        sql = "SELECT id from builds ORDER BY id DESC LIMIT 1;"
+        cur = self.__conn.cursor()
+        cur.execute(sql)
+        val = cur.fetchone()
+        if not val:
+            return None
+        return int(val[0])
+
+    def cleanup_builds(self, oldest_to_keep: datetime) -> int:
+        params = (int(oldest_to_keep.timestamp()),)
+        sql = "DELETE FROM builds WHERE created_at < %s;"
+        cur = self.__conn.cursor()
+        cur.execute(sql, params)
+        self.__conn.commit()
+        return cur.rowcount
+
+    def get_unfinished_builds(self, not_before: datetime) -> Dict[int, Dict[int, int]]:
+        """
+        Getting list of unfinished builds and build_tasks
+        Dict[build_id, Dict[build_task_id, task_status_id]]
+        """
+        res: Dict[int, Dict[int, int]] = {}
+
+        # getting unfinished builds
+        sql = 'SELECT id FROM builds where finished_at is NULL AND created_at > %s;'
+        builds_to_check: Dict[int, bool] = {}
+        cur = self.__conn.cursor()
+        cur.execute(sql, (not_before.timestamp(),))
+        logging.debug('raw SQL query: %s', cur.query)
+        for row in cur.fetchall():
+            builds_to_check[row[0]] = True
+
+        # getting list of unfinished tasks
+        sql = 'SELECT id, build_id, status_id FROM build_tasks WHERE status_id < 2;'
+        cur = self.__conn.cursor()
+        cur.execute(sql)
+        for row in cur.fetchall():
+            build_task_id: int = row[0]
+            build_id: int = row[1]
+            status_id: int = row[2]
+            if build_id not in builds_to_check:
+                continue
+            try:
+                res[build_id][build_task_id] = status_id
+            except KeyError:
+                res[build_id] = {build_task_id: status_id}
+
+        return res
+
+    def update_build(self, build: BuildDB):
+        sql = '''
+                UPDATE builds
+                SET finished_at = %s
+                WHERE id = %s;
+              '''
+
+        cur = self.__conn.cursor()
+        cur.execute(sql, (build.finished_at, build.id))
+        self.__conn.commit()
+
+    def update_build_task(self, build_task: BuildTaskDB,
+                          web_node_stats: List[WebNodeStatDB],
+                          build_node_stats: List[BuildNodeStatDB]):
+        cur = self.__conn.cursor()
+
+        sql = '''
+                UPDATE build_tasks
+                SET status_id = %s,
+                    started_at = %s,
+                    finished_at = %s
+                WHERE id = %s;
+              '''
+        cur.execute(sql, (build_task.status_id, build_task.started_at,
+                    build_task.finished_at, build_task.id))
+        logging.debug('raw SQL query: %s', cur.query)
+
+        # updating web_node_stats
+        for stat in web_node_stats:
+            logging.debug(
+                'updating web_node_stats %s build_task %s', stat.stat_name_id, build_task.id)
+            if self.stat_exists(task_id=stat.build_task_id,
+                                stat_name_id=stat.stat_name_id,
+                                table_name='web_node_stats',
+                                column_name='build_task_id'):
+                sql = '''
+                      UPDATE web_node_stats
+                      SET start_ts = %(start_ts)s, end_ts = %(end_ts)s
+                      WHERE build_task_id = %(build_task_id)s AND stat_name_id = %(stat_name_id)s
+                      '''
+            else:
+                sql = '''
+                       INSERT INTO web_node_stats(build_task_id, stat_name_id, start_ts, end_ts)
+                        VALUES (%(build_task_id)s, %(stat_name_id)s, %(start_ts)s, %(end_ts)s);
+                      '''
+            params = {'build_task_id': build_task.id,
+                      'stat_name_id': stat.stat_name_id,
+                      'start_ts': stat.start_ts,
+                      'end_ts': stat.end_ts}
+            cur.execute(sql, params)
+            logging.debug('raw SQL query: %s', cur.query)
+
+        # updating build_node_stats
+        for bn_stat in build_node_stats:
+            logging.debug(
+                'updating build_node_stats %s build_task %s', bn_stat.stat_name_id, build_task.id)
+            if self.stat_exists(task_id=bn_stat.build_task_id,
+                                stat_name_id=bn_stat.stat_name_id,
+                                table_name='build_node_stats',
+                                column_name='build_task_id'):
+                sql = '''
+                      UPDATE build_node_stats
+                      SET start_ts = %(start_ts)s, end_ts = %(end_ts)s
+                      WHERE build_task_id = %(build_task_id)s AND stat_name_id = %(stat_name_id)s
+                      '''
+            else:
+                sql = '''
+                       INSERT INTO build_node_stats(build_task_id, stat_name_id, start_ts, end_ts)
+                        VALUES (%(build_task_id)s, %(stat_name_id)s, %(start_ts)s, %(end_ts)s);
+                      '''
+            params = {'build_task_id': build_task.id,
+                      'stat_name_id': bn_stat.stat_name_id,
+                      'start_ts': bn_stat.start_ts,
+                      'end_ts': bn_stat.end_ts}
+            logging.debug('raw SQL query: %s', cur.query)
+            cur.execute(sql, params)
+
+        # commiting changes
+        self.__conn.commit()
+
+    def get_db_schema_version(self) -> Optional[int]:
+        sql = '''
+              SELECT *
+              FROM schema_version
+              LIMIT 1;
+              '''
+        cur = self.__conn.cursor()
+        cur.execute(sql)
+        val = cur.fetchone()
+        if not val:
+            return None
+        return int(val[0])
+
+    def stat_exists(self, task_id: int, stat_name_id: int, table_name: str, column_name: str) -> bool:
+        sql = f'''
+                SELECT COUNT({column_name})
+                FROM {table_name}
+                WHERE {column_name} = %s AND stat_name_id = %s;
+             '''
+        cur = self.__conn.cursor()
+        cur.execute(sql, (task_id, stat_name_id))
+        val = int(cur.fetchone()[0])
+        return val == 1
+
+    def get_build_tasks_for_tests_update(self, not_before: datetime) -> List[int]:
+        '''
+        Getting build tasks id for test tasks that we need to update
+        https://cloudlinux.atlassian.net/browse/ALBS-1060
+        '''
+        cur = self.__conn.cursor()
+        sql = '''
+              SELECT bt.id
+              FROM build_tasks AS bt
+              INNER JOIN builds AS b
+                ON b.id = bt.build_id
+              WHERE b.created_at > %s;
+              '''
+        cur.execute(sql, (not_before.timestamp(),))
+        logging.debug('raw SQL query: %s', cur.query)
+        result = [int(row[0]) for row in cur.fetchall()]
+        return result
+
+    def insert_update_test_tasks(self, test_tasks: List[TestTaskDB]):
+        cur = self.__conn.cursor()
+        # test tasks
+        for task in test_tasks:
+            if self.row_exists(pk=task.id, table='test_tasks'):
+                sql = '''
+                    UPDATE test_tasks
+                    SET revision = %s,
+                        status_id = %s,
+                        started_at = %s
+                    WHERE id = %s;
+                '''
+                cur.execute(sql, (task.revision, task.status_id,
+                            task.started_at, task.id))
+                assert cur.rowcount == 1
+            else:
+                sql = '''
+                      INSERT INTO test_tasks(
+                        id, build_task_id, revision, status_id, package_fullname, started_at)
+                      VALUES
+                      (%s, %s, %s, %s, %s, %s);
+                      '''
+                cur.execute(sql, (task.id, task.build_task_id, task.revision, task.status_id,
+                                  task.package_fullname, task.started_at))
+
+            # test step
+            if not task.steps_stats:
+                continue
+            for s in task.steps_stats:
+                logging.debug('test_task_id %s, stat_name_id %s',
+                              s.test_task_id, s.stat_name_id)
+                if self.stat_exists(s.test_task_id,
+                                    s.stat_name_id,
+                                    'test_steps_stats',
+                                    'test_task_id'):
+                    sql = '''
+                            UPDATE test_steps_stats
+                            SET start_ts = %s,
+                                finish_ts = %s
+                            WHERE test_task_id = %s AND stat_name_id = %s;
+                          '''
+                    cur.execute(sql, (s.start_ts, s.finish_ts,
+                                s.test_task_id, s.stat_name_id))
+                    assert cur.rowcount == 1
+                else:
+                    sql = '''
+                          INSERT INTO test_steps_stats (
+                            test_task_id, stat_name_id, start_ts, finish_ts)
+                          VALUES (%s, %s, %s, %s);
+                          '''
+                    cur.execute(sql, (s.test_task_id, s.stat_name_id,
+                                s.start_ts, s.finish_ts))
+        # commiting changes
+        self.__conn.commit()
+
+    def delete_build(self, build_id: int):
+        params = (build_id,)
+        sql = "DELETE FROM builds WHERE id = %s;"
+        cur = self.__conn.cursor()
+
+        cur.execute(sql, params)
+        self.__conn.commit()
--- a/build_analytics/build_analytics/extractor/jnit.py
+++ b/build_analytics/build_analytics/extractor/jnit.py
--- a/build_analytics/build_analytics/extractor/extractor.py
+++ b/build_analytics/build_analytics/extractor/extractor.py
@ -0,0 +1,162 @@
+# pylint: disable=relative-beyond-top-level
+
+from datetime import datetime, timedelta
+import logging
+from typing import Dict, List
+
+
+from ..api_client import APIclient
+from ..const import BuildTaskEnum
+from ..db import DB
+from ..models.build import BuildTask
+from ..models.extractor_config import ExtractorConfig
+
+
+class Extractor:
+    def __init__(self, config: ExtractorConfig, api: APIclient, db: DB):
+        self.config = config
+        self.api = api
+        self.db = db
+
+    def extract_and_store(self) -> int:
+        build_count = 0
+        page_num = 1
+        last_build_id = self.db.get_latest_build_id()
+        if not last_build_id:
+            last_build_id = self.config.start_from - 1
+        logging.info("last_build_id: %s", last_build_id)
+        stop = False
+
+        while not stop:
+            oldest_build_age = datetime.now().astimezone() - \
+                timedelta(days=self.config.data_store_days)
+            logging.info("page: %s", page_num)
+            for build in self.api.get_builds(page_num):
+                # check if we shoud stop processing build
+                if build.id <= last_build_id or \
+                   build.created_at <= oldest_build_age:
+                    stop = True
+                    break
+
+                # some builds could move from one page to another
+                if self.db.row_exists(pk=build.id, table='builds'):
+                    continue
+
+                # inserting build build tasks and build tasks statistics
+                logging.info('inserting %s', build.id)
+                try:
+                    self.db.insert_build(build.as_db_model())
+                except Exception as error:  # pylint: disable=broad-except
+                    logging.error('failed to insert build %d: %s',
+                                  build.id, error, exc_info=True)
+                    continue
+
+                for build_task in build.build_tasks:
+                    logging.info('build %s: inserting build task %s',
+                                 build.id, build_task.id)
+                    try:
+                        self.db.insert_buildtask(build_task.as_db_model(),
+                                                 build_task.web_node_stats.as_db_model(
+                                                     build_task.id),
+                                                 build_task.build_node_stats.as_db_model(
+                            build_task.id))
+                    except Exception as error:  # pylint: disable=broad-except
+                        logging.error('build %s: failed to insert build task %d: %s',
+                                      build.id, build_task.id, error, exc_info=True)
+
+                    logging.info(
+                        'getting test tasks for build task %s', build_task.id)
+                    test_tasks = self.api.get_test_tasks(build_task.id)
+                    logging.info('received %d tests tasks', len(test_tasks))
+                    if len(test_tasks) > 0:
+                        logging.info('inserting test tasks')
+                        as_db = [t.as_db_model() for t in test_tasks]
+                        self.db.insert_update_test_tasks(as_db)
+                build_count += 1
+            page_num += 1
+        return build_count
+
+    def build_cleanup(self):
+        oldest_to_keep = datetime.now().astimezone() - \
+            timedelta(days=self.config.data_store_days)
+        logging.info('Removing all buidls older then %s', oldest_to_keep)
+        removed_count = self.db.cleanup_builds(oldest_to_keep)
+        logging.info('removed %d entries', removed_count)
+
+    def __update_build_tasks(self, build_tasks: List[BuildTask],
+                             build_tasks_status_db: Dict[int, int]):
+        for b in build_tasks:
+            if b.status_id != build_tasks_status_db[b.id]:
+                logging.info('build: %s, build task %d status have changed %s ->  %s. Updating DB',
+                             b.build_id,
+                             b.id, BuildTaskEnum(
+                                 build_tasks_status_db[b.id]).name,
+                             BuildTaskEnum(b.status_id).name)
+                try:
+                    self.db.update_build_task(b.as_db_model(),
+                                              b.web_node_stats.as_db_model(
+                                                  b.id),
+                                              b.build_node_stats.as_db_model(b.id))
+                except Exception as err:  # pylint: disable=broad-except
+                    logging.error(
+                        'build: %d, failed to update build task %d: %s',
+                        b.build_id, b.id, err, exc_info=True)
+                else:
+                    logging.info(
+                        'build: %d, build task %d was updated', b.build_id, b.id)
+            else:
+                logging.info(
+                    "build: %d, build_task %d is still %s. Skipping",
+                    b.build_id, b.id, BuildTaskEnum(b.status_id).name)
+
+    def update_builds(self):
+        not_before = datetime.now().astimezone() - \
+            timedelta(days=self.config.oldest_to_update_days)
+        logging.info('Getting unfinished builds that were created after %s ',
+                     not_before)
+        unfinished_tasks = self.db.get_unfinished_builds(not_before)
+        for build_id, build_tasks_db in unfinished_tasks.items():
+            try:
+                logging.info('Getting status of build %d', build_id)
+                build = self.api.get_build(build_id)
+                if not build:
+                    logging.warning(
+                        "build %s was deleted from albs, removing it", build_id)
+                    self.db.delete_build(build_id)
+                    continue
+
+                logging.info('Updating build tasks')
+                build_tasks_to_check = [
+                    b for b in build.build_tasks if b.id in build_tasks_db]
+                self.__update_build_tasks(
+                    build_tasks_to_check, build_tasks_db)
+
+                if build.finished_at:
+                    logging.info(
+                        "build is finished, we need to update finished_at attribute")
+                    self.db.update_build(build.as_db_model())
+
+                logging.info('finished proccessing build %d', build_id)
+
+            except Exception as err:  # pylint: disable=broad-except
+                logging.error("Cant process build %d:  %s",
+                              build_id, err, exc_info=True)
+
+    def updating_test_tasks(self):
+        not_before = datetime.now().astimezone() - \
+            timedelta(days=self.config.oldest_to_update_days)
+        logging.info('getting build tasks for builds created after %s',
+                     not_before)
+        build_task_ids = self.db.get_build_tasks_for_tests_update(
+            not_before)
+        for build_task_id in build_task_ids:
+            try:
+                logging.info('getting tests for build task %s', build_task_id)
+                tasks_api = self.api.get_test_tasks(build_task_id)
+                logging.info('updating test tasks')
+                tasks_db = [t.as_db_model() for t in tasks_api]
+                self.db.insert_update_test_tasks(tasks_db)
+            except Exception as err:  # pylint: disable=broad-except
+                logging.error(
+                    'failed to update tests for %d build task: %s',
+                    build_task_id, err, exc_info=True)
--- a/build_analytics/build_analytics/extractor/start.py
+++ b/build_analytics/build_analytics/extractor/start.py
@ -1,32 +1,20 @@
-from datetime import datetime, timedelta
 import logging
 from logging.handlers import RotatingFileHandler
+import sys
 import time
+from typing import Dict, Any

 import yaml

 # pylint: disable=relative-beyond-top-level
 from ..api_client import APIclient
 from ..db import DB
+from ..const import DB_SCHEMA_VER
 from .extractor import Extractor
 from ..models.extractor_config import ExtractorConfig
 from ..models.db_config import DbConfig


-def __get_oldest_build_age(config: dict) -> datetime:
-    oldest_build_age = datetime.now().astimezone() \
-        - timedelta(days=config['data_store_days'])
-    return oldest_build_age
-
-
-def __get_db_config(config: dict) -> DbConfig:
-    return DbConfig(name=config['db_name'],
-                    port=int(config['db_port']),
-                    host=config['db_host'],
-                    username=config['db_username'],
-                    password=config['db_password'])
-
-
 def __get_config(yml_path: str) -> ExtractorConfig:
    """
    get_config loads yml file and generates  instance
@ -34,9 +22,15 @@ def __get_config(yml_path: str) -> ExtractorConfig:
    with open(yml_path, 'r', encoding='utf-8') as flr:
        raw = yaml.safe_load(flr)

-    # adding new attrs
-    raw['oldest_build_age'] = __get_oldest_build_age(raw)
-    raw['db_config'] = __get_db_config(raw)
+    # Dbconfig
+    db_params: Dict[str, Any] = {'name': raw['db_name'],
+                                 'username': raw['db_username'],
+                                 'password': raw['db_password'], }
+    if 'db_port' in raw:
+        db_params['port'] = raw['db_port']
+    if 'db_host' in raw:
+        db_params['host'] = raw['db_host']
+    raw['db_config'] = DbConfig(**db_params)

    return ExtractorConfig(**raw)

@ -46,11 +40,24 @@ def start(yml_path: str):

    # configuring logging
    logging.basicConfig(level=logging.INFO,
-                        format='%(asctime)s %(levelname)s <%(funcName)s> %(message)s',
+                        format='%(asctime)s %(levelname)s %(funcName)s() %(message)s',
                        handlers=[RotatingFileHandler(config.log_file,
                                                      maxBytes=10000000,
                                                      backupCount=3)])

+    # some pre-flight checks
+    db = DB(config.db_config)
+    cur_version = db.get_db_schema_version()
+    if not cur_version:
+        logging.error(
+            'Cant get db schema version. Make sure that schema_version exists')
+        sys.exit(1)
+    if cur_version != DB_SCHEMA_VER:
+        logging.error('unsupported DB schema: want %s, have %s',
+                      DB_SCHEMA_VER, cur_version)
+        sys.exit(1)
+    db.close_conn()
+
    while True:
        logging.info('Starting extraction proccess')
        api = APIclient(api_root=config.albs_url,
@ -68,23 +75,34 @@ def start(yml_path: str):
            logging.info(
                'Build extaction was finished. %d builds were inserted', inserted_count)

-        logging.info('Starting old builds removal')
+        logging.info('starting old builds removal')
        try:
            extractor.build_cleanup()
        except Exception as err:  # pylint: disable=broad-except
-            logging.critical("Unhandled exception %s", err, exc_info=True)
+            logging.critical("unhandled exception %s", err, exc_info=True)
        else:
-            logging.info('Cleanup finished')
+            logging.info('cleanup finished')

-        logging.info('Updating statuses of unfinished build tasks')
+        logging.info('updating statuses of unfinished build tasks')
        try:
            extractor.update_builds()
        except Exception as err:  # pylint: disable=broad-except
-            logging.critical("Unhandled exception %s", err, exc_info=True)
+            logging.critical("unhandled exception %s", err, exc_info=True)
        else:
-            logging.info('Update finished')
+            logging.info('update finished')

+        logging.info('updating/inserting test tasks')
+        try:
+            extractor.updating_test_tasks()
+        except Exception as err:  # pylint: disable=broad-except
+            logging.critical("unhandled exception %s", err, exc_info=True)
+        else:
+            logging.info('test tasks were updated')
+
+        # freeing up resources
        extractor.db.close_conn()
+        extractor.api.close_session()
+
        logging.info("Extraction was finished")
        logging.info("Sleeping for %d seconds", config.scrape_interval)
        time.sleep(config.scrape_interval)
--- a/build_analytics/build_analytics/models/init.py
+++ b/build_analytics/build_analytics/models/init.py
--- a/build_analytics/build_analytics/models/build.py
+++ b/build_analytics/build_analytics/models/build.py
--- a/build_analytics/build_analytics/models/build_db.py
+++ b/build_analytics/build_analytics/models/build_db.py
--- a/build_analytics/build_analytics/models/build_node_stat_db.py
+++ b/build_analytics/build_analytics/models/build_node_stat_db.py
@ -0,0 +1,12 @@
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+from typing import Optional
+
+
+class BuildNodeStatDB(BaseModel):
+    """
+    Build node stat as it sent to/received from database
+    """
+    build_task_id: int
+    stat_name_id: int
+    start_ts: Optional[float] = None
+    end_ts: Optional[float] = None
--- a/build_analytics/build_analytics/models/build_node_stats.py
+++ b/build_analytics/build_analytics/models/build_node_stats.py
@ -0,0 +1,41 @@
+from typing import List
+
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+
+from .build_stat import BuildStat
+from .build_node_stat_db import BuildNodeStatDB
+from ..const import BuildNodeStatsEnum
+
+
+class BuildNodeStats(BaseModel):
+    """
+    Represents build statistics for build node
+    """
+    build_all: BuildStat
+    build_binaries: BuildStat
+    build_packages: BuildStat
+    build_srpm: BuildStat
+    build_node_task: BuildStat
+    cas_notarize_artifacts: BuildStat
+    cas_source_authenticate: BuildStat
+    git_checkout: BuildStat
+    upload:  BuildStat
+
+    def as_db_model(self, build_task_id: int) -> List[BuildNodeStatDB]:
+        result = []
+        for field_name in self.__fields__.keys():
+
+            stats: BuildStat = getattr(self, field_name)
+            start_ts = stats.start_ts.timestamp() \
+                if stats.start_ts else None
+            end_ts = stats.end_ts.timestamp() \
+                if stats.end_ts else None
+            stat_name_id = BuildNodeStatsEnum[field_name].value
+
+            build_node_stat_db = BuildNodeStatDB(build_task_id=build_task_id,
+                                                 stat_name_id=stat_name_id,
+                                                 start_ts=start_ts,
+                                                 end_ts=end_ts)
+            result.append(build_node_stat_db)
+        return result
--- a/build_analytics/build_analytics/models/build_stat.py
+++ b/build_analytics/build_analytics/models/build_stat.py
@ -0,0 +1,15 @@
+"""
+Module for BuildStat model
+"""
+
+from datetime import datetime
+from typing import Optional
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+
+class BuildStat(BaseModel):
+    """
+    BuildStat represents particular build statistic
+    """
+    start_ts: Optional[datetime] = None
+    end_ts: Optional[datetime] = None
--- a/build_analytics/build_analytics/models/build_stat_db.py
+++ b/build_analytics/build_analytics/models/build_stat_db.py
@ -0,0 +1,16 @@
+"""
+Module for BuildStatDB model
+"""
+
+
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+
+class BuildStatDB(BaseModel):
+    """
+    Represents build stat as it send to/received from database
+    """
+    build_task_id: int
+    stat_name_id: int
+    start_ts: float
+    end_ts: float
--- a/build_analytics/build_analytics/models/build_task.py
+++ b/build_analytics/build_analytics/models/build_task.py
@ -4,7 +4,9 @@ from typing import Optional
 from pydantic import BaseModel  # pylint: disable=no-name-in-module

 from .build_task_db import BuildTaskDB
-from .enums import ArchEnum
+from .build_node_stats import BuildNodeStats
+from ..const import ArchEnum
+from .web_node_stats import WebNodeStats


 class BuildTask(BaseModel):
@ -15,6 +17,8 @@ class BuildTask(BaseModel):
    started_at: Optional[datetime] = None
    finished_at: Optional[datetime] = None
    status_id: int
+    build_node_stats: BuildNodeStats
+    web_node_stats: WebNodeStats

    def as_db_model(self) -> BuildTaskDB:
        started_at = self.started_at.timestamp() \
--- a/build_analytics/build_analytics/models/build_task_db.py
+++ b/build_analytics/build_analytics/models/build_task_db.py
--- a/build_analytics/build_analytics/models/db_config.py
+++ b/build_analytics/build_analytics/models/db_config.py
@ -1,9 +1,13 @@
 from pydantic import BaseModel, Field


+DB_PORT = 5432
+DB_HOST = "localhost"
+
+
 class DbConfig(BaseModel):
    name: str = Field(description="db name")
-    port: int = Field(description="db server port")
-    host: str = Field(description="db server ip/hostname")
+    port: int = Field(description="db server port", default=DB_PORT)
+    host: str = Field(description="db server ip/hostname", default=DB_HOST)
    username: str = Field(description="username to connect with")
    password: str = Field(description="password to connect with1")
--- a/build_analytics/build_analytics/models/extractor_config.py
+++ b/build_analytics/build_analytics/models/extractor_config.py
@ -1,4 +1,3 @@
-from datetime import datetime
 from pathlib import Path

 from pydantic import HttpUrl, Field, BaseModel  # pylint: disable=no-name-in-module
@ -8,8 +7,10 @@ from .db_config import DbConfig
 # DEFAULTS
 ALBS_URL_DEFAULT = 'https://build.almalinux.org'
 LOG_FILE_DEFAULT = '/tmp/extractor.log'
-API_DEFAULT = 30
+API_TIMEOUT_DEFAULT = 30
 SCRAPE_INTERVAL_DEFAULT = 3600
+START_FROM_DEFAULT = 5808
+OLDEST_TO_UPDATE_DAYS_DEFAULT = 7


 class ExtractorConfig(BaseModel):
@ -20,12 +21,17 @@ class ExtractorConfig(BaseModel):
                           default=LOG_FILE_DEFAULT)
    albs_url: HttpUrl = Field(description='ALBS root URL',
                              default=ALBS_URL_DEFAULT)
-    oldest_build_age: datetime = \
-        Field(description='oldest build age to extract and store')
+    data_store_days: int = \
+        Field(description='oldest build (in days) to keep in DB')
    jwt: str = Field(description='ALBS JWT token')
    db_config: DbConfig = Field(description="database configuration")
    api_timeout: int = Field(
        description="max time in seconds to wait for API response",
-        default=API_DEFAULT)
+        default=API_TIMEOUT_DEFAULT)
    scrape_interval: int = Field(description='how often (in seconds) we will extract data from ALBS',
                                 default=SCRAPE_INTERVAL_DEFAULT)
+    start_from: int = Field(description='build id to start populating empty db with',
+                            default=START_FROM_DEFAULT)
+    oldest_to_update_days: int = \
+        Field(description='oldest (in days) unfinished object (build/task/step...) that we will try to update',
+              default=OLDEST_TO_UPDATE_DAYS_DEFAULT)
--- a/build_analytics/build_analytics/models/sign_task_db.py
+++ b/build_analytics/build_analytics/models/sign_task_db.py
@ -1,4 +1,4 @@
-from pydantic import BaseModel
+from pydantic import BaseModel  # pylint: disable=no-name-in-module


 class SignTaskDB(BaseModel):
--- a/build_analytics/build_analytics/models/test_step_stat.py
+++ b/build_analytics/build_analytics/models/test_step_stat.py
@ -0,0 +1,9 @@
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+
+class TestStepStat(BaseModel):
+    start_ts: Optional[datetime] = None
+    finish_ts: Optional[datetime] = None
--- a/build_analytics/build_analytics/models/test_step_stat_db.py
+++ b/build_analytics/build_analytics/models/test_step_stat_db.py
@ -0,0 +1,9 @@
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+from typing import Optional
+
+
+class TestStepStatDB(BaseModel):
+    test_task_id: int
+    stat_name_id: int
+    start_ts: Optional[float] = None
+    finish_ts: Optional[float] = None
--- a/build_analytics/build_analytics/models/test_steps_stats.py
+++ b/build_analytics/build_analytics/models/test_steps_stats.py
@ -0,0 +1,36 @@
+from typing import List, Optional
+
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+from ..const import TestStepEnum
+from .test_step_stat import TestStepStat
+from .test_step_stat_db import TestStepStatDB
+
+
+class TestStepsStats(BaseModel):
+    install_package: Optional[TestStepStat] = None
+    stop_environment: Optional[TestStepStat] = None
+    initial_provision: Optional[TestStepStat] = None
+    start_environment: Optional[TestStepStat] = None
+    uninstall_package: Optional[TestStepStat] = None
+    initialize_terraform: Optional[TestStepStat] = None
+    package_integrity_tests: Optional[TestStepStat] = None
+
+    def as_db(self, test_task_id: int) -> List[TestStepStatDB]:
+        result = []
+        for field_name in self.__fields__.keys():
+            stats: TestStepStat = getattr(self, field_name)
+            if not stats:
+                continue
+            start_ts = stats.start_ts.timestamp() \
+                if stats.start_ts else None
+            finish_ts = stats.finish_ts.timestamp() \
+                if stats.finish_ts else None
+            stat_name_id = TestStepEnum[field_name].value
+
+            test_step_stat_db = TestStepStatDB(test_task_id=test_task_id,
+                                               stat_name_id=stat_name_id,
+                                               start_ts=start_ts,
+                                               finish_ts=finish_ts)
+            result.append(test_step_stat_db)
+        return result
--- a/build_analytics/build_analytics/models/test_task.py
+++ b/build_analytics/build_analytics/models/test_task.py
@ -0,0 +1,31 @@
+from datetime import datetime
+from typing import Optional
+
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+from .test_task_db import TestTaskDB
+from .test_steps_stats import TestStepsStats
+
+
+class TestTask(BaseModel):
+    id: int
+    build_task_id: int
+    revision: int
+    status: int
+    package_fullname: str
+    started_at: Optional[datetime] = None
+    steps_stats: Optional[TestStepsStats] = None
+
+    def as_db_model(self) -> TestTaskDB:
+        started_at = self.started_at.timestamp() \
+            if self.started_at else None
+        params = {
+            'id': self.id,
+            'build_task_id': self.build_task_id,
+            'revision': self.revision,
+            'status_id': self.status,
+            'package_fullname': self.package_fullname,
+            'started_at': started_at,
+            'steps_stats': self.steps_stats.as_db(self.id) if self.steps_stats else None
+        }
+        return TestTaskDB(**params)
--- a/build_analytics/build_analytics/models/test_task_db.py
+++ b/build_analytics/build_analytics/models/test_task_db.py
@ -0,0 +1,17 @@
+from typing import List, Optional
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+from .test_step_stat_db import TestStepStatDB
+
+
+class TestTaskDB(BaseModel):
+    """
+    Test task as it received from/sent to database
+    """
+    id: int
+    build_task_id: int
+    revision: int
+    status_id: int
+    package_fullname: str
+    started_at: Optional[float] = None
+    steps_stats: Optional[List[TestStepStatDB]] = None
--- a/build_analytics/build_analytics/models/web_node_stat_db.py
+++ b/build_analytics/build_analytics/models/web_node_stat_db.py
@ -0,0 +1,13 @@
+
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+from typing import Optional
+
+
+class WebNodeStatDB(BaseModel):
+    """
+    Represents WebNodeStat as it sent to/received from databse
+    """
+    build_task_id: int
+    stat_name_id: int
+    start_ts: Optional[float] = None
+    end_ts: Optional[float] = None
--- a/build_analytics/build_analytics/models/web_node_stats.py
+++ b/build_analytics/build_analytics/models/web_node_stats.py
@ -0,0 +1,36 @@
+from typing import List
+
+from pydantic import BaseModel  # pylint: disable=no-name-in-module
+
+
+from .build_stat import BuildStat
+from .web_node_stat_db import WebNodeStatDB
+from ..const import WebNodeStatsEnum
+
+
+class WebNodeStats(BaseModel):
+    """
+    Represents build statistics for web node
+    """
+    build_done: BuildStat
+    logs_processing: BuildStat
+    packages_processing: BuildStat
+    multilib_processing: BuildStat
+
+    def as_db_model(self, build_task_id: int) -> List[WebNodeStatDB]:
+        result = []
+        for field_name in self.__fields__.keys():
+
+            stats: BuildStat = getattr(self, field_name)
+            start_ts = stats.start_ts.timestamp() \
+                if stats.start_ts else None
+            end_ts = stats.end_ts.timestamp() \
+                if stats.end_ts else None
+            stat_name_id = WebNodeStatsEnum[field_name].value
+
+            web_node_stat_db = WebNodeStatDB(build_task_id=build_task_id,
+                                             stat_name_id=stat_name_id,
+                                             start_ts=start_ts,
+                                             end_ts=end_ts)
+            result.append(web_node_stat_db)
+        return result
--- a/build_analytics/config_default.yml
+++ b/build_analytics/config_default.yml
@ -10,7 +10,6 @@ albs_url: https://build.almalinux.org
 # required: yes
 jwt: ""

-
 # db_host
 # IP/hostname of database server
 # required: no
@ -28,7 +27,6 @@ db_port: 5432
 # required: yes
 db_username: albs_analytics

-
 # db_password
 # password to connect with
 # required: yes
@ -39,7 +37,6 @@ db_password: super_secret_password
 # required: yes
 db_name: albs_analytics

-
 # log_file
 # file to  write logs to
 # required: no
@ -55,4 +52,21 @@ data_store_days: 30
 # sleep time in seconds between data extraction
 # required: no
 # default: 3600
-scrape_interval: 3600
+scrape_interval: 3600
+
+# build_id to start populating empty db with
+# required: false
+# default: 5808 (first build with correct metrics)
+start_from: 5808
+
+# oldest_to_update_days
+# oldest (in days) unfinished object (build/task/step...) that we will try to update
+# required: false
+# default: 7
+oldest_to_update_days: 7
+
+# api_timeout
+# how long (in seconds) we will wait for API response
+# required: false
+# default: 30
+api_timeout: 30
--- a/build_analytics/grafana-dashbords/Build
+++ b/build_analytics/grafana-dashbords/Build
--- a/build_analytics/grafana-dashbords/Build
+++ b/build_analytics/grafana-dashbords/Build
--- a/build_analytics/grafana-dashbords/Build
+++ b/build_analytics/grafana-dashbords/Build
--- a/build_analytics/grafana-dashbords/Test
+++ b/build_analytics/grafana-dashbords/Test
@ -0,0 +1,516 @@
+{
+  "__inputs": [
+    {
+      "name": "DS_ALBS_ANALYTICS",
+      "label": "albs_analytics",
+      "description": "",
+      "type": "datasource",
+      "pluginId": "postgres",
+      "pluginName": "PostgreSQL"
+    }
+  ],
+  "__elements": {},
+  "__requires": [
+    {
+      "type": "panel",
+      "id": "bargauge",
+      "name": "Bar gauge",
+      "version": ""
+    },
+    {
+      "type": "grafana",
+      "id": "grafana",
+      "name": "Grafana",
+      "version": "9.3.6"
+    },
+    {
+      "type": "datasource",
+      "id": "postgres",
+      "name": "PostgreSQL",
+      "version": "1.0.0"
+    },
+    {
+      "type": "panel",
+      "id": "table",
+      "name": "Table",
+      "version": ""
+    }
+  ],
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": {
+          "type": "grafana",
+          "uid": "-- Grafana --"
+        },
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "target": {
+          "limit": 100,
+          "matchAny": false,
+          "tags": [],
+          "type": "dashboard"
+        },
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 0,
+  "id": null,
+  "links": [],
+  "liveNow": false,
+  "panels": [
+    {
+      "datasource": {
+        "type": "postgres",
+        "uid": "${DS_ALBS_ANALYTICS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "align": "left",
+            "displayMode": "auto",
+            "inspect": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "duration"
+            },
+            "properties": [
+              {
+                "id": "unit",
+                "value": "s"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "package"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 253
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "id"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 80
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 3,
+        "w": 24,
+        "x": 0,
+        "y": 0
+      },
+      "id": 4,
+      "options": {
+        "footer": {
+          "fields": "",
+          "reducer": [
+            "sum"
+          ],
+          "show": false
+        },
+        "showHeader": true,
+        "sortBy": []
+      },
+      "pluginVersion": "9.3.6",
+      "targets": [
+        {
+          "cacheDurationSeconds": 300,
+          "datasource": {
+            "type": "postgres",
+            "uid": "${DS_ALBS_ANALYTICS}"
+          },
+          "editorMode": "code",
+          "fields": [
+            {
+              "jsonPath": ""
+            }
+          ],
+          "format": "table",
+          "method": "GET",
+          "queryParams": "",
+          "rawQuery": true,
+          "rawSql": "SELECT \n  DISTINCT tt.id,\n  tt.package_fullname AS package,\n  tt.revision,\n  enum.value AS \"status\",\n  tt.started_at * 1000 AS \"started at\",\n  tf.finished_at * 1000 AS \"finished at\", \n  tf.finished_at - tt.started_at AS duration\nFROM test_tasks AS tt\nINNER JOIN test_steps_stats AS tss\n  ON tt.id = tss.test_task_id\nINNER JOIN test_tasks_status_enum AS enum\n  ON tt.status_id = enum.id\nINNER JOIN \n  (SELECT \n    tss.test_task_id, \n    MAX(tss.finish_ts) AS finished_at \n   FROM test_steps_stats AS tss\n   INNER JOIN test_tasks AS tt\n    ON tss.test_task_id = tt.id\n   WHERE tt.id = $id\n   GROUP BY tss.test_task_id) AS tf\n  ON tf.test_task_id = tt.id\nWHERE tt.id = $id;",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          },
+          "urlPath": ""
+        }
+      ],
+      "title": "Task info",
+      "transformations": [
+        {
+          "id": "convertFieldType",
+          "options": {
+            "conversions": [
+              {
+                "destinationType": "time",
+                "targetField": "started at"
+              },
+              {
+                "destinationType": "time",
+                "targetField": "finished at"
+              }
+            ],
+            "fields": {}
+          }
+        }
+      ],
+      "type": "table"
+    },
+    {
+      "datasource": {
+        "type": "postgres",
+        "uid": "${DS_ALBS_ANALYTICS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "fixedColor": "blue",
+            "mode": "fixed"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "s"
+        },
+        "overrides": []
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 24,
+        "x": 0,
+        "y": 3
+      },
+      "id": 8,
+      "options": {
+        "displayMode": "basic",
+        "minVizHeight": 10,
+        "minVizWidth": 0,
+        "orientation": "vertical",
+        "reduceOptions": {
+          "calcs": [
+            "lastNotNull"
+          ],
+          "fields": "",
+          "values": false
+        },
+        "showUnfilled": false
+      },
+      "pluginVersion": "9.3.6",
+      "targets": [
+        {
+          "cacheDurationSeconds": 300,
+          "datasource": {
+            "type": "postgres",
+            "uid": "${DS_ALBS_ANALYTICS}"
+          },
+          "editorMode": "code",
+          "fields": [
+            {
+              "jsonPath": ""
+            }
+          ],
+          "format": "table",
+          "method": "GET",
+          "queryParams": "",
+          "rawQuery": true,
+          "rawSql": "SELECT \n  enum.value AS \"step name\",\n  SUM(tss.finish_ts - tss.start_ts) AS duration\nFROM test_tasks AS tt\nINNER JOIN test_steps_stats AS tss\n  ON tt.id = tss.test_task_id\nINNER JOIN test_steps_enum AS enum\n  ON tss.stat_name_id = enum.id\nWHERE tt.id = $id\nGROUP BY (enum.value); ",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          },
+          "urlPath": ""
+        }
+      ],
+      "title": "Test duration (group by test step)",
+      "transformations": [
+        {
+          "id": "rowsToFields",
+          "options": {}
+        }
+      ],
+      "type": "bargauge"
+    },
+    {
+      "datasource": {
+        "type": "postgres",
+        "uid": "${DS_ALBS_ANALYTICS}"
+      },
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "custom": {
+            "align": "left",
+            "displayMode": "auto",
+            "filterable": true,
+            "inspect": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          }
+        },
+        "overrides": [
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "test task id"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 159
+              },
+              {
+                "id": "links",
+                "value": [
+                  {
+                    "title": "",
+                    "url": "/d/8nFXlkB4z/test-task-details?orgId=1&var-id=${__value.raw}"
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "duration"
+            },
+            "properties": [
+              {
+                "id": "unit",
+                "value": "s"
+              }
+            ]
+          },
+          {
+            "matcher": {
+              "id": "byName",
+              "options": "package name"
+            },
+            "properties": [
+              {
+                "id": "custom.width",
+                "value": 408
+              }
+            ]
+          }
+        ]
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 24,
+        "x": 0,
+        "y": 11
+      },
+      "id": 6,
+      "options": {
+        "footer": {
+          "fields": "",
+          "reducer": [
+            "sum"
+          ],
+          "show": false
+        },
+        "showHeader": true,
+        "sortBy": [
+          {
+            "desc": false,
+            "displayName": "started"
+          }
+        ]
+      },
+      "pluginVersion": "9.3.6",
+      "targets": [
+        {
+          "cacheDurationSeconds": 300,
+          "datasource": {
+            "type": "postgres",
+            "uid": "${DS_ALBS_ANALYTICS}"
+          },
+          "editorMode": "code",
+          "fields": [
+            {
+              "jsonPath": ""
+            }
+          ],
+          "format": "table",
+          "method": "GET",
+          "queryParams": "",
+          "rawQuery": true,
+          "rawSql": "SELECT \n  tt.package_fullname AS \"package name\",\n  tss.test_task_id AS \"test task id\", \n  enum.value AS \"step name\",\n  tss.start_ts * 1000 AS started,\n  tss.finish_ts * 1000 AS finished,\n  tss.finish_ts - tss.start_ts AS duration\nFROM test_tasks AS tt\nINNER JOIN test_steps_stats AS tss\n  ON tt.id = tss.test_task_id\nINNER JOIN test_steps_enum AS enum\n  ON tss.stat_name_id = enum.id\nWHERE tt.id = $id",
+          "refId": "A",
+          "sql": {
+            "columns": [
+              {
+                "parameters": [],
+                "type": "function"
+              }
+            ],
+            "groupBy": [
+              {
+                "property": {
+                  "type": "string"
+                },
+                "type": "groupBy"
+              }
+            ],
+            "limit": 50
+          },
+          "urlPath": ""
+        }
+      ],
+      "title": "Test steps",
+      "transformations": [
+        {
+          "id": "convertFieldType",
+          "options": {
+            "conversions": [
+              {
+                "destinationType": "time",
+                "targetField": "started"
+              },
+              {
+                "destinationType": "time",
+                "targetField": "finished"
+              }
+            ],
+            "fields": {}
+          }
+        }
+      ],
+      "type": "table"
+    }
+  ],
+  "schemaVersion": 37,
+  "style": "dark",
+  "tags": [],
+  "templating": {
+    "list": [
+      {
+        "current": {},
+        "datasource": {
+          "type": "postgres",
+          "uid": "${DS_ALBS_ANALYTICS}"
+        },
+        "definition": "SELECT id\nFROM test_tasks\nORDER BY id DESC\nLIMIT 1000;",
+        "hide": 0,
+        "includeAll": false,
+        "label": "Test task id",
+        "multi": false,
+        "name": "id",
+        "options": [],
+        "query": "SELECT id\nFROM test_tasks\nORDER BY id DESC\nLIMIT 1000;",
+        "refresh": 1,
+        "regex": "",
+        "skipUrlSync": false,
+        "sort": 0,
+        "type": "query"
+      }
+    ]
+  },
+  "time": {
+    "from": "now-6h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Test task details",
+  "uid": "8nFXlkB4z",
+  "version": 3,
+  "weekStart": ""
+}
--- a/build_analytics/migrations/1.sql
+++ b/build_analytics/migrations/1.sql
@ -0,0 +1,167 @@
+BEGIN;
+
+-- builds
+CREATE TABLE builds (
+    id INTEGER PRIMARY KEY,
+    url VARCHAR(50) NOT NULL,
+    created_at DOUBLE PRECISION NOT NULL,
+    finished_at DOUBLE PRECISION
+);
+
+
+CREATE INDEX IF NOT EXISTS builds_created_at
+ON builds(created_at);
+
+CREATE INDEX IF NOT EXISTS builds_finished_at
+ON builds(finished_at);
+
+
+-- build_tasks_enum
+CREATE TABLE IF NOT EXISTS build_task_status_enum(
+    id INTEGER PRIMARY KEY,
+    value VARCHAR(15)
+);
+
+INSERT INTO build_task_status_enum (id, value)
+VALUES
+    (0, 'idle'),
+    (1, 'started'),
+    (2, 'completed'),
+    (3, 'failed'),
+    (4, 'excluded');
+
+
+-- arch_enum
+CREATE TABLE arch_enum(
+    id INTEGER PRIMARY KEY,
+    value VARCHAR(15)
+);
+
+INSERT INTO arch_enum(id, value)
+VALUES
+    (0, 'i686'),
+    (1, 'x86_64'),
+    (2, 'aarch64'),
+    (3, 'ppc64le'),
+    (4, 's390x');
+
+
+-- web_node_stats_enum
+CREATE TABLE web_node_stats_enum (
+     id INTEGER PRIMARY KEY,
+    value VARCHAR(50)
+);
+
+INSERT INTO web_node_stats_enum (id, value)
+VALUES
+    (0, 'build_done'),
+    (1, 'logs_processing'),
+    (2, 'packages_processing');
+
+
+-- build_node_stats_enum
+CREATE TABLE build_node_stats_enum(
+    id INTEGER PRIMARY KEY,
+    value VARCHAR(50)
+);
+
+INSERT INTO build_node_stats_enum (id, value)
+VALUES
+    (0, 'upload'),
+    (1, 'build_all'),
+    (2, 'build_srpm'),
+    (3, 'git_checkout'),
+    (4, 'build_binaries'),
+    (5, 'build_packages'),
+    (6, 'build_node_task'),
+    (7, 'cas_notarize_artifacts'),
+    (8, 'cas_source_authenticate');
+
+
+-- build_tasks
+CREATE TABLE build_tasks (
+    id INTEGER PRIMARY KEY,
+    name VARCHAR(50) NOT NULL,
+    build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
+    arch_id INTEGER REFERENCES arch_enum(id) ON DELETE SET NULL,
+    status_id INTEGER REFERENCES build_task_status_enum(id) ON DELETE SET NULL,
+    started_at DOUBLE PRECISION,
+    finished_at DOUBLE PRECISION
+);
+
+CREATE INDEX build_tasks_build_id
+ON build_tasks(build_id);
+
+CREATE INDEX build_tasks_started_at
+ON build_tasks(started_at);
+
+CREATE INDEX build_tasks_finished_at
+ON build_tasks(finished_at);
+
+
+-- web_node_stats
+CREATE TABLE web_node_stats (
+    build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
+    stat_name_id  INTEGER REFERENCES web_node_stats_enum(id) ON DELETE SET NULL,
+    start_ts DOUBLE PRECISION,
+    end_ts DOUBLE PRECISION
+);
+
+CREATE INDEX web_node_stats_build_task_id
+ON web_node_stats(build_task_id);
+
+CREATE INDEX web_node_stats_start_ts
+ON web_node_stats(start_ts);
+
+CREATE INDEX web_node_stats_end_ts
+ON web_node_stats(end_ts);
+
+
+-- build_node_stats
+CREATE TABLE build_node_stats (
+    build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
+    stat_name_id  INTEGER REFERENCES build_node_stats_enum(id) ON DELETE SET NULL,
+    start_ts DOUBLE PRECISION,
+    end_ts DOUBLE PRECISION
+);
+
+CREATE INDEX build_node_stats_build_task_id
+ON build_node_stats(build_task_id);
+
+CREATE INDEX build_node_stats_build_start_ts
+ON build_node_stats(start_ts);
+
+CREATE INDEX build_node_stats_build_end_ts
+ON build_node_stats(end_ts);
+
+
+-- sign_tasks
+CREATE TABLE sign_tasks (
+    id INTEGER PRIMARY KEY,
+    build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
+    buildtask_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
+    started_at DOUBLE PRECISION,
+    finished_at DOUBLE PRECISION
+);
+
+CREATE INDEX sign_tasks_build_id
+ON sign_tasks(build_id);
+
+CREATE INDEX sign_tasks_buildtask_id
+ON sign_tasks(buildtask_id);
+
+CREATE INDEX sing_tasks_started_at
+ON sign_tasks(started_at);
+
+CREATE INDEX sign_tasks_finished_at
+ON sign_tasks(finished_at);
+
+
+-- schema_version
+CREATE TABLE schema_version (
+    version INTEGER
+);
+INSERT INTO  schema_version (version)
+VALUES (1);
+
+COMMIT;
--- a/build_analytics/migrations/2.sql
+++ b/build_analytics/migrations/2.sql
@ -0,0 +1,21 @@
+BEGIN;
+
+INSERT INTO web_node_stats_enum (id, value)
+VALUES
+    (3, 'multilib_processing');
+
+ALTER TABLE web_node_stats
+ADD CONSTRAINT web_node_stats_unique UNIQUE (build_task_id, stat_name_id);
+
+ALTER TABLE build_node_stats
+ADD CONSTRAINT build_node_stats_unique UNIQUE (build_task_id, stat_name_id);
+
+
+INSERT INTO build_task_status_enum (id, value)
+VALUES
+    (5, 'canceled');
+
+UPDATE schema_version
+SET version = 2;
+
+COMMIT;
--- a/build_analytics/migrations/3.sql
+++ b/build_analytics/migrations/3.sql
@ -0,0 +1,80 @@
+BEGIN;
+
+-- test_tasks_status_enum
+CREATE TABLE  test_tasks_status_enum(
+    id INTEGER PRIMARY KEY,
+    value VARCHAR(15)
+);
+
+INSERT INTO test_tasks_status_enum (id, value)
+VALUES
+    (1, 'created'),
+    (2, 'started'),
+    (3, 'completed'),
+    (4, 'failed');
+
+
+-- test_tasks
+CREATE TABLE test_tasks (
+    id INTEGER PRIMARY KEY,
+    build_task_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
+    revision INTEGER,
+    status_id INTEGER REFERENCES test_tasks_status_enum(id) ON DELETE SET NULL,
+    package_fullname VARCHAR(100),
+    started_at DOUBLE PRECISION
+);
+
+CREATE INDEX test_tasks_build_task_id
+ON test_tasks(build_task_id);
+
+CREATE INDEX test_tasks_build_status_id
+ON test_tasks(status_id);
+
+CREATE INDEX test_tasks_package_fullname
+ON test_tasks(package_fullname);
+
+
+-- test_steps_enum
+CREATE TABLE test_steps_enum (
+    id INTEGER PRIMARY KEY,
+    value VARCHAR(50)
+);
+
+INSERT INTO test_steps_enum (id, value)
+VALUES
+    (0, 'install_package'),
+    (1, 'stop_environment'),
+    (2, 'initial_provision'),
+    (3, 'start_environment'),
+    (4, 'uninstall_package'),
+    (5, 'initialize_terraform'),
+    (6, 'package_integrity_tests'),
+    (7, 'stop_environment');
+
+
+
+-- test_steps_stats
+CREATE TABLE test_steps_stats(
+    test_task_id INTEGER REFERENCES test_tasks(id) ON DELETE CASCADE,
+    stat_name_id  INTEGER REFERENCES test_steps_enum(id) ON DELETE SET NULL,
+    start_ts DOUBLE PRECISION,
+    finish_ts DOUBLE PRECISION
+);
+
+ALTER TABLE test_steps_stats
+ADD CONSTRAINT test_steps_stats_unique UNIQUE (test_task_id, stat_name_id);
+
+CREATE INDEX test_steps_stats_start_ts
+ON test_steps_stats(start_ts);
+
+CREATE INDEX test_steps_stats_end_ts
+ON test_steps_stats(finish_ts);
+
+-- increasing size of name field
+ALTER TABLE build_tasks ALTER COLUMN name TYPE varchar(150);
+
+
+UPDATE schema_version
+SET version = 3;
+
+COMMIT;
--- a/build_analytics/requirements.txt
+++ b/build_analytics/requirements.txt
--- a/build_analytics/run_extractor.py
+++ b/build_analytics/run_extractor.py
--- a/grafana-dashbords/albs_analytics.json
+++ b/grafana-dashbords/albs_analytics.json
@ -1,280 +0,0 @@
-{
-  "__inputs": [
-    {
-      "name": "DS_POSTGRESQL",
-      "label": "PostgreSQL",
-      "description": "",
-      "type": "datasource",
-      "pluginId": "postgres",
-      "pluginName": "PostgreSQL"
-    }
-  ],
-  "__elements": {},
-  "__requires": [
-    {
-      "type": "grafana",
-      "id": "grafana",
-      "name": "Grafana",
-      "version": "9.3.2"
-    },
-    {
-      "type": "datasource",
-      "id": "postgres",
-      "name": "PostgreSQL",
-      "version": "1.0.0"
-    },
-    {
-      "type": "panel",
-      "id": "table",
-      "name": "Table",
-      "version": ""
-    }
-  ],
-  "annotations": {
-    "list": [
-      {
-        "builtIn": 1,
-        "datasource": {
-          "type": "grafana",
-          "uid": "-- Grafana --"
-        },
-        "enable": true,
-        "hide": true,
-        "iconColor": "rgba(0, 211, 255, 1)",
-        "name": "Annotations & Alerts",
-        "target": {
-          "limit": 100,
-          "matchAny": false,
-          "tags": [],
-          "type": "dashboard"
-        },
-        "type": "dashboard"
-      }
-    ]
-  },
-  "editable": true,
-  "fiscalYearStartMonth": 0,
-  "graphTooltip": 0,
-  "id": null,
-  "links": [],
-  "liveNow": false,
-  "panels": [
-    {
-      "datasource": {
-        "type": "postgres",
-        "uid": "${DS_POSTGRESQL}"
-      },
-      "description": "",
-      "fieldConfig": {
-        "defaults": {
-          "color": {
-            "mode": "thresholds"
-          },
-          "custom": {
-            "align": "auto",
-            "displayMode": "auto",
-            "inspect": false
-          },
-          "mappings": [],
-          "thresholds": {
-            "mode": "absolute",
-            "steps": [
-              {
-                "color": "green",
-                "value": null
-              },
-              {
-                "color": "red",
-                "value": 80
-              }
-            ]
-          }
-        },
-        "overrides": [
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "id"
-            },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 54
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "created_at"
-            },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 226
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "finished_at"
-            },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 209
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "finished"
-            },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 187
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "created"
-            },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 213
-              }
-            ]
-          },
-          {
-            "matcher": {
-              "id": "byName",
-              "options": "url"
-            },
-            "properties": [
-              {
-                "id": "custom.width",
-                "value": 279
-              }
-            ]
-          }
-        ]
-      },
-      "gridPos": {
-        "h": 12,
-        "w": 24,
-        "x": 0,
-        "y": 0
-      },
-      "id": 2,
-      "options": {
-        "footer": {
-          "fields": "",
-          "reducer": [
-            "sum"
-          ],
-          "show": false
-        },
-        "showHeader": true,
-        "sortBy": [
-          {
-            "desc": true,
-            "displayName": "duration (h)"
-          }
-        ]
-      },
-      "pluginVersion": "9.3.2",
-      "targets": [
-        {
-          "cacheDurationSeconds": 300,
-          "datasource": {
-            "type": "postgres",
-            "uid": "${DS_POSTGRESQL}"
-          },
-          "editorMode": "code",
-          "fields": [
-            {
-              "jsonPath": ""
-            }
-          ],
-          "format": "table",
-          "hide": false,
-          "method": "GET",
-          "queryParams": "",
-          "rawQuery": true,
-          "rawSql": "SELECT id, url, created_at * 1000 as created, finished_at * 1000 as finished, (finished_at - created_at) / (60*60) as duration\nFROM builds\nWHERE $__unixEpochFilter(created_at) AND finished_at IS NOT NULL",
-          "refId": "A",
-          "sql": {
-            "columns": [
-              {
-                "parameters": [],
-                "type": "function"
-              }
-            ],
-            "groupBy": [
-              {
-                "property": {
-                  "type": "string"
-                },
-                "type": "groupBy"
-              }
-            ],
-            "limit": 50
-          },
-          "urlPath": ""
-        }
-      ],
-      "title": "Finished builds",
-      "transformations": [
-        {
-          "id": "convertFieldType",
-          "options": {
-            "conversions": [
-              {
-                "destinationType": "time",
-                "targetField": "created"
-              },
-              {
-                "destinationType": "time",
-                "targetField": "finished"
-              }
-            ],
-            "fields": {}
-          }
-        },
-        {
-          "id": "organize",
-          "options": {
-            "excludeByName": {},
-            "indexByName": {},
-            "renameByName": {
-              "duration": "duration (h)"
-            }
-          }
-        }
-      ],
-      "type": "table"
-    }
-  ],
-  "schemaVersion": 37,
-  "style": "dark",
-  "tags": [],
-  "templating": {
-    "list": []
-  },
-  "time": {
-    "from": "now-3h",
-    "to": "now"
-  },
-  "timepicker": {},
-  "timezone": "",
-  "title": "albs_analytics",
-  "uid": "02mg4oxVk",
-  "version": 1,
-  "weekStart": ""
-}
--- a/releases.txt
+++ b/releases.txt
@ -0,0 +1,40 @@
+0.1.0 (2023-03-01)
+First version
+
+0.2.0 (2023-03-15)
+- New parameter start_from
+- Moved to double persition for timestamps
+- Added metrics for build steps
+
+0.2.1 (2023-03-15)
+ - Added canceled Build task status
+
+0.3.0 (2023-03-22)
+ - Added test tasks stats
+ - New config parameter: oldest_to_update_days
+
+0.3.1 (2023-03-22)
+ - db: bugfix with migration to version 3
+ - added info about api_timeout config parameter
+ - bugfix with processing of test tasks with new revision
+
+0.3.2 (2023-03-23)
+ - Bugfix ALBS-1060
+
+0.3.3 (2023-04-24)
+build-analytics
+  Improvements
+    - [ALBS-1077] start deleting builds that were removed from ALBS
+  Bugfixes
+    - 'Key error' when db_port/db_host is not set
+    - update_builds() ignoring opldest_to_update attribute
+    - [ALBS-1099] Test task started_at attribute is NULL
+    - Max recursion error in 'Test task details.json'
+
+0.3.4 (2023-05-12)
+build_analytics
+  - Bigfix ALBS-1111
+
+0.3.5 (2023-06-01)
+build_analytics:
+  ALBS-1103 start using persistent HTTP connections
Author	SHA1	Message	Date
Kirill Zhukov	7c05bbacb6	Release 0.3.5 (2023-06-01) build_analytics: ALBS-1103 start using persistent HTTP connections	2023-06-01 11:57:27 +02:00
Kirill Zhukov	d47fe3b4cd	Release 0.3.4 (2023-05-12) build_analytics - Bigfix ALBS-1111	2023-05-12 11:22:55 +02:00
Kirill Zhukov	f74bc0748a	0.3.3 (2023-04-24) build-analytics Improvements - [ALBS-1077] start deleting builds that were removed from ALBS Bugfixes - 'Key error' when db_port/db_host is not set - update_builds() ignoring opldest_to_update attribute - [ALBS-1099] Test task started_at attribute is NULL - Max recursion error in 'Test task details.json'	2023-04-24 09:20:58 +02:00
Kirill Zhukov	5a590cbadb	built_analytics: [ALBS-1077] Now we delete build if it was deleted from ALBS Bugfix 'Key error' when db_port/db_host is not set Bugfix update_builds ignoring opldest_to_update attribute	2023-04-21 15:13:48 +02:00
kzhukov	4b5adb52d5	ALBS-1099 (#4 ) Co-authored-by: Kirill Zhukov <kzhukov@cloudlinux.com> Reviewed-on: #4	2023-04-21 07:53:09 +00:00
Kirill Zhukov	40ce2c583d	Release 0.3.2 (2023-03-23) - Bugfix ALBS-1060	2023-03-23 13:06:43 +01:00
Kirill Zhukov	4145ce8e9e	Bugfix ALBS-1060	2023-03-23 13:04:28 +01:00
Kirill Zhukov	ae8b2a7089	0.3.1 (2023-03-22) - db: bugfix with migration to version 3 - added info about api_timeout config parameter - bugfix with processing of test tasks with new revision	2023-03-22 14:39:14 +01:00
kzhukov	9f3796db07	Merge pull request '0.3.1' (#3 ) from 0.3.1 into main Reviewed-on: #3	2023-03-22 13:37:03 +00:00
Kirill Zhukov	919f417463	- bugfix with migration to version 3 - added docs about api_timeout config parameter - bugfix with processing of test tasks with new revision	2023-03-22 14:35:01 +01:00
Kirill Zhukov	49b16179d9	Release 0.3.0 (2023-03-22) - Added test tasks stats - New config parameter: oldest_to_update_days	2023-03-22 11:36:50 +01:00
kzhukov	bd74c99a7d	Merge pull request 'ALBS-1043' (#2 ) from ALBS-1043 into main Reviewed-on: #2	2023-03-22 10:35:03 +00:00
Kirill Zhukov	5b1e296fbc	ALBS-1043: - db,extractor: added check for cases when build 'moved' between pages - grafana: new dashboard Test tasks.json, added test info to other dashboars	2023-03-22 11:30:12 +01:00
Kirill Zhukov	4d5ffcc74f	- Added new try/catch segments - Added tz info to test step stats timestamps - Increased oldest_to_update_days parameter	2023-03-20 19:29:45 +01:00
Kirill Zhukov	313d4a4d2a	db: debug of update feature	2023-03-16 22:48:35 +01:00
Kirill Zhukov	679328093a	added test tasks updating logic	2023-03-16 18:57:31 +01:00
Kirill Zhukov	45a6850056	debugging	2023-03-16 09:31:09 +01:00
Kirill Zhukov	08ec138942	added api and db functions	2023-03-15 16:59:22 +01:00
Kirill Zhukov	a93165420b	added schema	2023-03-15 16:59:22 +01:00
Kirill Zhukov	86dddb30d6	0.2.1 (2023-03-15) - Added canceled Build task status	2023-03-15 12:43:24 +01:00
Kirill Zhukov	564571adbe	Release 0.2.0 (15.11:29) - New parameter start_from - Moved to double persition for timestamps - Added metrics for build steps	2023-03-15 11:30:06 +01:00
kzhukov	1f98c072f9	Merge pull request 'ALBS-1026: add statistics for each build_task step' (#1 ) from ALBS-1026 into main Reviewed-on: #1	2023-03-15 10:25:52 +00:00
Kirill Zhukov	4c843a27c6	db: forgot to update schema version	2023-03-10 19:44:54 +01:00
Kirill Zhukov	3ed0dcdd78	added build step: multilib_processing db: stop adding empty build steps (without start_ts)	2023-03-10 19:35:12 +01:00
Kirill Zhukov	d54ab39941	db: bugfix with closed connection on subsequent srapping	2023-03-10 11:41:01 +01:00
Kirill Zhukov	22fb4a6423	ALBS-1026 grafana: added dashboards for Build details, small fixes on other ones	2023-03-10 11:24:17 +01:00
Kirill Zhukov	b9bc5269ce	ALBS-1026 grafana/Build analytics: removed exluded/failed build tasks from duration calculation	2023-03-09 14:16:35 +01:00
Kirill Zhukov	03ed6e9c94	added releases	2023-03-09 13:41:49 +01:00
Kirill Zhukov	fafd7ae79b	ALBS-1026 db.py: bug fix with updates grafana: added build task steps	2023-03-09 13:33:02 +01:00
Kirill Zhukov	d1291c7308	ALBS-1026 grafana-dashbords/Build analytics.json: added 95th percentile	2023-03-08 09:43:46 +01:00
Kirill Zhukov	76be50ca9c	Grafana: Added new dashboards, improved old ones	2023-03-08 00:02:25 +01:00
Kirill Zhukov	c4983d6ae4	ALBS-1026: new Dashboard Build task details	2023-03-07 17:48:19 +01:00
Kirill Zhukov	c6a14cfe3a	- fixed bug with wrong ts of build_tasks steps - db: moved to double percicion for all ts columns - added db_schema_version check	2023-03-07 16:02:14 +01:00
Kirill Zhukov	d737bae353	added build task stats	2023-03-06 19:21:56 +01:00