init commit
This commit is contained in:
commit
913a998265
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
venv
|
||||
logs
|
||||
*.pyc
|
||||
__pycache__
|
||||
.vscode
|
||||
private*
|
||||
debug*
|
0
build_analitycs/__init__.py
Normal file
0
build_analitycs/__init__.py
Normal file
0
build_analitycs/build_analytics/__init__.py
Normal file
0
build_analitycs/build_analytics/__init__.py
Normal file
82
build_analitycs/build_analytics/api_client.py
Normal file
82
build_analitycs/build_analytics/api_client.py
Normal file
@ -0,0 +1,82 @@
|
||||
from datetime import datetime
|
||||
import logging
|
||||
|
||||
from urllib.parse import urljoin
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
from .models.build import Build
|
||||
from .models.build_task import BuildTask
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
TZ_OFFSET = '+00:00'
|
||||
|
||||
|
||||
class APIclient():
|
||||
"""
|
||||
client for working with ALBS API
|
||||
"""
|
||||
|
||||
def __init__(self, api_root: str, jwt: str):
|
||||
self.api_root = api_root
|
||||
self.jwt = jwt
|
||||
|
||||
def get_builds(self, page_num: int = 1) -> List[Build]:
|
||||
ep = '/api/v1/builds'
|
||||
url = urljoin(self.api_root, ep)
|
||||
params = {'pageNumber': page_num}
|
||||
headers = {'accept': 'appilication/json'}
|
||||
|
||||
response = requests.get(url, params=params, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
result = []
|
||||
for b in response.json()['builds']:
|
||||
try:
|
||||
result.append(self._parse_build(b))
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
logging.error("Cant convert build JSON %s to Buildmodel: %s",
|
||||
b, err, exc_info=True)
|
||||
return result
|
||||
|
||||
def _parse_build_tasks(self, tasks_json: Dict, build_id: int) -> List[BuildTask]:
|
||||
result = []
|
||||
for task in tasks_json:
|
||||
try:
|
||||
started_at = datetime.fromisoformat(
|
||||
task['started_at']+TZ_OFFSET) \
|
||||
if task['started_at'] else None
|
||||
finished_at = datetime.fromisoformat(task['finished_at']+TZ_OFFSET) \
|
||||
if task['finished_at'] else None
|
||||
params = {'id': task['id'],
|
||||
'build_id': build_id,
|
||||
'started_at': started_at,
|
||||
'finished_at': finished_at,
|
||||
'arch': task['arch'],
|
||||
'status_id': task['status']}
|
||||
result.append(BuildTask(**params))
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
logging.error("Cant convert build_task JSON %s (build_id %s) to BuildTask model: %s",
|
||||
task, build_id, err, exc_info=True)
|
||||
|
||||
result.sort(key=lambda x: x.id, reverse=True)
|
||||
return result
|
||||
|
||||
def _parse_build(self, build_json: Dict) -> Build:
|
||||
url = f"https://build.almalinux.org/build/{build_json['id']}"
|
||||
created_at = datetime.fromisoformat(build_json['created_at']+TZ_OFFSET)
|
||||
finished_at = datetime.fromisoformat(build_json['finished_at']+TZ_OFFSET) \
|
||||
if build_json['finished_at'] else None
|
||||
build_tasks = self._parse_build_tasks(
|
||||
build_json['tasks'], build_json['id'])
|
||||
|
||||
params = {
|
||||
'id': build_json['id'],
|
||||
'url': url,
|
||||
'created_at': created_at,
|
||||
'finished_at': finished_at,
|
||||
'build_tasks': build_tasks}
|
||||
|
||||
return Build(**params)
|
66
build_analitycs/build_analytics/db.py
Normal file
66
build_analitycs/build_analytics/db.py
Normal file
@ -0,0 +1,66 @@
|
||||
from datetime import datetime
|
||||
from typing import Union
|
||||
|
||||
import psycopg2
|
||||
|
||||
from .models.build_db import BuildDB
|
||||
from .models.build_task_db import BuildTaskDB
|
||||
from .models.db_config import DbConfig
|
||||
|
||||
|
||||
class DB():
|
||||
def __init__(self, config: DbConfig):
|
||||
self.conf = config
|
||||
|
||||
def _get_conn(self):
|
||||
conn = psycopg2.connect(database=self.conf.name,
|
||||
host=self.conf.host,
|
||||
user=self.conf.username,
|
||||
password=self.conf.password,
|
||||
port=self.conf.port)
|
||||
return conn
|
||||
|
||||
def insert_update_build(self, build: BuildDB):
|
||||
sql = f'''
|
||||
INSERT INTO builds(id, url, created_at, finished_at)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
(url, created_at, finished_at) = (EXCLUDED.url, EXCLUDED.created_at, EXCLUDED.finished_at);
|
||||
'''
|
||||
with self._get_conn() as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, (build.id, build.url,
|
||||
build.created_at, build.finished_at))
|
||||
conn.commit()
|
||||
|
||||
def insert_update_buildtask(self, build_task: BuildTaskDB):
|
||||
sql = f"""
|
||||
INSERT INTO build_tasks(id, build_id, arch_id, started_at, finished_at, status_id)
|
||||
VALUES (%s, %s, %s, %s, %s, %s)
|
||||
ON CONFLICT (id) DO UPDATE SET
|
||||
(id, build_id, arch_id, started_at, finished_at, status_id) = (EXCLUDED.ID, EXCLUDED.build_id, EXCLUDED.arch_id, EXCLUDED.started_at, EXCLUDED.finished_at, EXCLUDED.status_id)
|
||||
"""
|
||||
with self._get_conn() as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, (build_task.id, build_task.build_id, build_task.arch_id,
|
||||
build_task.started_at, build_task.finished_at, build_task.status_id))
|
||||
conn.commit()
|
||||
|
||||
def get_latest_build_id(self) -> Union[int, None]:
|
||||
sql = "SELECT id from builds ORDER BY id DESC LIMIT 1;"
|
||||
with self._get_conn() as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql)
|
||||
val = cur.fetchone()
|
||||
if not val:
|
||||
return None
|
||||
return int(val[0])
|
||||
|
||||
def cleanup_builds(self, oldest_to_keep: datetime) -> int:
|
||||
params = (int(oldest_to_keep.timestamp()),)
|
||||
sql = "DELETE FROM builds WHERE created_at < %s;"
|
||||
with self._get_conn() as conn:
|
||||
cur = conn.cursor()
|
||||
cur.execute(sql, params)
|
||||
conn.commit()
|
||||
return cur.rowcount
|
45
build_analitycs/build_analytics/extractor/extractor.py
Normal file
45
build_analitycs/build_analytics/extractor/extractor.py
Normal file
@ -0,0 +1,45 @@
|
||||
import logging
|
||||
|
||||
from ..models.extractor_config import ExtractorConfig
|
||||
from ..api_client import APIclient
|
||||
from ..db import DB
|
||||
|
||||
|
||||
class Extractor:
|
||||
def __init__(self, config: ExtractorConfig, api: APIclient, db: DB):
|
||||
self.oldest_build_age = config.oldest_build_age
|
||||
self.api = api
|
||||
self.db = db
|
||||
|
||||
def extract_and_store(self) -> int:
|
||||
build_count = 0
|
||||
page_num = 1
|
||||
last_build_id = self.db.get_latest_build_id()
|
||||
if not last_build_id:
|
||||
last_build_id = 0
|
||||
logging.info(f"last_build_id: {last_build_id}")
|
||||
stop = False
|
||||
|
||||
while not stop:
|
||||
logging.info(f"page: {page_num}")
|
||||
for build in self.api.get_builds(page_num):
|
||||
# check if we shoud stop processing build
|
||||
if build.id <= last_build_id or \
|
||||
build.created_at <= self.oldest_build_age:
|
||||
stop = True
|
||||
break
|
||||
|
||||
# inserting build and build tasks
|
||||
logging.info(f"inserting {build.id}")
|
||||
self.db.insert_update_build(build.as_db_model())
|
||||
for build_task in build.build_tasks:
|
||||
self.db.insert_update_buildtask(build_task.as_db_model())
|
||||
build_count += 1
|
||||
page_num += 1
|
||||
return build_count
|
||||
|
||||
def build_cleanup(self):
|
||||
logging.info('Removing all buidls older then %s',
|
||||
self.oldest_build_age.strftime("%m/%d/%Y, %H:%M:%S"))
|
||||
removed_count = self.db.cleanup_builds(self.oldest_build_age)
|
||||
logging.info('removed %d entries', removed_count)
|
71
build_analitycs/build_analytics/extractor/start.py
Normal file
71
build_analitycs/build_analytics/extractor/start.py
Normal file
@ -0,0 +1,71 @@
|
||||
from datetime import datetime, timedelta
|
||||
import logging
|
||||
from logging.handlers import RotatingFileHandler
|
||||
|
||||
import yaml
|
||||
|
||||
from ..api_client import APIclient
|
||||
from ..db import DB
|
||||
from .extractor import Extractor
|
||||
from ..models.extractor_config import ExtractorConfig
|
||||
from ..models.db_config import DbConfig
|
||||
|
||||
|
||||
def __get_oldest_build_age(config: dict) -> datetime:
|
||||
oldest_build_age = datetime.now().astimezone() \
|
||||
- timedelta(days=config['data_store_days'])
|
||||
return oldest_build_age
|
||||
|
||||
|
||||
def __get_db_config(config: dict) -> DbConfig:
|
||||
return DbConfig(name=config['db_name'],
|
||||
port=int(config['db_port']),
|
||||
host=config['db_host'],
|
||||
username=config['db_username'],
|
||||
password=config['db_password'])
|
||||
|
||||
|
||||
def __get_config(yml_path: str) -> ExtractorConfig:
|
||||
"""
|
||||
get_config loads yml file and generates instance
|
||||
"""
|
||||
with open(yml_path, 'r', encoding='utf-8') as flr:
|
||||
raw = yaml.safe_load(flr)
|
||||
|
||||
# adding new attrs
|
||||
raw['oldest_build_age'] = __get_oldest_build_age(raw)
|
||||
raw['db_config'] = __get_db_config(raw)
|
||||
|
||||
return ExtractorConfig(**raw)
|
||||
|
||||
|
||||
def start(yml_path: str):
|
||||
config = __get_config(yml_path)
|
||||
|
||||
# configuring logging
|
||||
logging.basicConfig(level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s <%(funcName)s> %(message)s',
|
||||
handlers=[RotatingFileHandler(config.log_file,
|
||||
maxBytes=10000000,
|
||||
backupCount=3)])
|
||||
|
||||
api = APIclient(api_root=config.albs_url, jwt=config.jwt)
|
||||
db = DB(config.db_config)
|
||||
extractor = Extractor(config, api, db)
|
||||
logging.info('Starting builds insertion')
|
||||
inserted_count = -1
|
||||
try:
|
||||
inserted_count = extractor.extract_and_store()
|
||||
except Exception as err: # pylint: disable=broad-except
|
||||
logging.critical("Unhandled exeption %s", err, exc_info=True)
|
||||
else:
|
||||
logging.info(
|
||||
'Build extaction was finished. %d builds were inserted', inserted_count)
|
||||
|
||||
logging.info('Starting old builds removal')
|
||||
try:
|
||||
extractor.build_cleanup()
|
||||
except Exception as err:
|
||||
logging.critical("Unhandled exeption %s", err, exc_info=True)
|
||||
else:
|
||||
logging.info('Cleanup finished')
|
0
build_analitycs/build_analytics/models/__init__.py
Normal file
0
build_analitycs/build_analytics/models/__init__.py
Normal file
27
build_analitycs/build_analytics/models/build.py
Normal file
27
build_analitycs/build_analytics/models/build.py
Normal file
@ -0,0 +1,27 @@
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from typing import Any
|
||||
|
||||
from .build_task import BuildTask
|
||||
from .build_db import BuildDB
|
||||
|
||||
|
||||
class Build(BaseModel):
|
||||
id: int
|
||||
url: HttpUrl
|
||||
build_tasks: List[BuildTask]
|
||||
created_at: datetime
|
||||
finished_at: Optional[datetime] = None
|
||||
|
||||
def as_db_model(self) -> BuildDB:
|
||||
created_at = self.created_at.timestamp()
|
||||
finished_at = self.finished_at.timestamp() \
|
||||
if self.finished_at else None
|
||||
params = {
|
||||
'id': self.id,
|
||||
'url': self.url,
|
||||
'created_at': created_at,
|
||||
'finished_at': finished_at
|
||||
}
|
||||
return BuildDB(**params)
|
12
build_analitycs/build_analytics/models/build_db.py
Normal file
12
build_analitycs/build_analytics/models/build_db.py
Normal file
@ -0,0 +1,12 @@
|
||||
from typing import Optional, Dict, Any
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
|
||||
|
||||
class BuildDB(BaseModel):
|
||||
"""
|
||||
Build as it received from/sent to database
|
||||
"""
|
||||
id: int
|
||||
url: HttpUrl
|
||||
created_at: int
|
||||
finished_at: Optional[float] = None
|
31
build_analitycs/build_analytics/models/build_task.py
Normal file
31
build_analitycs/build_analytics/models/build_task.py
Normal file
@ -0,0 +1,31 @@
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from .build_task_db import BuildTaskDB
|
||||
from .enums import ArchEnum
|
||||
|
||||
|
||||
class BuildTask(BaseModel):
|
||||
id: int
|
||||
build_id: int
|
||||
arch: str
|
||||
started_at: Optional[datetime] = None
|
||||
finished_at: Optional[datetime] = None
|
||||
status_id: int
|
||||
|
||||
def as_db_model(self) -> BuildTaskDB:
|
||||
started_at = self.started_at.timestamp() \
|
||||
if self.started_at else None
|
||||
finished_at = self.finished_at.timestamp() \
|
||||
if self.finished_at else None
|
||||
params = {
|
||||
'id': self.id,
|
||||
'build_id': self.build_id,
|
||||
'arch_id': ArchEnum[self.arch].value,
|
||||
'started_at': started_at,
|
||||
'finished_at': finished_at,
|
||||
'status_id': self.status_id
|
||||
}
|
||||
return BuildTaskDB(**params)
|
14
build_analitycs/build_analytics/models/build_task_db.py
Normal file
14
build_analitycs/build_analytics/models/build_task_db.py
Normal file
@ -0,0 +1,14 @@
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class BuildTaskDB(BaseModel):
|
||||
"""
|
||||
BuildTask as it received from/sent to database
|
||||
"""
|
||||
id: int
|
||||
build_id: int
|
||||
arch_id: int
|
||||
started_at: Optional[float] = None
|
||||
finished_at: Optional[float] = None
|
||||
status_id: int
|
9
build_analitycs/build_analytics/models/db_config.py
Normal file
9
build_analitycs/build_analytics/models/db_config.py
Normal file
@ -0,0 +1,9 @@
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
class DbConfig(BaseModel):
|
||||
name: str = Field(description="db name")
|
||||
port: int = Field(description="db server port")
|
||||
host: str = Field(description="db server ip/hostname")
|
||||
username: str = Field(description="username to connect with")
|
||||
password: str = Field(description="password to connect with1")
|
17
build_analitycs/build_analytics/models/enums.py
Normal file
17
build_analitycs/build_analytics/models/enums.py
Normal file
@ -0,0 +1,17 @@
|
||||
from enum import IntEnum
|
||||
|
||||
|
||||
class ArchEnum(IntEnum):
|
||||
i686 = 0
|
||||
x86_64 = 1
|
||||
aarch64 = 2
|
||||
ppc64le = 3
|
||||
s390x = 4
|
||||
|
||||
|
||||
class BuildTaskEnum(IntEnum):
|
||||
idle = 0
|
||||
started = 1
|
||||
completed = 2
|
||||
failed = 3
|
||||
excluded = 4
|
24
build_analitycs/build_analytics/models/extractor_config.py
Normal file
24
build_analitycs/build_analytics/models/extractor_config.py
Normal file
@ -0,0 +1,24 @@
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import HttpUrl, Field, BaseModel
|
||||
|
||||
from .db_config import DbConfig
|
||||
|
||||
# DEFAULTS
|
||||
ALBS_URL_DEFAULT = 'https://build.almalinux.org'
|
||||
LOG_FILE_DEFAULT = '/tmp/extractor.log'
|
||||
|
||||
|
||||
class ExtractorConfig(BaseModel):
|
||||
"""
|
||||
config model for Extractor service
|
||||
"""
|
||||
log_file: Path = Field(description='logfile path',
|
||||
default=LOG_FILE_DEFAULT)
|
||||
albs_url: HttpUrl = Field(description='ALBS root URL',
|
||||
default=ALBS_URL_DEFAULT)
|
||||
oldest_build_age: datetime = \
|
||||
Field(description='oldest build age to extract and store')
|
||||
jwt: str = Field(description='ALBS JWT token')
|
||||
db_config: DbConfig = Field(description="database configuration")
|
12
build_analitycs/build_analytics/models/sign_task_db.py
Normal file
12
build_analitycs/build_analytics/models/sign_task_db.py
Normal file
@ -0,0 +1,12 @@
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class SignTaskDB(BaseModel):
|
||||
"""
|
||||
SignTaskDB as it received from/sent to database
|
||||
"""
|
||||
id: int
|
||||
build_id: int
|
||||
build_task_id: int
|
||||
started_at: int
|
||||
finished_at: int
|
91
build_analitycs/db_schema/postgres.sql
Normal file
91
build_analitycs/db_schema/postgres.sql
Normal file
@ -0,0 +1,91 @@
|
||||
-- builds
|
||||
DROP TABLE IF EXISTS builds CASCADE;
|
||||
CREATE TABLE builds (
|
||||
id INTEGER PRIMARY KEY,
|
||||
url VARCHAR(50) NOT NULL,
|
||||
created_at REAL NOT NULL,
|
||||
finished_at REAL
|
||||
);
|
||||
|
||||
|
||||
CREATE INDEX IF NOT EXISTS builds_created_at
|
||||
ON builds(created_at);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS builds_finished_at
|
||||
ON builds(finished_at);
|
||||
|
||||
|
||||
-- build_taks_enum
|
||||
DROP TABLE IF EXISTS build_task_enum CASCADE;
|
||||
CREATE TABLE IF NOT EXISTS build_task_enum(
|
||||
id INTEGER PRIMARY KEY,
|
||||
value VARCHAR(15)
|
||||
);
|
||||
|
||||
INSERT INTO build_task_enum (id, value)
|
||||
VALUES
|
||||
(0, 'idle'),
|
||||
(1, 'started'),
|
||||
(2, 'completed'),
|
||||
(3, 'failed'),
|
||||
(4, 'excluded');
|
||||
|
||||
|
||||
-- arch_enum
|
||||
DROP TABLE IF EXISTS arch_enum CASCADE;
|
||||
CREATE TABLE arch_enum(
|
||||
id INTEGER PRIMARY KEY,
|
||||
value VARCHAR(15)
|
||||
);
|
||||
|
||||
INSERT INTO arch_enum(id, value)
|
||||
VALUES
|
||||
(0, 'i686'),
|
||||
(1, 'x86_64'),
|
||||
(2, 'aarch64'),
|
||||
(3, 'ppc64le'),
|
||||
(4, 's390x');
|
||||
|
||||
|
||||
-- build_tasks
|
||||
DROP TABLE IF EXISTS build_tasks CASCADE;
|
||||
CREATE TABLE build_tasks (
|
||||
id INTEGER PRIMARY KEY,
|
||||
build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
|
||||
arch_id INTEGER REFERENCES arch_enum(id) ON DELETE SET NULL,
|
||||
status_id INTEGER REFERENCES build_task_enum(id) ON DELETE SET NULL,
|
||||
started_at REAL,
|
||||
finished_at REAL
|
||||
);
|
||||
|
||||
CREATE INDEX build_tasks_build_id
|
||||
ON build_tasks(build_id);
|
||||
|
||||
CREATE INDEX build_tasks_started_at
|
||||
ON build_tasks(started_at);
|
||||
|
||||
CREATE INDEX build_tasks_finished_at
|
||||
ON build_tasks(finished_at);
|
||||
|
||||
|
||||
-- sign_tasks
|
||||
DROP TABLE IF EXISTS sign_tasks CASCADE;
|
||||
CREATE TABLE sign_tasks (
|
||||
id INTEGER PRIMARY KEY,
|
||||
build_id INTEGER REFERENCES builds(id) ON DELETE CASCADE,
|
||||
buildtask_id INTEGER REFERENCES build_tasks(id) ON DELETE CASCADE,
|
||||
started_at REAL,
|
||||
finished_at REAL
|
||||
);
|
||||
|
||||
CREATE INDEX sign_tasks_build_id
|
||||
ON sign_tasks(build_id);
|
||||
|
||||
CREATE INDEX sign_tasks_buildtask_id
|
||||
ON sign_tasks(buildtask_id);
|
||||
|
||||
CREATE INDEX sing_tasks_started_at
|
||||
ON sign_tasks(started_at);
|
||||
|
||||
CREATE INDEX sign_tasks_finished_at
|
||||
ON sign_tasks(finished_at);
|
12
build_analitycs/run_extractor.py
Normal file
12
build_analitycs/run_extractor.py
Normal file
@ -0,0 +1,12 @@
|
||||
"""
|
||||
extractor startup script
|
||||
"""
|
||||
import sys
|
||||
from build_analytics.extractor.start import start
|
||||
|
||||
try:
|
||||
YAML_PATH = sys.argv[1]
|
||||
except IndexError:
|
||||
print(f"Usage: {sys.argv[0]} config.yml")
|
||||
sys.exit(1)
|
||||
start(YAML_PATH)
|
Loading…
Reference in New Issue
Block a user