Compare commits
3 Commits
Author | SHA1 | Date | |
---|---|---|---|
c731cba102 | |||
7c05bbacb6 | |||
d47fe3b4cd |
@ -27,6 +27,8 @@ class APIclient():
|
|||||||
self.api_root = api_root
|
self.api_root = api_root
|
||||||
self.jwt = jwt
|
self.jwt = jwt
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
|
# will be set at first call of __send_request
|
||||||
|
self.session: Optional[requests.Session] = None
|
||||||
|
|
||||||
def get_builds(self, page_num: int = 1) -> List[Build]:
|
def get_builds(self, page_num: int = 1) -> List[Build]:
|
||||||
ep = '/api/v1/builds'
|
ep = '/api/v1/builds'
|
||||||
@ -34,8 +36,7 @@ class APIclient():
|
|||||||
params = {'pageNumber': page_num}
|
params = {'pageNumber': page_num}
|
||||||
headers = {'accept': 'appilication/json'}
|
headers = {'accept': 'appilication/json'}
|
||||||
|
|
||||||
response = requests.get(
|
response = self.__send_request(url, 'get', params, headers)
|
||||||
url, params=params, headers=headers, timeout=self.timeout)
|
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
result = []
|
result = []
|
||||||
@ -54,7 +55,7 @@ class APIclient():
|
|||||||
ep = f'/api/v1/builds/{build_id}'
|
ep = f'/api/v1/builds/{build_id}'
|
||||||
url = urljoin(self.api_root, ep)
|
url = urljoin(self.api_root, ep)
|
||||||
headers = {'accept': 'application/json'}
|
headers = {'accept': 'application/json'}
|
||||||
response = requests.get(url, headers=headers, timeout=self.timeout)
|
response = self.__send_request(url, 'get', headers=headers)
|
||||||
|
|
||||||
if response.status_code == 404:
|
if response.status_code == 404:
|
||||||
return None
|
return None
|
||||||
@ -228,3 +229,31 @@ class APIclient():
|
|||||||
start_ts = stat.start_ts
|
start_ts = stat.start_ts
|
||||||
|
|
||||||
return start_ts
|
return start_ts
|
||||||
|
|
||||||
|
def __send_request(self,
|
||||||
|
url: str,
|
||||||
|
method: str,
|
||||||
|
params: Optional[Dict[str, Any]] = None,
|
||||||
|
headers: Optional[Dict[str, Any]] = None,
|
||||||
|
) -> requests.Response:
|
||||||
|
"""
|
||||||
|
Simple wrapper around requests.get/posts.. methods
|
||||||
|
so we can use same session between API calls
|
||||||
|
"""
|
||||||
|
if not self.session:
|
||||||
|
self.session = requests.Session()
|
||||||
|
|
||||||
|
m = getattr(self.session, method, None)
|
||||||
|
if not m:
|
||||||
|
raise ValueError(f"method {method} is not supported")
|
||||||
|
|
||||||
|
# pylint: disable=not-callable
|
||||||
|
return m(url, params=params, headers=headers, timeout=self.timeout)
|
||||||
|
|
||||||
|
def close_session(self):
|
||||||
|
if self.session:
|
||||||
|
self.session.close()
|
||||||
|
self.session = None
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.close_session()
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
from enum import IntEnum
|
from enum import IntEnum
|
||||||
|
|
||||||
# supported schema version
|
# supported schema version
|
||||||
DB_SCHEMA_VER = 3
|
DB_SCHEMA_VER = 4
|
||||||
|
|
||||||
|
|
||||||
# ENUMS
|
# ENUMS
|
||||||
@ -13,6 +13,8 @@ class ArchEnum(IntEnum):
|
|||||||
aarch64 = 2
|
aarch64 = 2
|
||||||
ppc64le = 3
|
ppc64le = 3
|
||||||
s390x = 4
|
s390x = 4
|
||||||
|
src = 5
|
||||||
|
x86_64_v2 = 6
|
||||||
|
|
||||||
|
|
||||||
class BuildTaskEnum(IntEnum):
|
class BuildTaskEnum(IntEnum):
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
# pylint: disable=relative-beyond-top-level
|
# pylint: disable=relative-beyond-top-level
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
|
|
||||||
from ..api_client import APIclient
|
from ..api_client import APIclient
|
||||||
from ..const import BuildTaskEnum
|
from ..const import BuildTaskEnum
|
||||||
from ..db import DB
|
from ..db import DB
|
||||||
@ -26,11 +28,13 @@ class Extractor:
|
|||||||
stop = False
|
stop = False
|
||||||
|
|
||||||
while not stop:
|
while not stop:
|
||||||
|
oldest_build_age = datetime.now().astimezone() - \
|
||||||
|
timedelta(days=self.config.data_store_days)
|
||||||
logging.info("page: %s", page_num)
|
logging.info("page: %s", page_num)
|
||||||
for build in self.api.get_builds(page_num):
|
for build in self.api.get_builds(page_num):
|
||||||
# check if we shoud stop processing build
|
# check if we shoud stop processing build
|
||||||
if build.id <= last_build_id or \
|
if build.id <= last_build_id or \
|
||||||
build.created_at <= self.config.oldest_build_age:
|
build.created_at <= oldest_build_age:
|
||||||
stop = True
|
stop = True
|
||||||
break
|
break
|
||||||
|
|
||||||
@ -73,9 +77,10 @@ class Extractor:
|
|||||||
return build_count
|
return build_count
|
||||||
|
|
||||||
def build_cleanup(self):
|
def build_cleanup(self):
|
||||||
logging.info('Removing all buidls older then %s',
|
oldest_to_keep = datetime.now().astimezone() - \
|
||||||
self.config.oldest_build_age.strftime("%m/%d/%Y, %H:%M:%S"))
|
timedelta(days=self.config.data_store_days)
|
||||||
removed_count = self.db.cleanup_builds(self.config.oldest_build_age)
|
logging.info('Removing all buidls older then %s', oldest_to_keep)
|
||||||
|
removed_count = self.db.cleanup_builds(oldest_to_keep)
|
||||||
logging.info('removed %d entries', removed_count)
|
logging.info('removed %d entries', removed_count)
|
||||||
|
|
||||||
def __update_build_tasks(self, build_tasks: List[BuildTask],
|
def __update_build_tasks(self, build_tasks: List[BuildTask],
|
||||||
@ -105,10 +110,11 @@ class Extractor:
|
|||||||
b.build_id, b.id, BuildTaskEnum(b.status_id).name)
|
b.build_id, b.id, BuildTaskEnum(b.status_id).name)
|
||||||
|
|
||||||
def update_builds(self):
|
def update_builds(self):
|
||||||
|
not_before = datetime.now().astimezone() - \
|
||||||
|
timedelta(days=self.config.oldest_to_update_days)
|
||||||
logging.info('Getting unfinished builds that were created after %s ',
|
logging.info('Getting unfinished builds that were created after %s ',
|
||||||
self.config.oldest_to_update)
|
not_before)
|
||||||
unfinished_tasks = self.db.get_unfinished_builds(
|
unfinished_tasks = self.db.get_unfinished_builds(not_before)
|
||||||
self.config.oldest_to_update)
|
|
||||||
for build_id, build_tasks_db in unfinished_tasks.items():
|
for build_id, build_tasks_db in unfinished_tasks.items():
|
||||||
try:
|
try:
|
||||||
logging.info('Getting status of build %d', build_id)
|
logging.info('Getting status of build %d', build_id)
|
||||||
@ -137,10 +143,12 @@ class Extractor:
|
|||||||
build_id, err, exc_info=True)
|
build_id, err, exc_info=True)
|
||||||
|
|
||||||
def updating_test_tasks(self):
|
def updating_test_tasks(self):
|
||||||
|
not_before = datetime.now().astimezone() - \
|
||||||
|
timedelta(days=self.config.oldest_to_update_days)
|
||||||
logging.info('getting build tasks for builds created after %s',
|
logging.info('getting build tasks for builds created after %s',
|
||||||
self.config.oldest_to_update)
|
not_before)
|
||||||
build_task_ids = self.db.get_build_tasks_for_tests_update(
|
build_task_ids = self.db.get_build_tasks_for_tests_update(
|
||||||
self.config.oldest_to_update)
|
not_before)
|
||||||
for build_task_id in build_task_ids:
|
for build_task_id in build_task_ids:
|
||||||
try:
|
try:
|
||||||
logging.info('getting tests for build task %s', build_task_id)
|
logging.info('getting tests for build task %s', build_task_id)
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
import logging
|
import logging
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
import sys
|
import sys
|
||||||
@ -23,10 +22,6 @@ def __get_config(yml_path: str) -> ExtractorConfig:
|
|||||||
with open(yml_path, 'r', encoding='utf-8') as flr:
|
with open(yml_path, 'r', encoding='utf-8') as flr:
|
||||||
raw = yaml.safe_load(flr)
|
raw = yaml.safe_load(flr)
|
||||||
|
|
||||||
# adding new attrs
|
|
||||||
raw['oldest_build_age'] = datetime.now().astimezone() \
|
|
||||||
- timedelta(days=raw['data_store_days'])
|
|
||||||
|
|
||||||
# Dbconfig
|
# Dbconfig
|
||||||
db_params: Dict[str, Any] = {'name': raw['db_name'],
|
db_params: Dict[str, Any] = {'name': raw['db_name'],
|
||||||
'username': raw['db_username'],
|
'username': raw['db_username'],
|
||||||
@ -37,10 +32,6 @@ def __get_config(yml_path: str) -> ExtractorConfig:
|
|||||||
db_params['host'] = raw['db_host']
|
db_params['host'] = raw['db_host']
|
||||||
raw['db_config'] = DbConfig(**db_params)
|
raw['db_config'] = DbConfig(**db_params)
|
||||||
|
|
||||||
if 'oldest_to_update_days' in raw:
|
|
||||||
raw['oldest_to_update_days'] = datetime.now().astimezone() \
|
|
||||||
- timedelta(days=raw['oldest_to_update_days'])
|
|
||||||
|
|
||||||
return ExtractorConfig(**raw)
|
return ExtractorConfig(**raw)
|
||||||
|
|
||||||
|
|
||||||
@ -108,7 +99,10 @@ def start(yml_path: str):
|
|||||||
else:
|
else:
|
||||||
logging.info('test tasks were updated')
|
logging.info('test tasks were updated')
|
||||||
|
|
||||||
|
# freeing up resources
|
||||||
extractor.db.close_conn()
|
extractor.db.close_conn()
|
||||||
|
extractor.api.close_session()
|
||||||
|
|
||||||
logging.info("Extraction was finished")
|
logging.info("Extraction was finished")
|
||||||
logging.info("Sleeping for %d seconds", config.scrape_interval)
|
logging.info("Sleeping for %d seconds", config.scrape_interval)
|
||||||
time.sleep(config.scrape_interval)
|
time.sleep(config.scrape_interval)
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
from datetime import datetime, timedelta
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from pydantic import HttpUrl, Field, BaseModel # pylint: disable=no-name-in-module
|
from pydantic import HttpUrl, Field, BaseModel # pylint: disable=no-name-in-module
|
||||||
@ -8,10 +7,10 @@ from .db_config import DbConfig
|
|||||||
# DEFAULTS
|
# DEFAULTS
|
||||||
ALBS_URL_DEFAULT = 'https://build.almalinux.org'
|
ALBS_URL_DEFAULT = 'https://build.almalinux.org'
|
||||||
LOG_FILE_DEFAULT = '/tmp/extractor.log'
|
LOG_FILE_DEFAULT = '/tmp/extractor.log'
|
||||||
API_DEFAULT = 30
|
API_TIMEOUT_DEFAULT = 30
|
||||||
SCRAPE_INTERVAL_DEFAULT = 3600
|
SCRAPE_INTERVAL_DEFAULT = 3600
|
||||||
START_FROM_DEFAULT = 5808
|
START_FROM_DEFAULT = 5808
|
||||||
OLDEST_TO_UPDATE_DEFAULT = datetime.now().astimezone() - timedelta(days=7)
|
OLDEST_TO_UPDATE_DAYS_DEFAULT = 7
|
||||||
|
|
||||||
|
|
||||||
class ExtractorConfig(BaseModel):
|
class ExtractorConfig(BaseModel):
|
||||||
@ -22,17 +21,17 @@ class ExtractorConfig(BaseModel):
|
|||||||
default=LOG_FILE_DEFAULT)
|
default=LOG_FILE_DEFAULT)
|
||||||
albs_url: HttpUrl = Field(description='ALBS root URL',
|
albs_url: HttpUrl = Field(description='ALBS root URL',
|
||||||
default=ALBS_URL_DEFAULT)
|
default=ALBS_URL_DEFAULT)
|
||||||
oldest_build_age: datetime = \
|
data_store_days: int = \
|
||||||
Field(description='oldest build age to store')
|
Field(description='oldest build (in days) to keep in DB')
|
||||||
jwt: str = Field(description='ALBS JWT token')
|
jwt: str = Field(description='ALBS JWT token')
|
||||||
db_config: DbConfig = Field(description="database configuration")
|
db_config: DbConfig = Field(description="database configuration")
|
||||||
api_timeout: int = Field(
|
api_timeout: int = Field(
|
||||||
description="max time in seconds to wait for API response",
|
description="max time in seconds to wait for API response",
|
||||||
default=API_DEFAULT)
|
default=API_TIMEOUT_DEFAULT)
|
||||||
scrape_interval: int = Field(description='how often (in seconds) we will extract data from ALBS',
|
scrape_interval: int = Field(description='how often (in seconds) we will extract data from ALBS',
|
||||||
default=SCRAPE_INTERVAL_DEFAULT)
|
default=SCRAPE_INTERVAL_DEFAULT)
|
||||||
start_from: int = Field(description='build id to start populating empty db with',
|
start_from: int = Field(description='build id to start populating empty db with',
|
||||||
default=START_FROM_DEFAULT)
|
default=START_FROM_DEFAULT)
|
||||||
oldest_to_update: datetime = \
|
oldest_to_update_days: int = \
|
||||||
Field(description='oldest unfinished object (build/task/step...) that we will try to update',
|
Field(description='oldest (in days) unfinished object (build/task/step...) that we will try to update',
|
||||||
default=OLDEST_TO_UPDATE_DEFAULT)
|
default=OLDEST_TO_UPDATE_DAYS_DEFAULT)
|
||||||
|
@ -10,7 +10,6 @@ albs_url: https://build.almalinux.org
|
|||||||
# required: yes
|
# required: yes
|
||||||
jwt: ""
|
jwt: ""
|
||||||
|
|
||||||
|
|
||||||
# db_host
|
# db_host
|
||||||
# IP/hostname of database server
|
# IP/hostname of database server
|
||||||
# required: no
|
# required: no
|
||||||
@ -28,7 +27,6 @@ db_port: 5432
|
|||||||
# required: yes
|
# required: yes
|
||||||
db_username: albs_analytics
|
db_username: albs_analytics
|
||||||
|
|
||||||
|
|
||||||
# db_password
|
# db_password
|
||||||
# password to connect with
|
# password to connect with
|
||||||
# required: yes
|
# required: yes
|
||||||
@ -39,7 +37,6 @@ db_password: super_secret_password
|
|||||||
# required: yes
|
# required: yes
|
||||||
db_name: albs_analytics
|
db_name: albs_analytics
|
||||||
|
|
||||||
|
|
||||||
# log_file
|
# log_file
|
||||||
# file to write logs to
|
# file to write logs to
|
||||||
# required: no
|
# required: no
|
||||||
@ -62,7 +59,7 @@ scrape_interval: 3600
|
|||||||
# default: 5808 (first build with correct metrics)
|
# default: 5808 (first build with correct metrics)
|
||||||
start_from: 5808
|
start_from: 5808
|
||||||
|
|
||||||
# oldest_to_update
|
# oldest_to_update_days
|
||||||
# oldest (in days) unfinished object (build/task/step...) that we will try to update
|
# oldest (in days) unfinished object (build/task/step...) that we will try to update
|
||||||
# required: false
|
# required: false
|
||||||
# default: 7
|
# default: 7
|
||||||
|
11
build_analytics/migrations/4.sql
Normal file
11
build_analytics/migrations/4.sql
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
BEGIN;
|
||||||
|
|
||||||
|
INSERT INTO arch_enum (id, value)
|
||||||
|
VALUES
|
||||||
|
(5, 'src'),
|
||||||
|
(6, 'x86_64_v2');
|
||||||
|
|
||||||
|
UPDATE schema_version
|
||||||
|
SET version = 4;
|
||||||
|
|
||||||
|
COMMIT;
|
14
releases.txt
14
releases.txt
@ -27,6 +27,18 @@ build-analytics
|
|||||||
- [ALBS-1077] start deleting builds that were removed from ALBS
|
- [ALBS-1077] start deleting builds that were removed from ALBS
|
||||||
Bugfixes
|
Bugfixes
|
||||||
- 'Key error' when db_port/db_host is not set
|
- 'Key error' when db_port/db_host is not set
|
||||||
- update_builds() ignoring opldest_to_update attribute
|
- update_builds() ignoring odldest_to_update attribute
|
||||||
- [ALBS-1099] Test task started_at attribute is NULL
|
- [ALBS-1099] Test task started_at attribute is NULL
|
||||||
- Max recursion error in 'Test task details.json'
|
- Max recursion error in 'Test task details.json'
|
||||||
|
|
||||||
|
0.3.4 (2023-05-12)
|
||||||
|
build_analytics
|
||||||
|
- Bigfix ALBS-1111
|
||||||
|
|
||||||
|
0.3.5 (2023-06-01)
|
||||||
|
build_analytics:
|
||||||
|
ALBS-1103 start using persistent HTTP connections
|
||||||
|
|
||||||
|
0.3.6 (2024-10-08)
|
||||||
|
build_analytics:
|
||||||
|
buildsystem#360 Added src and x86_64_v2 arches
|
Loading…
Reference in New Issue
Block a user