2023-02-28 17:28:48 +00:00
|
|
|
# pylint: disable=relative-beyond-top-level
|
|
|
|
|
2023-02-27 19:51:53 +00:00
|
|
|
import logging
|
2023-02-28 17:28:48 +00:00
|
|
|
from typing import List, Dict
|
2023-02-27 19:51:53 +00:00
|
|
|
|
|
|
|
from ..models.extractor_config import ExtractorConfig
|
2023-02-28 17:28:48 +00:00
|
|
|
from ..models.enums import BuildTaskEnum
|
|
|
|
from ..models.build import BuildTask
|
2023-02-27 19:51:53 +00:00
|
|
|
from ..db import DB
|
2023-02-28 17:28:48 +00:00
|
|
|
from ..api_client import APIclient
|
2023-02-27 19:51:53 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Extractor:
|
|
|
|
def __init__(self, config: ExtractorConfig, api: APIclient, db: DB):
|
2023-03-06 18:21:56 +00:00
|
|
|
self.start_from = config.start_from
|
2023-02-27 19:51:53 +00:00
|
|
|
self.oldest_build_age = config.oldest_build_age
|
|
|
|
self.api = api
|
|
|
|
self.db = db
|
|
|
|
|
|
|
|
def extract_and_store(self) -> int:
|
|
|
|
build_count = 0
|
|
|
|
page_num = 1
|
|
|
|
last_build_id = self.db.get_latest_build_id()
|
|
|
|
if not last_build_id:
|
2023-03-06 18:21:56 +00:00
|
|
|
last_build_id = self.start_from
|
2023-02-28 17:28:48 +00:00
|
|
|
logging.info("last_build_id: %s", last_build_id)
|
2023-02-27 19:51:53 +00:00
|
|
|
stop = False
|
|
|
|
|
|
|
|
while not stop:
|
2023-02-28 17:28:48 +00:00
|
|
|
logging.info("page: %s", page_num)
|
2023-02-27 19:51:53 +00:00
|
|
|
for build in self.api.get_builds(page_num):
|
|
|
|
# check if we shoud stop processing build
|
|
|
|
if build.id <= last_build_id or \
|
|
|
|
build.created_at <= self.oldest_build_age:
|
|
|
|
stop = True
|
|
|
|
break
|
|
|
|
|
2023-03-06 18:21:56 +00:00
|
|
|
# inserting build build tasks and build tasks statistics
|
2023-02-28 17:28:48 +00:00
|
|
|
logging.info("inserting %s", build.id)
|
2023-03-06 18:21:56 +00:00
|
|
|
try:
|
|
|
|
self.db.insert_build(build.as_db_model())
|
|
|
|
except Exception as error: # pylint: disable=broad-except
|
|
|
|
logging.error('failed to insert build %d: %s',
|
|
|
|
build.id, error, exc_info=True)
|
|
|
|
continue
|
|
|
|
|
2023-02-27 19:51:53 +00:00
|
|
|
for build_task in build.build_tasks:
|
2023-03-06 18:21:56 +00:00
|
|
|
try:
|
|
|
|
self.db.insert_buildtask(build_task.as_db_model(),
|
|
|
|
build_task.web_node_stats.as_db_model(
|
|
|
|
build_task.id),
|
|
|
|
build_task.web_node_stats.as_db_model(
|
|
|
|
build_task.id))
|
|
|
|
except Exception as error: # pylint: disable=broad-except
|
|
|
|
logging.error('failed to insert build task %d: %s',
|
|
|
|
build_task.id, error, exc_info=True)
|
2023-02-27 19:51:53 +00:00
|
|
|
build_count += 1
|
|
|
|
page_num += 1
|
|
|
|
return build_count
|
|
|
|
|
|
|
|
def build_cleanup(self):
|
|
|
|
logging.info('Removing all buidls older then %s',
|
|
|
|
self.oldest_build_age.strftime("%m/%d/%Y, %H:%M:%S"))
|
|
|
|
removed_count = self.db.cleanup_builds(self.oldest_build_age)
|
|
|
|
logging.info('removed %d entries', removed_count)
|
2023-02-28 17:28:48 +00:00
|
|
|
|
2023-03-06 18:21:56 +00:00
|
|
|
def __update_build_tasks(self, build_tasks: List[BuildTask],
|
|
|
|
build_tasks_status_db: Dict[int, int]):
|
2023-02-28 17:28:48 +00:00
|
|
|
for b in build_tasks:
|
|
|
|
if b.status_id != build_tasks_status_db[b.id]:
|
2023-03-06 18:21:56 +00:00
|
|
|
logging.info('build: %s, build task %d status have changed %s -> %s. Updating DB',
|
|
|
|
b.build_id,
|
2023-02-28 17:28:48 +00:00
|
|
|
b.id, BuildTaskEnum(
|
|
|
|
build_tasks_status_db[b.id]).name,
|
|
|
|
BuildTaskEnum(b.status_id).name)
|
|
|
|
try:
|
2023-03-06 18:21:56 +00:00
|
|
|
self.db.update_build_task(b.as_db_model(),
|
|
|
|
b.web_node_stats.as_db_model(
|
|
|
|
b.id),
|
|
|
|
b.build_node_stats.as_db_model(b.id))
|
2023-02-28 17:28:48 +00:00
|
|
|
except Exception as err: # pylint: disable=broad-except
|
|
|
|
logging.error(
|
2023-03-06 18:21:56 +00:00
|
|
|
'build: %d, failed to update build task %d: %s',
|
|
|
|
b.build_id, b.id, err, exc_info=True)
|
2023-02-28 17:28:48 +00:00
|
|
|
else:
|
2023-03-06 18:21:56 +00:00
|
|
|
logging.info(
|
|
|
|
'build: %d, build task %d was updated', b.build_id, b.id)
|
2023-02-28 17:28:48 +00:00
|
|
|
else:
|
|
|
|
logging.info(
|
2023-03-06 18:21:56 +00:00
|
|
|
"build: %d, build_task %d is still %s. Skipping",
|
|
|
|
b.build_id, b.id, BuildTaskEnum(b.status_id).name)
|
2023-02-28 17:28:48 +00:00
|
|
|
|
|
|
|
def update_builds(self):
|
|
|
|
logging.info('Getting list of tasks from DB')
|
|
|
|
unfinished_tasks = self.db.get_unfinished_builds()
|
|
|
|
for build_id, build_tasks_db in unfinished_tasks.items():
|
|
|
|
try:
|
|
|
|
logging.info('Getting status of build %d', build_id)
|
|
|
|
build = self.api.get_build(build_id)
|
|
|
|
|
|
|
|
logging.info('Updating build tasks')
|
|
|
|
build_tasks_to_check = [
|
|
|
|
b for b in build.build_tasks if b.id in build_tasks_db]
|
2023-03-06 18:21:56 +00:00
|
|
|
self.__update_build_tasks(
|
2023-02-28 17:28:48 +00:00
|
|
|
build_tasks_to_check, build_tasks_db)
|
|
|
|
|
|
|
|
if build.finished_at:
|
|
|
|
logging.info(
|
|
|
|
"build is finished, we need to update finished_at attribute")
|
|
|
|
self.db.update_build(build.as_db_model())
|
|
|
|
|
|
|
|
logging.info('finished proccessing build %d', build_id)
|
|
|
|
|
|
|
|
except Exception as err: # pylint: disable=broad-except
|
|
|
|
logging.error("Cant process build %d: %s",
|
|
|
|
build_id, err, exc_info=True)
|