diff --git a/albs_oval_errata_diff.py b/albs_oval_errata_diff.py index d878108..206d00d 100644 --- a/albs_oval_errata_diff.py +++ b/albs_oval_errata_diff.py @@ -1,7 +1,11 @@ """ albs_oval_errata_diff.py is a service startup script """ +import sys from albs_oval_errata_diff.start import start - -start() +try: + YAML_PATH = sys.argv[1] +except IndexError: + print(f"Usage {sys.argv[0]} config.yml") +start(YAML_PATH) diff --git a/albs_oval_errata_diff/comparer.py b/albs_oval_errata_diff/comparer.py index 3d96971..18e7c29 100644 --- a/albs_oval_errata_diff/comparer.py +++ b/albs_oval_errata_diff/comparer.py @@ -1,80 +1,87 @@ +""" +package comparer.py implemets difference checking logic +""" + import bz2 import datetime +from pathlib import Path import re -import requests from typing import Tuple, List, Dict, Any -import xml.etree.ElementTree as ET import logging import json +import xml.etree.ElementTree as ET -from .config import DOWNLOAD_DIR, NOT_BEFORE, RELEASES, SA_EXCLUDE, PACKAGES_EXCLUDE -from .sa import SecurityAdvisory +import requests + +from .config import Config from .package import Package +from .sa import SecurityAdvisory -def download_oval(url: str) -> str: +def download_oval(url: str, download_dir: Path) -> str: """ download_oval downloads, decompreses oval file and returns filepath of saved file """ - r = requests.get(url, stream=True, timeout=30) + response = requests.get(url, stream=True, timeout=30) decompressor = bz2.BZ2Decompressor() fname = url.split('/')[-1].replace('.bz2', '') - fpath = DOWNLOAD_DIR / fname - with open(fpath, 'wb') as fd: - for chunk in r.iter_content(chunk_size=128): - fd.write(decompressor.decompress(chunk)) + fpath = download_dir / fname + with open(fpath, 'wb') as flw: + for chunk in response.iter_content(chunk_size=128): + flw.write(decompressor.decompress(chunk)) return fpath -def download_errata(url: str, release_version: int) -> str: +def download_errata(url: str, release_version: int, download_dir: Path) -> str: """ downloads errata_full.json file end returns file path """ response = requests.get(url, stream=True, timeout=30) fname = f'alma-{release_version}.json' - fpath = DOWNLOAD_DIR / fname + fpath = download_dir / fname with open(fpath, 'wb') as errata_file: for chunk in response.iter_content(chunk_size=128): errata_file.write(chunk) return fpath -def parse_oval(fpath: str) -> Dict[str, SecurityAdvisory]: +def parse_oval(fpath: str, not_before: datetime.datetime) -> Dict[str, SecurityAdvisory]: """ converting oval xml file to dict """ def extract_package(title: str) -> Package: - r = r'(.*) is earlier than \d+:(.+?(?=-))' - res = re.search(r, title) + regexp = r'(.*) is earlier than \d+:(.+?(?=-))' + res = re.search(regexp, title) name = res.group(1) version = res.group(2) return Package(name=name, version=version) def extract_id(title: str) -> str: - r = r'[RH|AL]SA-(\d{4}:\d+)(.*)' - res = re.search(r, title) + regexp = r'[RH|AL]SA-(\d{4}:\d+)(.*)' + res = re.search(regexp, title) return res.group(1) tree = ET.parse(fpath) root = tree.getroot() - ns = { + namespase = { 'n': 'http://oval.mitre.org/XMLSchema/oval-definitions-5', } res = {} - for definition in root.findall('n:definitions/', ns): - title = definition.find('n:metadata/n:title', ns).text + for definition in root.findall('n:definitions/', namespase): + title = definition.find('n:metadata/n:title', namespase).text issued = definition.find( - 'n:metadata/n:advisory/n:issued', ns).attrib['date'] + 'n:metadata/n:advisory/n:issued', namespase).attrib['date'] issued_dt = datetime.datetime.strptime(issued, "%Y-%m-%d") # we are only interesed in Security advisories after RHEL 8.3 - if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < NOT_BEFORE: + if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < not_before: continue sa_id = extract_id(title) - packages = [extract_package(i.attrib['comment']) for i in definition.findall(".//n:criterion", ns) + packages = [extract_package(i.attrib['comment']) for + i in definition.findall(".//n:criterion", namespase) if 'is earlier than' in i.attrib['comment']] res[sa_id] = SecurityAdvisory( title=title, id=sa_id, packages=packages) @@ -104,7 +111,9 @@ def parse_errata(fpath: str) -> Dict[str, SecurityAdvisory]: def compare(rhel_oval: Dict[str, SecurityAdvisory], alma_oval: Dict[str, SecurityAdvisory], - alma_errata: Dict[str, SecurityAdvisory]) -> Tuple[dict, list]: + alma_errata: Dict[str, SecurityAdvisory], + sa_exclude: List[str], + packages_exclude: List[str]) -> Tuple[dict, list]: """ compares rhel oval with alma oval and alma errata """ @@ -147,18 +156,18 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory], sa_name = f'ALSA-{rhel_sa_id}' # filtering out SA - if sa_name in SA_EXCLUDE: + if sa_name in sa_exclude: report['excluded_sa'].append(sa_name) continue # filtefing out packages packages_to_check: List[Package] = [] - for p in rhel_sa.packages: - if any(p.name == i for i in PACKAGES_EXCLUDE): - if str(p) not in report['excluded_pkg']: - report['excluded_pkg'].append(str(p)) + for package in rhel_sa.packages: + if any(package.name == i for i in packages_exclude): + if str(package) not in report['excluded_pkg']: + report['excluded_pkg'].append(str(package)) else: - packages_to_check.append(p) + packages_to_check.append(package) # check oval try: @@ -175,13 +184,15 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory], if r not in alma_oval_packages] if alma_oval_missing_packages: report['diff_count'] += 1 + diff_str = f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}" diff.append({'sa_name': sa_name, - 'diff': f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}"}) + 'diff': diff_str}) report['oval_missing_pkg_sa'].append(sa_name) report['oval_missing_pkg_sa_count'] += 1 - for mp in alma_oval_missing_packages: - if mp not in report['missing_packages_unique']: - report['missing_packages_unique'].append(mp) + for missing_package in alma_oval_missing_packages: + if missing_package not in report['missing_packages_unique']: + report['missing_packages_unique'].append( + missing_package) report['missing_packages_unique_count'] += 1 # check errata @@ -200,13 +211,14 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory], str(r) for r in packages_to_check if r not in alma_errata_packages] if alma_errata_missing_packages: report['diff_count'] += 1 + diff_str = f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}" diff.append({'sa_name': sa_name, - 'diff': f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}"}) + 'diff': diff_str}) report['errata_missing_pkg_sa'].append(sa_name) report['errata_missing_pkg_sa_count'] += 1 - for mp in alma_errata_missing_packages: - if mp not in report['missing_packages_unique']: - report['missing_packages_unique'].append(mp) + for missing_package in alma_errata_missing_packages: + if missing_package not in report['missing_packages_unique']: + report['missing_packages_unique'].append(missing_package) report['missing_packages_unique_count'] += 1 else: # if we here, all checks were passed @@ -219,36 +231,45 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory], # starting point -def comparer_run() -> Dict[str, Any]: +def comparer_run(config: Config) -> Dict[str, Any]: + """ + comperer_run is the starting point of comparer component + """ result = {} - for release, urls in RELEASES.items(): + for release, urls in config.releases.items(): logging.info('Processing release %i', release) logging.info('downloading rhel oval') - rhel_file = download_oval(urls['rhel_oval_url']) + rhel_file = download_oval(urls.rhel_oval_url, config.download_dir) logging.info('parsing rhel oval') - rhel_oval_dict = parse_oval(rhel_file) + rhel_oval_dict = parse_oval(rhel_file, config.not_before) logging.info('downloading alma oval') - alma_oval_file = download_oval(urls['alma_oval_url']) + alma_oval_file = download_oval( + urls.alma_oval_url, download_dir=config.download_dir) logging.info('parsing alma oval') - alma_oval_dict = parse_oval(alma_oval_file) + alma_oval_dict = parse_oval(alma_oval_file, config.not_before) logging.info('downloading alma errata') - alma_errata_file = download_errata(urls['alma_errata_url'], release) + alma_errata_file = download_errata(urls.alma_errata_url, + release, config.download_dir) logging.info('parsing alma errata') alma_errata_dict = parse_errata(alma_errata_file) logging.info('comparing rhel and alma') - report_release, diff_release = compare( - rhel_oval_dict, alma_oval_dict, alma_errata_dict) + report_release, diff_release = \ + compare(rhel_oval_dict, + alma_oval_dict, + alma_errata_dict, + config.sa_exclude, + config.packages_exclude) result[release] = {'report': report_release, 'diff': diff_release, - 'rhel_oval_url': urls['rhel_oval_url'], - 'alma_oval_url': urls['alma_oval_url'], - 'alma_errata_url': urls['alma_errata_url']} + 'rhel_oval_url': urls.rhel_oval_url, + 'alma_oval_url': urls.alma_oval_url, + 'alma_errata_url': urls.alma_errata_url} result['report_generated'] = datetime.datetime.now().timestamp() * 1000 - result['sa_not_before'] = NOT_BEFORE.timestamp() * 1000 + result['sa_not_before'] = config.not_before.timestamp() * 1000 return result diff --git a/albs_oval_errata_diff/config.py b/albs_oval_errata_diff/config.py index ab69d67..01c012d 100644 --- a/albs_oval_errata_diff/config.py +++ b/albs_oval_errata_diff/config.py @@ -2,12 +2,12 @@ config.py used for generation service configuration based on input json file ''' -from datetime import datetime, date +from datetime import datetime from pathlib import Path from typing import Dict, List from ipaddress import IPv4Address -from pydantic import BaseModel, validator, Field # pylint: disable=import-error +from pydantic import BaseModel, validator, Field # pylint: disable=import-error,no-name-in-module import yaml @@ -59,7 +59,7 @@ class Config(BaseModel): server_ip: IPv4Address = Field( description="IP that will be used by webserver", default=SERVER_IP) - not_before: date = Field( + not_before: datetime = Field( description='date to start checking from (YYYY-mm-dd)', default=NOT_BEFORE) update_interval_minutes: int = Field( @@ -88,8 +88,7 @@ class Config(BaseModel): """ return datetime.strptime( value, - "%Y-%m-%d" - ).date() + "%Y-%m-%d") def get_config(yml_path: str) -> Config: diff --git a/albs_oval_errata_diff/package.py b/albs_oval_errata_diff/package.py index 03ca6f5..3da1272 100644 --- a/albs_oval_errata_diff/package.py +++ b/albs_oval_errata_diff/package.py @@ -1,5 +1,7 @@ +""" +package.py contains Package dataclass definition +""" from dataclasses import dataclass -from typing import List @dataclass diff --git a/albs_oval_errata_diff/sa.py b/albs_oval_errata_diff/sa.py index 7e4dbfb..40e07da 100644 --- a/albs_oval_errata_diff/sa.py +++ b/albs_oval_errata_diff/sa.py @@ -1,3 +1,6 @@ +""" +sa contains SecurityAdvisory dataclass definition +""" from dataclasses import dataclass from typing import List @@ -11,5 +14,5 @@ class SecurityAdvisory: from oval or errata """ title: str - id: str + id: str # pylint: disable=invalid-name packages: List[Package] diff --git a/albs_oval_errata_diff/start.py b/albs_oval_errata_diff/start.py index 15c3e35..d2b0e7b 100644 --- a/albs_oval_errata_diff/start.py +++ b/albs_oval_errata_diff/start.py @@ -2,15 +2,17 @@ service compares rhel oval with alma ovals and errata ovals results available via API Call """ -from aiohttp import web import copy import logging from logging.handlers import RotatingFileHandler import threading from time import sleep +from ipaddress import IPv4Address import json -from .config import LOG_FILE, DIFF_FILE, UPDATE_INTERVAL_MINUTES, SERVER_IP, SERVER_PORT +from aiohttp import web + +from .config import get_config, Config from .comparer import comparer_run @@ -19,82 +21,97 @@ diffs = {} diffs_lock = threading.Lock() -async def web_handler(request): +async def web_handler(_): + """ + web_handler returns diffs as JSON file + """ data = {} try: diffs_lock.acquire() data = copy.deepcopy(diffs) diffs_lock.release() - except Exception as e: - logging.critical("Unhandled exeption %s", e, exc_info=True) + except Exception as err: # pylint: disable=broad-except + logging.critical("Unhandled exeption %s", err, exc_info=True) return web.json_response(data=data) -def webserver_run(): +def webserver_run(server_ip: IPv4Address, server_port: str): + """ + webserver_run starts webserver component + """ app = web.Application() app.add_routes([web.get('/', web_handler)]) - web.run_app(app=app, host=SERVER_IP, port=SERVER_PORT) + web.run_app(app=app, host=str(server_ip), port=server_port) -def diff_checker(): +def diff_checker(config: Config): + """ + runs comparer component in infinite loop + """ while True: logging.info("Start comparing") # generating new diff try: - result = comparer_run() - except Exception as e: - logging.critical("Unhandled exeption %s", e, exc_info=True) + result = comparer_run(config) + except Exception as err: # pylint: disable=broad-except + logging.critical("Unhandled exeption %s", err, exc_info=True) else: logging.info("Finished comparing, updating diff dict") diffs_lock.acquire() - global diffs + global diffs # pylint: disable=invalid-name,global-statement diffs = result diffs_lock.release() - # dumping - logging.info("Saving results to disk") - try: - with open(DIFF_FILE, 'w', encoding='utf-8') as flw: - json.dump(result, flw, indent=4) - except Exception as e: - logging.critical("Unhandled exeption %s", e, exc_info=True) + + # dumping + logging.info("Saving results to disk") + try: + with open(config.diff_file, 'w', encoding='utf-8') as flw: + json.dump(result, flw, indent=4) + except Exception as err: # pylint: disable=broad-except + logging.critical("Unhandled exeption %s", err, exc_info=True) logging.info("Done") logging.info("Finished comparing, go to sleep for %d minutes", - UPDATE_INTERVAL_MINUTES) - sleep(UPDATE_INTERVAL_MINUTES * 60) + config.update_interval_minutes) + sleep(config.update_interval_minutes * 60) -def start(): - # making sure that directory exists - for p in [LOG_FILE, DIFF_FILE]: - if not p.parent.exists(): - p.parent.mkdir() +def start(yaml_path: str): + """ + start starts comparer and webserver components + each component runs in it`s own thread + """ + config = get_config(yaml_path) + # making sure that parent directories exist + for path in [config.log_file, config.log_file]: + if not path.parent.exists(): + path.parent.mkdir() + + # configuring logging + handlers = [logging.FileHandler(config.log_file, mode='a'), + logging.StreamHandler(), + RotatingFileHandler(config.log_file, maxBytes=10000, backupCount=3)] logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(funcName)s %(message)s', - handlers=[logging.FileHandler(LOG_FILE, mode='a'), - logging.StreamHandler(), - RotatingFileHandler(LOG_FILE, maxBytes=10000, backupCount=3)]) + handlers=handlers) logging.info("Trying to load diff file from disk") try: - with open(DIFF_FILE, 'r', encoding='utf-8') as flr: + with open(config.diff_file, 'r', encoding='utf-8') as flr: loaded_data = json.load(flr) diffs_lock.acquire() + global diffs # pylint: disable=invalid-name,global-statement diffs = loaded_data diffs_lock.release() - except Exception as e: - logging.warning('cant load data from disk %s', e) + except Exception as err: # pylint: disable=broad-except + logging.warning('cant load data from disk %s', err) else: logging.info('diff file was loaded') logging.info("Starting diff_checker in background") - thread = threading.Thread(target=diff_checker) + thread = threading.Thread(target=diff_checker, args=(config,)) thread.daemon = True thread.start() logging.info("Starting webserver") - webserver_run() - - -if __name__ == "__main__": - start() + webserver_run(config.server_ip, config.server_port) diff --git a/config.default.yml b/config.default.yml index 93c757a..e597784 100644 --- a/config.default.yml +++ b/config.default.yml @@ -33,8 +33,8 @@ releases: alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-8.xml.bz2 alma_errata_url: https://errata.almalinux.org/8/errata.full.json 9: - rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL9/rhel-9.oval.xml.bz2' - alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-9.xml.bz2' + rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL9/rhel-9.oval.xml.bz2 + alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-9.xml.bz2 alma_errata_url: https://errata.almalinux.org/9/errata.full.json # sa_exclude