diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..886279f --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +venv +logs +results +*.pyc +__pycache__ +.vscode \ No newline at end of file diff --git a/README.md b/README.md index e69de29..ac33a59 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,63 @@ +# albs-oval-errata-diff + +Service compares RHEL Oval with AlmaLinux Oval/Errata and stores differences. Differences are available via HTTP GET request in form of JSON + +## Components +### comparer +Downloads Oval/Errata files and generates differences report + +### webserver +Publishes JSON report via aiohttp webserver + +## Configuration +check [config.default.yml](config.default.yml) for references + +## Requirements +- Python 3.9 +- pip +- virtualenv + +## Installation +1. Checkout code + ```bash + $ git clone git@git.almalinux.org:kzhukov/albs-oval-errata-diff.git + ``` +2. Create and initialize virtual enviroment + ```bash + $ virtualenv -p python3.9 venv && source venv/bin/activate + ``` +3. Install requirements + ```bash + $ pip install -r requirements.txt + ``` +4. Create config file using [config.default.yml](config.default.yml) and start service with _albs_oval_errata_diff.py_ script + ```bash + $ python albs_oval_errata_diff.py config.yml + 2022-12-29 16:20:11,139 INFO start Trying to load diff file from disk + 2022-12-29 16:20:11,142 INFO start Diff file was loaded + 2022-12-29 16:20:11,142 INFO start Starting diff_checker in background + 2022-12-29 16:20:11,143 INFO diff_checker Start comparing + 2022-12-29 16:20:11,143 INFO start Starting webserver + 2022-12-29 16:20:11,144 INFO comparer_run Processing release 8 + 2022-12-29 16:20:11,148 INFO comparer_run downloading rhel oval + ======== Running on http://127.0.0.1:3001 ======== + (Press CTRL+C to quit) + 2022-12-29 16:20:12,142 INFO comparer_run parsing rhel oval + 2022-12-29 16:20:13,154 INFO comparer_run downloading alma oval + 2022-12-29 16:20:16,516 INFO comparer_run parsing alma oval + 2022-12-29 16:20:17,695 INFO comparer_run downloading alma errata + 2022-12-29 16:20:28,894 INFO comparer_run parsing alma errata + 2022-12-29 16:20:29,143 INFO comparer_run comparing rhel and alma + 2022-12-29 16:20:29,233 INFO comparer_run Processing release 9 + 2022-12-29 16:20:29,234 INFO comparer_run downloading rhel oval + 2022-12-29 16:20:29,599 INFO comparer_run parsing rhel oval + 2022-12-29 16:20:29,716 INFO comparer_run downloading alma oval + 2022-12-29 16:20:31,033 INFO comparer_run parsing alma oval + 2022-12-29 16:20:31,165 INFO comparer_run downloading alma errata + 2022-12-29 16:20:33,542 INFO comparer_run parsing alma errata + 2022-12-29 16:20:33,601 INFO comparer_run comparing rhel and alma + 2022-12-29 16:20:33,621 INFO diff_checker Finished comparing, updating diff dict + 2022-12-29 16:20:33,622 INFO diff_checker Saving results to disk + 2022-12-29 16:20:33,630 INFO diff_checker Done + 2022-12-29 16:20:33,630 INFO diff_checker Finished comparing, go to sleep for 30 minutes + ``` \ No newline at end of file diff --git a/albs_oval_errata_diff.py b/albs_oval_errata_diff.py new file mode 100644 index 0000000..c70fd0c --- /dev/null +++ b/albs_oval_errata_diff.py @@ -0,0 +1,12 @@ +""" +albs_oval_errata_diff.py is a service startup script +""" +import sys +from albs_oval_errata_diff.start import start + +try: + YAML_PATH = sys.argv[1] +except IndexError: + print(f"Usage: {sys.argv[0]} config.yml") + sys.exit(1) +start(YAML_PATH) diff --git a/albs_oval_errata_diff/__init__.py b/albs_oval_errata_diff/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/albs_oval_errata_diff/comparer.py b/albs_oval_errata_diff/comparer.py new file mode 100644 index 0000000..6c0c437 --- /dev/null +++ b/albs_oval_errata_diff/comparer.py @@ -0,0 +1,276 @@ +""" +package comparer.py implemets difference checking logic +""" + +import bz2 +import datetime +from pathlib import Path +import re +from typing import Tuple, List, Dict, Any +import logging +import json +import xml.etree.ElementTree as ET + +import requests + +from .config import Config +from .package import Package +from .sa import SecurityAdvisory + + +def download_oval(url: str, download_dir: Path) -> str: + """ + download_oval downloads, decompreses oval file + and returns filepath of saved file + """ + response = requests.get(url, stream=True, timeout=30) + decompressor = bz2.BZ2Decompressor() + fname = url.split('/')[-1].replace('.bz2', '') + fpath = download_dir / fname + with open(fpath, 'wb') as flw: + for chunk in response.iter_content(chunk_size=128): + flw.write(decompressor.decompress(chunk)) + return fpath + + +def download_errata(url: str, release_version: int, download_dir: Path) -> str: + """ + downloads errata_full.json file end returns file path + """ + response = requests.get(url, stream=True, timeout=30) + fname = f'alma-{release_version}.json' + fpath = download_dir / fname + with open(fpath, 'wb') as errata_file: + for chunk in response.iter_content(chunk_size=128): + errata_file.write(chunk) + return fpath + + +def parse_oval(fpath: str, not_before: datetime.datetime) -> Dict[str, SecurityAdvisory]: + """ + converting oval xml file to dict + """ + + def extract_package(title: str) -> Package: + regexp = r'(.*) is earlier than \d+:(.+?(?=-))' + res = re.search(regexp, title) + name = res.group(1) + version = res.group(2) + return Package(name=name, version=version) + + def extract_id(title: str) -> str: + regexp = r'[RH|AL]SA-(\d{4}:\d+)(.*)' + res = re.search(regexp, title) + return res.group(1) + + tree = ET.parse(fpath) + root = tree.getroot() + namespase = { + 'n': 'http://oval.mitre.org/XMLSchema/oval-definitions-5', + + } + res = {} + for definition in root.findall('n:definitions/', namespase): + title = definition.find('n:metadata/n:title', namespase).text + issued = definition.find( + 'n:metadata/n:advisory/n:issued', namespase).attrib['date'] + issued_dt = datetime.datetime.strptime(issued, "%Y-%m-%d") + + # we are only interesed in Security advisories after RHEL 8.3 + if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < not_before: + continue + sa_id = extract_id(title) + packages = [extract_package(i.attrib['comment']) for + i in definition.findall(".//n:criterion", namespase) + if 'is earlier than' in i.attrib['comment']] + res[sa_id] = SecurityAdvisory( + title=title, id=sa_id, packages=packages) + return res + + +def parse_errata(fpath: str) -> Dict[str, SecurityAdvisory]: + """ + parses alma errata file and converts it to dict of SA instances + """ + with open(fpath, 'r', encoding='utf-8') as file_to_load: + erratas = json.load(file_to_load) + res = {} + for errata in erratas['data']: + title = errata['title'] + sa_id = errata['id'].split('-')[-1] + packages = [] + for package in errata['packages']: + full_name = f"{package['name']}-{package['version']}" + if full_name not in packages: + packages.append(full_name) + packages.sort() + res[sa_id] = SecurityAdvisory( + title=title, id=sa_id, packages=packages) + return res + + +def compare(rhel_oval: Dict[str, SecurityAdvisory], + alma_oval: Dict[str, SecurityAdvisory], + alma_errata: Dict[str, SecurityAdvisory], + sa_exclude: List[str], + packages_exclude: List[str]) -> Tuple[dict, list]: + """ + compares rhel oval with alma oval and alma errata + """ + diff = [] + report = { + # total amount of security advisories + 'total_sa_count': 0, + # amount of SA that match with rhel + 'good_sa_count': 0, + # total amount of differencies + 'diff_count': 0, + # list of SA excluded from diff check + 'excluded_sa': [], + # list of packages excluded from diff check + 'excluded_pkg': [], + # amount of oval SA that dont exists in oval file + 'oval_missing_sa_count': 0, + # amount of oval SA that have missing packages + 'oval_missing_pkg_sa_count': 0, + # list of missing oval SA + 'oval_missing_sa': [], + # list of oval SA that have missing packages + 'oval_missing_pkg_sa': [], + # amount of SA that dont exists in errata file + 'errata_missing_sa_count': 0, + # amount of errata SA that have missing packages + 'errata_missing_pkg_sa_count': 0, + # list of SA that are missing in errata file + 'errata_missing_sa': [], + # list of errata SA with missing packages + 'errata_missing_pkg_sa': [], + # total amount of unique missing packages across all alma SA + 'missing_packages_unique_count': 0, + # list of unique packages that missing across all alma SA + 'missing_packages_unique': [] + } + + for rhel_sa_id, rhel_sa in rhel_oval.items(): + report['total_sa_count'] += 1 + sa_name = f'ALSA-{rhel_sa_id}' + + # filtering out SA + if sa_name in sa_exclude: + report['excluded_sa'].append(sa_name) + continue + + # filtefing out packages + packages_to_check: List[Package] = [] + for package in rhel_sa.packages: + if any(package.name == i for i in packages_exclude): + if str(package) not in report['excluded_pkg']: + report['excluded_pkg'].append(str(package)) + else: + packages_to_check.append(package) + + # check oval + try: + alma_oval_sa = alma_oval[rhel_sa_id] + except KeyError: + report['diff_count'] += 1 + diff.append({'sa_name': sa_name, 'diff': 'SA is missing in oval'}) + report['oval_missing_sa'].append(sa_name) + report['oval_missing_sa_count'] += 1 + else: + # check if some packages are missing from oval SA + alma_oval_packages = alma_oval_sa.packages + alma_oval_missing_packages = [str(r) for r in packages_to_check + if str(r) not in [str(i) for i in alma_oval_packages]] + if alma_oval_missing_packages: + report['diff_count'] += 1 + diff_str = f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}" + diff.append({'sa_name': sa_name, + 'diff': diff_str}) + report['oval_missing_pkg_sa'].append(sa_name) + report['oval_missing_pkg_sa_count'] += 1 + for missing_package in alma_oval_missing_packages: + if missing_package not in report['missing_packages_unique']: + report['missing_packages_unique'].append( + missing_package) + report['missing_packages_unique_count'] += 1 + + # check errata + try: + alma_errata_sa = alma_errata[rhel_sa_id] + except KeyError: + report['errata_missing_sa'].append(sa_name) + report['errata_missing_sa_count'] += 1 + report['diff_count'] += 1 + diff.append( + {'sa_name': sa_name, 'diff': 'SA is missing in errata'}) + continue + # check if some packages are missing from errata SA + alma_errata_packages = alma_errata_sa.packages + alma_errata_missing_packages = \ + [str(r) for r in packages_to_check + if str(r) not in [str(i) for i in alma_errata_packages]] + if alma_errata_missing_packages: + report['diff_count'] += 1 + diff_str = f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}" + diff.append({'sa_name': sa_name, + 'diff': diff_str}) + report['errata_missing_pkg_sa'].append(sa_name) + report['errata_missing_pkg_sa_count'] += 1 + for missing_package in alma_errata_missing_packages: + if missing_package not in report['missing_packages_unique']: + report['missing_packages_unique'].append(missing_package) + report['missing_packages_unique_count'] += 1 + else: + # if we here, all checks were passed + report['good_sa_count'] += 1 + + for item in report.values(): + if isinstance(item, list): + item.sort() + return report, diff + + +# starting point +def comparer_run(config: Config) -> Dict[str, Any]: + """ + comperer_run is the starting point of comparer component + """ + result = {} + for release, urls in config.releases.items(): + logging.info('Processing release %i', release) + + logging.info('Downloading rhel oval') + rhel_file = download_oval(urls.rhel_oval_url, config.download_dir) + logging.info('Parsing rhel oval') + rhel_oval_dict = parse_oval(rhel_file, config.not_before) + + logging.info('Downloading alma oval') + alma_oval_file = download_oval( + urls.alma_oval_url, download_dir=config.download_dir) + logging.info('Parsing alma oval') + alma_oval_dict = parse_oval(alma_oval_file, config.not_before) + + logging.info('Downloading alma errata') + alma_errata_file = download_errata(urls.alma_errata_url, + release, config.download_dir) + logging.info('Parsing alma errata') + alma_errata_dict = parse_errata(alma_errata_file) + + logging.info('Comparing rhel and alma') + report_release, diff_release = \ + compare(rhel_oval_dict, + alma_oval_dict, + alma_errata_dict, + config.sa_exclude, + config.packages_exclude) + result[release] = {'report': report_release, + 'diff': diff_release, + 'rhel_oval_url': urls.rhel_oval_url, + 'alma_oval_url': urls.alma_oval_url, + 'alma_errata_url': urls.alma_errata_url} + + result['report_generated'] = datetime.datetime.now().timestamp() * 1000 + result['sa_not_before'] = config.not_before.timestamp() * 1000 + + return result diff --git a/albs_oval_errata_diff/config.py b/albs_oval_errata_diff/config.py new file mode 100644 index 0000000..4896f88 --- /dev/null +++ b/albs_oval_errata_diff/config.py @@ -0,0 +1,104 @@ +''' +config.py used for generation service configuration based on input json file +''' + +from datetime import datetime +from pathlib import Path +from typing import Dict, List +from ipaddress import IPv4Address + +from pydantic import BaseModel, validator, Field # pylint: disable=import-error,no-name-in-module +import yaml + + +# DEFAULTS +DIFF_FILE = Path('/tmp/albs-oval-errata-diff.json') +DOWNLOAD_DIR = Path('/tmp') +LOG_FILE = Path('logs/albs-oval-errata-diff.log') +PACKAGES_EXCLUDE = [] +SA_EXCLUDE = [] +SERVER_PORT = 3001 +SERVER_IP = IPv4Address('127.0.0.1') +# not checking anything before RHEL-9.0 release +NOT_BEFORE = datetime(2022, 5, 18) +UPDATE_INTERVAL_MINUTES = 30 + + +class ReleaseUrls(BaseModel): + """ + ReleaseUrls represents list of RHEL/Alma Oval and Errata URLS for specific OS release + """ + rhel_oval_url: str = Field(description='URL for RHEL OVAL file') + alma_oval_url: str = Field(description='URL for Alma OVAL file') + alma_errata_url: str = Field(description='URL for Alma Errata file') + + +class Config(BaseModel): + """ + Config represents service configuration + """ + diff_file: Path = Field(description="file to store diff JSON in", + default=DIFF_FILE) + download_dir: Path = Field( + description='directory to download Oval/Errata files to', + default=DOWNLOAD_DIR) + log_file: Path = Field( + description='file to write logs to', + default=LOG_FILE) + packages_exclude: List[str] = Field( + description='list of RPM package names to exclude from checking', + default=PACKAGES_EXCLUDE) + releases: Dict[int, ReleaseUrls] = Field( + description='list of OS releases with Oval/Errata URLs to check') + sa_exclude: List[str] = Field( + description='list of Security Advisory IDs (ALSA-2022:5219) to exclude from checking', + default=SA_EXCLUDE) + server_port: int = Field( + description='port that will be used by websever', + default=SERVER_PORT) + server_ip: IPv4Address = Field( + description='IP that will be used by webserver', + default=SERVER_IP) + not_before: datetime = Field( + description='date to start checking from (YYYY-mm-dd)', + default=NOT_BEFORE) + update_interval_minutes: int = Field( + description='how often service will be running difference checks (in minutes)', + default=UPDATE_INTERVAL_MINUTES) + + @validator("releases", pre=True) + @classmethod + def parse_releases(cls, value) -> Dict[int, ReleaseUrls]: + """ + parse_release converts releases attribute + Dict[int, Dict[str, str]] -> Dict[str, ReleaseUrls] + """ + result: Dict[int, ReleaseUrls] = {} + for release, urls in value.items(): + result[release] = ReleaseUrls(rhel_oval_url=urls['rhel_oval_url'], + alma_oval_url=urls['alma_oval_url'], + alma_errata_url=urls['alma_errata_url']) + return result + + @validator("not_before", pre=True) + @classmethod + def str_to_datetime(cls, value) -> datetime: + """ + str_to_datetime converts string attr str -> datetime + """ + return datetime.strptime( + value, + "%Y-%m-%d") + + +def get_config(yml_path: str) -> Config: + """ + get_config loads yml file and generates Config instance + """ + with open(yml_path, 'r', encoding='utf-8') as flr: + data = yaml.safe_load(flr) + return Config(**data) + + +if __name__ == "__main__": + print(get_config('./config.default.yml')) diff --git a/albs_oval_errata_diff/package.py b/albs_oval_errata_diff/package.py new file mode 100644 index 0000000..3da1272 --- /dev/null +++ b/albs_oval_errata_diff/package.py @@ -0,0 +1,16 @@ +""" +package.py contains Package dataclass definition +""" +from dataclasses import dataclass + + +@dataclass +class Package: + """ + Package represents RPM package exstracted from RHEL OVAL + """ + name: str + version: str + + def __str__(self): + return f"{self.name}-{self.version}" diff --git a/albs_oval_errata_diff/sa.py b/albs_oval_errata_diff/sa.py new file mode 100644 index 0000000..40e07da --- /dev/null +++ b/albs_oval_errata_diff/sa.py @@ -0,0 +1,18 @@ +""" +sa contains SecurityAdvisory dataclass definition +""" +from dataclasses import dataclass +from typing import List + +from .package import Package + + +@dataclass +class SecurityAdvisory: + """ + SecurityAdvisory represents Security advisory deffition extracted + from oval or errata + """ + title: str + id: str # pylint: disable=invalid-name + packages: List[Package] diff --git a/albs_oval_errata_diff/start.py b/albs_oval_errata_diff/start.py new file mode 100644 index 0000000..8b681c8 --- /dev/null +++ b/albs_oval_errata_diff/start.py @@ -0,0 +1,114 @@ +""" +service compares rhel oval with alma ovals and errata ovals +results available via API Call +""" +import copy +import logging +from logging.handlers import RotatingFileHandler +import threading +from time import sleep +from ipaddress import IPv4Address +import json + +from aiohttp import web + +from .config import get_config, Config +from .comparer import comparer_run + + +# This dict holds all current differentes +diffs = {} +diffs_lock = threading.Lock() + + +async def web_handler(_): + """ + web_handler returns diffs as JSON file + """ + data = {} + try: + diffs_lock.acquire() + data = copy.deepcopy(diffs) + diffs_lock.release() + except Exception as err: # pylint: disable=broad-except + logging.critical("Unhandled exeption %s", err, exc_info=True) + return web.json_response(data=data) + + +def webserver_run(server_ip: IPv4Address, server_port: str): + """ + webserver_run starts webserver component + """ + app = web.Application() + app.add_routes([web.get('/', web_handler)]) + web.run_app(app=app, host=str(server_ip), port=server_port) + + +def diff_checker(config: Config): + """ + Runs comparer component in infinite loop + """ + while True: + logging.info("Start comparing") + # generating new diff + try: + result = comparer_run(config) + except Exception as err: # pylint: disable=broad-except + logging.critical("Unhandled exeption %s", err, exc_info=True) + else: + logging.info("Finished comparing, updating diff dict") + diffs_lock.acquire() + global diffs # pylint: disable=invalid-name,global-statement + diffs = result + diffs_lock.release() + + # dumping + logging.info("Saving results to disk") + try: + with open(config.diff_file, 'w', encoding='utf-8') as flw: + json.dump(result, flw, indent=4) + except Exception as err: # pylint: disable=broad-except + logging.critical("Unhandled exeption %s", err, exc_info=True) + logging.info("Done") + + logging.info("Finished comparing, go to sleep for %d minutes", + config.update_interval_minutes) + sleep(config.update_interval_minutes * 60) + + +def start(yaml_path: str): + """ + start starts comparer and webserver components + each component runs in it`s own thread + """ + config = get_config(yaml_path) + + # making sure that parent directories exist + for path in [config.log_file, config.log_file]: + if not path.parent.exists(): + path.parent.mkdir() + + # configuring logging + logging.basicConfig(level=logging.INFO, + format='%(asctime)s %(levelname)s %(funcName)s %(message)s', + handlers=[RotatingFileHandler(config.log_file, maxBytes=10000000, backupCount=3)]) + + logging.info("Trying to load diff file from disk") + try: + with open(config.diff_file, 'r', encoding='utf-8') as flr: + loaded_data = json.load(flr) + diffs_lock.acquire() + global diffs # pylint: disable=invalid-name,global-statement + diffs = loaded_data + diffs_lock.release() + except Exception as err: # pylint: disable=broad-except + logging.warning('Cant load data from disk %s', err) + else: + logging.info('Diff file was loaded') + + logging.info("Starting diff_checker in background") + thread = threading.Thread(target=diff_checker, args=(config,)) + thread.daemon = True + thread.start() + logging.info("Starting webserver") + webserver_run(config.server_ip, config.server_port) diff --git a/config.default.yml b/config.default.yml new file mode 100644 index 0000000..e597784 --- /dev/null +++ b/config.default.yml @@ -0,0 +1,68 @@ +--- +# diff_file +# file to store diff JSON in +# requred: no +# default: /tmp/albs-oval-errata-diff.json +diff_file: /tmp/albs-oval-errata-diff.json + +# download_dir +# directory to download Oval/Errata files to +# required: no +# default: /tmp +download_dir: /tmp + +# log_file +# file to write logs to +# requred: no +# default: logs/albs-oval-errata-diff.log +log_file: logs/albs-oval-errata-diff.log + +# packages_exclude +# list of RPM package names to exclude from checking +# requred: no +# default: [] +packages_exclude: [] + +# releases +# list of OS releases with Oval/Errata URLs to check +# required: yes +# default: N/A +releases: + 8: + rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL8/rhel-8.oval.xml.bz2 + alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-8.xml.bz2 + alma_errata_url: https://errata.almalinux.org/8/errata.full.json + 9: + rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL9/rhel-9.oval.xml.bz2 + alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-9.xml.bz2 + alma_errata_url: https://errata.almalinux.org/9/errata.full.json + +# sa_exclude +# list of Security Advisory IDs (ALSA-2022:5219) to exclude from checking +# requred: no +# default: [] +sa_exclude: [] + +# server_port +# port that will be used by websever +# required: no +# default: 3001 +server_port: 3001 + +# server_ip +# IP that will be used by webserver +# required: no +# default: 127.0.0.1 +server_ip: 127.0.0.1 + +# not_before +# date to start checking from (YYYY-mm-dd) +# required: no +# default: 2022-5-18 (Release of RHEL 9.0) +not_before: 2022-5-18 + +# update_interval_minutes +# how often service will be running difference checks (in minutes) +# required: no +# default: 30 +update_interval_minutes: 30 \ No newline at end of file diff --git a/releases.txt b/releases.txt new file mode 100644 index 0000000..94b3d83 --- /dev/null +++ b/releases.txt @@ -0,0 +1,4 @@ +2022-12-30 v1.0.0 + First version of service +2023-01-04 v1.0.1 + Fixed missing packages false positives \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..37fd186 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +aiohttp==3.8.3 +pydantic==1.10.2 +PyYAML==6.0 +requests==2.28.1