ALBS-901: New service albs-oval-errata-dif #1

Merged
kzhukov merged 11 commits from ALBS-901 into main 2023-01-11 10:43:42 +00:00
7 changed files with 146 additions and 100 deletions
Showing only changes of commit fb2b1de2c1 - Show all commits

View File

@ -1,7 +1,11 @@
""" """
albs_oval_errata_diff.py is a service startup script albs_oval_errata_diff.py is a service startup script
""" """
import sys
from albs_oval_errata_diff.start import start from albs_oval_errata_diff.start import start
try:
start() YAML_PATH = sys.argv[1]
except IndexError:
print(f"Usage {sys.argv[0]} config.yml")
start(YAML_PATH)

View File

@ -1,80 +1,87 @@
"""
package comparer.py implemets difference checking logic
"""
import bz2 import bz2
import datetime import datetime
from pathlib import Path
import re import re
import requests
from typing import Tuple, List, Dict, Any from typing import Tuple, List, Dict, Any
import xml.etree.ElementTree as ET
import logging import logging
import json import json
import xml.etree.ElementTree as ET
from .config import DOWNLOAD_DIR, NOT_BEFORE, RELEASES, SA_EXCLUDE, PACKAGES_EXCLUDE import requests
from .sa import SecurityAdvisory
from .config import Config
from .package import Package from .package import Package
from .sa import SecurityAdvisory
def download_oval(url: str) -> str: def download_oval(url: str, download_dir: Path) -> str:
""" """
download_oval downloads, decompreses oval file download_oval downloads, decompreses oval file
and returns filepath of saved file and returns filepath of saved file
""" """
r = requests.get(url, stream=True, timeout=30) response = requests.get(url, stream=True, timeout=30)
decompressor = bz2.BZ2Decompressor() decompressor = bz2.BZ2Decompressor()
fname = url.split('/')[-1].replace('.bz2', '') fname = url.split('/')[-1].replace('.bz2', '')
fpath = DOWNLOAD_DIR / fname fpath = download_dir / fname
with open(fpath, 'wb') as fd: with open(fpath, 'wb') as flw:
for chunk in r.iter_content(chunk_size=128): for chunk in response.iter_content(chunk_size=128):
fd.write(decompressor.decompress(chunk)) flw.write(decompressor.decompress(chunk))
return fpath return fpath
def download_errata(url: str, release_version: int) -> str: def download_errata(url: str, release_version: int, download_dir: Path) -> str:
""" """
downloads errata_full.json file end returns file path downloads errata_full.json file end returns file path
""" """
response = requests.get(url, stream=True, timeout=30) response = requests.get(url, stream=True, timeout=30)
fname = f'alma-{release_version}.json' fname = f'alma-{release_version}.json'
fpath = DOWNLOAD_DIR / fname fpath = download_dir / fname
with open(fpath, 'wb') as errata_file: with open(fpath, 'wb') as errata_file:
for chunk in response.iter_content(chunk_size=128): for chunk in response.iter_content(chunk_size=128):
errata_file.write(chunk) errata_file.write(chunk)
return fpath return fpath
def parse_oval(fpath: str) -> Dict[str, SecurityAdvisory]: def parse_oval(fpath: str, not_before: datetime.datetime) -> Dict[str, SecurityAdvisory]:
""" """
converting oval xml file to dict converting oval xml file to dict
""" """
def extract_package(title: str) -> Package: def extract_package(title: str) -> Package:
r = r'(.*) is earlier than \d+:(.+?(?=-))' regexp = r'(.*) is earlier than \d+:(.+?(?=-))'
res = re.search(r, title) res = re.search(regexp, title)
name = res.group(1) name = res.group(1)
version = res.group(2) version = res.group(2)
return Package(name=name, version=version) return Package(name=name, version=version)
def extract_id(title: str) -> str: def extract_id(title: str) -> str:
r = r'[RH|AL]SA-(\d{4}:\d+)(.*)' regexp = r'[RH|AL]SA-(\d{4}:\d+)(.*)'
res = re.search(r, title) res = re.search(regexp, title)
return res.group(1) return res.group(1)
tree = ET.parse(fpath) tree = ET.parse(fpath)
root = tree.getroot() root = tree.getroot()
ns = { namespase = {
'n': 'http://oval.mitre.org/XMLSchema/oval-definitions-5', 'n': 'http://oval.mitre.org/XMLSchema/oval-definitions-5',
} }
res = {} res = {}
for definition in root.findall('n:definitions/', ns): for definition in root.findall('n:definitions/', namespase):
title = definition.find('n:metadata/n:title', ns).text title = definition.find('n:metadata/n:title', namespase).text
issued = definition.find( issued = definition.find(
'n:metadata/n:advisory/n:issued', ns).attrib['date'] 'n:metadata/n:advisory/n:issued', namespase).attrib['date']
issued_dt = datetime.datetime.strptime(issued, "%Y-%m-%d") issued_dt = datetime.datetime.strptime(issued, "%Y-%m-%d")
# we are only interesed in Security advisories after RHEL 8.3 # we are only interesed in Security advisories after RHEL 8.3
if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < NOT_BEFORE: if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < not_before:
kzhukov marked this conversation as resolved
Review

It's rare, but security advisories can also be RHBAs or RHEAs. Up to you if want to add this to the logic, nothing against merging this as it is now.

It's rare, but security advisories can also be RHBAs or RHEAs. Up to you if want to add this to the logic, nothing against merging this as it is now.
Review

As a tip, [RH|AL]BAs and [RH|AL]EAs that became security advisories must have the severity field set to any of the valid values, these are: Low, Moderate, Important or Critical. You can check them here https://access.redhat.com/security/updates/classification

As a tip, [RH|AL]BAs and [RH|AL]EAs that became security advisories must have the severity field set to any of the valid values, these are: Low, Moderate, Important or Critical. You can check them here https://access.redhat.com/security/updates/classification
Review

Thanks for heads up
I created task for adding support of BA/EA in https://cloudlinux.atlassian.net/browse/ALBS-915

I will work on this feature in separate branch. This one I will merge

Thanks for heads up I created task for adding support of BA/EA in https://cloudlinux.atlassian.net/browse/ALBS-915 I will work on this feature in separate branch. This one I will merge
continue continue
sa_id = extract_id(title) sa_id = extract_id(title)
packages = [extract_package(i.attrib['comment']) for i in definition.findall(".//n:criterion", ns) packages = [extract_package(i.attrib['comment']) for
i in definition.findall(".//n:criterion", namespase)
if 'is earlier than' in i.attrib['comment']] if 'is earlier than' in i.attrib['comment']]
res[sa_id] = SecurityAdvisory( res[sa_id] = SecurityAdvisory(
title=title, id=sa_id, packages=packages) title=title, id=sa_id, packages=packages)
@ -104,7 +111,9 @@ def parse_errata(fpath: str) -> Dict[str, SecurityAdvisory]:
def compare(rhel_oval: Dict[str, SecurityAdvisory], def compare(rhel_oval: Dict[str, SecurityAdvisory],
alma_oval: Dict[str, SecurityAdvisory], alma_oval: Dict[str, SecurityAdvisory],
alma_errata: Dict[str, SecurityAdvisory]) -> Tuple[dict, list]: alma_errata: Dict[str, SecurityAdvisory],
sa_exclude: List[str],
packages_exclude: List[str]) -> Tuple[dict, list]:
""" """
compares rhel oval with alma oval and alma errata compares rhel oval with alma oval and alma errata
""" """
@ -147,18 +156,18 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
sa_name = f'ALSA-{rhel_sa_id}' sa_name = f'ALSA-{rhel_sa_id}'
# filtering out SA # filtering out SA
if sa_name in SA_EXCLUDE: if sa_name in sa_exclude:
report['excluded_sa'].append(sa_name) report['excluded_sa'].append(sa_name)
continue continue
# filtefing out packages # filtefing out packages
packages_to_check: List[Package] = [] packages_to_check: List[Package] = []
for p in rhel_sa.packages: for package in rhel_sa.packages:
if any(p.name == i for i in PACKAGES_EXCLUDE): if any(package.name == i for i in packages_exclude):
if str(p) not in report['excluded_pkg']: if str(package) not in report['excluded_pkg']:
report['excluded_pkg'].append(str(p)) report['excluded_pkg'].append(str(package))
else: else:
packages_to_check.append(p) packages_to_check.append(package)
# check oval # check oval
try: try:
@ -175,13 +184,15 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
if r not in alma_oval_packages] if r not in alma_oval_packages]
if alma_oval_missing_packages: if alma_oval_missing_packages:
report['diff_count'] += 1 report['diff_count'] += 1
diff_str = f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}"
diff.append({'sa_name': sa_name, diff.append({'sa_name': sa_name,
'diff': f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}"}) 'diff': diff_str})
report['oval_missing_pkg_sa'].append(sa_name) report['oval_missing_pkg_sa'].append(sa_name)
report['oval_missing_pkg_sa_count'] += 1 report['oval_missing_pkg_sa_count'] += 1
for mp in alma_oval_missing_packages: for missing_package in alma_oval_missing_packages:
if mp not in report['missing_packages_unique']: if missing_package not in report['missing_packages_unique']:
report['missing_packages_unique'].append(mp) report['missing_packages_unique'].append(
missing_package)
report['missing_packages_unique_count'] += 1 report['missing_packages_unique_count'] += 1
# check errata # check errata
@ -200,13 +211,14 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
str(r) for r in packages_to_check if r not in alma_errata_packages] str(r) for r in packages_to_check if r not in alma_errata_packages]
if alma_errata_missing_packages: if alma_errata_missing_packages:
report['diff_count'] += 1 report['diff_count'] += 1
diff_str = f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}"
diff.append({'sa_name': sa_name, diff.append({'sa_name': sa_name,
'diff': f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}"}) 'diff': diff_str})
report['errata_missing_pkg_sa'].append(sa_name) report['errata_missing_pkg_sa'].append(sa_name)
report['errata_missing_pkg_sa_count'] += 1 report['errata_missing_pkg_sa_count'] += 1
for mp in alma_errata_missing_packages: for missing_package in alma_errata_missing_packages:
if mp not in report['missing_packages_unique']: if missing_package not in report['missing_packages_unique']:
report['missing_packages_unique'].append(mp) report['missing_packages_unique'].append(missing_package)
report['missing_packages_unique_count'] += 1 report['missing_packages_unique_count'] += 1
else: else:
# if we here, all checks were passed # if we here, all checks were passed
@ -219,36 +231,45 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
# starting point # starting point
def comparer_run() -> Dict[str, Any]: def comparer_run(config: Config) -> Dict[str, Any]:
"""
comperer_run is the starting point of comparer component
"""
result = {} result = {}
for release, urls in RELEASES.items(): for release, urls in config.releases.items():
logging.info('Processing release %i', release) logging.info('Processing release %i', release)
logging.info('downloading rhel oval') logging.info('downloading rhel oval')
rhel_file = download_oval(urls['rhel_oval_url']) rhel_file = download_oval(urls.rhel_oval_url, config.download_dir)
logging.info('parsing rhel oval') logging.info('parsing rhel oval')
rhel_oval_dict = parse_oval(rhel_file) rhel_oval_dict = parse_oval(rhel_file, config.not_before)
logging.info('downloading alma oval') logging.info('downloading alma oval')
alma_oval_file = download_oval(urls['alma_oval_url']) alma_oval_file = download_oval(
urls.alma_oval_url, download_dir=config.download_dir)
logging.info('parsing alma oval') logging.info('parsing alma oval')
alma_oval_dict = parse_oval(alma_oval_file) alma_oval_dict = parse_oval(alma_oval_file, config.not_before)
logging.info('downloading alma errata') logging.info('downloading alma errata')
alma_errata_file = download_errata(urls['alma_errata_url'], release) alma_errata_file = download_errata(urls.alma_errata_url,
release, config.download_dir)
logging.info('parsing alma errata') logging.info('parsing alma errata')
alma_errata_dict = parse_errata(alma_errata_file) alma_errata_dict = parse_errata(alma_errata_file)
logging.info('comparing rhel and alma') logging.info('comparing rhel and alma')
report_release, diff_release = compare( report_release, diff_release = \
rhel_oval_dict, alma_oval_dict, alma_errata_dict) compare(rhel_oval_dict,
alma_oval_dict,
alma_errata_dict,
config.sa_exclude,
config.packages_exclude)
result[release] = {'report': report_release, result[release] = {'report': report_release,
'diff': diff_release, 'diff': diff_release,
'rhel_oval_url': urls['rhel_oval_url'], 'rhel_oval_url': urls.rhel_oval_url,
'alma_oval_url': urls['alma_oval_url'], 'alma_oval_url': urls.alma_oval_url,
'alma_errata_url': urls['alma_errata_url']} 'alma_errata_url': urls.alma_errata_url}
result['report_generated'] = datetime.datetime.now().timestamp() * 1000 result['report_generated'] = datetime.datetime.now().timestamp() * 1000
result['sa_not_before'] = NOT_BEFORE.timestamp() * 1000 result['sa_not_before'] = config.not_before.timestamp() * 1000
return result return result

View File

@ -2,12 +2,12 @@
config.py used for generation service configuration based on input json file config.py used for generation service configuration based on input json file
''' '''
from datetime import datetime, date from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Dict, List from typing import Dict, List
from ipaddress import IPv4Address from ipaddress import IPv4Address
from pydantic import BaseModel, validator, Field # pylint: disable=import-error from pydantic import BaseModel, validator, Field # pylint: disable=import-error,no-name-in-module
import yaml import yaml
@ -59,7 +59,7 @@ class Config(BaseModel):
server_ip: IPv4Address = Field( server_ip: IPv4Address = Field(
description="IP that will be used by webserver", description="IP that will be used by webserver",
default=SERVER_IP) default=SERVER_IP)
not_before: date = Field( not_before: datetime = Field(
description='date to start checking from (YYYY-mm-dd)', description='date to start checking from (YYYY-mm-dd)',
default=NOT_BEFORE) default=NOT_BEFORE)
update_interval_minutes: int = Field( update_interval_minutes: int = Field(
@ -88,8 +88,7 @@ class Config(BaseModel):
""" """
return datetime.strptime( return datetime.strptime(
value, value,
"%Y-%m-%d" "%Y-%m-%d")
).date()
def get_config(yml_path: str) -> Config: def get_config(yml_path: str) -> Config:

View File

@ -1,5 +1,7 @@
"""
package.py contains Package dataclass definition
"""
from dataclasses import dataclass from dataclasses import dataclass
from typing import List
@dataclass @dataclass

View File

@ -1,3 +1,6 @@
"""
sa contains SecurityAdvisory dataclass definition
"""
from dataclasses import dataclass from dataclasses import dataclass
from typing import List from typing import List
@ -11,5 +14,5 @@ class SecurityAdvisory:
from oval or errata from oval or errata
""" """
title: str title: str
id: str id: str # pylint: disable=invalid-name
packages: List[Package] packages: List[Package]

View File

@ -2,15 +2,17 @@
service compares rhel oval with alma ovals and errata ovals service compares rhel oval with alma ovals and errata ovals
results available via API Call results available via API Call
""" """
from aiohttp import web
import copy import copy
import logging import logging
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
import threading import threading
from time import sleep from time import sleep
from ipaddress import IPv4Address
import json import json
from .config import LOG_FILE, DIFF_FILE, UPDATE_INTERVAL_MINUTES, SERVER_IP, SERVER_PORT from aiohttp import web
from .config import get_config, Config
from .comparer import comparer_run from .comparer import comparer_run
@ -19,82 +21,97 @@ diffs = {}
diffs_lock = threading.Lock() diffs_lock = threading.Lock()
async def web_handler(request): async def web_handler(_):
"""
web_handler returns diffs as JSON file
"""
data = {} data = {}
try: try:
diffs_lock.acquire() diffs_lock.acquire()
data = copy.deepcopy(diffs) data = copy.deepcopy(diffs)
diffs_lock.release() diffs_lock.release()
except Exception as e: except Exception as err: # pylint: disable=broad-except
logging.critical("Unhandled exeption %s", e, exc_info=True) logging.critical("Unhandled exeption %s", err, exc_info=True)
return web.json_response(data=data) return web.json_response(data=data)
def webserver_run(): def webserver_run(server_ip: IPv4Address, server_port: str):
"""
webserver_run starts webserver component
"""
app = web.Application() app = web.Application()
app.add_routes([web.get('/', web_handler)]) app.add_routes([web.get('/', web_handler)])
web.run_app(app=app, host=SERVER_IP, port=SERVER_PORT) web.run_app(app=app, host=str(server_ip), port=server_port)
def diff_checker(): def diff_checker(config: Config):
"""
runs comparer component in infinite loop
"""
while True: while True:
logging.info("Start comparing") logging.info("Start comparing")
# generating new diff # generating new diff
try: try:
result = comparer_run() result = comparer_run(config)
except Exception as e: except Exception as err: # pylint: disable=broad-except
logging.critical("Unhandled exeption %s", e, exc_info=True) logging.critical("Unhandled exeption %s", err, exc_info=True)
else: else:
logging.info("Finished comparing, updating diff dict") logging.info("Finished comparing, updating diff dict")
diffs_lock.acquire() diffs_lock.acquire()
global diffs global diffs # pylint: disable=invalid-name,global-statement
diffs = result diffs = result
diffs_lock.release() diffs_lock.release()
# dumping # dumping
logging.info("Saving results to disk") logging.info("Saving results to disk")
try: try:
with open(DIFF_FILE, 'w', encoding='utf-8') as flw: with open(config.diff_file, 'w', encoding='utf-8') as flw:
json.dump(result, flw, indent=4) json.dump(result, flw, indent=4)
except Exception as e: except Exception as err: # pylint: disable=broad-except
logging.critical("Unhandled exeption %s", e, exc_info=True) logging.critical("Unhandled exeption %s", err, exc_info=True)
logging.info("Done") logging.info("Done")
logging.info("Finished comparing, go to sleep for %d minutes", logging.info("Finished comparing, go to sleep for %d minutes",
UPDATE_INTERVAL_MINUTES) config.update_interval_minutes)
sleep(UPDATE_INTERVAL_MINUTES * 60) sleep(config.update_interval_minutes * 60)
def start(): def start(yaml_path: str):
# making sure that directory exists """
for p in [LOG_FILE, DIFF_FILE]: start starts comparer and webserver components
if not p.parent.exists(): each component runs in it`s own thread
p.parent.mkdir() """
config = get_config(yaml_path)
# making sure that parent directories exist
for path in [config.log_file, config.log_file]:
if not path.parent.exists():
path.parent.mkdir()
# configuring logging
handlers = [logging.FileHandler(config.log_file, mode='a'),
logging.StreamHandler(),
RotatingFileHandler(config.log_file, maxBytes=10000, backupCount=3)]
logging.basicConfig(level=logging.INFO, logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(funcName)s %(message)s', format='%(asctime)s %(levelname)s %(funcName)s %(message)s',
handlers=[logging.FileHandler(LOG_FILE, mode='a'), handlers=handlers)
logging.StreamHandler(),
RotatingFileHandler(LOG_FILE, maxBytes=10000, backupCount=3)])
logging.info("Trying to load diff file from disk") logging.info("Trying to load diff file from disk")
try: try:
with open(DIFF_FILE, 'r', encoding='utf-8') as flr: with open(config.diff_file, 'r', encoding='utf-8') as flr:
loaded_data = json.load(flr) loaded_data = json.load(flr)
diffs_lock.acquire() diffs_lock.acquire()
global diffs # pylint: disable=invalid-name,global-statement
diffs = loaded_data diffs = loaded_data
diffs_lock.release() diffs_lock.release()
except Exception as e: except Exception as err: # pylint: disable=broad-except
logging.warning('cant load data from disk %s', e) logging.warning('cant load data from disk %s', err)
else: else:
logging.info('diff file was loaded') logging.info('diff file was loaded')
logging.info("Starting diff_checker in background") logging.info("Starting diff_checker in background")
thread = threading.Thread(target=diff_checker) thread = threading.Thread(target=diff_checker, args=(config,))
thread.daemon = True thread.daemon = True
thread.start() thread.start()
logging.info("Starting webserver") logging.info("Starting webserver")
webserver_run() webserver_run(config.server_ip, config.server_port)
if __name__ == "__main__":
start()

View File

@ -33,8 +33,8 @@ releases:
alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-8.xml.bz2 alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-8.xml.bz2
alma_errata_url: https://errata.almalinux.org/8/errata.full.json alma_errata_url: https://errata.almalinux.org/8/errata.full.json
9: 9:
rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL9/rhel-9.oval.xml.bz2' rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL9/rhel-9.oval.xml.bz2
alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-9.xml.bz2' alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-9.xml.bz2
alma_errata_url: https://errata.almalinux.org/9/errata.full.json alma_errata_url: https://errata.almalinux.org/9/errata.full.json
# sa_exclude # sa_exclude