albs-oval-errata-diff/albs_oval_errata_diff/comparer.py

276 lines
10 KiB
Python
Raw Normal View History

2022-12-29 14:29:18 +00:00
"""
package comparer.py implemets difference checking logic
"""
2022-12-28 16:21:40 +00:00
import bz2
import datetime
2022-12-29 14:29:18 +00:00
from pathlib import Path
2022-12-28 16:21:40 +00:00
import re
from typing import Tuple, List, Dict, Any
import logging
import json
2022-12-29 14:29:18 +00:00
import xml.etree.ElementTree as ET
2022-12-28 16:21:40 +00:00
2022-12-29 14:29:18 +00:00
import requests
from .config import Config
2022-12-28 16:21:40 +00:00
from .package import Package
2022-12-29 14:29:18 +00:00
from .sa import SecurityAdvisory
2022-12-28 16:21:40 +00:00
2022-12-29 14:29:18 +00:00
def download_oval(url: str, download_dir: Path) -> str:
2022-12-28 16:21:40 +00:00
"""
download_oval downloads, decompreses oval file
and returns filepath of saved file
"""
2022-12-29 14:29:18 +00:00
response = requests.get(url, stream=True, timeout=30)
2022-12-28 16:21:40 +00:00
decompressor = bz2.BZ2Decompressor()
fname = url.split('/')[-1].replace('.bz2', '')
2022-12-29 14:29:18 +00:00
fpath = download_dir / fname
with open(fpath, 'wb') as flw:
for chunk in response.iter_content(chunk_size=128):
flw.write(decompressor.decompress(chunk))
2022-12-28 16:21:40 +00:00
return fpath
2022-12-29 14:29:18 +00:00
def download_errata(url: str, release_version: int, download_dir: Path) -> str:
2022-12-28 16:21:40 +00:00
"""
downloads errata_full.json file end returns file path
"""
response = requests.get(url, stream=True, timeout=30)
fname = f'alma-{release_version}.json'
2022-12-29 14:29:18 +00:00
fpath = download_dir / fname
2022-12-28 16:21:40 +00:00
with open(fpath, 'wb') as errata_file:
for chunk in response.iter_content(chunk_size=128):
errata_file.write(chunk)
return fpath
2022-12-29 14:29:18 +00:00
def parse_oval(fpath: str, not_before: datetime.datetime) -> Dict[str, SecurityAdvisory]:
2022-12-28 16:21:40 +00:00
"""
converting oval xml file to dict
"""
def extract_package(title: str) -> Package:
2022-12-29 14:29:18 +00:00
regexp = r'(.*) is earlier than \d+:(.+?(?=-))'
res = re.search(regexp, title)
2022-12-28 16:21:40 +00:00
name = res.group(1)
version = res.group(2)
return Package(name=name, version=version)
def extract_id(title: str) -> str:
2022-12-29 14:29:18 +00:00
regexp = r'[RH|AL]SA-(\d{4}:\d+)(.*)'
res = re.search(regexp, title)
2022-12-28 16:21:40 +00:00
return res.group(1)
tree = ET.parse(fpath)
root = tree.getroot()
2022-12-29 14:29:18 +00:00
namespase = {
2022-12-28 16:21:40 +00:00
'n': 'http://oval.mitre.org/XMLSchema/oval-definitions-5',
}
res = {}
2022-12-29 14:29:18 +00:00
for definition in root.findall('n:definitions/', namespase):
title = definition.find('n:metadata/n:title', namespase).text
2022-12-28 16:21:40 +00:00
issued = definition.find(
2022-12-29 14:29:18 +00:00
'n:metadata/n:advisory/n:issued', namespase).attrib['date']
2022-12-28 16:21:40 +00:00
issued_dt = datetime.datetime.strptime(issued, "%Y-%m-%d")
# we are only interesed in Security advisories after RHEL 8.3
2022-12-29 14:29:18 +00:00
if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < not_before:
2022-12-28 16:21:40 +00:00
continue
sa_id = extract_id(title)
2022-12-29 14:29:18 +00:00
packages = [extract_package(i.attrib['comment']) for
i in definition.findall(".//n:criterion", namespase)
2022-12-28 16:21:40 +00:00
if 'is earlier than' in i.attrib['comment']]
res[sa_id] = SecurityAdvisory(
title=title, id=sa_id, packages=packages)
return res
def parse_errata(fpath: str) -> Dict[str, SecurityAdvisory]:
"""
parses alma errata file and converts it to dict of SA instances
"""
with open(fpath, 'r', encoding='utf-8') as file_to_load:
erratas = json.load(file_to_load)
res = {}
for errata in erratas['data']:
title = errata['title']
sa_id = errata['id'].split('-')[-1]
packages = []
for package in errata['packages']:
full_name = f"{package['name']}-{package['version']}"
if full_name not in packages:
packages.append(full_name)
packages.sort()
res[sa_id] = SecurityAdvisory(
title=title, id=sa_id, packages=packages)
return res
def compare(rhel_oval: Dict[str, SecurityAdvisory],
alma_oval: Dict[str, SecurityAdvisory],
2022-12-29 14:29:18 +00:00
alma_errata: Dict[str, SecurityAdvisory],
sa_exclude: List[str],
packages_exclude: List[str]) -> Tuple[dict, list]:
2022-12-28 16:21:40 +00:00
"""
compares rhel oval with alma oval and alma errata
"""
diff = []
report = {
# total amount of security advisories
'total_sa_count': 0,
# amount of SA that match with rhel
'good_sa_count': 0,
# total amount of differencies
'diff_count': 0,
# list of SA excluded from diff check
'excluded_sa': [],
# list of packages excluded from diff check
'excluded_pkg': [],
# amount of oval SA that dont exists in oval file
'oval_missing_sa_count': 0,
# amount of oval SA that have missing packages
'oval_missing_pkg_sa_count': 0,
# list of missing oval SA
'oval_missing_sa': [],
# list of oval SA that have missing packages
'oval_missing_pkg_sa': [],
# amount of SA that dont exists in errata file
'errata_missing_sa_count': 0,
# amount of errata SA that have missing packages
'errata_missing_pkg_sa_count': 0,
# list of SA that are missing in errata file
'errata_missing_sa': [],
# list of errata SA with missing packages
'errata_missing_pkg_sa': [],
# total amount of unique missing packages across all alma SA
'missing_packages_unique_count': 0,
# list of unique packages that missing across all alma SA
'missing_packages_unique': []
}
for rhel_sa_id, rhel_sa in rhel_oval.items():
report['total_sa_count'] += 1
sa_name = f'ALSA-{rhel_sa_id}'
# filtering out SA
2022-12-29 14:29:18 +00:00
if sa_name in sa_exclude:
2022-12-28 16:21:40 +00:00
report['excluded_sa'].append(sa_name)
continue
# filtefing out packages
packages_to_check: List[Package] = []
2022-12-29 14:29:18 +00:00
for package in rhel_sa.packages:
if any(package.name == i for i in packages_exclude):
if str(package) not in report['excluded_pkg']:
report['excluded_pkg'].append(str(package))
2022-12-28 16:21:40 +00:00
else:
2022-12-29 14:29:18 +00:00
packages_to_check.append(package)
2022-12-28 16:21:40 +00:00
# check oval
try:
alma_oval_sa = alma_oval[rhel_sa_id]
except KeyError:
report['diff_count'] += 1
diff.append({'sa_name': sa_name, 'diff': 'SA is missing in oval'})
report['oval_missing_sa'].append(sa_name)
report['oval_missing_sa_count'] += 1
else:
# check if some packages are missing from oval SA
alma_oval_packages = alma_oval_sa.packages
alma_oval_missing_packages = [str(r) for r in packages_to_check
if r not in alma_oval_packages]
if alma_oval_missing_packages:
report['diff_count'] += 1
2022-12-29 14:29:18 +00:00
diff_str = f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}"
2022-12-28 16:21:40 +00:00
diff.append({'sa_name': sa_name,
2022-12-29 14:29:18 +00:00
'diff': diff_str})
2022-12-28 16:21:40 +00:00
report['oval_missing_pkg_sa'].append(sa_name)
report['oval_missing_pkg_sa_count'] += 1
2022-12-29 14:29:18 +00:00
for missing_package in alma_oval_missing_packages:
if missing_package not in report['missing_packages_unique']:
report['missing_packages_unique'].append(
missing_package)
2022-12-28 16:21:40 +00:00
report['missing_packages_unique_count'] += 1
# check errata
try:
alma_errata_sa = alma_errata[rhel_sa_id]
except KeyError:
report['errata_missing_sa'].append(sa_name)
report['errata_missing_sa_count'] += 1
report['diff_count'] += 1
diff.append(
{'sa_name': sa_name, 'diff': 'SA is missing in errata'})
continue
# check if some packages are missing from errata SA
alma_errata_packages = alma_errata_sa.packages
alma_errata_missing_packages = [
str(r) for r in packages_to_check if r not in alma_errata_packages]
if alma_errata_missing_packages:
report['diff_count'] += 1
2022-12-29 14:29:18 +00:00
diff_str = f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}"
2022-12-28 16:21:40 +00:00
diff.append({'sa_name': sa_name,
2022-12-29 14:29:18 +00:00
'diff': diff_str})
2022-12-28 16:21:40 +00:00
report['errata_missing_pkg_sa'].append(sa_name)
report['errata_missing_pkg_sa_count'] += 1
2022-12-29 14:29:18 +00:00
for missing_package in alma_errata_missing_packages:
if missing_package not in report['missing_packages_unique']:
report['missing_packages_unique'].append(missing_package)
2022-12-28 16:21:40 +00:00
report['missing_packages_unique_count'] += 1
else:
# if we here, all checks were passed
report['good_sa_count'] += 1
for item in report.values():
if isinstance(item, list):
item.sort()
return report, diff
# starting point
2022-12-29 14:29:18 +00:00
def comparer_run(config: Config) -> Dict[str, Any]:
"""
comperer_run is the starting point of comparer component
"""
2022-12-28 16:21:40 +00:00
result = {}
2022-12-29 14:29:18 +00:00
for release, urls in config.releases.items():
2022-12-28 16:21:40 +00:00
logging.info('Processing release %i', release)
logging.info('downloading rhel oval')
2022-12-29 14:29:18 +00:00
rhel_file = download_oval(urls.rhel_oval_url, config.download_dir)
2022-12-28 16:21:40 +00:00
logging.info('parsing rhel oval')
2022-12-29 14:29:18 +00:00
rhel_oval_dict = parse_oval(rhel_file, config.not_before)
2022-12-28 16:21:40 +00:00
logging.info('downloading alma oval')
2022-12-29 14:29:18 +00:00
alma_oval_file = download_oval(
urls.alma_oval_url, download_dir=config.download_dir)
2022-12-28 16:21:40 +00:00
logging.info('parsing alma oval')
2022-12-29 14:29:18 +00:00
alma_oval_dict = parse_oval(alma_oval_file, config.not_before)
2022-12-28 16:21:40 +00:00
logging.info('downloading alma errata')
2022-12-29 14:29:18 +00:00
alma_errata_file = download_errata(urls.alma_errata_url,
release, config.download_dir)
2022-12-28 16:21:40 +00:00
logging.info('parsing alma errata')
alma_errata_dict = parse_errata(alma_errata_file)
logging.info('comparing rhel and alma')
2022-12-29 14:29:18 +00:00
report_release, diff_release = \
compare(rhel_oval_dict,
alma_oval_dict,
alma_errata_dict,
config.sa_exclude,
config.packages_exclude)
2022-12-28 16:21:40 +00:00
result[release] = {'report': report_release,
'diff': diff_release,
2022-12-29 14:29:18 +00:00
'rhel_oval_url': urls.rhel_oval_url,
'alma_oval_url': urls.alma_oval_url,
'alma_errata_url': urls.alma_errata_url}
2022-12-28 16:21:40 +00:00
result['report_generated'] = datetime.datetime.now().timestamp() * 1000
2022-12-29 14:29:18 +00:00
result['sa_not_before'] = config.not_before.timestamp() * 1000
2022-12-28 16:21:40 +00:00
return result