ALBS-901: New service albs-oval-errata-dif #1

Merged
kzhukov merged 11 commits from ALBS-901 into main 2023-01-11 10:43:42 +00:00
7 changed files with 146 additions and 100 deletions
Showing only changes of commit fb2b1de2c1 - Show all commits

View File

@ -1,7 +1,11 @@
"""
albs_oval_errata_diff.py is a service startup script
"""
import sys
from albs_oval_errata_diff.start import start
start()
try:
YAML_PATH = sys.argv[1]
except IndexError:
print(f"Usage {sys.argv[0]} config.yml")
start(YAML_PATH)

View File

@ -1,80 +1,87 @@
"""
package comparer.py implemets difference checking logic
"""
import bz2
import datetime
from pathlib import Path
import re
import requests
from typing import Tuple, List, Dict, Any
import xml.etree.ElementTree as ET
import logging
import json
import xml.etree.ElementTree as ET
from .config import DOWNLOAD_DIR, NOT_BEFORE, RELEASES, SA_EXCLUDE, PACKAGES_EXCLUDE
from .sa import SecurityAdvisory
import requests
from .config import Config
from .package import Package
from .sa import SecurityAdvisory
def download_oval(url: str) -> str:
def download_oval(url: str, download_dir: Path) -> str:
"""
download_oval downloads, decompreses oval file
and returns filepath of saved file
"""
r = requests.get(url, stream=True, timeout=30)
response = requests.get(url, stream=True, timeout=30)
decompressor = bz2.BZ2Decompressor()
fname = url.split('/')[-1].replace('.bz2', '')
fpath = DOWNLOAD_DIR / fname
with open(fpath, 'wb') as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(decompressor.decompress(chunk))
fpath = download_dir / fname
with open(fpath, 'wb') as flw:
for chunk in response.iter_content(chunk_size=128):
flw.write(decompressor.decompress(chunk))
return fpath
def download_errata(url: str, release_version: int) -> str:
def download_errata(url: str, release_version: int, download_dir: Path) -> str:
"""
downloads errata_full.json file end returns file path
"""
response = requests.get(url, stream=True, timeout=30)
fname = f'alma-{release_version}.json'
fpath = DOWNLOAD_DIR / fname
fpath = download_dir / fname
with open(fpath, 'wb') as errata_file:
for chunk in response.iter_content(chunk_size=128):
errata_file.write(chunk)
return fpath
def parse_oval(fpath: str) -> Dict[str, SecurityAdvisory]:
def parse_oval(fpath: str, not_before: datetime.datetime) -> Dict[str, SecurityAdvisory]:
"""
converting oval xml file to dict
"""
def extract_package(title: str) -> Package:
r = r'(.*) is earlier than \d+:(.+?(?=-))'
res = re.search(r, title)
regexp = r'(.*) is earlier than \d+:(.+?(?=-))'
res = re.search(regexp, title)
name = res.group(1)
version = res.group(2)
return Package(name=name, version=version)
def extract_id(title: str) -> str:
r = r'[RH|AL]SA-(\d{4}:\d+)(.*)'
res = re.search(r, title)
regexp = r'[RH|AL]SA-(\d{4}:\d+)(.*)'
res = re.search(regexp, title)
return res.group(1)
tree = ET.parse(fpath)
root = tree.getroot()
ns = {
namespase = {
'n': 'http://oval.mitre.org/XMLSchema/oval-definitions-5',
}
res = {}
for definition in root.findall('n:definitions/', ns):
title = definition.find('n:metadata/n:title', ns).text
for definition in root.findall('n:definitions/', namespase):
title = definition.find('n:metadata/n:title', namespase).text
issued = definition.find(
'n:metadata/n:advisory/n:issued', ns).attrib['date']
'n:metadata/n:advisory/n:issued', namespase).attrib['date']
issued_dt = datetime.datetime.strptime(issued, "%Y-%m-%d")
# we are only interesed in Security advisories after RHEL 8.3
if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < NOT_BEFORE:
if ('RHSA' not in title and 'ALSA' not in title) or issued_dt < not_before:
kzhukov marked this conversation as resolved
Review

It's rare, but security advisories can also be RHBAs or RHEAs. Up to you if want to add this to the logic, nothing against merging this as it is now.

It's rare, but security advisories can also be RHBAs or RHEAs. Up to you if want to add this to the logic, nothing against merging this as it is now.
Review

As a tip, [RH|AL]BAs and [RH|AL]EAs that became security advisories must have the severity field set to any of the valid values, these are: Low, Moderate, Important or Critical. You can check them here https://access.redhat.com/security/updates/classification

As a tip, [RH|AL]BAs and [RH|AL]EAs that became security advisories must have the severity field set to any of the valid values, these are: Low, Moderate, Important or Critical. You can check them here https://access.redhat.com/security/updates/classification
Review

Thanks for heads up
I created task for adding support of BA/EA in https://cloudlinux.atlassian.net/browse/ALBS-915

I will work on this feature in separate branch. This one I will merge

Thanks for heads up I created task for adding support of BA/EA in https://cloudlinux.atlassian.net/browse/ALBS-915 I will work on this feature in separate branch. This one I will merge
continue
sa_id = extract_id(title)
packages = [extract_package(i.attrib['comment']) for i in definition.findall(".//n:criterion", ns)
packages = [extract_package(i.attrib['comment']) for
i in definition.findall(".//n:criterion", namespase)
if 'is earlier than' in i.attrib['comment']]
res[sa_id] = SecurityAdvisory(
title=title, id=sa_id, packages=packages)
@ -104,7 +111,9 @@ def parse_errata(fpath: str) -> Dict[str, SecurityAdvisory]:
def compare(rhel_oval: Dict[str, SecurityAdvisory],
alma_oval: Dict[str, SecurityAdvisory],
alma_errata: Dict[str, SecurityAdvisory]) -> Tuple[dict, list]:
alma_errata: Dict[str, SecurityAdvisory],
sa_exclude: List[str],
packages_exclude: List[str]) -> Tuple[dict, list]:
"""
compares rhel oval with alma oval and alma errata
"""
@ -147,18 +156,18 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
sa_name = f'ALSA-{rhel_sa_id}'
# filtering out SA
if sa_name in SA_EXCLUDE:
if sa_name in sa_exclude:
report['excluded_sa'].append(sa_name)
continue
# filtefing out packages
packages_to_check: List[Package] = []
for p in rhel_sa.packages:
if any(p.name == i for i in PACKAGES_EXCLUDE):
if str(p) not in report['excluded_pkg']:
report['excluded_pkg'].append(str(p))
for package in rhel_sa.packages:
if any(package.name == i for i in packages_exclude):
if str(package) not in report['excluded_pkg']:
report['excluded_pkg'].append(str(package))
else:
packages_to_check.append(p)
packages_to_check.append(package)
# check oval
try:
@ -175,13 +184,15 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
if r not in alma_oval_packages]
if alma_oval_missing_packages:
report['diff_count'] += 1
diff_str = f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}"
diff.append({'sa_name': sa_name,
'diff': f"missing packages in oval SA: {','.join(alma_oval_missing_packages)}"})
'diff': diff_str})
report['oval_missing_pkg_sa'].append(sa_name)
report['oval_missing_pkg_sa_count'] += 1
for mp in alma_oval_missing_packages:
if mp not in report['missing_packages_unique']:
report['missing_packages_unique'].append(mp)
for missing_package in alma_oval_missing_packages:
if missing_package not in report['missing_packages_unique']:
report['missing_packages_unique'].append(
missing_package)
report['missing_packages_unique_count'] += 1
# check errata
@ -200,13 +211,14 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
str(r) for r in packages_to_check if r not in alma_errata_packages]
if alma_errata_missing_packages:
report['diff_count'] += 1
diff_str = f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}"
diff.append({'sa_name': sa_name,
'diff': f"missing packages in errata SA: {','.join(alma_errata_missing_packages)}"})
'diff': diff_str})
report['errata_missing_pkg_sa'].append(sa_name)
report['errata_missing_pkg_sa_count'] += 1
for mp in alma_errata_missing_packages:
if mp not in report['missing_packages_unique']:
report['missing_packages_unique'].append(mp)
for missing_package in alma_errata_missing_packages:
if missing_package not in report['missing_packages_unique']:
report['missing_packages_unique'].append(missing_package)
report['missing_packages_unique_count'] += 1
else:
# if we here, all checks were passed
@ -219,36 +231,45 @@ def compare(rhel_oval: Dict[str, SecurityAdvisory],
# starting point
def comparer_run() -> Dict[str, Any]:
def comparer_run(config: Config) -> Dict[str, Any]:
"""
comperer_run is the starting point of comparer component
"""
result = {}
for release, urls in RELEASES.items():
for release, urls in config.releases.items():
logging.info('Processing release %i', release)
logging.info('downloading rhel oval')
rhel_file = download_oval(urls['rhel_oval_url'])
rhel_file = download_oval(urls.rhel_oval_url, config.download_dir)
logging.info('parsing rhel oval')
rhel_oval_dict = parse_oval(rhel_file)
rhel_oval_dict = parse_oval(rhel_file, config.not_before)
logging.info('downloading alma oval')
alma_oval_file = download_oval(urls['alma_oval_url'])
alma_oval_file = download_oval(
urls.alma_oval_url, download_dir=config.download_dir)
logging.info('parsing alma oval')
alma_oval_dict = parse_oval(alma_oval_file)
alma_oval_dict = parse_oval(alma_oval_file, config.not_before)
logging.info('downloading alma errata')
alma_errata_file = download_errata(urls['alma_errata_url'], release)
alma_errata_file = download_errata(urls.alma_errata_url,
release, config.download_dir)
logging.info('parsing alma errata')
alma_errata_dict = parse_errata(alma_errata_file)
logging.info('comparing rhel and alma')
report_release, diff_release = compare(
rhel_oval_dict, alma_oval_dict, alma_errata_dict)
report_release, diff_release = \
compare(rhel_oval_dict,
alma_oval_dict,
alma_errata_dict,
config.sa_exclude,
config.packages_exclude)
result[release] = {'report': report_release,
'diff': diff_release,
'rhel_oval_url': urls['rhel_oval_url'],
'alma_oval_url': urls['alma_oval_url'],
'alma_errata_url': urls['alma_errata_url']}
'rhel_oval_url': urls.rhel_oval_url,
'alma_oval_url': urls.alma_oval_url,
'alma_errata_url': urls.alma_errata_url}
result['report_generated'] = datetime.datetime.now().timestamp() * 1000
result['sa_not_before'] = NOT_BEFORE.timestamp() * 1000
result['sa_not_before'] = config.not_before.timestamp() * 1000
return result

View File

@ -2,12 +2,12 @@
config.py used for generation service configuration based on input json file
'''
from datetime import datetime, date
from datetime import datetime
from pathlib import Path
from typing import Dict, List
from ipaddress import IPv4Address
from pydantic import BaseModel, validator, Field # pylint: disable=import-error
from pydantic import BaseModel, validator, Field # pylint: disable=import-error,no-name-in-module
import yaml
@ -59,7 +59,7 @@ class Config(BaseModel):
server_ip: IPv4Address = Field(
description="IP that will be used by webserver",
default=SERVER_IP)
not_before: date = Field(
not_before: datetime = Field(
description='date to start checking from (YYYY-mm-dd)',
default=NOT_BEFORE)
update_interval_minutes: int = Field(
@ -88,8 +88,7 @@ class Config(BaseModel):
"""
return datetime.strptime(
value,
"%Y-%m-%d"
).date()
"%Y-%m-%d")
def get_config(yml_path: str) -> Config:

View File

@ -1,5 +1,7 @@
"""
package.py contains Package dataclass definition
"""
from dataclasses import dataclass
from typing import List
@dataclass

View File

@ -1,3 +1,6 @@
"""
sa contains SecurityAdvisory dataclass definition
"""
from dataclasses import dataclass
from typing import List
@ -11,5 +14,5 @@ class SecurityAdvisory:
from oval or errata
"""
title: str
id: str
id: str # pylint: disable=invalid-name
packages: List[Package]

View File

@ -2,15 +2,17 @@
service compares rhel oval with alma ovals and errata ovals
results available via API Call
"""
from aiohttp import web
import copy
import logging
from logging.handlers import RotatingFileHandler
import threading
from time import sleep
from ipaddress import IPv4Address
import json
from .config import LOG_FILE, DIFF_FILE, UPDATE_INTERVAL_MINUTES, SERVER_IP, SERVER_PORT
from aiohttp import web
from .config import get_config, Config
from .comparer import comparer_run
@ -19,82 +21,97 @@ diffs = {}
diffs_lock = threading.Lock()
async def web_handler(request):
async def web_handler(_):
"""
web_handler returns diffs as JSON file
"""
data = {}
try:
diffs_lock.acquire()
data = copy.deepcopy(diffs)
diffs_lock.release()
except Exception as e:
logging.critical("Unhandled exeption %s", e, exc_info=True)
except Exception as err: # pylint: disable=broad-except
logging.critical("Unhandled exeption %s", err, exc_info=True)
return web.json_response(data=data)
def webserver_run():
def webserver_run(server_ip: IPv4Address, server_port: str):
"""
webserver_run starts webserver component
"""
app = web.Application()
app.add_routes([web.get('/', web_handler)])
web.run_app(app=app, host=SERVER_IP, port=SERVER_PORT)
web.run_app(app=app, host=str(server_ip), port=server_port)
def diff_checker():
def diff_checker(config: Config):
"""
runs comparer component in infinite loop
"""
while True:
logging.info("Start comparing")
# generating new diff
try:
result = comparer_run()
except Exception as e:
logging.critical("Unhandled exeption %s", e, exc_info=True)
result = comparer_run(config)
except Exception as err: # pylint: disable=broad-except
logging.critical("Unhandled exeption %s", err, exc_info=True)
else:
logging.info("Finished comparing, updating diff dict")
diffs_lock.acquire()
global diffs
global diffs # pylint: disable=invalid-name,global-statement
diffs = result
diffs_lock.release()
# dumping
logging.info("Saving results to disk")
try:
with open(DIFF_FILE, 'w', encoding='utf-8') as flw:
json.dump(result, flw, indent=4)
except Exception as e:
logging.critical("Unhandled exeption %s", e, exc_info=True)
# dumping
logging.info("Saving results to disk")
try:
with open(config.diff_file, 'w', encoding='utf-8') as flw:
json.dump(result, flw, indent=4)
except Exception as err: # pylint: disable=broad-except
logging.critical("Unhandled exeption %s", err, exc_info=True)
logging.info("Done")
logging.info("Finished comparing, go to sleep for %d minutes",
UPDATE_INTERVAL_MINUTES)
sleep(UPDATE_INTERVAL_MINUTES * 60)
config.update_interval_minutes)
sleep(config.update_interval_minutes * 60)
def start():
# making sure that directory exists
for p in [LOG_FILE, DIFF_FILE]:
if not p.parent.exists():
p.parent.mkdir()
def start(yaml_path: str):
"""
start starts comparer and webserver components
each component runs in it`s own thread
"""
config = get_config(yaml_path)
# making sure that parent directories exist
for path in [config.log_file, config.log_file]:
if not path.parent.exists():
path.parent.mkdir()
# configuring logging
handlers = [logging.FileHandler(config.log_file, mode='a'),
logging.StreamHandler(),
RotatingFileHandler(config.log_file, maxBytes=10000, backupCount=3)]
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(funcName)s %(message)s',
handlers=[logging.FileHandler(LOG_FILE, mode='a'),
logging.StreamHandler(),
RotatingFileHandler(LOG_FILE, maxBytes=10000, backupCount=3)])
handlers=handlers)
logging.info("Trying to load diff file from disk")
try:
with open(DIFF_FILE, 'r', encoding='utf-8') as flr:
with open(config.diff_file, 'r', encoding='utf-8') as flr:
loaded_data = json.load(flr)
diffs_lock.acquire()
global diffs # pylint: disable=invalid-name,global-statement
diffs = loaded_data
diffs_lock.release()
except Exception as e:
logging.warning('cant load data from disk %s', e)
except Exception as err: # pylint: disable=broad-except
logging.warning('cant load data from disk %s', err)
else:
logging.info('diff file was loaded')
logging.info("Starting diff_checker in background")
thread = threading.Thread(target=diff_checker)
thread = threading.Thread(target=diff_checker, args=(config,))
thread.daemon = True
thread.start()
logging.info("Starting webserver")
webserver_run()
if __name__ == "__main__":
start()
webserver_run(config.server_ip, config.server_port)

View File

@ -33,8 +33,8 @@ releases:
alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-8.xml.bz2
alma_errata_url: https://errata.almalinux.org/8/errata.full.json
9:
rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL9/rhel-9.oval.xml.bz2'
alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-9.xml.bz2'
rhel_oval_url: https://www.redhat.com/security/data/oval/v2/RHEL9/rhel-9.oval.xml.bz2
alma_oval_url: https://repo.almalinux.org/security/oval/org.almalinux.alsa-9.xml.bz2
alma_errata_url: https://errata.almalinux.org/9/errata.full.json
# sa_exclude