516 lines
17 KiB
Python
516 lines
17 KiB
Python
# coding=utf-8
|
|
"""
|
|
The tool allow to generate package.json. This file is used by pungi
|
|
# as parameter `gather_prepopulate`
|
|
Sample of using repodata files taken from
|
|
https://github.com/rpm-software-management/createrepo_c/blob/master/examples/python/repodata_parsing.py
|
|
"""
|
|
|
|
import argparse
|
|
import gzip
|
|
import json
|
|
import logging
|
|
import lzma
|
|
import os
|
|
import re
|
|
import tempfile
|
|
from collections import defaultdict
|
|
from itertools import tee
|
|
from pathlib import Path
|
|
from typing import (
|
|
AnyStr,
|
|
Dict,
|
|
List,
|
|
Any,
|
|
Iterator,
|
|
Optional,
|
|
Tuple,
|
|
Union,
|
|
)
|
|
|
|
import binascii
|
|
from urllib.parse import urljoin
|
|
|
|
import requests
|
|
import rpm
|
|
import yaml
|
|
from createrepo_c import (
|
|
Package,
|
|
PackageIterator,
|
|
Repomd,
|
|
RepomdRecord,
|
|
)
|
|
from dataclasses import dataclass, field
|
|
from kobo.rpmlib import parse_nvra
|
|
|
|
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
def _is_compressed_file(first_two_bytes: bytes, initial_bytes: bytes):
|
|
return binascii.hexlify(first_two_bytes) == initial_bytes
|
|
|
|
|
|
def is_gzip_file(first_two_bytes):
|
|
return _is_compressed_file(
|
|
first_two_bytes=first_two_bytes,
|
|
initial_bytes=b'1f8b',
|
|
)
|
|
|
|
|
|
def is_xz_file(first_two_bytes):
|
|
return _is_compressed_file(
|
|
first_two_bytes=first_two_bytes,
|
|
initial_bytes=b'fd37',
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class RepoInfo:
|
|
# path to a directory with repo directories. E.g. '/var/repos' contains
|
|
# 'appstream', 'baseos', etc.
|
|
# Or 'http://koji.cloudlinux.com/mirrors/rhel_mirror' if you are
|
|
# using remote repo
|
|
path: str
|
|
# name of folder with a repodata folder. E.g. 'baseos', 'appstream', etc
|
|
folder: str
|
|
# Is a repo remote or local
|
|
is_remote: bool
|
|
# Is a reference repository (usually it's a RHEL repo)
|
|
# Layout of packages from such repository will be taken as example
|
|
# Only layout of specific package (which doesn't exist
|
|
# in a reference repository) will be taken as example
|
|
is_reference: bool = False
|
|
# The packages from 'present' repo will be added to a variant.
|
|
# The packages from 'absent' repo will be removed from a variant.
|
|
repo_type: str = 'present'
|
|
|
|
|
|
@dataclass
|
|
class VariantInfo:
|
|
# name of variant. E.g. 'BaseOS', 'AppStream', etc
|
|
name: AnyStr
|
|
# architecture of variant. E.g. 'x86_64', 'i686', etc
|
|
arch: AnyStr
|
|
# The packages which will be not added to a variant
|
|
excluded_packages: List[str] = field(default_factory=list)
|
|
# Repos of a variant
|
|
repos: List[RepoInfo] = field(default_factory=list)
|
|
|
|
|
|
class PackagesGenerator:
|
|
|
|
repo_arches = defaultdict(lambda: list(('noarch',)))
|
|
addon_repos = {
|
|
'x86_64': ['i686'],
|
|
'ppc64le': [],
|
|
'aarch64': [],
|
|
's390x': [],
|
|
'i686': [],
|
|
}
|
|
|
|
def __init__(
|
|
self,
|
|
variants: List[VariantInfo],
|
|
excluded_packages: List[AnyStr],
|
|
included_packages: List[AnyStr],
|
|
):
|
|
self.variants = variants
|
|
self.pkgs = dict()
|
|
self.excluded_packages = excluded_packages
|
|
self.included_packages = included_packages
|
|
self.tmp_files = []
|
|
for arch, arch_list in self.addon_repos.items():
|
|
self.repo_arches[arch].extend(arch_list)
|
|
self.repo_arches[arch].append(arch)
|
|
|
|
def __del__(self):
|
|
for tmp_file in self.tmp_files:
|
|
if os.path.exists(tmp_file):
|
|
os.remove(tmp_file)
|
|
|
|
@staticmethod
|
|
def _get_full_repo_path(repo_info: RepoInfo):
|
|
result = os.path.join(
|
|
repo_info.path,
|
|
repo_info.folder
|
|
)
|
|
if repo_info.is_remote:
|
|
result = urljoin(
|
|
repo_info.path + '/',
|
|
repo_info.folder,
|
|
)
|
|
return result
|
|
|
|
@staticmethod
|
|
def _warning_callback(warning_type, message):
|
|
"""
|
|
Warning callback for createrepo_c parsing functions
|
|
"""
|
|
print(f'Warning message: "{message}"; warning type: "{warning_type}"')
|
|
return True
|
|
|
|
@staticmethod
|
|
def get_remote_file_content(file_url: AnyStr) -> AnyStr:
|
|
"""
|
|
Get content from a remote file and write it to a temp file
|
|
:param file_url: url of a remote file
|
|
:return: path to a temp file
|
|
"""
|
|
|
|
file_request = requests.get(
|
|
url=file_url,
|
|
)
|
|
file_request.raise_for_status()
|
|
with tempfile.NamedTemporaryFile(delete=False) as file_stream:
|
|
file_stream.write(file_request.content)
|
|
return file_stream.name
|
|
|
|
@staticmethod
|
|
def _parse_repomd(repomd_file_path: AnyStr) -> Repomd:
|
|
"""
|
|
Parse file repomd.xml and create object Repomd
|
|
:param repomd_file_path: path to local repomd.xml
|
|
"""
|
|
return Repomd(repomd_file_path)
|
|
|
|
@classmethod
|
|
def _parse_modules_file(
|
|
cls,
|
|
modules_file_path: AnyStr,
|
|
|
|
) -> Iterator[Any]:
|
|
"""
|
|
Parse modules.yaml.gz and returns parsed data
|
|
:param modules_file_path: path to local modules.yaml.gz
|
|
:return: List of dict for each module in a repo
|
|
"""
|
|
|
|
with open(modules_file_path, 'rb') as modules_file:
|
|
data = modules_file.read()
|
|
if is_gzip_file(data[:2]):
|
|
data = gzip.decompress(data)
|
|
elif is_xz_file(data[:2]):
|
|
data = lzma.decompress(data)
|
|
return yaml.load_all(
|
|
data,
|
|
Loader=yaml.BaseLoader,
|
|
)
|
|
|
|
def _get_repomd_records(
|
|
self,
|
|
repo_info: RepoInfo,
|
|
) -> List[RepomdRecord]:
|
|
"""
|
|
Get, parse file repomd.xml and extract from it repomd records
|
|
:param repo_info: structure which contains info about a current repo
|
|
:return: list with repomd records
|
|
"""
|
|
repomd_file_path = os.path.join(
|
|
repo_info.path,
|
|
repo_info.folder,
|
|
'repodata',
|
|
'repomd.xml',
|
|
)
|
|
if repo_info.is_remote:
|
|
repomd_file_path = urljoin(
|
|
urljoin(
|
|
repo_info.path + '/',
|
|
repo_info.folder
|
|
) + '/',
|
|
'repodata/repomd.xml'
|
|
)
|
|
repomd_file_path = self.get_remote_file_content(repomd_file_path)
|
|
|
|
repomd_object = self._parse_repomd(repomd_file_path)
|
|
if repo_info.is_remote:
|
|
os.remove(repomd_file_path)
|
|
return repomd_object.records
|
|
|
|
def _download_repomd_records(
|
|
self,
|
|
repo_info: RepoInfo,
|
|
repomd_records: List[RepomdRecord],
|
|
repomd_records_dict: Dict[str, str],
|
|
):
|
|
"""
|
|
Download repomd records
|
|
:param repo_info: structure which contains info about a current repo
|
|
:param repomd_records: list with repomd records
|
|
:param repomd_records_dict: dict with paths to repodata files
|
|
"""
|
|
for repomd_record in repomd_records:
|
|
if repomd_record.type not in (
|
|
'primary',
|
|
'filelists',
|
|
'other',
|
|
):
|
|
continue
|
|
repomd_record_file_path = os.path.join(
|
|
repo_info.path,
|
|
repo_info.folder,
|
|
repomd_record.location_href,
|
|
)
|
|
if repo_info.is_remote:
|
|
repomd_record_file_path = self.get_remote_file_content(
|
|
repomd_record_file_path)
|
|
self.tmp_files.append(repomd_record_file_path)
|
|
repomd_records_dict[repomd_record.type] = repomd_record_file_path
|
|
|
|
def _parse_module_repomd_record(
|
|
self,
|
|
repo_info: RepoInfo,
|
|
repomd_records: List[RepomdRecord],
|
|
) -> List[Dict]:
|
|
"""
|
|
Download repomd records
|
|
:param repo_info: structure which contains info about a current repo
|
|
:param repomd_records: list with repomd records
|
|
"""
|
|
for repomd_record in repomd_records:
|
|
if repomd_record.type != 'modules':
|
|
continue
|
|
repomd_record_file_path = os.path.join(
|
|
repo_info.path,
|
|
repo_info.folder,
|
|
repomd_record.location_href,
|
|
)
|
|
if repo_info.is_remote:
|
|
repomd_record_file_path = self.get_remote_file_content(
|
|
repomd_record_file_path)
|
|
self.tmp_files.append(repomd_record_file_path)
|
|
return list(self._parse_modules_file(
|
|
repomd_record_file_path,
|
|
))
|
|
|
|
@staticmethod
|
|
def compare_pkgs_version(package_1: Package, package_2: Package) -> int:
|
|
version_tuple_1 = (
|
|
package_1.epoch,
|
|
package_1.version,
|
|
package_1.release,
|
|
)
|
|
version_tuple_2 = (
|
|
package_2.epoch,
|
|
package_2.version,
|
|
package_2.release,
|
|
)
|
|
return rpm.labelCompare(version_tuple_1, version_tuple_2)
|
|
|
|
def get_packages_iterator(
|
|
self,
|
|
repo_info: RepoInfo,
|
|
) -> Union[PackageIterator, Iterator]:
|
|
full_repo_path = self._get_full_repo_path(repo_info)
|
|
pkgs_iterator = self.pkgs.get(full_repo_path)
|
|
if pkgs_iterator is None:
|
|
repomd_records = self._get_repomd_records(
|
|
repo_info=repo_info,
|
|
)
|
|
repomd_records_dict = {} # type: Dict[str, str]
|
|
self._download_repomd_records(
|
|
repo_info=repo_info,
|
|
repomd_records=repomd_records,
|
|
repomd_records_dict=repomd_records_dict,
|
|
)
|
|
pkgs_iterator = PackageIterator(
|
|
primary_path=repomd_records_dict['primary'],
|
|
filelists_path=repomd_records_dict['filelists'],
|
|
other_path=repomd_records_dict['other'],
|
|
warningcb=self._warning_callback,
|
|
)
|
|
pkgs_iterator, self.pkgs[full_repo_path] = tee(pkgs_iterator)
|
|
return pkgs_iterator
|
|
|
|
def get_package_arch(
|
|
self,
|
|
package: Package,
|
|
variant_arch: str,
|
|
) -> str:
|
|
result = variant_arch
|
|
if package.arch in self.repo_arches[variant_arch]:
|
|
result = package.arch
|
|
return result
|
|
|
|
def is_skipped_module_package(
|
|
self,
|
|
package: Package,
|
|
variant_arch: str,
|
|
) -> bool:
|
|
package_key = self.get_package_key(package, variant_arch)
|
|
# Even a module package will be added to packages.json if
|
|
# it presents in the list of included packages
|
|
return 'module' in package.release and not any(
|
|
re.search(
|
|
f'^{included_pkg}$',
|
|
package_key,
|
|
) or included_pkg in (package.name, package_key)
|
|
for included_pkg in self.included_packages
|
|
)
|
|
|
|
def is_excluded_package(
|
|
self,
|
|
package: Package,
|
|
variant_arch: str,
|
|
excluded_packages: List[str],
|
|
) -> bool:
|
|
package_key = self.get_package_key(package, variant_arch)
|
|
return any(
|
|
re.search(
|
|
f'^{excluded_pkg}$',
|
|
package_key,
|
|
) or excluded_pkg in (package.name, package_key)
|
|
for excluded_pkg in excluded_packages
|
|
)
|
|
|
|
@staticmethod
|
|
def get_source_rpm_name(package: Package) -> str:
|
|
source_rpm_nvra = parse_nvra(package.rpm_sourcerpm)
|
|
return source_rpm_nvra['name']
|
|
|
|
def get_package_key(self, package: Package, variant_arch: str) -> str:
|
|
return (
|
|
f'{package.name}.'
|
|
f'{self.get_package_arch(package, variant_arch)}'
|
|
)
|
|
|
|
def generate_packages_json(
|
|
self
|
|
) -> Dict[AnyStr, Dict[AnyStr, Dict[AnyStr, List[AnyStr]]]]:
|
|
"""
|
|
Generate packages.json
|
|
"""
|
|
packages = defaultdict(lambda: defaultdict(lambda: {
|
|
'variants': list(),
|
|
}))
|
|
for variant_info in self.variants:
|
|
for repo_info in variant_info.repos:
|
|
is_reference = repo_info.is_reference
|
|
for package in self.get_packages_iterator(repo_info=repo_info):
|
|
if self.is_skipped_module_package(
|
|
package=package,
|
|
variant_arch=variant_info.arch,
|
|
):
|
|
continue
|
|
if self.is_excluded_package(
|
|
package=package,
|
|
variant_arch=variant_info.arch,
|
|
excluded_packages=self.excluded_packages,
|
|
):
|
|
continue
|
|
if self.is_excluded_package(
|
|
package=package,
|
|
variant_arch=variant_info.arch,
|
|
excluded_packages=variant_info.excluded_packages,
|
|
):
|
|
continue
|
|
package_key = self.get_package_key(
|
|
package,
|
|
variant_info.arch,
|
|
)
|
|
source_rpm_name = self.get_source_rpm_name(package)
|
|
package_info = packages[source_rpm_name][package_key]
|
|
if 'is_reference' not in package_info:
|
|
package_info['variants'].append(variant_info.name)
|
|
package_info['is_reference'] = is_reference
|
|
package_info['package'] = package
|
|
elif not package_info['is_reference'] or \
|
|
package_info['is_reference'] == is_reference and \
|
|
self.compare_pkgs_version(
|
|
package_1=package,
|
|
package_2=package_info['package'],
|
|
) > 0:
|
|
package_info['variants'] = [variant_info.name]
|
|
package_info['is_reference'] = is_reference
|
|
package_info['package'] = package
|
|
elif self.compare_pkgs_version(
|
|
package_1=package,
|
|
package_2=package_info['package'],
|
|
) == 0 and repo_info.repo_type != 'absent':
|
|
package_info['variants'].append(variant_info.name)
|
|
result = defaultdict(lambda: defaultdict(
|
|
lambda: defaultdict(list),
|
|
))
|
|
for variant_info in self.variants:
|
|
for source_rpm_name, packages_info in packages.items():
|
|
for package_key, package_info in packages_info.items():
|
|
variant_pkgs = result[variant_info.name][variant_info.arch]
|
|
if variant_info.name not in package_info['variants']:
|
|
continue
|
|
variant_pkgs[source_rpm_name].append(package_key)
|
|
return result
|
|
|
|
|
|
def create_parser():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'-c',
|
|
'--config',
|
|
type=Path,
|
|
default=Path('config.yaml'),
|
|
required=False,
|
|
help='Path to a config',
|
|
)
|
|
parser.add_argument(
|
|
'-o',
|
|
'--json-output-path',
|
|
type=str,
|
|
help='Full path to output json file',
|
|
required=True,
|
|
)
|
|
|
|
return parser
|
|
|
|
|
|
def read_config(config_path: Path) -> Optional[Dict]:
|
|
if not config_path.exists():
|
|
logging.error('A config by path "%s" does not exist', config_path)
|
|
exit(1)
|
|
with config_path.open('r') as config_fd:
|
|
return yaml.safe_load(config_fd)
|
|
|
|
|
|
def process_config(config_data: Dict) -> Tuple[
|
|
List[VariantInfo],
|
|
List[str],
|
|
List[str],
|
|
]:
|
|
excluded_packages = config_data.get('excluded_packages', [])
|
|
included_packages = config_data.get('included_packages', [])
|
|
variants = [VariantInfo(
|
|
name=variant_name,
|
|
arch=variant_info['arch'],
|
|
excluded_packages=variant_info.get('excluded_packages', []),
|
|
repos=[RepoInfo(
|
|
path=variant_repo['path'],
|
|
folder=variant_repo['folder'],
|
|
is_remote=variant_repo['remote'],
|
|
is_reference=variant_repo['reference'],
|
|
repo_type=variant_repo.get('repo_type', 'present'),
|
|
) for variant_repo in variant_info['repos']]
|
|
) for variant_name, variant_info in config_data['variants'].items()]
|
|
return variants, excluded_packages, included_packages
|
|
|
|
|
|
def cli_main():
|
|
args = create_parser().parse_args()
|
|
variants, excluded_packages, included_packages = process_config(
|
|
config_data=read_config(args.config)
|
|
)
|
|
pg = PackagesGenerator(
|
|
variants=variants,
|
|
excluded_packages=excluded_packages,
|
|
included_packages=included_packages,
|
|
)
|
|
result = pg.generate_packages_json()
|
|
with open(args.json_output_path, 'w') as packages_file:
|
|
json.dump(
|
|
result,
|
|
packages_file,
|
|
indent=4,
|
|
sort_keys=True,
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
cli_main()
|