@ -9,22 +9,41 @@ https://github.com/rpm-software-management/createrepo_c/blob/master/examples/pyt
import argparse
import gzip
import json
import logging
import lzma
import os
import re
import tempfile
from collections import defaultdict
from typing import AnyStr , Dict , List , Any , Iterator
from itertools import tee
from pathlib import Path
from typing import (
AnyStr ,
Dict ,
List ,
Any ,
Iterator ,
Optional ,
Tuple ,
Union ,
)
import binascii
import createrepo_c as cr
import dnf . subject
import hawkey
from urllib . parse import urljoin
import requests
import rpm
import yaml
from createrepo_c import Package , PackageIterator
from dataclasses import dataclass
from createrepo_c import (
Package ,
PackageIterator ,
Repomd ,
RepomdRecord ,
)
from dataclasses import dataclass , field
from kobo . rpmlib import parse_nvra
logging . basicConfig ( level = logging . INFO )
def _is_compressed_file ( first_two_bytes : bytes , initial_bytes : bytes ) :
@ -51,23 +70,33 @@ class RepoInfo:
# 'appstream', 'baseos', etc.
# Or 'http://koji.cloudlinux.com/mirrors/rhel_mirror' if you are
# using remote repo
path : AnyS tr
path : s tr
# name of folder with a repodata folder. E.g. 'baseos', 'appstream', etc
folder : AnyStr
# name of repo. E.g. 'BaseOS', 'AppStream', etc
name : AnyStr
# architecture of repo. E.g. 'x86_64', 'i686', etc
arch : AnyStr
folder : str
# Is a repo remote or local
is_remote : bool
# Is a reference repository (usually it's a RHEL repo)
# Layout of packages from such repository will be taken as example
# Only layout of specific package (which do n't exist
# Only layout of specific package (which do es n't exist
# in a reference repository) will be taken as example
is_reference : bool = False
# The packages from 'present' repo will be added to a variant.
# The packages from 'absent' repo will be removed from a variant.
repo_type : str = ' present '
@dataclass
class VariantInfo :
# name of variant. E.g. 'BaseOS', 'AppStream', etc
name : AnyStr
# architecture of variant. E.g. 'x86_64', 'i686', etc
arch : AnyStr
# The packages which will be not added to a variant
excluded_packages : List [ str ] = field ( default_factory = list )
# Repos of a variant
repos : List [ RepoInfo ] = field ( default_factory = list )
class PackagesGenerator :
repo_arches = defaultdict ( lambda : list ( ( ' noarch ' , ) ) )
@ -81,11 +110,12 @@ class PackagesGenerator:
def __init__ (
self ,
repos: List [ Repo Info] ,
variants: List [ Variant Info] ,
excluded_packages : List [ AnyStr ] ,
included_packages : List [ AnyStr ] ,
) :
self . repos = repos
self . variants = variants
self . pkgs = dict ( )
self . excluded_packages = excluded_packages
self . included_packages = included_packages
self . tmp_files = [ ]
@ -98,6 +128,19 @@ class PackagesGenerator:
if os . path . exists ( tmp_file ) :
os . remove ( tmp_file )
@staticmethod
def _get_full_repo_path ( repo_info : RepoInfo ) :
result = os . path . join (
repo_info . path ,
repo_info . folder
)
if repo_info . is_remote :
result = urljoin (
repo_info . path + ' / ' ,
repo_info . folder ,
)
return result
@staticmethod
def _warning_callback ( warning_type , message ) :
"""
@ -123,12 +166,12 @@ class PackagesGenerator:
return file_stream . name
@staticmethod
def _parse_repomd ( repomd_file_path : AnyStr ) - > cr. Repomd:
def _parse_repomd ( repomd_file_path : AnyStr ) - > Repomd:
"""
Parse file repomd . xml and create object Repomd
: param repomd_file_path : path to local repomd . xml
"""
return cr. Repomd( repomd_file_path )
return Repomd( repomd_file_path )
@classmethod
def _parse_modules_file (
@ -139,7 +182,7 @@ class PackagesGenerator:
"""
Parse modules . yaml . gz and returns parsed data
: param modules_file_path : path to local modules . yaml . gz
: return : List of dict for each module s in a repo
: return : List of dict for each module in a repo
"""
with open ( modules_file_path , ' rb ' ) as modules_file :
@ -156,7 +199,7 @@ class PackagesGenerator:
def _get_repomd_records (
self ,
repo_info : RepoInfo ,
) - > List [ cr. RepomdRecord] :
) - > List [ RepomdRecord] :
"""
Get , parse file repomd . xml and extract from it repomd records
: param repo_info : structure which contains info about a current repo
@ -169,9 +212,15 @@ class PackagesGenerator:
' repomd.xml ' ,
)
if repo_info . is_remote :
repomd_file_path = urljoin (
urljoin (
repo_info . path + ' / ' ,
repo_info . folder
) + ' / ' ,
' repodata/repomd.xml '
)
repomd_file_path = self . get_remote_file_content ( repomd_file_path )
else :
repomd_file_path = repomd_file_path
repomd_object = self . _parse_repomd ( repomd_file_path )
if repo_info . is_remote :
os . remove ( repomd_file_path )
@ -180,7 +229,7 @@ class PackagesGenerator:
def _download_repomd_records (
self ,
repo_info : RepoInfo ,
repomd_records : List [ cr. RepomdRecord] ,
repomd_records : List [ RepomdRecord] ,
repomd_records_dict : Dict [ str , str ] ,
) :
"""
@ -210,13 +259,12 @@ class PackagesGenerator:
def _parse_module_repomd_record (
self ,
repo_info : RepoInfo ,
repomd_records : List [ cr. RepomdRecord] ,
repomd_records : List [ RepomdRecord] ,
) - > List [ Dict ] :
"""
Download repomd records
: param repo_info : structure which contains info about a current repo
: param repomd_records : list with repomd records
: param repomd_records_dict : dict with paths to repodata files
"""
for repomd_record in repomd_records :
if repomd_record . type != ' modules ' :
@ -248,25 +296,13 @@ class PackagesGenerator:
)
return rpm . labelCompare ( version_tuple_1 , version_tuple_2 )
def generate_packages_json (
self
) - > Dict [ AnyStr , Dict [ AnyStr , Dict [ AnyStr , List [ AnyStr ] ] ] ] :
"""
Generate packages . json
"""
packages_json = defaultdict (
lambda : defaultdict (
lambda : defaultdict (
list ,
)
)
)
all_packages = defaultdict ( lambda : { ' variants ' : list ( ) } )
for repo_info in sorted (
self . repos ,
key = lambda i : i . repo_type ,
reverse = True ,
) :
def get_packages_iterator (
self ,
repo_info : RepoInfo ,
) - > Union [ PackageIterator , Iterator ] :
full_repo_path = self . _get_full_repo_path ( repo_info )
pkgs_iterator = self . pkgs . get ( full_repo_path )
if pkgs_iterator is None :
repomd_records = self . _get_repomd_records (
repo_info = repo_info ,
)
@ -276,167 +312,133 @@ class PackagesGenerator:
repomd_records = repomd_records ,
repomd_records_dict = repomd_records_dict ,
)
p ac ka ge s_iterator = PackageIterator (
p kgs_iterator = PackageIterator (
primary_path = repomd_records_dict [ ' primary ' ] ,
filelists_path = repomd_records_dict [ ' filelists ' ] ,
other_path = repomd_records_dict [ ' other ' ] ,
warningcb = self . _warning_callback ,
)
for package in packages_iterator :
if package . arch not in self . repo_arches [ repo_info . arch ] :
package_arch = repo_info . arch
else :
package_arch = package . arch
package_key = f ' { package . name } . { package_arch } '
if ' module ' in package . release and not any (
re . search ( included_package , package . name )
for included_package in self . included_packages
) :
# Even a module package will be added to packages.json if
# it presents in the list of included packages
continue
if package_key not in all_packages :
all_packages [ package_key ] [ ' variants ' ] . append (
( repo_info . name , repo_info . arch )
)
all_packages [ package_key ] [ ' arch ' ] = package_arch
all_packages [ package_key ] [ ' package ' ] = package
all_packages [ package_key ] [ ' type ' ] = repo_info . is_reference
elif repo_info . repo_type == ' absent ' and ( repo_info . name , repo_info . arch ) in all_packages [ package_key ] [ ' variants ' ] :
all_packages [ package_key ] [ ' variants ' ] . remove ( ( repo_info . name , repo_info . arch ) )
# replace an older package if it's not reference or
# a newer package is from reference repo
elif ( not all_packages [ package_key ] [ ' type ' ] or
all_packages [ package_key ] [ ' type ' ] ==
repo_info . is_reference ) and \
self . compare_pkgs_version (
pkgs_iterator , self . pkgs [ full_repo_path ] = tee ( pkgs_iterator )
return pkgs_iterator
def get_package_arch (
self ,
package : Package ,
variant_arch : str ,
) - > str :
result = variant_arch
if package . arch in self . repo_arches [ variant_arch ] :
result = package . arch
return result
def is_skipped_module_package ( self , package : Package ) - > bool :
# Even a module package will be added to packages.json if
# it presents in the list of included packages
return ' module ' in package . release and not any (
re . search ( included_package , package . name )
for included_package in self . included_packages
)
def is_excluded_package (
self ,
package : Package ,
variant_arch : str ,
excluded_packages : List [ str ] ,
) - > bool :
return any (
re . search (
excluded_pkg ,
self . get_package_key ( package , variant_arch ) ,
) for excluded_pkg in excluded_packages
)
@staticmethod
def get_source_rpm_name ( package : Package ) - > str :
source_rpm_nvra = parse_nvra ( package . rpm_sourcerpm )
return source_rpm_nvra [ ' name ' ]
def get_package_key ( self , package : Package , variant_arch : str ) - > str :
return (
f ' { package . name } . '
f ' { self . get_package_arch ( package , variant_arch ) } '
)
def generate_packages_json (
self
) - > Dict [ AnyStr , Dict [ AnyStr , Dict [ AnyStr , List [ AnyStr ] ] ] ] :
"""
Generate packages . json
"""
packages = defaultdict ( lambda : defaultdict ( lambda : {
' variants ' : list ( ) ,
} ) )
for variant_info in self . variants :
for repo_info in variant_info . repos :
is_reference = repo_info . is_reference
for package in self . get_packages_iterator ( repo_info = repo_info ) :
if self . is_skipped_module_package ( package ) :
continue
if self . is_excluded_package (
package ,
all_packages [ package_key ] [ ' package ' ]
) > 0 :
all_packages [ package_key ] [ ' variants ' ] = [
( repo_info . name , repo_info . arch )
]
all_packages [ package_key ] [ ' arch ' ] = package_arch
all_packages [ package_key ] [ ' package ' ] = package
elif self . compare_pkgs_version (
variant_info . arch ,
self . excluded_packages ,
) :
continue
if self . is_excluded_package (
package ,
all_packages [ package_key ] [ ' package ' ]
) == 0 :
all_packages [ package_key ] [ ' variants ' ] . append (
( repo_info . name , repo_info . arch )
)
for package_dict in all_packages . values ( ) :
for variant_name , variant_arch in package_dict [ ' variants ' ] :
package_arch = package_dict [ ' arch ' ]
package = package_dict [ ' package ' ]
package_name = package . name
if any ( re . search ( excluded_package , package_name )
for excluded_package in self . excluded_packages ) :
continue
src_package_name = dnf . subject . Subject (
package . rpm_sourcerpm ,
) . get_nevra_possibilities (
forms = hawkey . FORM_NEVRA ,
)
if len ( src_package_name ) > 1 :
# We should stop utility if we can't get exact name of srpm
raise ValueError (
' We can \' t get exact name of srpm '
f ' by its NEVRA " { package . rpm_sourcerpm } " '
variant_info . arch ,
variant_info . excluded_packages ,
) :
continue
package_key = self . get_package_key (
package ,
variant_info . arch ,
)
else :
src_package_name = src_package_name [ 0 ] . name
# TODO: for x86_64 + i686 in one packages.json
# don't remove!
# if package.arch in self.addon_repos[variant_arch]:
# arches = self.addon_repos[variant_arch] + [variant_arch]
# else:
# arches = [variant_arch]
# for arch in arches:
# pkgs_list = packages_json[variant_name][
# arch][src_package_name]
# added_pkg = f'{package_name}.{package_arch}'
# if added_pkg not in pkgs_list:
# pkgs_list.append(added_pkg)
pkgs_list = packages_json [ variant_name ] [
variant_arch ] [ src_package_name ]
added_pkg = f ' { package_name } . { package_arch } '
if added_pkg not in pkgs_list :
pkgs_list . append ( added_pkg )
return packages_json
source_rpm_name = self . get_source_rpm_name ( package )
package_info = packages [ source_rpm_name ] [ package_key ]
if ' is_reference ' not in package_info :
package_info [ ' variants ' ] . append ( variant_info . name )
package_info [ ' is_reference ' ] = is_reference
package_info [ ' package ' ] = package
elif not package_info [ ' is_reference ' ] or \
package_info [ ' is_reference ' ] == is_reference and \
self . compare_pkgs_version (
package_1 = package ,
package_2 = package_info [ ' package ' ] ,
) > 0 :
package_info [ ' variants ' ] = [ variant_info . name ]
package_info [ ' is_reference ' ] = is_reference
package_info [ ' package ' ] = package
elif self . compare_pkgs_version (
package_1 = package ,
package_2 = package_info [ ' package ' ] ,
) == 0 and repo_info . repo_type != ' absent ' :
package_info [ ' variants ' ] . append ( variant_info . name )
result = defaultdict ( lambda : defaultdict (
lambda : defaultdict ( list ) ,
) )
for variant_info in self . variants :
for source_rpm_name , packages_info in packages . items ( ) :
for package_key , package_info in packages_info . items ( ) :
variant_pkgs = result [ variant_info . name ] [ variant_info . arch ]
if variant_info . name not in package_info [ ' variants ' ] :
continue
variant_pkgs [ source_rpm_name ] . append ( package_key )
return result
def create_parser ( ) :
parser = argparse . ArgumentParser ( )
parser . add_argument (
' --repo-path ' ,
action = ' append ' ,
help = ' Path to a folder with repofolders. E.g. " /var/repos " or '
' " http://koji.cloudlinux.com/mirrors/rhel_mirror " ' ,
required = True ,
)
parser . add_argument (
' --repo-folder ' ,
action = ' append ' ,
help = ' A folder which contains folder repodata . E.g. " baseos-stream " ' ,
required = True ,
)
parser . add_argument (
' --repo-arch ' ,
action = ' append ' ,
help = ' What architecture packages a repository contains. E.g. " x86_64 " ' ,
required = True ,
)
parser . add_argument (
' --repo-name ' ,
action = ' append ' ,
help = ' Name of a repository. E.g. " AppStream " ' ,
required = True ,
)
parser . add_argument (
' --is-remote ' ,
action = ' append ' ,
type = str ,
help = ' A repository is remote or local ' ,
choices = [ ' yes ' , ' no ' ] ,
required = True ,
)
parser . add_argument (
' --is-reference ' ,
action = ' append ' ,
type = str ,
help = ' A repository is used as reference for packages layout ' ,
choices = [ ' yes ' , ' no ' ] ,
required = True ,
)
parser . add_argument (
' --repo-type ' ,
action = ' append ' ,
type = str ,
help = ' Packages from repository will be removed or added to variant ' ,
choices = [ ' present ' , ' absent ' ] ,
required = True ,
)
parser . add_argument (
' --excluded-packages ' ,
nargs = ' + ' ,
type = str ,
default = [ ] ,
help = ' A list of globally excluded packages from generated json. '
' All of list elements should be separated by space ' ,
required = False ,
)
parser . add_argument (
' --included-packages ' ,
nargs = ' + ' ,
type = str ,
default = [ ] ,
help = ' A list of globally included packages from generated json. '
' All of list elements should be separated by space ' ,
' -c ' ,
' --config ' ,
type = Path ,
default = Path ( ' config.yaml ' ) ,
required = False ,
help = ' Path to a config ' ,
)
parser . add_argument (
' -o ' ,
' --json-output-path ' ,
type = str ,
help = ' Full path to output json file ' ,
@ -446,32 +448,45 @@ def create_parser():
return parser
def read_config ( config_path : Path ) - > Optional [ Dict ] :
if not config_path . exists ( ) :
logging . error ( ' A config by path " %s " does not exist ' , config_path )
exit ( 1 )
with config_path . open ( ' r ' ) as config_fd :
return yaml . safe_load ( config_fd )
def process_config ( config_data : Dict ) - > Tuple [
List [ VariantInfo ] ,
List [ str ] ,
List [ str ] ,
] :
excluded_packages = config_data . get ( ' excluded_packages ' , [ ] )
included_packages = config_data . get ( ' included_packages ' , [ ] )
variants = [ VariantInfo (
name = variant_name ,
arch = variant_info [ ' arch ' ] ,
excluded_packages = variant_info . get ( ' excluded_packages ' , [ ] ) ,
repos = [ RepoInfo (
path = variant_repo [ ' path ' ] ,
folder = variant_repo [ ' folder ' ] ,
is_remote = variant_repo [ ' remote ' ] ,
is_reference = variant_repo [ ' reference ' ] ,
repo_type = variant_repo . get ( ' repo_type ' , ' present ' ) ,
) for variant_repo in variant_info [ ' repos ' ] ]
) for variant_name , variant_info in config_data [ ' variants ' ] . items ( ) ]
return variants , excluded_packages , included_packages
def cli_main ( ) :
args = create_parser ( ) . parse_args ( )
repos = [ ]
for repo_path , repo_folder , repo_name , \
repo_arch , is_remote , is_reference , repo_type in zip (
args . repo_path ,
args . repo_folder ,
args . repo_name ,
args . repo_arch ,
args . is_remote ,
args . is_reference ,
args . repo_type ,
) :
repos . append ( RepoInfo (
path = repo_path ,
folder = repo_folder ,
name = repo_name ,
arch = repo_arch ,
is_remote = True if is_remote == ' yes ' else False ,
is_reference = True if is_reference == ' yes ' else False ,
repo_type = repo_type ,
) )
variants , excluded_packages , included_packages = process_config (
config_data = read_config ( args . config )
)
pg = PackagesGenerator (
repos = repos ,
excluded_packages = args . excluded_packages ,
included_packages = args . included_packages ,
variants = variants ,
excluded_packages = excluded_packages ,
included_packages = included_packages ,
)
result = pg . generate_packages_json ( )
with open ( args . json_output_path , ' w ' ) as packages_file :