From 97801e772e3fe518edf2d06748b5b22a462bc3d1 Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Fri, 29 Apr 2022 21:25:59 +0300 Subject: [PATCH 1/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- pungi/scripts/create_extra_repo.py | 2 +- pungi/scripts/create_packages_json.py | 10 +-- pungi/scripts/gather_modules.py | 123 ++++++++++++++++++++------ tests/test_create_packages_json.py | 7 +- 4 files changed, 107 insertions(+), 35 deletions(-) diff --git a/pungi/scripts/create_extra_repo.py b/pungi/scripts/create_extra_repo.py index 6cf9c186..ebee8007 100644 --- a/pungi/scripts/create_extra_repo.py +++ b/pungi/scripts/create_extra_repo.py @@ -161,7 +161,7 @@ class CreateExtraRepo(PackagesGenerator): if os.path.exists(self.default_modules_yaml_path): os.remove(self.default_modules_yaml_path) - def _get_remote_file_content( + def get_remote_file_content( self, file_url: AnyStr, ) -> AnyStr: diff --git a/pungi/scripts/create_packages_json.py b/pungi/scripts/create_packages_json.py index a95e4e1e..dc51a1eb 100644 --- a/pungi/scripts/create_packages_json.py +++ b/pungi/scripts/create_packages_json.py @@ -27,6 +27,7 @@ from dataclasses import dataclass from .gather_modules import is_gzip_file, is_xz_file + @dataclass class RepoInfo: # path to a directory with repo directories. E.g. '/var/repos' contains @@ -69,7 +70,7 @@ class PackagesGenerator: return True @staticmethod - def _get_remote_file_content(file_url: AnyStr) -> AnyStr: + def get_remote_file_content(file_url: AnyStr) -> AnyStr: """ Get content from a remote file and write it to a temp file :param file_url: url of a remote file @@ -194,7 +195,7 @@ class PackagesGenerator: 'repomd.xml', ) if repo_info.is_remote: - repomd_file_path = self._get_remote_file_content(repomd_file_path) + repomd_file_path = self.get_remote_file_content(repomd_file_path) else: repomd_file_path = repomd_file_path repomd_object = self._parse_repomd(repomd_file_path) @@ -232,9 +233,8 @@ class PackagesGenerator: repomd_record.location_href, ) if repo_info.is_remote: - repomd_record_file_path = self._get_remote_file_content( - repomd_record_file_path, - ) + repomd_record_file_path = self.get_remote_file_content( + repomd_record_file_path) if repomd_record.type == 'modules': modules_data = self._parse_modules_file( repomd_record_file_path, diff --git a/pungi/scripts/gather_modules.py b/pungi/scripts/gather_modules.py index 507d9a39..ac0053b6 100644 --- a/pungi/scripts/gather_modules.py +++ b/pungi/scripts/gather_modules.py @@ -5,12 +5,15 @@ import os from argparse import ArgumentParser, FileType from io import BytesIO from pathlib import Path -from typing import List, AnyStr +from typing import List, AnyStr, Iterable, Union import logging +from urllib.parse import urljoin + import yaml import createrepo_c as cr from typing.io import BinaryIO +from pungi.scripts.create_packages_json import PackagesGenerator EMPTY_FILE = '.empty' @@ -33,31 +36,76 @@ def is_xz_file(first_two_bytes): ) -def grep_list_of_modules_yaml_gz(repo_path: AnyStr) -> List[BytesIO]: +def read_modules_yaml(modules_yaml_path: Union[str, Path]) -> BytesIO: + with open(modules_yaml_path, 'rb') as fp: + return BytesIO(fp.read()) + + +def grep_list_of_modules_yaml(repos_path: AnyStr) -> Iterable[BytesIO]: """ Find all of valid *modules.yaml.gz in repos - :param repo_path: path to a directory which contains repodirs - :return: list of content from *modules.yaml.gz + :param repos_path: path to a directory which contains repo dirs + :return: iterable object of content from *modules.yaml.* """ - result = [] - for path in Path(repo_path).rglob('repomd.xml'): - repo_dir_path = Path(path.parent).parent - repomd_obj = cr.Repomd(str(path)) - for record in repomd_obj.records: - if record.type != 'modules': - continue - with open(os.path.join( - repo_dir_path, + return ( + read_modules_yaml(modules_yaml_path=path.parent) for path in + Path(repos_path).rglob('repodata') + ) + + +def _is_remote(path: str): + return any(path.startswith(protocol) for protocol in ('http', 'https')) + + +def read_modules_yaml_from_specific_repo(repo_path: AnyStr) -> List[BytesIO]: + """ + Read modules_yaml from a specific repo (remote or local) + :param repo_path: path/url to a specific repo + (final dir should contain dir `repodata`) + :return: iterable object of content from *modules.yaml.* + """ + + if _is_remote(repo_path): + repomd_url = urljoin( + repo_path + '/', + 'repodata/repomd.xml', + ) + repomd_file_path = PackagesGenerator.get_remote_file_content( + file_url=repomd_url + ) + else: + repomd_file_path = os.path.join( + repo_path, + 'repodata/repomd.xml', + ) + repomd_obj = cr.Repomd(str(repomd_file_path)) + for record in repomd_obj.records: + if record.type != 'modules': + continue + else: + if _is_remote(repo_path): + modules_yaml_url = urljoin( + repo_path + '/', record.location_href, - ), 'rb') as fp: - result.append( - BytesIO(fp.read()) ) - return result + modules_yaml_path = PackagesGenerator.get_remote_file_content( + file_url=modules_yaml_url + ) + else: + modules_yaml_path = os.path.join( + repo_path, + record.location_href, + ) + return [read_modules_yaml(modules_yaml_path=modules_yaml_path)] -def collect_modules(modules_paths: List[BinaryIO], target_dir: str): +def collect_modules( + modules_paths: List[BinaryIO], + target_dir: str, + grep_only_modules_data: bool = False, + grep_only_modules_defaults_data: bool = False, +): """ Read given modules.yaml.gz files and export modules and modulemd files from it. @@ -79,12 +127,15 @@ def collect_modules(modules_paths: List[BinaryIO], target_dir: str): elif is_xz_file(data[:2]): data = lzma.decompress(data) documents = yaml.load_all(data, Loader=yaml.BaseLoader) + xor_flag = grep_only_modules_defaults_data is grep_only_modules_data for doc in documents: - if doc['document'] == 'modulemd-defaults': + path = None + if doc['document'] == 'modulemd-defaults' and \ + (grep_only_modules_defaults_data or xor_flag): name = f"{doc['data']['module']}.yaml" path = os.path.join(module_defaults_path, name) logging.info('Found %s module defaults', name) - else: + elif grep_only_modules_data or xor_flag: # pungi.phases.pkgset.sources.source_koji.get_koji_modules stream = doc['data']['stream'].replace('-', '_') doc_data = doc['data'] @@ -106,13 +157,24 @@ def collect_modules(modules_paths: List[BinaryIO], target_dir: str): 'RPM %s does not have explicit list of artifacts', name ) - - with open(path, 'w') as f: - yaml.dump(doc, f, default_flow_style=False) + if path is not None: + with open(path, 'w') as f: + yaml.dump(doc, f, default_flow_style=False) def cli_main(): parser = ArgumentParser() + content_type_group = parser.add_mutually_exclusive_group(required=False) + content_type_group.add_argument( + '--get-only-modules-data', + action='store_true', + help='Parse and get only modules data', + ) + content_type_group.add_argument( + '--get-only-modules-defaults-data', + action='store_true', + help='Parse and get only modules_defaults data', + ) path_group = parser.add_mutually_exclusive_group(required=True) path_group.add_argument( '-p', '--path', @@ -127,16 +189,27 @@ def cli_main(): default=None, help='Path to a directory which contains repodirs. E.g. /var/repos' ) + path_group.add_argument( + '-rd', '--repodata-path', + required=False, + type=str, + default=None, + help='Path/url to a directory with repodata dir', + ) parser.add_argument('-t', '--target', required=True) namespace = parser.parse_args() - if namespace.repo_path is None: + if namespace.repodata_path is not None: + modules = read_modules_yaml_from_specific_repo(namespace.repodata_path) + elif namespace.path is not None: modules = namespace.path else: - modules = grep_list_of_modules_yaml_gz(namespace.repo_path) + modules = grep_list_of_modules_yaml(namespace.repo_path) collect_modules( modules, namespace.target, + namespace.get_only_modules_data, + namespace.get_only_modules_defaults_data, ) diff --git a/tests/test_create_packages_json.py b/tests/test_create_packages_json.py index 746782a8..df109e71 100644 --- a/tests/test_create_packages_json.py +++ b/tests/test_create_packages_json.py @@ -33,7 +33,7 @@ test_repo_info_2 = RepoInfo( class TestPackagesJson(TestCase): - def test_01__get_remote_file_content(self): + def test_01_get_remote_file_content(self): """ Test the getting of content from a remote file """ @@ -47,9 +47,8 @@ class TestPackagesJson(TestCase): 'pungi.scripts.create_packages_json.tempfile.NamedTemporaryFile', ) as mock_tempfile: mock_tempfile.return_value.__enter__.return_value.name = 'tmpfile' - file_name = PackagesGenerator._get_remote_file_content( - file_url='fakeurl' - ) + file_name = PackagesGenerator.get_remote_file_content( + file_url='fakeurl') mock_requests_get.assert_called_once_with(url='fakeurl') mock_tempfile.assert_called_once_with(delete=False) mock_tempfile.return_value.__enter__().\ -- 2.40.1 From 7422d1e04519d003376f29cc6695fb78e8210a6c Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Fri, 29 Apr 2022 21:33:28 +0300 Subject: [PATCH 2/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- pungi/scripts/gather_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pungi/scripts/gather_modules.py b/pungi/scripts/gather_modules.py index ac0053b6..adcfeccb 100644 --- a/pungi/scripts/gather_modules.py +++ b/pungi/scripts/gather_modules.py @@ -13,7 +13,7 @@ import yaml import createrepo_c as cr from typing.io import BinaryIO -from pungi.scripts.create_packages_json import PackagesGenerator +from .create_packages_json import PackagesGenerator EMPTY_FILE = '.empty' -- 2.40.1 From 34eb45c7ec03ff147bcd3342b79a1409d2ae8375 Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Fri, 29 Apr 2022 21:39:51 +0300 Subject: [PATCH 3/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- pungi/scripts/create_packages_json.py | 18 +++++++++++++++++- pungi/scripts/gather_modules.py | 21 +-------------------- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pungi/scripts/create_packages_json.py b/pungi/scripts/create_packages_json.py index dc51a1eb..f4143453 100644 --- a/pungi/scripts/create_packages_json.py +++ b/pungi/scripts/create_packages_json.py @@ -16,6 +16,7 @@ import tempfile from collections import defaultdict from typing import AnyStr, Dict, List, Optional +import binascii import createrepo_c as cr import dnf.subject import hawkey @@ -25,8 +26,23 @@ import yaml from createrepo_c import Package from dataclasses import dataclass -from .gather_modules import is_gzip_file, is_xz_file +def _is_compressed_file(first_two_bytes: bytes, initial_bytes: bytes): + return binascii.hexlify(first_two_bytes) == initial_bytes + + +def is_gzip_file(first_two_bytes): + return _is_compressed_file( + first_two_bytes=first_two_bytes, + initial_bytes=b'1f8b', + ) + + +def is_xz_file(first_two_bytes): + return _is_compressed_file( + first_two_bytes=first_two_bytes, + initial_bytes=b'fd37', + ) @dataclass class RepoInfo: diff --git a/pungi/scripts/gather_modules.py b/pungi/scripts/gather_modules.py index adcfeccb..11e38e42 100644 --- a/pungi/scripts/gather_modules.py +++ b/pungi/scripts/gather_modules.py @@ -1,4 +1,3 @@ -import binascii import gzip import lzma import os @@ -13,29 +12,11 @@ import yaml import createrepo_c as cr from typing.io import BinaryIO -from .create_packages_json import PackagesGenerator +from .create_packages_json import PackagesGenerator, is_gzip_file, is_xz_file EMPTY_FILE = '.empty' -def _is_compressed_file(first_two_bytes: bytes, initial_bytes: bytes): - return binascii.hexlify(first_two_bytes) == initial_bytes - - -def is_gzip_file(first_two_bytes): - return _is_compressed_file( - first_two_bytes=first_two_bytes, - initial_bytes=b'1f8b', - ) - - -def is_xz_file(first_two_bytes): - return _is_compressed_file( - first_two_bytes=first_two_bytes, - initial_bytes=b'fd37', - ) - - def read_modules_yaml(modules_yaml_path: Union[str, Path]) -> BytesIO: with open(modules_yaml_path, 'rb') as fp: return BytesIO(fp.read()) -- 2.40.1 From 38ea8222609b1d80338a7b02daff625b50003481 Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Sat, 30 Apr 2022 00:27:31 +0300 Subject: [PATCH 4/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- pungi/scripts/gather_modules.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/pungi/scripts/gather_modules.py b/pungi/scripts/gather_modules.py index 11e38e42..1f80a0a1 100644 --- a/pungi/scripts/gather_modules.py +++ b/pungi/scripts/gather_modules.py @@ -79,6 +79,8 @@ def read_modules_yaml_from_specific_repo(repo_path: AnyStr) -> List[BytesIO]: record.location_href, ) return [read_modules_yaml(modules_yaml_path=modules_yaml_path)] + else: + return [] def collect_modules( @@ -93,10 +95,13 @@ def collect_modules( Returns: object: """ + xor_flag = grep_only_modules_defaults_data is grep_only_modules_data modules_path = os.path.join(target_dir, 'modules') module_defaults_path = os.path.join(target_dir, 'module_defaults') - os.makedirs(modules_path, exist_ok=True) - os.makedirs(module_defaults_path, exist_ok=True) + if grep_only_modules_data or xor_flag: + os.makedirs(modules_path, exist_ok=True) + if grep_only_modules_defaults_data or xor_flag: + os.makedirs(module_defaults_path, exist_ok=True) # Defaults modules can be empty, but pungi detects # empty folder while copying and raises the exception in this case Path(os.path.join(module_defaults_path, EMPTY_FILE)).touch() @@ -108,7 +113,6 @@ def collect_modules( elif is_xz_file(data[:2]): data = lzma.decompress(data) documents = yaml.load_all(data, Loader=yaml.BaseLoader) - xor_flag = grep_only_modules_defaults_data is grep_only_modules_data for doc in documents: path = None if doc['document'] == 'modulemd-defaults' and \ @@ -171,17 +175,22 @@ def cli_main(): help='Path to a directory which contains repodirs. E.g. /var/repos' ) path_group.add_argument( - '-rd', '--repodata-path', + '-rd', '--repodata-paths', required=False, type=str, - default=None, - help='Path/url to a directory with repodata dir', + nargs='+', + default=[], + help='Paths/urls to the directories with directory `repodata`', ) parser.add_argument('-t', '--target', required=True) namespace = parser.parse_args() - if namespace.repodata_path is not None: - modules = read_modules_yaml_from_specific_repo(namespace.repodata_path) + if namespace.repodata_paths: + modules = [] + for repodata_path in namespace.repodata_paths: + modules.extend(read_modules_yaml_from_specific_repo( + repodata_path, + )) elif namespace.path is not None: modules = namespace.path else: -- 2.40.1 From 1e18e8995d0a6bf2ab8ccea6e7d7cfd7c92c113d Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Sun, 1 May 2022 03:32:01 +0300 Subject: [PATCH 5/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- pungi.spec | 2 +- pungi/scripts/create_packages_json.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pungi.spec b/pungi.spec index 087e4f08..be527fc2 100644 --- a/pungi.spec +++ b/pungi.spec @@ -2,7 +2,7 @@ Name: pungi Version: 4.2.15 -Release: 1%{?dist}.cloudlinux +Release: 2%{?dist}.cloudlinux Summary: Distribution compose tool License: GPLv2 diff --git a/pungi/scripts/create_packages_json.py b/pungi/scripts/create_packages_json.py index f4143453..977b44a2 100644 --- a/pungi/scripts/create_packages_json.py +++ b/pungi/scripts/create_packages_json.py @@ -44,6 +44,7 @@ def is_xz_file(first_two_bytes): initial_bytes=b'fd37', ) + @dataclass class RepoInfo: # path to a directory with repo directories. E.g. '/var/repos' contains @@ -97,7 +98,7 @@ class PackagesGenerator: url=file_url, ) file_request.raise_for_status() - with tempfile.NamedTemporaryFile(delete=False) as file_stream: + with tempfile.NamedTemporaryFile(delete=True) as file_stream: file_stream.write(file_request.content) return file_stream.name -- 2.40.1 From 5f74175c33648ccab5451f4a20fb94e7a4951c15 Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Sun, 1 May 2022 03:41:40 +0300 Subject: [PATCH 6/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- tests/test_create_packages_json.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_create_packages_json.py b/tests/test_create_packages_json.py index df109e71..a4ab70cc 100644 --- a/tests/test_create_packages_json.py +++ b/tests/test_create_packages_json.py @@ -50,7 +50,7 @@ class TestPackagesJson(TestCase): file_name = PackagesGenerator.get_remote_file_content( file_url='fakeurl') mock_requests_get.assert_called_once_with(url='fakeurl') - mock_tempfile.assert_called_once_with(delete=False) + mock_tempfile.assert_called_once_with(delete=True) mock_tempfile.return_value.__enter__().\ write.assert_called_once_with(b'TestContent') self.assertEqual( -- 2.40.1 From 86769416554ea4feeca2e5b7b69dffead4c614d5 Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Mon, 2 May 2022 02:25:32 +0300 Subject: [PATCH 7/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- pungi/scripts/create_packages_json.py | 2 +- tests/test_create_packages_json.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pungi/scripts/create_packages_json.py b/pungi/scripts/create_packages_json.py index 977b44a2..a04cb412 100644 --- a/pungi/scripts/create_packages_json.py +++ b/pungi/scripts/create_packages_json.py @@ -98,7 +98,7 @@ class PackagesGenerator: url=file_url, ) file_request.raise_for_status() - with tempfile.NamedTemporaryFile(delete=True) as file_stream: + with tempfile.NamedTemporaryFile(delete=False) as file_stream: file_stream.write(file_request.content) return file_stream.name diff --git a/tests/test_create_packages_json.py b/tests/test_create_packages_json.py index a4ab70cc..df109e71 100644 --- a/tests/test_create_packages_json.py +++ b/tests/test_create_packages_json.py @@ -50,7 +50,7 @@ class TestPackagesJson(TestCase): file_name = PackagesGenerator.get_remote_file_content( file_url='fakeurl') mock_requests_get.assert_called_once_with(url='fakeurl') - mock_tempfile.assert_called_once_with(delete=True) + mock_tempfile.assert_called_once_with(delete=False) mock_tempfile.return_value.__enter__().\ write.assert_called_once_with(b'TestContent') self.assertEqual( -- 2.40.1 From e6c6f74176f845701022b56ee188c4c9fbb9943e Mon Sep 17 00:00:00 2001 From: soksanichenko Date: Tue, 3 May 2022 18:18:17 +0300 Subject: [PATCH 8/8] ALBS-334: Make the ability of Pungi to give module_defaults from remote sources --- pungi/scripts/gather_modules.py | 56 ++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 11 deletions(-) diff --git a/pungi/scripts/gather_modules.py b/pungi/scripts/gather_modules.py index 1f80a0a1..46e9d440 100644 --- a/pungi/scripts/gather_modules.py +++ b/pungi/scripts/gather_modules.py @@ -4,7 +4,7 @@ import os from argparse import ArgumentParser, FileType from io import BytesIO from pathlib import Path -from typing import List, AnyStr, Iterable, Union +from typing import List, AnyStr, Iterable, Union, Optional import logging from urllib.parse import urljoin @@ -30,16 +30,19 @@ def grep_list_of_modules_yaml(repos_path: AnyStr) -> Iterable[BytesIO]: """ return ( - read_modules_yaml(modules_yaml_path=path.parent) for path in - Path(repos_path).rglob('repodata') + read_modules_yaml_from_specific_repo(repo_path=path.parent) + for path in Path(repos_path).rglob('repodata') ) def _is_remote(path: str): - return any(path.startswith(protocol) for protocol in ('http', 'https')) + return any(str(path).startswith(protocol) + for protocol in ('http', 'https')) -def read_modules_yaml_from_specific_repo(repo_path: AnyStr) -> List[BytesIO]: +def read_modules_yaml_from_specific_repo( + repo_path: Union[str, Path] +) -> Optional[BytesIO]: """ Read modules_yaml from a specific repo (remote or local) :param repo_path: path/url to a specific repo @@ -78,9 +81,32 @@ def read_modules_yaml_from_specific_repo(repo_path: AnyStr) -> List[BytesIO]: repo_path, record.location_href, ) - return [read_modules_yaml(modules_yaml_path=modules_yaml_path)] + return read_modules_yaml(modules_yaml_path=modules_yaml_path) else: - return [] + return None + + +def _should_grep_defaults( + document_type: str, + grep_only_modules_data: bool = False, + grep_only_modules_defaults_data: bool = False, +) -> bool: + xor_flag = grep_only_modules_data == grep_only_modules_defaults_data + if document_type == 'modulemd' and (xor_flag or grep_only_modules_data): + return True + return False + + +def _should_grep_modules( + document_type: str, + grep_only_modules_data: bool = False, + grep_only_modules_defaults_data: bool = False, +) -> bool: + xor_flag = grep_only_modules_data == grep_only_modules_defaults_data + if document_type == 'modulemd-defaults' and \ + (xor_flag or grep_only_modules_defaults_data): + return True + return False def collect_modules( @@ -115,12 +141,19 @@ def collect_modules( documents = yaml.load_all(data, Loader=yaml.BaseLoader) for doc in documents: path = None - if doc['document'] == 'modulemd-defaults' and \ - (grep_only_modules_defaults_data or xor_flag): + if _should_grep_modules( + doc['document'], + grep_only_modules_data, + grep_only_modules_defaults_data, + ): name = f"{doc['data']['module']}.yaml" path = os.path.join(module_defaults_path, name) logging.info('Found %s module defaults', name) - elif grep_only_modules_data or xor_flag: + elif _should_grep_defaults( + doc['document'], + grep_only_modules_data, + grep_only_modules_defaults_data, + ): # pungi.phases.pkgset.sources.source_koji.get_koji_modules stream = doc['data']['stream'].replace('-', '_') doc_data = doc['data'] @@ -188,13 +221,14 @@ def cli_main(): if namespace.repodata_paths: modules = [] for repodata_path in namespace.repodata_paths: - modules.extend(read_modules_yaml_from_specific_repo( + modules.append(read_modules_yaml_from_specific_repo( repodata_path, )) elif namespace.path is not None: modules = namespace.path else: modules = grep_list_of_modules_yaml(namespace.repo_path) + modules = list(filter(lambda i: i is not None, modules)) collect_modules( modules, namespace.target, -- 2.40.1