From 478c0202110da39a18fc0fc16b4f2b6cb96ef7d7 Mon Sep 17 00:00:00 2001 From: Stepan Oksanichenko Date: Tue, 11 Jun 2024 23:43:51 +0300 Subject: [PATCH] ALBS-1216: Implement support for new SHA512 external sources file format in almalinux-git-utils - New key argument `--sha512` for the sources uploader if you need to upload a source in new format (using sha512 checksum) - A separate recognizer of a checksum type in metadata file - The sources downloader can automatically detect a type of checksum --- almalinux/gitutils/blob_upload.py | 32 +++++++++++++++++++++++++------ almalinux/gitutils/common.py | 12 +++++++++++- 2 files changed, 37 insertions(+), 7 deletions(-) diff --git a/almalinux/gitutils/blob_upload.py b/almalinux/gitutils/blob_upload.py index f378bd6..d7b7f88 100644 --- a/almalinux/gitutils/blob_upload.py +++ b/almalinux/gitutils/blob_upload.py @@ -40,6 +40,8 @@ def init_arg_parser() -> argparse.ArgumentParser: help='output metadata file path') arg_parser.add_argument('-a', '--append-metadata', action='store_true', help='Append to an output metadata') + arg_parser.add_argument('--sha512', action='store_true', + help='Use a new format of hashsum') arg_parser.add_argument('-p', '--private', action='store_true', help='set uploaded file mode to private. All ' 'uploaded files are public by default') @@ -51,17 +53,21 @@ def init_arg_parser() -> argparse.ArgumentParser: return arg_parser -def iter_files(files: List[str]) -> Iterator[Tuple[str, str, str]]: +def iter_files( + files: List[str], + sha512: bool +) -> Iterator[Tuple[str, str, str]]: """ Iterates over a list of files and calculates checksums for them. Args: files: List of files. + sha512: True if we use new format of hashsum. Returns: Iterator over files and their checksums. """ - checksum_type = 'sha1' + checksum_type = 'sha512' if sha512 else 'sha1' for rel_path in files: file_path = normalize_path(rel_path) checksum = get_file_checksum(file_path, checksum_type) @@ -104,7 +110,9 @@ def upload_file(s3_client, bucket_name: str, file_path: str, def get_file_iterator( - files: List[str], metadata_path: Optional[str] + files: List[str], + metadata_path: Optional[str], + sha512: bool, ) -> Iterator[Tuple[str, str, str]]: """ Finds a suitable file iterator for given arguments. @@ -112,12 +120,16 @@ def get_file_iterator( Args: files: List of files. metadata_path: Metadata file path. + sha512: True if we use new format of hashsum. Returns: File iterator. """ if files: - iterator = iter_files(files) + iterator = iter_files( + files=files, + sha512=sha512, + ) else: if not metadata_path: metadata_path = find_metadata_file(os.getcwd()) @@ -130,10 +142,18 @@ def main(): args = arg_parser.parse_args(sys.argv[1:]) configure_logger(args.verbose) s3_client = boto3.client('s3') - iterator = get_file_iterator(args.file, args.input_metadata) + iterator = get_file_iterator( + args.file, + args.input_metadata, + args.sha512, + ) out_fd = None + if args.append_metadata: + file_mdoe = 'a' + else: + file_mdoe = 'w' if args.output_metadata: - out_fd = open(args.output_metadata, 'a' if args.append_metadata else 'w') + out_fd = open(args.output_metadata, file_mdoe) try: for rel_path, checksum, checksum_type in iterator: file_path = normalize_path(rel_path) diff --git a/almalinux/gitutils/common.py b/almalinux/gitutils/common.py index b35085f..34e2fa9 100644 --- a/almalinux/gitutils/common.py +++ b/almalinux/gitutils/common.py @@ -90,6 +90,16 @@ def get_file_checksum(file_path: str, checksum_type: str = 'sha1', return hasher.hexdigest() +def extract_checksum_and_file_path(line: str) -> Tuple[str, str]: + if 'SHA512' in line.upper(): + file_path, checksum = line.split('=') + file_path = file_path.strip().split(' ')[1].strip('(').strip(')') + checksum = checksum.strip() + else: + checksum, file_path = line.split() + return checksum, file_path + + def iter_metadata(metadata_path: str) -> Iterator[Tuple[str, str, str]]: """ Iterates over records in a CentOS git repository-compatible metadata file. @@ -102,7 +112,7 @@ def iter_metadata(metadata_path: str) -> Iterator[Tuple[str, str, str]]: """ with open(metadata_path, 'r') as fd: for line in fd: - checksum, file_path = line.split() + checksum, file_path = extract_checksum_and_file_path(line) checksum_type = detect_checksum_type(checksum) yield file_path, checksum, checksum_type