diff --git a/almalinux/gitutils/blob_upload.py b/almalinux/gitutils/blob_upload.py index 8f821fb..8ca58be 100644 --- a/almalinux/gitutils/blob_upload.py +++ b/almalinux/gitutils/blob_upload.py @@ -1,156 +1,158 @@ -"""Uploads sources and BLOBs to the AlmaLinux sources cache""" - -import argparse -import logging -import os -import sys -from typing import Iterator, List, Optional, Tuple - -import boto3 -import botocore.exceptions - -from almalinux.gitutils.errors import ChecksumError -from almalinux.gitutils.common import ( - configure_logger, find_metadata_file, get_file_checksum, iter_metadata, - normalize_path -) - - -def init_arg_parser() -> argparse.ArgumentParser: - """ - Initializes a command line arguments parser. - - Returns: - Command line arguments parser. - """ - arg_parser = argparse.ArgumentParser( - prog="alma_blob_upload", - description="Uploads sources and BLOBs to the AlmaLinux sources cache" - ) - group = arg_parser.add_mutually_exclusive_group() - group.add_argument('-f', '--file', nargs='+', help='file(s) to upload') - group.add_argument('-i', '--input-metadata', metavar='INPUT_FILE', - help='input metadata file list to upload. Will be ' - 'detected automatically if omitted and no files ' - 'provided') - arg_parser.add_argument('-b', '--bucket', default="sources.almalinux.org", - help='Amazon S3 bucket name. Default is ' - 'sources.almalinux.org') - arg_parser.add_argument('-o', '--output-metadata', metavar='OUTPUT_FILE', - help='output metadata file path') - arg_parser.add_argument('-p', '--private', action='store_true', - help='set uploaded file mode to private. All ' - 'uploaded files are public by default') - arg_parser.add_argument('--domain-name', default='sources.almalinux.org', - help='AlmaLinux sources server domain name. ' - 'Default is sources.almalinux.org') - arg_parser.add_argument('-v', '--verbose', action='store_true', - help='enable additional debug output') - return arg_parser - - -def iter_files(files: List[str]) -> Iterator[Tuple[str, str, str]]: - """ - Iterates over a list of files and calculates checksums for them. - - Args: - files: List of files. - - Returns: - Iterator over files and their checksums. - """ - checksum_type = 'sha1' - for rel_path in files: - file_path = normalize_path(rel_path) - checksum = get_file_checksum(file_path, checksum_type) - yield rel_path, checksum, checksum_type - - -def is_file_exist(s3_client, bucket_name: str, checksum: str) -> bool: - """ - Checks is a file with a given checksum is already uploaded. - Args: - s3_client: Amazon S3 client. - bucket_name: S3 bucket name. - checksum: File checksum. - - Returns: - True if a file is already uploaded, False otherwise. - """ - try: - s3_client.head_object(Bucket=bucket_name, Key=checksum) - return True - except botocore.exceptions.ClientError: - return False - - -def upload_file(s3_client, bucket_name: str, file_path: str, - checksum: str, private: bool): - """ - Uploads a file to an Amazon S3 bucket. - - Args: - s3_client: Amazon S3 client. - bucket_name: S3 bucket name. - file_path: File path. - checksum: File checksum. - private: False if file should be public, True otherwise. - """ - acl = 'bucket-owner-full-control' if private else 'public-read' - s3_client.upload_file(file_path, bucket_name, checksum, - ExtraArgs={'ACL': acl}) - - -def get_file_iterator( - files: List[str], metadata_path: Optional[str] -) -> Iterator[Tuple[str, str, str]]: - """ - Finds a suitable file iterator for given arguments. - - Args: - files: List of files. - metadata_path: Metadata file path. - - Returns: - File iterator. - """ - if files: - iterator = iter_files(files) - else: - if not metadata_path: - metadata_path = find_metadata_file(os.getcwd()) - iterator = iter_metadata(metadata_path) - return iterator - - -def main(): - arg_parser = init_arg_parser() - args = arg_parser.parse_args(sys.argv[1:]) - configure_logger(args.verbose) - s3_client = boto3.client('s3') - iterator = get_file_iterator(args.file, args.input_metadata) - out_fd = None - if args.output_metadata: - out_fd = open(args.output_metadata, 'w') - try: - for rel_path, checksum, checksum_type in iterator: - file_path = normalize_path(rel_path) - if not args.file: - real_checksum = get_file_checksum(file_path, checksum_type) - if real_checksum != checksum: - raise ChecksumError( - f"{rel_path} {checksum_type} checksum {real_checksum} " - f"doesn't match expected {checksum}" - ) - file_url = f'https://{args.domain_name}/{checksum}' - if not is_file_exist(s3_client, args.bucket, checksum): - upload_file(s3_client, args.bucket, file_path, checksum, - args.private) - logging.info(f'{rel_path} successfully uploaded: {file_url}') - else: - logging.info(f'{rel_path} is already uploaded: {file_url}') - if out_fd: - out_fd.write(f'{checksum} {rel_path}\n') - finally: - if out_fd: - out_fd.close() +"""Uploads sources and BLOBs to the AlmaLinux sources cache""" + +import argparse +import logging +import os +import sys +from typing import Iterator, List, Optional, Tuple + +import boto3 +import botocore.exceptions + +from almalinux.gitutils.errors import ChecksumError +from almalinux.gitutils.common import ( + configure_logger, find_metadata_file, get_file_checksum, iter_metadata, + normalize_path +) + + +def init_arg_parser() -> argparse.ArgumentParser: + """ + Initializes a command line arguments parser. + + Returns: + Command line arguments parser. + """ + arg_parser = argparse.ArgumentParser( + prog="alma_blob_upload", + description="Uploads sources and BLOBs to the AlmaLinux sources cache" + ) + group = arg_parser.add_mutually_exclusive_group() + group.add_argument('-f', '--file', nargs='+', help='file(s) to upload') + group.add_argument('-i', '--input-metadata', metavar='INPUT_FILE', + help='input metadata file list to upload. Will be ' + 'detected automatically if omitted and no files ' + 'provided') + arg_parser.add_argument('-b', '--bucket', default="sources.almalinux.org", + help='Amazon S3 bucket name. Default is ' + 'sources.almalinux.org') + arg_parser.add_argument('-o', '--output-metadata', metavar='OUTPUT_FILE', + help='output metadata file path') + arg_parser.add_argument('-a', '--append-metadata', action='store_true', + help='Append to an output metadata') + arg_parser.add_argument('-p', '--private', action='store_true', + help='set uploaded file mode to private. All ' + 'uploaded files are public by default') + arg_parser.add_argument('--domain-name', default='sources.almalinux.org', + help='AlmaLinux sources server domain name. ' + 'Default is sources.almalinux.org') + arg_parser.add_argument('-v', '--verbose', action='store_true', + help='enable additional debug output') + return arg_parser + + +def iter_files(files: List[str]) -> Iterator[Tuple[str, str, str]]: + """ + Iterates over a list of files and calculates checksums for them. + + Args: + files: List of files. + + Returns: + Iterator over files and their checksums. + """ + checksum_type = 'sha1' + for rel_path in files: + file_path = normalize_path(rel_path) + checksum = get_file_checksum(file_path, checksum_type) + yield rel_path, checksum, checksum_type + + +def is_file_exist(s3_client, bucket_name: str, checksum: str) -> bool: + """ + Checks is a file with a given checksum is already uploaded. + Args: + s3_client: Amazon S3 client. + bucket_name: S3 bucket name. + checksum: File checksum. + + Returns: + True if a file is already uploaded, False otherwise. + """ + try: + s3_client.head_object(Bucket=bucket_name, Key=checksum) + return True + except botocore.exceptions.ClientError: + return False + + +def upload_file(s3_client, bucket_name: str, file_path: str, + checksum: str, private: bool): + """ + Uploads a file to an Amazon S3 bucket. + + Args: + s3_client: Amazon S3 client. + bucket_name: S3 bucket name. + file_path: File path. + checksum: File checksum. + private: False if file should be public, True otherwise. + """ + acl = 'bucket-owner-full-control' if private else 'public-read' + s3_client.upload_file(file_path, bucket_name, checksum, + ExtraArgs={'ACL': acl}) + + +def get_file_iterator( + files: List[str], metadata_path: Optional[str] +) -> Iterator[Tuple[str, str, str]]: + """ + Finds a suitable file iterator for given arguments. + + Args: + files: List of files. + metadata_path: Metadata file path. + + Returns: + File iterator. + """ + if files: + iterator = iter_files(files) + else: + if not metadata_path: + metadata_path = find_metadata_file(os.getcwd()) + iterator = iter_metadata(metadata_path) + return iterator + + +def main(): + arg_parser = init_arg_parser() + args = arg_parser.parse_args(sys.argv[1:]) + configure_logger(args.verbose) + s3_client = boto3.client('s3') + iterator = get_file_iterator(args.file, args.input_metadata) + out_fd = None + if args.output_metadata: + out_fd = open(args.output_metadata, 'w+' if args.append_metadata else 'w') + try: + for rel_path, checksum, checksum_type in iterator: + file_path = normalize_path(rel_path) + if not args.file: + real_checksum = get_file_checksum(file_path, checksum_type) + if real_checksum != checksum: + raise ChecksumError( + f"{rel_path} {checksum_type} checksum {real_checksum} " + f"doesn't match expected {checksum}" + ) + file_url = f'https://{args.domain_name}/{checksum}' + if not is_file_exist(s3_client, args.bucket, checksum): + upload_file(s3_client, args.bucket, file_path, checksum, + args.private) + logging.info(f'{rel_path} successfully uploaded: {file_url}') + else: + logging.info(f'{rel_path} is already uploaded: {file_url}') + if out_fd: + out_fd.write(f'{checksum} {rel_path}\n') + finally: + if out_fd: + out_fd.close()