almalinux-git-utils/almalinux/gitutils/blob_upload.py

159 lines
5.5 KiB
Python
Raw Permalink Normal View History

"""Uploads sources and BLOBs to the AlmaLinux sources cache"""
import argparse
import logging
import os
import sys
from typing import Iterator, List, Optional, Tuple
import boto3
import botocore.exceptions
from almalinux.gitutils.errors import ChecksumError
from almalinux.gitutils.common import (
configure_logger, find_metadata_file, get_file_checksum, iter_metadata,
normalize_path
)
def init_arg_parser() -> argparse.ArgumentParser:
"""
Initializes a command line arguments parser.
Returns:
Command line arguments parser.
"""
arg_parser = argparse.ArgumentParser(
prog="alma_blob_upload",
description="Uploads sources and BLOBs to the AlmaLinux sources cache"
)
group = arg_parser.add_mutually_exclusive_group()
group.add_argument('-f', '--file', nargs='+', help='file(s) to upload')
group.add_argument('-i', '--input-metadata', metavar='INPUT_FILE',
help='input metadata file list to upload. Will be '
'detected automatically if omitted and no files '
'provided')
arg_parser.add_argument('-b', '--bucket', default="sources.almalinux.org",
help='Amazon S3 bucket name. Default is '
'sources.almalinux.org')
arg_parser.add_argument('-o', '--output-metadata', metavar='OUTPUT_FILE',
help='output metadata file path')
arg_parser.add_argument('-a', '--append-metadata', action='store_true',
help='Append to an output metadata')
arg_parser.add_argument('-p', '--private', action='store_true',
help='set uploaded file mode to private. All '
'uploaded files are public by default')
arg_parser.add_argument('--domain-name', default='sources.almalinux.org',
help='AlmaLinux sources server domain name. '
'Default is sources.almalinux.org')
arg_parser.add_argument('-v', '--verbose', action='store_true',
help='enable additional debug output')
return arg_parser
def iter_files(files: List[str]) -> Iterator[Tuple[str, str, str]]:
"""
Iterates over a list of files and calculates checksums for them.
Args:
files: List of files.
Returns:
Iterator over files and their checksums.
"""
checksum_type = 'sha1'
for rel_path in files:
file_path = normalize_path(rel_path)
checksum = get_file_checksum(file_path, checksum_type)
yield rel_path, checksum, checksum_type
def is_file_exist(s3_client, bucket_name: str, checksum: str) -> bool:
"""
Checks is a file with a given checksum is already uploaded.
Args:
s3_client: Amazon S3 client.
bucket_name: S3 bucket name.
checksum: File checksum.
Returns:
True if a file is already uploaded, False otherwise.
"""
try:
s3_client.head_object(Bucket=bucket_name, Key=checksum)
return True
except botocore.exceptions.ClientError:
return False
def upload_file(s3_client, bucket_name: str, file_path: str,
checksum: str, private: bool):
"""
Uploads a file to an Amazon S3 bucket.
Args:
s3_client: Amazon S3 client.
bucket_name: S3 bucket name.
file_path: File path.
checksum: File checksum.
private: False if file should be public, True otherwise.
"""
acl = 'bucket-owner-full-control' if private else 'public-read'
s3_client.upload_file(file_path, bucket_name, checksum,
ExtraArgs={'ACL': acl})
def get_file_iterator(
files: List[str], metadata_path: Optional[str]
) -> Iterator[Tuple[str, str, str]]:
"""
Finds a suitable file iterator for given arguments.
Args:
files: List of files.
metadata_path: Metadata file path.
Returns:
File iterator.
"""
if files:
iterator = iter_files(files)
else:
if not metadata_path:
metadata_path = find_metadata_file(os.getcwd())
iterator = iter_metadata(metadata_path)
return iterator
def main():
arg_parser = init_arg_parser()
args = arg_parser.parse_args(sys.argv[1:])
configure_logger(args.verbose)
s3_client = boto3.client('s3')
iterator = get_file_iterator(args.file, args.input_metadata)
out_fd = None
if args.output_metadata:
out_fd = open(args.output_metadata, 'a' if args.append_metadata else 'w')
try:
for rel_path, checksum, checksum_type in iterator:
file_path = normalize_path(rel_path)
if not args.file:
real_checksum = get_file_checksum(file_path, checksum_type)
if real_checksum != checksum:
raise ChecksumError(
f"{rel_path} {checksum_type} checksum {real_checksum} "
f"doesn't match expected {checksum}"
)
file_url = f'https://{args.domain_name}/{checksum}'
if not is_file_exist(s3_client, args.bucket, checksum):
upload_file(s3_client, args.bucket, file_path, checksum,
args.private)
logging.info(f'{rel_path} successfully uploaded: {file_url}')
else:
logging.info(f'{rel_path} is already uploaded: {file_url}')
if out_fd:
out_fd.write(f'{checksum} {rel_path}\n')
finally:
if out_fd:
out_fd.close()