Initial release of alma_blob_upload tool
This commit is contained in:
parent
8971f4be15
commit
896ea1f1ba
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
.idea
|
||||||
|
.mypy_cache
|
62
README.md
62
README.md
@ -1,3 +1,65 @@
|
|||||||
# almalinux-git-utils
|
# almalinux-git-utils
|
||||||
|
|
||||||
Utilities for working with the AlmaLinux OS Git server.
|
Utilities for working with the AlmaLinux OS Git server.
|
||||||
|
|
||||||
|
|
||||||
|
## alma_blob_upload
|
||||||
|
|
||||||
|
The `alma_blob_upload` script uploads sources and BLOBs to the AlmaLinux
|
||||||
|
sources cache.
|
||||||
|
|
||||||
|
### Prerequirements
|
||||||
|
|
||||||
|
Install the `python3-boto3` package:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
# RPM-based distributions. On EL8 derivatives the package is available from EPEL.
|
||||||
|
$ sudo dnf install python3 python3-boto3
|
||||||
|
|
||||||
|
# Debian-based distributions
|
||||||
|
$ sudo apt install python3-boto3
|
||||||
|
```
|
||||||
|
|
||||||
|
Create an AWS credentials file ~/.aws/credentials with the following content:
|
||||||
|
|
||||||
|
```ini
|
||||||
|
[default]
|
||||||
|
aws_access_key_id = YOUR_ACCESS_KEY
|
||||||
|
aws_secret_access_key = YOUR_SECRET_KEY
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
The utility supports two types of input: a CentOS git repository metadata file
|
||||||
|
or a list of files to upload.
|
||||||
|
|
||||||
|
For CentOS repositories workflow will be the following:
|
||||||
|
|
||||||
|
1. Install the `get_sources.sh` script from the
|
||||||
|
[centos-git-common](https://git.centos.org/centos-git-common) repository.
|
||||||
|
2. Clone a project and download its sources as described on the CentOS
|
||||||
|
[Wiki](https://wiki.centos.org/Sources).
|
||||||
|
3. Run the `alma_blob_upload` tool (don't forget to replace `PROJECT_NAME` with
|
||||||
|
an actual project name):
|
||||||
|
```shell
|
||||||
|
$ alma_blob_upload.py -i .PROJECT_NAME.metadata
|
||||||
|
```
|
||||||
|
|
||||||
|
Alternatively, you can upload a list of files in the following way:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ alma_blob_upload.py -f SOURCES/FILE_1 SOURCES/FILE_N
|
||||||
|
```
|
||||||
|
|
||||||
|
The `alma_blob_upload` utility can also generate a CentOS-compatible metadata
|
||||||
|
file:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ alma_blob_upload.py -o .PROJECT_NAME.metadata -f SOURCES/FILE_1 SOURCES/FILE_N
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
Licensed under the GPLv3 license, see the LICENSE file for details.
|
||||||
|
193
alma_blob_upload.py
Normal file
193
alma_blob_upload.py
Normal file
@ -0,0 +1,193 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
Uploads sources and BLOBs to the AlmaLinux sources cache.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import hashlib
|
||||||
|
import os.path
|
||||||
|
import sys
|
||||||
|
from typing import Iterator, List, TextIO, Tuple
|
||||||
|
|
||||||
|
import boto3 # type: ignore
|
||||||
|
from botocore.exceptions import ClientError # type: ignore
|
||||||
|
|
||||||
|
|
||||||
|
def init_arg_parser() -> argparse.ArgumentParser:
|
||||||
|
"""
|
||||||
|
Initializes a command line arguments parser.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Command line arguments parser.
|
||||||
|
"""
|
||||||
|
arg_parser = argparse.ArgumentParser(
|
||||||
|
prog="alma_blob_upload",
|
||||||
|
description="Uploads sources and BLOBs to the AlmaLinux sources cache"
|
||||||
|
)
|
||||||
|
group = arg_parser.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument('-f', '--file', nargs='+', help='file(s) to upload')
|
||||||
|
group.add_argument('-i', '--input-metadata', metavar='INPUT_FILE',
|
||||||
|
help='input metadata file list to upload')
|
||||||
|
arg_parser.add_argument('-b', '--bucket', default="sources.almalinux.org",
|
||||||
|
help='Amazon S3 bucket name. Default is '
|
||||||
|
'sources.almalinux.org')
|
||||||
|
arg_parser.add_argument('-o', '--output-metadata', metavar='OUTPUT_FILE',
|
||||||
|
help='output metadata file path')
|
||||||
|
arg_parser.add_argument('-p', '--private', action='store_true',
|
||||||
|
help='set uploaded file mode to private. All '
|
||||||
|
'uploaded files are public by default')
|
||||||
|
arg_parser.add_argument('--domain-name', default='sources.almalinux.org',
|
||||||
|
help='AlmaLinux sources server domain name. '
|
||||||
|
'Default is sources.almalinux.org')
|
||||||
|
arg_parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='enable additional debug output')
|
||||||
|
return arg_parser
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_checksum(file_path: str, checksum_type: str = 'sha1',
|
||||||
|
buff_size: int = 1048576) -> str:
|
||||||
|
"""
|
||||||
|
Calculates a file checksum.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: File path.
|
||||||
|
checksum_type: Checksum type.
|
||||||
|
buff_size: Number of bytes to read at once.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
File checksum.
|
||||||
|
"""
|
||||||
|
hasher = hashlib.new(checksum_type)
|
||||||
|
with open(file_path, 'rb') as fd:
|
||||||
|
buff = fd.read(buff_size)
|
||||||
|
while len(buff):
|
||||||
|
hasher.update(buff)
|
||||||
|
buff = fd.read(buff_size)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_path(path: str) -> str:
|
||||||
|
"""
|
||||||
|
Returns an absolute path with all variables expanded.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Path to normalize.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized path.
|
||||||
|
"""
|
||||||
|
return os.path.abspath(os.path.expanduser(os.path.expandvars(path)))
|
||||||
|
|
||||||
|
|
||||||
|
def iter_metadata(metadata_path: str) -> Iterator[Tuple[str, str]]:
|
||||||
|
"""
|
||||||
|
Iterates over records in a CentOS git repository-compatible metadata file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata_path: Metadata file path.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iterator over files and their checksums.
|
||||||
|
"""
|
||||||
|
with open(metadata_path, 'r') as fd:
|
||||||
|
for line in fd:
|
||||||
|
checksum, file_path = line.split()
|
||||||
|
file_path = normalize_path(file_path)
|
||||||
|
assert checksum == get_file_checksum(file_path)
|
||||||
|
yield file_path, checksum
|
||||||
|
|
||||||
|
|
||||||
|
def iter_files(files: List[str]) -> Iterator[Tuple[str, str]]:
|
||||||
|
"""
|
||||||
|
Iterates over a list of files and calculates checksums for them.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
files: List of files.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iterator over files and their checksums.
|
||||||
|
"""
|
||||||
|
for file_path in files:
|
||||||
|
file_path = normalize_path(file_path)
|
||||||
|
checksum = get_file_checksum(file_path)
|
||||||
|
yield file_path, checksum
|
||||||
|
|
||||||
|
|
||||||
|
def is_file_exist(s3_client, bucket_name: str, checksum: str) -> bool:
|
||||||
|
"""
|
||||||
|
Checks is a file with a given checksum is already uploaded.
|
||||||
|
Args:
|
||||||
|
s3_client: Amazon S3 client.
|
||||||
|
bucket_name: S3 bucket name.
|
||||||
|
checksum: File checksum.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if a file is already uploaded, False otherwise.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
s3_client.head_object(Bucket=bucket_name, Key=checksum)
|
||||||
|
return True
|
||||||
|
except ClientError:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def upload_file(s3_client, bucket_name: str, file_path: str,
|
||||||
|
checksum: str, private: bool):
|
||||||
|
"""
|
||||||
|
Uploads a file to an Amazon S3 bucket.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
s3_client: Amazon S3 client.
|
||||||
|
bucket_name: S3 bucket name.
|
||||||
|
file_path: File path.
|
||||||
|
checksum: File checksum.
|
||||||
|
private: False if file should be public, True otherwise.
|
||||||
|
"""
|
||||||
|
acl = 'bucket-owner-full-control' if private else 'public-read'
|
||||||
|
s3_client.upload_file(file_path, bucket_name, checksum,
|
||||||
|
ExtraArgs={'ACL': acl})
|
||||||
|
|
||||||
|
|
||||||
|
def add_metadata_record(metadata_fd: TextIO, file_path: str, checksum: str):
|
||||||
|
"""
|
||||||
|
Adds a source file record to a metadata file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata_fd: Metadata file descriptor.
|
||||||
|
file_path: Source file path.
|
||||||
|
checksum: Source file checksum.
|
||||||
|
"""
|
||||||
|
rel_path = os.path.relpath(file_path)
|
||||||
|
metadata_fd.write(f'{checksum} {rel_path}\n')
|
||||||
|
|
||||||
|
|
||||||
|
def main(sys_args):
|
||||||
|
arg_parser = init_arg_parser()
|
||||||
|
args = arg_parser.parse_args(sys_args)
|
||||||
|
s3_client = boto3.client('s3')
|
||||||
|
if args.input_metadata:
|
||||||
|
iterator = iter_metadata(args.input_metadata)
|
||||||
|
else:
|
||||||
|
iterator = iter_files(args.file)
|
||||||
|
out_fd = None
|
||||||
|
if args.output_metadata:
|
||||||
|
out_fd = open(args.output_metadata, 'w')
|
||||||
|
try:
|
||||||
|
for file_path, checksum in iterator:
|
||||||
|
file_url = f'https://{args.domain_name}/{checksum}'
|
||||||
|
if not is_file_exist(s3_client, args.bucket, checksum):
|
||||||
|
upload_file(s3_client, args.bucket, file_path, checksum,
|
||||||
|
args.private)
|
||||||
|
print(f'{file_path} uploaded: {file_url}')
|
||||||
|
else:
|
||||||
|
print(f'{file_path} exists: {file_url}')
|
||||||
|
if out_fd:
|
||||||
|
add_metadata_record(out_fd, file_path, checksum)
|
||||||
|
finally:
|
||||||
|
if out_fd:
|
||||||
|
out_fd.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main(sys.argv[1:]))
|
Loading…
Reference in New Issue
Block a user