parent
8971f4be15
commit
896ea1f1ba
@ -0,0 +1,2 @@ |
||||
.idea |
||||
.mypy_cache |
@ -1,3 +1,65 @@ |
||||
# almalinux-git-utils |
||||
|
||||
Utilities for working with the AlmaLinux OS Git server. |
||||
Utilities for working with the AlmaLinux OS Git server. |
||||
|
||||
|
||||
## alma_blob_upload |
||||
|
||||
The `alma_blob_upload` script uploads sources and BLOBs to the AlmaLinux |
||||
sources cache. |
||||
|
||||
### Prerequirements |
||||
|
||||
Install the `python3-boto3` package: |
||||
|
||||
```shell |
||||
# RPM-based distributions. On EL8 derivatives the package is available from EPEL. |
||||
$ sudo dnf install python3 python3-boto3 |
||||
|
||||
# Debian-based distributions |
||||
$ sudo apt install python3-boto3 |
||||
``` |
||||
|
||||
Create an AWS credentials file ~/.aws/credentials with the following content: |
||||
|
||||
```ini |
||||
[default] |
||||
aws_access_key_id = YOUR_ACCESS_KEY |
||||
aws_secret_access_key = YOUR_SECRET_KEY |
||||
``` |
||||
|
||||
|
||||
### Usage |
||||
|
||||
The utility supports two types of input: a CentOS git repository metadata file |
||||
or a list of files to upload. |
||||
|
||||
For CentOS repositories workflow will be the following: |
||||
|
||||
1. Install the `get_sources.sh` script from the |
||||
[centos-git-common](https://git.centos.org/centos-git-common) repository. |
||||
2. Clone a project and download its sources as described on the CentOS |
||||
[Wiki](https://wiki.centos.org/Sources). |
||||
3. Run the `alma_blob_upload` tool (don't forget to replace `PROJECT_NAME` with |
||||
an actual project name): |
||||
```shell |
||||
$ alma_blob_upload.py -i .PROJECT_NAME.metadata |
||||
``` |
||||
|
||||
Alternatively, you can upload a list of files in the following way: |
||||
|
||||
```shell |
||||
$ alma_blob_upload.py -f SOURCES/FILE_1 SOURCES/FILE_N |
||||
``` |
||||
|
||||
The `alma_blob_upload` utility can also generate a CentOS-compatible metadata |
||||
file: |
||||
|
||||
``` |
||||
$ alma_blob_upload.py -o .PROJECT_NAME.metadata -f SOURCES/FILE_1 SOURCES/FILE_N |
||||
``` |
||||
|
||||
|
||||
## License |
||||
|
||||
Licensed under the GPLv3 license, see the LICENSE file for details. |
||||
|
@ -0,0 +1,193 @@ |
||||
#!/usr/bin/env python3 |
||||
|
||||
""" |
||||
Uploads sources and BLOBs to the AlmaLinux sources cache. |
||||
""" |
||||
|
||||
import argparse |
||||
import hashlib |
||||
import os.path |
||||
import sys |
||||
from typing import Iterator, List, TextIO, Tuple |
||||
|
||||
import boto3 # type: ignore |
||||
from botocore.exceptions import ClientError # type: ignore |
||||
|
||||
|
||||
def init_arg_parser() -> argparse.ArgumentParser: |
||||
""" |
||||
Initializes a command line arguments parser. |
||||
|
||||
Returns: |
||||
Command line arguments parser. |
||||
""" |
||||
arg_parser = argparse.ArgumentParser( |
||||
prog="alma_blob_upload", |
||||
description="Uploads sources and BLOBs to the AlmaLinux sources cache" |
||||
) |
||||
group = arg_parser.add_mutually_exclusive_group(required=True) |
||||
group.add_argument('-f', '--file', nargs='+', help='file(s) to upload') |
||||
group.add_argument('-i', '--input-metadata', metavar='INPUT_FILE', |
||||
help='input metadata file list to upload') |
||||
arg_parser.add_argument('-b', '--bucket', default="sources.almalinux.org", |
||||
help='Amazon S3 bucket name. Default is ' |
||||
'sources.almalinux.org') |
||||
arg_parser.add_argument('-o', '--output-metadata', metavar='OUTPUT_FILE', |
||||
help='output metadata file path') |
||||
arg_parser.add_argument('-p', '--private', action='store_true', |
||||
help='set uploaded file mode to private. All ' |
||||
'uploaded files are public by default') |
||||
arg_parser.add_argument('--domain-name', default='sources.almalinux.org', |
||||
help='AlmaLinux sources server domain name. ' |
||||
'Default is sources.almalinux.org') |
||||
arg_parser.add_argument('-v', '--verbose', action='store_true', |
||||
help='enable additional debug output') |
||||
return arg_parser |
||||
|
||||
|
||||
def get_file_checksum(file_path: str, checksum_type: str = 'sha1', |
||||
buff_size: int = 1048576) -> str: |
||||
""" |
||||
Calculates a file checksum. |
||||
|
||||
Args: |
||||
file_path: File path. |
||||
checksum_type: Checksum type. |
||||
buff_size: Number of bytes to read at once. |
||||
|
||||
Returns: |
||||
File checksum. |
||||
""" |
||||
hasher = hashlib.new(checksum_type) |
||||
with open(file_path, 'rb') as fd: |
||||
buff = fd.read(buff_size) |
||||
while len(buff): |
||||
hasher.update(buff) |
||||
buff = fd.read(buff_size) |
||||
return hasher.hexdigest() |
||||
|
||||
|
||||
def normalize_path(path: str) -> str: |
||||
""" |
||||
Returns an absolute path with all variables expanded. |
||||
|
||||
Args: |
||||
path: Path to normalize. |
||||
|
||||
Returns: |
||||
Normalized path. |
||||
""" |
||||
return os.path.abspath(os.path.expanduser(os.path.expandvars(path))) |
||||
|
||||
|
||||
def iter_metadata(metadata_path: str) -> Iterator[Tuple[str, str]]: |
||||
""" |
||||
Iterates over records in a CentOS git repository-compatible metadata file. |
||||
|
||||
Args: |
||||
metadata_path: Metadata file path. |
||||
|
||||
Returns: |
||||
Iterator over files and their checksums. |
||||
""" |
||||
with open(metadata_path, 'r') as fd: |
||||
for line in fd: |
||||
checksum, file_path = line.split() |
||||
file_path = normalize_path(file_path) |
||||
assert checksum == get_file_checksum(file_path) |
||||
yield file_path, checksum |
||||
|
||||
|
||||
def iter_files(files: List[str]) -> Iterator[Tuple[str, str]]: |
||||
""" |
||||
Iterates over a list of files and calculates checksums for them. |
||||
|
||||
Args: |
||||
files: List of files. |
||||
|
||||
Returns: |
||||
Iterator over files and their checksums. |
||||
""" |
||||
for file_path in files: |
||||
file_path = normalize_path(file_path) |
||||
checksum = get_file_checksum(file_path) |
||||
yield file_path, checksum |
||||
|
||||
|
||||
def is_file_exist(s3_client, bucket_name: str, checksum: str) -> bool: |
||||
""" |
||||
Checks is a file with a given checksum is already uploaded. |
||||
Args: |
||||
s3_client: Amazon S3 client. |
||||
bucket_name: S3 bucket name. |
||||
checksum: File checksum. |
||||
|
||||
Returns: |
||||
True if a file is already uploaded, False otherwise. |
||||
""" |
||||
try: |
||||
s3_client.head_object(Bucket=bucket_name, Key=checksum) |
||||
return True |
||||
except ClientError: |
||||
return False |
||||
|
||||
|
||||
def upload_file(s3_client, bucket_name: str, file_path: str, |
||||
checksum: str, private: bool): |
||||
""" |
||||
Uploads a file to an Amazon S3 bucket. |
||||
|
||||
Args: |
||||
s3_client: Amazon S3 client. |
||||
bucket_name: S3 bucket name. |
||||
file_path: File path. |
||||
checksum: File checksum. |
||||
private: False if file should be public, True otherwise. |
||||
""" |
||||
acl = 'bucket-owner-full-control' if private else 'public-read' |
||||
s3_client.upload_file(file_path, bucket_name, checksum, |
||||
ExtraArgs={'ACL': acl}) |
||||
|
||||
|
||||
def add_metadata_record(metadata_fd: TextIO, file_path: str, checksum: str): |
||||
""" |
||||
Adds a source file record to a metadata file. |
||||
|
||||
Args: |
||||
metadata_fd: Metadata file descriptor. |
||||
file_path: Source file path. |
||||
checksum: Source file checksum. |
||||
""" |
||||
rel_path = os.path.relpath(file_path) |
||||
metadata_fd.write(f'{checksum} {rel_path}\n') |
||||
|
||||
|
||||
def main(sys_args): |
||||
arg_parser = init_arg_parser() |
||||
args = arg_parser.parse_args(sys_args) |
||||
s3_client = boto3.client('s3') |
||||
if args.input_metadata: |
||||
iterator = iter_metadata(args.input_metadata) |
||||
else: |
||||
iterator = iter_files(args.file) |
||||
out_fd = None |
||||
if args.output_metadata: |
||||
out_fd = open(args.output_metadata, 'w') |
||||
try: |
||||
for file_path, checksum in iterator: |
||||
file_url = f'https://{args.domain_name}/{checksum}' |
||||
if not is_file_exist(s3_client, args.bucket, checksum): |
||||
upload_file(s3_client, args.bucket, file_path, checksum, |
||||
args.private) |
||||
print(f'{file_path} uploaded: {file_url}') |
||||
else: |
||||
print(f'{file_path} exists: {file_url}') |
||||
if out_fd: |
||||
add_metadata_record(out_fd, file_path, checksum) |
||||
finally: |
||||
if out_fd: |
||||
out_fd.close() |
||||
|
||||
|
||||
if __name__ == '__main__': |
||||
sys.exit(main(sys.argv[1:])) |
Loading…
Reference in new issue