0.0.1 version
- added alma_get_sources tool - added tests for common functions - added setup.py and the project uploaded to PyPi
This commit is contained in:
parent
896ea1f1ba
commit
804cca076e
36
README.md
36
README.md
@ -3,6 +3,24 @@
|
|||||||
Utilities for working with the AlmaLinux OS Git server.
|
Utilities for working with the AlmaLinux OS Git server.
|
||||||
|
|
||||||
|
|
||||||
|
## alma_get_sources
|
||||||
|
|
||||||
|
The `alma_get_sources` script downloads sources and BLOBs from the AlmaLinux
|
||||||
|
sources cache.
|
||||||
|
|
||||||
|
### Usage
|
||||||
|
|
||||||
|
Run the `alma_get_sources` in a git project root directory:
|
||||||
|
|
||||||
|
1. Clone an AlmaLinux RPM package git project from
|
||||||
|
[git.almalinux.org](https://git.almalinux.org).
|
||||||
|
2. Switch to a required branch.
|
||||||
|
3. Run the `alma_get_sources` tool:
|
||||||
|
```shell
|
||||||
|
$ alma_get_sources
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## alma_blob_upload
|
## alma_blob_upload
|
||||||
|
|
||||||
The `alma_blob_upload` script uploads sources and BLOBs to the AlmaLinux
|
The `alma_blob_upload` script uploads sources and BLOBs to the AlmaLinux
|
||||||
@ -10,16 +28,6 @@ sources cache.
|
|||||||
|
|
||||||
### Prerequirements
|
### Prerequirements
|
||||||
|
|
||||||
Install the `python3-boto3` package:
|
|
||||||
|
|
||||||
```shell
|
|
||||||
# RPM-based distributions. On EL8 derivatives the package is available from EPEL.
|
|
||||||
$ sudo dnf install python3 python3-boto3
|
|
||||||
|
|
||||||
# Debian-based distributions
|
|
||||||
$ sudo apt install python3-boto3
|
|
||||||
```
|
|
||||||
|
|
||||||
Create an AWS credentials file ~/.aws/credentials with the following content:
|
Create an AWS credentials file ~/.aws/credentials with the following content:
|
||||||
|
|
||||||
```ini
|
```ini
|
||||||
@ -43,20 +51,20 @@ For CentOS repositories workflow will be the following:
|
|||||||
3. Run the `alma_blob_upload` tool (don't forget to replace `PROJECT_NAME` with
|
3. Run the `alma_blob_upload` tool (don't forget to replace `PROJECT_NAME` with
|
||||||
an actual project name):
|
an actual project name):
|
||||||
```shell
|
```shell
|
||||||
$ alma_blob_upload.py -i .PROJECT_NAME.metadata
|
$ alma_blob_upload -i .PROJECT_NAME.metadata
|
||||||
```
|
```
|
||||||
|
|
||||||
Alternatively, you can upload a list of files in the following way:
|
Alternatively, you can upload a list of files in the following way:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ alma_blob_upload.py -f SOURCES/FILE_1 SOURCES/FILE_N
|
$ alma_blob_upload -f SOURCES/FILE_1 SOURCES/FILE_N
|
||||||
```
|
```
|
||||||
|
|
||||||
The `alma_blob_upload` utility can also generate a CentOS-compatible metadata
|
The `alma_blob_upload` utility can also generate a CentOS-compatible metadata
|
||||||
file:
|
file:
|
||||||
|
|
||||||
```
|
```shell
|
||||||
$ alma_blob_upload.py -o .PROJECT_NAME.metadata -f SOURCES/FILE_1 SOURCES/FILE_N
|
$ alma_blob_upload -o .PROJECT_NAME.metadata -f SOURCES/FILE_1 SOURCES/FILE_N
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
1
almalinux/gitutils/__init__.py
Normal file
1
almalinux/gitutils/__init__.py
Normal file
@ -0,0 +1 @@
|
|||||||
|
name = 'gitutils'
|
@ -1,193 +1,156 @@
|
|||||||
#!/usr/bin/env python3
|
"""Uploads sources and BLOBs to the AlmaLinux sources cache"""
|
||||||
|
|
||||||
"""
|
import argparse
|
||||||
Uploads sources and BLOBs to the AlmaLinux sources cache.
|
import logging
|
||||||
"""
|
import os
|
||||||
|
import sys
|
||||||
import argparse
|
from typing import Iterator, List, Optional, Tuple
|
||||||
import hashlib
|
|
||||||
import os.path
|
import boto3
|
||||||
import sys
|
import botocore.exceptions
|
||||||
from typing import Iterator, List, TextIO, Tuple
|
|
||||||
|
from almalinux.gitutils.errors import ChecksumError
|
||||||
import boto3 # type: ignore
|
from almalinux.gitutils.common import (
|
||||||
from botocore.exceptions import ClientError # type: ignore
|
configure_logger, find_metadata_file, get_file_checksum, iter_metadata,
|
||||||
|
normalize_path
|
||||||
|
)
|
||||||
def init_arg_parser() -> argparse.ArgumentParser:
|
|
||||||
"""
|
|
||||||
Initializes a command line arguments parser.
|
def init_arg_parser() -> argparse.ArgumentParser:
|
||||||
|
"""
|
||||||
Returns:
|
Initializes a command line arguments parser.
|
||||||
Command line arguments parser.
|
|
||||||
"""
|
Returns:
|
||||||
arg_parser = argparse.ArgumentParser(
|
Command line arguments parser.
|
||||||
prog="alma_blob_upload",
|
"""
|
||||||
description="Uploads sources and BLOBs to the AlmaLinux sources cache"
|
arg_parser = argparse.ArgumentParser(
|
||||||
)
|
prog="alma_blob_upload",
|
||||||
group = arg_parser.add_mutually_exclusive_group(required=True)
|
description="Uploads sources and BLOBs to the AlmaLinux sources cache"
|
||||||
group.add_argument('-f', '--file', nargs='+', help='file(s) to upload')
|
)
|
||||||
group.add_argument('-i', '--input-metadata', metavar='INPUT_FILE',
|
group = arg_parser.add_mutually_exclusive_group()
|
||||||
help='input metadata file list to upload')
|
group.add_argument('-f', '--file', nargs='+', help='file(s) to upload')
|
||||||
arg_parser.add_argument('-b', '--bucket', default="sources.almalinux.org",
|
group.add_argument('-i', '--input-metadata', metavar='INPUT_FILE',
|
||||||
help='Amazon S3 bucket name. Default is '
|
help='input metadata file list to upload. Will be '
|
||||||
'sources.almalinux.org')
|
'detected automatically if omitted and no files '
|
||||||
arg_parser.add_argument('-o', '--output-metadata', metavar='OUTPUT_FILE',
|
'provided')
|
||||||
help='output metadata file path')
|
arg_parser.add_argument('-b', '--bucket', default="sources.almalinux.org",
|
||||||
arg_parser.add_argument('-p', '--private', action='store_true',
|
help='Amazon S3 bucket name. Default is '
|
||||||
help='set uploaded file mode to private. All '
|
'sources.almalinux.org')
|
||||||
'uploaded files are public by default')
|
arg_parser.add_argument('-o', '--output-metadata', metavar='OUTPUT_FILE',
|
||||||
arg_parser.add_argument('--domain-name', default='sources.almalinux.org',
|
help='output metadata file path')
|
||||||
help='AlmaLinux sources server domain name. '
|
arg_parser.add_argument('-p', '--private', action='store_true',
|
||||||
'Default is sources.almalinux.org')
|
help='set uploaded file mode to private. All '
|
||||||
arg_parser.add_argument('-v', '--verbose', action='store_true',
|
'uploaded files are public by default')
|
||||||
help='enable additional debug output')
|
arg_parser.add_argument('--domain-name', default='sources.almalinux.org',
|
||||||
return arg_parser
|
help='AlmaLinux sources server domain name. '
|
||||||
|
'Default is sources.almalinux.org')
|
||||||
|
arg_parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
def get_file_checksum(file_path: str, checksum_type: str = 'sha1',
|
help='enable additional debug output')
|
||||||
buff_size: int = 1048576) -> str:
|
return arg_parser
|
||||||
"""
|
|
||||||
Calculates a file checksum.
|
|
||||||
|
def iter_files(files: List[str]) -> Iterator[Tuple[str, str, str]]:
|
||||||
Args:
|
"""
|
||||||
file_path: File path.
|
Iterates over a list of files and calculates checksums for them.
|
||||||
checksum_type: Checksum type.
|
|
||||||
buff_size: Number of bytes to read at once.
|
Args:
|
||||||
|
files: List of files.
|
||||||
Returns:
|
|
||||||
File checksum.
|
Returns:
|
||||||
"""
|
Iterator over files and their checksums.
|
||||||
hasher = hashlib.new(checksum_type)
|
"""
|
||||||
with open(file_path, 'rb') as fd:
|
checksum_type = 'sha1'
|
||||||
buff = fd.read(buff_size)
|
for rel_path in files:
|
||||||
while len(buff):
|
file_path = normalize_path(rel_path)
|
||||||
hasher.update(buff)
|
checksum = get_file_checksum(file_path, checksum_type)
|
||||||
buff = fd.read(buff_size)
|
yield rel_path, checksum, checksum_type
|
||||||
return hasher.hexdigest()
|
|
||||||
|
|
||||||
|
def is_file_exist(s3_client, bucket_name: str, checksum: str) -> bool:
|
||||||
def normalize_path(path: str) -> str:
|
"""
|
||||||
"""
|
Checks is a file with a given checksum is already uploaded.
|
||||||
Returns an absolute path with all variables expanded.
|
Args:
|
||||||
|
s3_client: Amazon S3 client.
|
||||||
Args:
|
bucket_name: S3 bucket name.
|
||||||
path: Path to normalize.
|
checksum: File checksum.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Normalized path.
|
True if a file is already uploaded, False otherwise.
|
||||||
"""
|
"""
|
||||||
return os.path.abspath(os.path.expanduser(os.path.expandvars(path)))
|
try:
|
||||||
|
s3_client.head_object(Bucket=bucket_name, Key=checksum)
|
||||||
|
return True
|
||||||
def iter_metadata(metadata_path: str) -> Iterator[Tuple[str, str]]:
|
except botocore.exceptions.ClientError:
|
||||||
"""
|
return False
|
||||||
Iterates over records in a CentOS git repository-compatible metadata file.
|
|
||||||
|
|
||||||
Args:
|
def upload_file(s3_client, bucket_name: str, file_path: str,
|
||||||
metadata_path: Metadata file path.
|
checksum: str, private: bool):
|
||||||
|
"""
|
||||||
Returns:
|
Uploads a file to an Amazon S3 bucket.
|
||||||
Iterator over files and their checksums.
|
|
||||||
"""
|
Args:
|
||||||
with open(metadata_path, 'r') as fd:
|
s3_client: Amazon S3 client.
|
||||||
for line in fd:
|
bucket_name: S3 bucket name.
|
||||||
checksum, file_path = line.split()
|
file_path: File path.
|
||||||
file_path = normalize_path(file_path)
|
checksum: File checksum.
|
||||||
assert checksum == get_file_checksum(file_path)
|
private: False if file should be public, True otherwise.
|
||||||
yield file_path, checksum
|
"""
|
||||||
|
acl = 'bucket-owner-full-control' if private else 'public-read'
|
||||||
|
s3_client.upload_file(file_path, bucket_name, checksum,
|
||||||
def iter_files(files: List[str]) -> Iterator[Tuple[str, str]]:
|
ExtraArgs={'ACL': acl})
|
||||||
"""
|
|
||||||
Iterates over a list of files and calculates checksums for them.
|
|
||||||
|
def get_file_iterator(
|
||||||
Args:
|
files: List[str], metadata_path: Optional[str]
|
||||||
files: List of files.
|
) -> Iterator[Tuple[str, str, str]]:
|
||||||
|
"""
|
||||||
Returns:
|
Finds a suitable file iterator for given arguments.
|
||||||
Iterator over files and their checksums.
|
|
||||||
"""
|
Args:
|
||||||
for file_path in files:
|
files: List of files.
|
||||||
file_path = normalize_path(file_path)
|
metadata_path: Metadata file path.
|
||||||
checksum = get_file_checksum(file_path)
|
|
||||||
yield file_path, checksum
|
Returns:
|
||||||
|
File iterator.
|
||||||
|
"""
|
||||||
def is_file_exist(s3_client, bucket_name: str, checksum: str) -> bool:
|
if files:
|
||||||
"""
|
iterator = iter_files(files)
|
||||||
Checks is a file with a given checksum is already uploaded.
|
else:
|
||||||
Args:
|
if not metadata_path:
|
||||||
s3_client: Amazon S3 client.
|
metadata_path = find_metadata_file(os.getcwd())
|
||||||
bucket_name: S3 bucket name.
|
iterator = iter_metadata(metadata_path)
|
||||||
checksum: File checksum.
|
return iterator
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if a file is already uploaded, False otherwise.
|
def main():
|
||||||
"""
|
arg_parser = init_arg_parser()
|
||||||
try:
|
args = arg_parser.parse_args(sys.argv[1:])
|
||||||
s3_client.head_object(Bucket=bucket_name, Key=checksum)
|
configure_logger(args.verbose)
|
||||||
return True
|
s3_client = boto3.client('s3')
|
||||||
except ClientError:
|
iterator = get_file_iterator(args.file, args.input_metadata)
|
||||||
return False
|
out_fd = None
|
||||||
|
if args.output_metadata:
|
||||||
|
out_fd = open(args.output_metadata, 'w')
|
||||||
def upload_file(s3_client, bucket_name: str, file_path: str,
|
try:
|
||||||
checksum: str, private: bool):
|
for rel_path, checksum, checksum_type in iterator:
|
||||||
"""
|
file_path = normalize_path(rel_path)
|
||||||
Uploads a file to an Amazon S3 bucket.
|
if not args.file:
|
||||||
|
real_checksum = get_file_checksum(file_path, checksum_type)
|
||||||
Args:
|
if real_checksum != checksum:
|
||||||
s3_client: Amazon S3 client.
|
raise ChecksumError(
|
||||||
bucket_name: S3 bucket name.
|
f"{rel_path} {checksum_type} checksum {real_checksum} "
|
||||||
file_path: File path.
|
f"doesn't match expected {checksum}"
|
||||||
checksum: File checksum.
|
)
|
||||||
private: False if file should be public, True otherwise.
|
file_url = f'https://{args.domain_name}/{checksum}'
|
||||||
"""
|
if not is_file_exist(s3_client, args.bucket, checksum):
|
||||||
acl = 'bucket-owner-full-control' if private else 'public-read'
|
upload_file(s3_client, args.bucket, file_path, checksum,
|
||||||
s3_client.upload_file(file_path, bucket_name, checksum,
|
args.private)
|
||||||
ExtraArgs={'ACL': acl})
|
logging.info(f'{rel_path} successfully uploaded: {file_url}')
|
||||||
|
else:
|
||||||
|
logging.info(f'{rel_path} is already uploaded: {file_url}')
|
||||||
def add_metadata_record(metadata_fd: TextIO, file_path: str, checksum: str):
|
if out_fd:
|
||||||
"""
|
out_fd.write(f'{checksum} {rel_path}\n')
|
||||||
Adds a source file record to a metadata file.
|
finally:
|
||||||
|
if out_fd:
|
||||||
Args:
|
out_fd.close()
|
||||||
metadata_fd: Metadata file descriptor.
|
|
||||||
file_path: Source file path.
|
|
||||||
checksum: Source file checksum.
|
|
||||||
"""
|
|
||||||
rel_path = os.path.relpath(file_path)
|
|
||||||
metadata_fd.write(f'{checksum} {rel_path}\n')
|
|
||||||
|
|
||||||
|
|
||||||
def main(sys_args):
|
|
||||||
arg_parser = init_arg_parser()
|
|
||||||
args = arg_parser.parse_args(sys_args)
|
|
||||||
s3_client = boto3.client('s3')
|
|
||||||
if args.input_metadata:
|
|
||||||
iterator = iter_metadata(args.input_metadata)
|
|
||||||
else:
|
|
||||||
iterator = iter_files(args.file)
|
|
||||||
out_fd = None
|
|
||||||
if args.output_metadata:
|
|
||||||
out_fd = open(args.output_metadata, 'w')
|
|
||||||
try:
|
|
||||||
for file_path, checksum in iterator:
|
|
||||||
file_url = f'https://{args.domain_name}/{checksum}'
|
|
||||||
if not is_file_exist(s3_client, args.bucket, checksum):
|
|
||||||
upload_file(s3_client, args.bucket, file_path, checksum,
|
|
||||||
args.private)
|
|
||||||
print(f'{file_path} uploaded: {file_url}')
|
|
||||||
else:
|
|
||||||
print(f'{file_path} exists: {file_url}')
|
|
||||||
if out_fd:
|
|
||||||
add_metadata_record(out_fd, file_path, checksum)
|
|
||||||
finally:
|
|
||||||
if out_fd:
|
|
||||||
out_fd.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
sys.exit(main(sys.argv[1:]))
|
|
120
almalinux/gitutils/common.py
Normal file
120
almalinux/gitutils/common.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
"""AlmaLinux Git server utilities common functions"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from typing import Iterator, Tuple
|
||||||
|
|
||||||
|
__all__ = ['configure_logger', 'detect_checksum_type', 'find_metadata_file',
|
||||||
|
'get_file_checksum', 'iter_metadata', 'normalize_path']
|
||||||
|
|
||||||
|
|
||||||
|
def configure_logger(verbose: bool) -> logging.Logger:
|
||||||
|
"""
|
||||||
|
Configures a console logger.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
verbose: Show DEBUG messages if True, show INFO and higher otherwise.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Configured logger.
|
||||||
|
"""
|
||||||
|
level = logging.DEBUG if verbose else logging.INFO
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
handler.setLevel(level)
|
||||||
|
log_format = "%(levelname)-8s: %(message)s"
|
||||||
|
formatter = logging.Formatter(log_format, '%y-%m-%d %H:%M:%S')
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger = logging.getLogger()
|
||||||
|
logger.addHandler(handler)
|
||||||
|
logger.setLevel(level)
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
def detect_checksum_type(checksum: str) -> str:
|
||||||
|
"""
|
||||||
|
Detects checksum by its length.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
checksum: Checksum.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Checksum type.
|
||||||
|
"""
|
||||||
|
hash_types = {32: 'md5', 40: 'sha1', 64: 'sha256', 128: 'sha512'}
|
||||||
|
hash_type = hash_types.get(len(checksum))
|
||||||
|
if not hash_type:
|
||||||
|
raise ValueError(f'unknown checksum type {checksum}')
|
||||||
|
return hash_type
|
||||||
|
|
||||||
|
|
||||||
|
def find_metadata_file(path: str) -> str:
|
||||||
|
"""
|
||||||
|
Finds a sources metadata file in the specified directory.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Directory to search in.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Sources metadata file path.
|
||||||
|
"""
|
||||||
|
files = [f for f in os.listdir(path) if re.match(r'^\.\S*?\.metadata$', f)]
|
||||||
|
if not files:
|
||||||
|
raise Exception('metadata file is not found')
|
||||||
|
elif len(files) > 1:
|
||||||
|
raise Exception('multiple metadata files found. Please specify one to '
|
||||||
|
'use')
|
||||||
|
return os.path.join(path, files[0])
|
||||||
|
|
||||||
|
|
||||||
|
def get_file_checksum(file_path: str, checksum_type: str = 'sha1',
|
||||||
|
buff_size: int = 1048576) -> str:
|
||||||
|
"""
|
||||||
|
Calculates a file checksum.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: File path.
|
||||||
|
checksum_type: Checksum type.
|
||||||
|
buff_size: Number of bytes to read at once.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
File checksum.
|
||||||
|
"""
|
||||||
|
hasher = hashlib.new(checksum_type)
|
||||||
|
with open(file_path, 'rb') as fd:
|
||||||
|
buff = fd.read(buff_size)
|
||||||
|
while len(buff):
|
||||||
|
hasher.update(buff)
|
||||||
|
buff = fd.read(buff_size)
|
||||||
|
return hasher.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def iter_metadata(metadata_path: str) -> Iterator[Tuple[str, str, str]]:
|
||||||
|
"""
|
||||||
|
Iterates over records in a CentOS git repository-compatible metadata file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata_path: Metadata file path.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Iterator over files and their checksums.
|
||||||
|
"""
|
||||||
|
with open(metadata_path, 'r') as fd:
|
||||||
|
for line in fd:
|
||||||
|
checksum, file_path = line.split()
|
||||||
|
checksum_type = detect_checksum_type(checksum)
|
||||||
|
yield file_path, checksum, checksum_type
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_path(path: str) -> str:
|
||||||
|
"""
|
||||||
|
Returns an absolute path with all variables expanded.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Path to normalize.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized path.
|
||||||
|
"""
|
||||||
|
return os.path.abspath(os.path.expanduser(os.path.expandvars(path)))
|
15
almalinux/gitutils/errors.py
Normal file
15
almalinux/gitutils/errors.py
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
"""AlmaLinux Git server utilities error classes"""
|
||||||
|
|
||||||
|
|
||||||
|
class ChecksumError(Exception):
|
||||||
|
|
||||||
|
"""File checksum mismatch exception"""
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class NetworkError(Exception):
|
||||||
|
|
||||||
|
"""Network error exception"""
|
||||||
|
|
||||||
|
pass
|
120
almalinux/gitutils/get_sources.py
Normal file
120
almalinux/gitutils/get_sources.py
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
"""Downloads sources and blobs from AlmaLinux or CentOS sources cache"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import sys
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from almalinux.gitutils.common import (
|
||||||
|
configure_logger, find_metadata_file, get_file_checksum, iter_metadata,
|
||||||
|
normalize_path
|
||||||
|
)
|
||||||
|
from almalinux.gitutils.errors import ChecksumError, NetworkError
|
||||||
|
|
||||||
|
|
||||||
|
def init_arg_parser() -> argparse.ArgumentParser:
|
||||||
|
"""
|
||||||
|
Initializes a command line arguments parser.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Command line arguments parser.
|
||||||
|
"""
|
||||||
|
arg_parser = argparse.ArgumentParser(prog='alma_get_sources',
|
||||||
|
description=__doc__)
|
||||||
|
arg_parser.add_argument('-i', '--input-metadata', metavar='INPUT_FILE',
|
||||||
|
help='input metadata file list to download')
|
||||||
|
arg_parser.add_argument('--domain-name', default='sources.almalinux.org',
|
||||||
|
help='AlmaLinux sources server domain name. '
|
||||||
|
'Default is sources.almalinux.org')
|
||||||
|
arg_parser.add_argument('-v', '--verbose', action='store_true',
|
||||||
|
help='enable additional debug output')
|
||||||
|
return arg_parser
|
||||||
|
|
||||||
|
|
||||||
|
def create_sources_dir(base_dir: str, rel_path: str):
|
||||||
|
"""
|
||||||
|
Creates a sources directory if it doesn't exist.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
base_dir: Project's base directory.
|
||||||
|
rel_path: Project's source file relative path.
|
||||||
|
"""
|
||||||
|
dir_name, file_name = os.path.split(rel_path)
|
||||||
|
dir_path = os.path.join(base_dir, dir_name)
|
||||||
|
if not os.path.exists(dir_path):
|
||||||
|
os.makedirs(dir_path)
|
||||||
|
|
||||||
|
|
||||||
|
def download_alma_blob(file_path: str, checksum: str, domain_name: str):
|
||||||
|
"""
|
||||||
|
Downloads a BLOB from the AlmaLinux Git sources cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: Destination file path.
|
||||||
|
checksum: File checksum.
|
||||||
|
domain_name: AlmaLinux Git source cache domain name.
|
||||||
|
"""
|
||||||
|
url = f'https://{domain_name}/{checksum}'
|
||||||
|
with requests.get(url, stream=True) as rqst:
|
||||||
|
try:
|
||||||
|
rqst.raise_for_status()
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
raise NetworkError(str(e))
|
||||||
|
with open(file_path, 'wb') as fd:
|
||||||
|
shutil.copyfileobj(rqst.raw, fd)
|
||||||
|
|
||||||
|
|
||||||
|
def download_metadata_blobs(metadata_path: str, base_dir: str,
|
||||||
|
domain_name: str):
|
||||||
|
"""
|
||||||
|
Downloads BLOBs listed in a metadata file from AlmaLinux Git sources cache.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
metadata_path: Metadata file path.
|
||||||
|
base_dir: Package sources base directory.
|
||||||
|
domain_name: AlmaLinux Git sources cache domain name.
|
||||||
|
"""
|
||||||
|
for rel_path, checksum, checksum_type in iter_metadata(metadata_path):
|
||||||
|
file_path = os.path.join(base_dir, rel_path)
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
real_checksum = get_file_checksum(file_path, checksum_type)
|
||||||
|
if real_checksum != checksum:
|
||||||
|
raise ChecksumError(
|
||||||
|
f"{rel_path} already exists but its {checksum_type} "
|
||||||
|
f"checksum {real_checksum} doesn't match expected "
|
||||||
|
f"{checksum}"
|
||||||
|
)
|
||||||
|
logging.info(f'{rel_path} already exists and its checksum is '
|
||||||
|
f'correct')
|
||||||
|
continue
|
||||||
|
create_sources_dir(base_dir, rel_path)
|
||||||
|
download_alma_blob(file_path, checksum, domain_name)
|
||||||
|
real_checksum = get_file_checksum(file_path, checksum_type)
|
||||||
|
if real_checksum != checksum:
|
||||||
|
raise ChecksumError(
|
||||||
|
f"{rel_path} has been downloaded but its {checksum_type} "
|
||||||
|
f"checksum {real_checksum} doesn't match expected {checksum}"
|
||||||
|
)
|
||||||
|
logging.info(f'{rel_path} has been successfully downloaded')
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
arg_parser = init_arg_parser()
|
||||||
|
args = arg_parser.parse_args(sys.argv[1:])
|
||||||
|
configure_logger(args.verbose)
|
||||||
|
base_dir = os.getcwd()
|
||||||
|
if args.input_metadata:
|
||||||
|
metadata_path = normalize_path(args.input_metadata)
|
||||||
|
else:
|
||||||
|
metadata_path = find_metadata_file(base_dir)
|
||||||
|
try:
|
||||||
|
download_metadata_blobs(metadata_path, base_dir, args.domain_name)
|
||||||
|
except ChecksumError as e:
|
||||||
|
logging.error(e)
|
||||||
|
return os.EX_DATAERR
|
||||||
|
except NetworkError as e:
|
||||||
|
logging.error(e)
|
||||||
|
return os.EX_IOERR
|
6
pyproject.toml
Normal file
6
pyproject.toml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
[build-system]
|
||||||
|
requires = [
|
||||||
|
"setuptools>=42",
|
||||||
|
"wheel"
|
||||||
|
]
|
||||||
|
build-backend = "setuptools.build_meta"
|
36
setup.py
Normal file
36
setup.py
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
from setuptools import find_namespace_packages, setup
|
||||||
|
|
||||||
|
with open("README.md", "r", encoding="utf-8") as fd:
|
||||||
|
long_description = fd.read()
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="almalinux-git-utils",
|
||||||
|
version="0.0.1",
|
||||||
|
author="Eugene Zamriy",
|
||||||
|
author_email="ezamriy@almalinux.org",
|
||||||
|
description="Utilities for working with the AlmaLinux OS Git server",
|
||||||
|
long_description=long_description,
|
||||||
|
long_description_content_type="text/markdown",
|
||||||
|
url="https://git.almalinux.org/almalinux/almalinux-git-utils",
|
||||||
|
project_urls={
|
||||||
|
"Bug Tracker": "https://git.almalinux.org/almalinux/almalinux-git-utils/issues",
|
||||||
|
},
|
||||||
|
classifiers=[
|
||||||
|
"Programming Language :: Python :: 3",
|
||||||
|
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
||||||
|
"Operating System :: OS Independent",
|
||||||
|
],
|
||||||
|
packages=find_namespace_packages(include=['almalinux.*']),
|
||||||
|
entry_points={
|
||||||
|
'console_scripts': [
|
||||||
|
'alma_blob_upload=almalinux.gitutils.blob_upload:main',
|
||||||
|
'alma_get_sources=almalinux.gitutils.get_sources:main'
|
||||||
|
]
|
||||||
|
},
|
||||||
|
install_requires=[
|
||||||
|
'boto3>=1.15.15',
|
||||||
|
'requests>=2.20.0'
|
||||||
|
],
|
||||||
|
python_requires=">=3.6",
|
||||||
|
zip_safe=False
|
||||||
|
)
|
94
tests/gitutils/test_common.py
Normal file
94
tests/gitutils/test_common.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
import os
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from almalinux.gitutils.common import *
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'checksum,expected',
|
||||||
|
[('35d14f5ab4ee239b070f3b645fb82837', 'md5'),
|
||||||
|
('1014c8812720619a5a6bcd189e5d7f5d16276d86', 'sha1'),
|
||||||
|
('86d8a9a32cdaff2c6003c67a12549466319e0ae51b7665fd01fd9354a3b1cf55',
|
||||||
|
'sha256'),
|
||||||
|
('9906e61ef0b693bf978e2a88b737c79dd2c815cfc1a09443f04b79b994b4646ff72f18'
|
||||||
|
'6e42461b3a5768667119f39fa006ce71530791a5b35c2278e9252ec3ea', 'sha512')]
|
||||||
|
)
|
||||||
|
def test_detect_checksum_type(checksum, expected):
|
||||||
|
"""detect_checksum_type returns type for supported checksums"""
|
||||||
|
assert detect_checksum_type(checksum) == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_checksum_type_error():
|
||||||
|
"""detect_checksum_type raises ValueError if checksum type is unknown"""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
detect_checksum_type('somethingwrong')
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'checksum_type,checksum',
|
||||||
|
[(None, '06364afe79d801433188262478a76d19777ef351'),
|
||||||
|
('sha1', '06364afe79d801433188262478a76d19777ef351'),
|
||||||
|
('sha256', 'b37758528c0338d529b3fb16fd39f28da58241abc856e16bf0bc8b99c60cd632')]
|
||||||
|
)
|
||||||
|
def test_get_file_checksum(tmpdir, checksum_type, checksum):
|
||||||
|
"""get_file_checksum supports different checksum types"""
|
||||||
|
file_path = os.path.join(tmpdir, 'test_file.txt')
|
||||||
|
with open(file_path, 'w') as fd:
|
||||||
|
fd.write('TESTDATA\n')
|
||||||
|
args = [file_path]
|
||||||
|
if checksum_type:
|
||||||
|
args.append(checksum_type)
|
||||||
|
assert get_file_checksum(*args) == checksum
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_metadata_file_single(tmpdir):
|
||||||
|
"""find_metadata_file returns a single metadata file"""
|
||||||
|
file_path = os.path.join(tmpdir, '.project.metadata')
|
||||||
|
open(file_path, 'a').close()
|
||||||
|
assert find_metadata_file(tmpdir) == file_path
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_metadata_file_missing(tmpdir):
|
||||||
|
"""find_metadata_file raises Exception when metadata file is not found"""
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
find_metadata_file(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_find_metadata_file_multiple(tmpdir):
|
||||||
|
"""
|
||||||
|
find_metadata_file raises Exception when there are multiple metadata files
|
||||||
|
"""
|
||||||
|
for i in range(2):
|
||||||
|
open(os.path.join(tmpdir, f'.project{i}.metadata'), 'a').close()
|
||||||
|
with pytest.raises(Exception):
|
||||||
|
find_metadata_file(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_iter_metadata(tmpdir):
|
||||||
|
"""iter_metadata returns checksums from metadata file"""
|
||||||
|
data = [
|
||||||
|
('SOURCES/mc-4.8.19.tar.xz',
|
||||||
|
'850747ae43a5c81f1dd0d906dfa9e149eb19748a', 'sha1'),
|
||||||
|
('SOURCES/binary-blob',
|
||||||
|
'b37758528c0338d529b3fb16fd39f28da58241abc856e16bf0bc8b99c60cd632',
|
||||||
|
'sha256')
|
||||||
|
]
|
||||||
|
metadata_path = os.path.join(tmpdir, '.project.metadata')
|
||||||
|
with open(metadata_path, 'w') as fd:
|
||||||
|
for rec in data:
|
||||||
|
fd.write(f'{rec[1]} {rec[0]}\n')
|
||||||
|
metadata = []
|
||||||
|
for file_path, checksum, checksum_type in iter_metadata(metadata_path):
|
||||||
|
metadata.append((file_path, checksum, checksum_type))
|
||||||
|
assert metadata == data
|
||||||
|
|
||||||
|
|
||||||
|
def test_normalize_path(monkeypatch):
|
||||||
|
"""
|
||||||
|
normalize_path expands variables and converts relative paths to absolute
|
||||||
|
"""
|
||||||
|
cwd = os.getcwd()
|
||||||
|
expected = os.path.join(cwd, 'basedir', 'subdir')
|
||||||
|
monkeypatch.setenv('BASE_DIR', 'basedir')
|
||||||
|
assert normalize_path('${BASE_DIR}/subdir') == expected
|
Loading…
Reference in New Issue
Block a user