pungi/pungi/phases/image_checksum.py

207 lines
6.6 KiB
Python

# -*- coding: utf-8 -*-
import os
from kobo import shortcuts
from collections import defaultdict
import threading
from .base import PhaseBase
from ..util import get_format_substs, get_file_size
MULTIPLE_CHECKSUMS_ERROR = (
'Config option "media_checksum_one_file" requires only one checksum'
' to be configured in "media_checksums".'
)
class ImageChecksumPhase(PhaseBase):
"""Go through images specified in image manifest and generate their
checksums. The manifest will be updated with the checksums.
"""
name = "image_checksum"
def __init__(self, compose):
super(ImageChecksumPhase, self).__init__(compose)
self.checksums = self.compose.conf["media_checksums"]
self.one_file = self.compose.conf["media_checksum_one_file"]
def skip(self):
# Skipping this phase does not make sense:
# * if there are no images, it doesn't do anything and is quick
# * if there are images, they must have checksums computed or else
# writing metadata will fail
return False
def validate(self):
errors = []
if self.one_file and len(self.checksums) != 1:
errors.append(MULTIPLE_CHECKSUMS_ERROR)
if errors:
raise ValueError("\n".join(errors))
def _get_images(self):
"""Returns a mapping from directories to sets of ``Image``s.
The paths to dirs are absolute.
"""
top_dir = self.compose.paths.compose.topdir()
images = {}
for variant in self.compose.im.images:
for arch in self.compose.im.images[variant]:
for image in self.compose.im.images[variant][arch]:
path = os.path.dirname(os.path.join(top_dir, image.path))
images.setdefault((variant, arch, path), set()).add(image)
return images
def _get_base_filename(self, variant, arch, **kwargs):
base_checksum_name = self.compose.conf["media_checksum_base_filename"]
if base_checksum_name:
substs = get_format_substs(
self.compose, variant=variant, arch=arch, **kwargs
)
base_checksum_name = (base_checksum_name % substs).format(**substs)
base_checksum_name += "-"
return base_checksum_name
def run(self):
topdir = self.compose.paths.compose.topdir()
make_checksums(
topdir,
self.compose.im,
self.checksums,
self.one_file,
self._get_base_filename,
)
def _compute_checksums(
results,
cache,
variant,
arch,
path,
images,
checksum_types,
base_checksum_name_gen,
one_file,
results_lock,
cache_lock,
):
for image in images:
filename = os.path.basename(image.path)
full_path = os.path.join(path, filename)
if not os.path.exists(full_path):
continue
filesize = image.size or get_file_size(full_path)
cache_lock.acquire()
if full_path not in cache:
cache_lock.release()
# Source ISO is listed under each binary architecture. There's no
# point in checksumming it twice, so we can just remember the
# digest from first run..
checksum_value = shortcuts.compute_file_checksums(full_path, checksum_types)
with cache_lock:
cache[full_path] = checksum_value
else:
cache_lock.release()
with cache_lock:
digests = cache[full_path]
for checksum, digest in digests.items():
# Update metadata with the checksum
image.add_checksum(None, checksum, digest)
# If not turned of, create the file-specific checksum file
if not one_file:
checksum_filename = os.path.join(
path, "%s.%sSUM" % (filename, checksum.upper())
)
with results_lock:
results[checksum_filename].add(
(filename, filesize, checksum, digest)
)
if one_file:
dirname = os.path.basename(path)
base_checksum_name = base_checksum_name_gen(
variant, arch, dirname=dirname
)
checksum_filename = base_checksum_name + "CHECKSUM"
else:
base_checksum_name = base_checksum_name_gen(variant, arch)
checksum_filename = "%s%sSUM" % (base_checksum_name, checksum.upper())
checksum_path = os.path.join(path, checksum_filename)
with results_lock:
results[checksum_path].add((filename, filesize, checksum, digest))
def make_checksums(topdir, im, checksum_types, one_file, base_checksum_name_gen):
results = defaultdict(set)
cache = {}
threads = []
results_lock = threading.Lock() # lock to synchronize access to the results dict.
cache_lock = threading.Lock() # lock to synchronize access to the cache dict.
# create all worker threads
for (variant, arch, path), images in get_images(topdir, im).items():
threads.append(
threading.Thread(
target=_compute_checksums,
args=[
results,
cache,
variant,
arch,
path,
images,
checksum_types,
base_checksum_name_gen,
one_file,
results_lock,
cache_lock,
],
)
)
threads[-1].start()
# wait for all worker threads to finish
for thread in threads:
thread.join()
for file in results:
dump_checksums(file, results[file])
def dump_checksums(checksum_file, data):
"""Write checksums to file.
:param checksum_file: where to write the checksums
:param data: an iterable of tuples (filename, filesize, checksum_type, hash)
"""
with open(checksum_file, "w") as f:
for filename, filesize, alg, checksum in sorted(data):
f.write("# %s: %s bytes\n" % (filename, filesize))
f.write("%s (%s) = %s\n" % (alg.upper(), filename, checksum))
def get_images(top_dir, manifest):
"""Returns a mapping from directories to sets of ``Image``s.
The paths to dirs are absolute.
"""
images = {}
for variant in manifest.images:
for arch in manifest.images[variant]:
for image in manifest.images[variant][arch]:
path = os.path.dirname(os.path.join(top_dir, image.path))
images.setdefault((variant, arch, path), []).append(image)
return images