pungi/pungi/phases/createrepo.py
Patrick Uiterwijk 1a10a1fe83 Optionally do old_compose per release type
This would make sure that e.g. "updates" composes don't try to use "updates-testing" as an
old_compose_path, which would create practically useless deltarpms and for no repodata
reuse at all.

Signed-off-by: Patrick Uiterwijk <patrick@puiterwijk.org>
2017-11-06 15:17:33 +01:00

315 lines
12 KiB
Python

# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://gnu.org/licenses/>.
__all__ = (
"create_variant_repo",
)
import os
import glob
import shutil
import threading
import copy
import errno
from kobo.threads import ThreadPool, WorkerThread
from kobo.shortcuts import run, relative_path
from ..wrappers.scm import get_dir_from_scm
from ..wrappers.createrepo import CreaterepoWrapper
from .base import PhaseBase
from ..util import find_old_compose, temp_dir, get_arch_variant_data
import productmd.rpms
createrepo_lock = threading.Lock()
createrepo_dirs = set()
class CreaterepoPhase(PhaseBase):
name = "createrepo"
def __init__(self, compose):
PhaseBase.__init__(self, compose)
self.pool = ThreadPool(logger=self.compose._logger)
def validate(self):
errors = []
try:
super(CreaterepoPhase, self).validate()
except ValueError as exc:
errors = exc.message.split('\n')
if not self.compose.old_composes and self.compose.conf.get('createrepo_deltas'):
errors.append('Can not generate deltas without old compose')
if errors:
raise ValueError('\n'.join(errors))
def run(self):
get_productids_from_scm(self.compose)
for i in range(self.compose.conf['createrepo_num_threads']):
self.pool.add(CreaterepoThread(self.pool))
for variant in self.compose.get_variants():
if variant.is_empty:
continue
self.pool.queue_put((self.compose, None, variant, "srpm"))
for arch in variant.arches:
self.pool.queue_put((self.compose, arch, variant, "rpm"))
self.pool.queue_put((self.compose, arch, variant, "debuginfo"))
self.pool.start()
def create_variant_repo(compose, arch, variant, pkg_type):
types = {
'rpm': ('binary',
lambda: compose.paths.compose.repository(arch=arch, variant=variant)),
'srpm': ('source',
lambda: compose.paths.compose.repository(arch='src', variant=variant)),
'debuginfo': ('debug',
lambda: compose.paths.compose.debug_repository(arch=arch, variant=variant)),
}
if variant.is_empty or (arch is None and pkg_type != 'srpm'):
compose.log_info("[SKIP ] Creating repo (arch: %s, variant: %s): %s" % (arch, variant))
return
createrepo_c = compose.conf["createrepo_c"]
createrepo_checksum = compose.conf["createrepo_checksum"]
repo = CreaterepoWrapper(createrepo_c=createrepo_c)
repo_dir_arch = compose.paths.work.arch_repo(arch='global' if pkg_type == 'srpm' else arch)
try:
repo_dir = types[pkg_type][1]()
except KeyError:
raise ValueError("Unknown package type: %s" % pkg_type)
msg = "Creating repo (arch: %s, variant: %s): %s" % (arch, variant, repo_dir)
# HACK: using global lock
# This is important when addons put packages into parent variant directory.
# There can't be multiple createrepo processes operating on the same
# directory.
with createrepo_lock:
if repo_dir in createrepo_dirs:
compose.log_warning("[SKIP ] Already in progress: %s" % msg)
return
createrepo_dirs.add(repo_dir)
if compose.DEBUG and os.path.isdir(os.path.join(repo_dir, "repodata")):
compose.log_warning("[SKIP ] %s" % msg)
return
compose.log_info("[BEGIN] %s" % msg)
# We only want delta RPMs for binary repos.
with_deltas = pkg_type == 'rpm' and _has_deltas(compose, variant, arch)
rpms = set()
rpm_nevras = set()
# read rpms from metadata rather than guessing it by scanning filesystem
manifest_file = compose.paths.compose.metadata("rpms.json")
manifest = productmd.rpms.Rpms()
manifest.load(manifest_file)
for rpms_arch, data in manifest.rpms.get(variant.uid, {}).items():
if arch is not None and arch != rpms_arch:
continue
for srpm_data in data.values():
for rpm_nevra, rpm_data in srpm_data.items():
if types[pkg_type][0] != rpm_data['category']:
continue
path = os.path.join(compose.topdir, "compose", rpm_data["path"])
rel_path = relative_path(path, repo_dir.rstrip("/") + "/")
rpms.add(rel_path)
rpm_nevras.add(str(rpm_nevra))
file_list = compose.paths.work.repo_package_list(arch, variant, pkg_type)
with open(file_list, 'w') as f:
for rel_path in sorted(rpms):
f.write("%s\n" % rel_path)
# Only find last compose when we actually want delta RPMs.
old_package_dirs = _get_old_package_dirs(compose, repo_dir) if with_deltas else None
if old_package_dirs:
# If we are creating deltas, we can not reuse existing metadata, as
# that would stop deltas from being created.
# This seems to only affect createrepo_c though.
repo_dir_arch = None
comps_path = None
if compose.has_comps and pkg_type == "rpm":
comps_path = compose.paths.work.comps(arch=arch, variant=variant)
cmd = repo.get_createrepo_cmd(repo_dir, update=True, database=True, skip_stat=True,
pkglist=file_list, outputdir=repo_dir,
workers=compose.conf["createrepo_num_workers"],
groupfile=comps_path, update_md_path=repo_dir_arch,
checksum=createrepo_checksum,
deltas=with_deltas,
oldpackagedirs=old_package_dirs,
use_xz=compose.conf['createrepo_use_xz'])
log_file = compose.paths.log.log_file(arch, "createrepo-%s.%s" % (variant, pkg_type))
run(cmd, logfile=log_file, show_cmd=True)
# call modifyrepo to inject productid
product_id = compose.conf.get("product_id")
if product_id and pkg_type == "rpm":
# add product certificate to base (rpm) repo; skip source and debug
product_id_path = compose.paths.work.product_id(arch, variant)
if os.path.isfile(product_id_path):
cmd = repo.get_modifyrepo_cmd(os.path.join(repo_dir, "repodata"), product_id_path, compress_type="gz")
log_file = compose.paths.log.log_file(arch, "modifyrepo-%s" % variant)
run(cmd, logfile=log_file, show_cmd=True)
# productinfo is not supported by modifyrepo in any way
# this is a HACK to make CDN happy (dmach: at least I think, need to confirm with dgregor)
shutil.copy2(product_id_path, os.path.join(repo_dir, "repodata", "productid"))
# call modifyrepo to inject modulemd if needed
if arch in variant.arch_mmds:
import yaml
modules = []
for mmd in variant.arch_mmds[arch].values():
# Create copy of architecture specific mmd to filter out packages
# which are not part of this particular repo.
repo_mmd = copy.deepcopy(mmd)
# Modules without RPMs are also valid.
if ("artifacts" in repo_mmd["data"] and
"rpms" in repo_mmd["data"]["artifacts"]):
repo_mmd["data"]["artifacts"]["rpms"] = [
rpm_nevra for rpm_nevra in repo_mmd["data"]["artifacts"]["rpms"]
if rpm_nevra in rpm_nevras]
modules.append(repo_mmd)
with temp_dir() as tmp_dir:
modules_path = os.path.join(tmp_dir, "modules.yaml")
with open(modules_path, "w") as outfile:
outfile.write(yaml.dump_all(modules, explicit_start=True))
cmd = repo.get_modifyrepo_cmd(os.path.join(repo_dir, "repodata"),
modules_path, mdtype="modules",
compress_type="gz")
log_file = compose.paths.log.log_file(
arch, "modifyrepo-modules-%s" % variant)
run(cmd, logfile=log_file, show_cmd=True)
compose.log_info("[DONE ] %s" % msg)
class CreaterepoThread(WorkerThread):
def process(self, item, num):
compose, arch, variant, pkg_type = item
create_variant_repo(compose, arch, variant, pkg_type=pkg_type)
def get_productids_from_scm(compose):
# product_id is a scm_dict: {scm, repo, branch, dir}
# expected file name format: $variant_uid-$arch-*.pem
product_id = compose.conf.get("product_id")
if not product_id:
compose.log_info("No product certificates specified")
return
product_id_allow_missing = compose.conf["product_id_allow_missing"]
msg = "Getting product certificates from SCM..."
compose.log_info("[BEGIN] %s" % msg)
tmp_dir = compose.mkdtemp(prefix="pungi_")
try:
get_dir_from_scm(product_id, tmp_dir)
except OSError as e:
if e.errno == errno.ENOENT and product_id_allow_missing:
compose.log_warning("No product IDs in %s" % product_id)
return
raise
for arch in compose.get_arches():
for variant in compose.get_variants(arch=arch):
# some layered products may use base product name before variant
pem_files = glob.glob("%s/*%s-%s-*.pem" % (tmp_dir, variant.uid, arch))
# use for development:
# pem_files = glob.glob("%s/*.pem" % tmp_dir)[-1:]
if not pem_files:
msg = "No product certificate found (arch: %s, variant: %s)" % (arch, variant.uid)
if product_id_allow_missing:
compose.log_warning(msg)
continue
else:
shutil.rmtree(tmp_dir)
raise RuntimeError(msg)
if len(pem_files) > 1:
shutil.rmtree(tmp_dir)
raise RuntimeError("Multiple product certificates found (arch: %s, variant: %s): %s" % (arch, variant.uid, ", ".join(sorted([os.path.basename(i) for i in pem_files]))))
product_id_path = compose.paths.work.product_id(arch, variant)
shutil.copy2(pem_files[0], product_id_path)
shutil.rmtree(tmp_dir)
compose.log_info("[DONE ] %s" % msg)
def _get_old_package_dirs(compose, repo_dir):
"""Given a compose and a path to a repo in it, try to find corresponging
repo in an older compose and return a list of paths to directories with
packages in it.
"""
if not compose.conf['createrepo_deltas']:
return None
old_compose_path = find_old_compose(
compose.old_composes,
compose.ci_base.release.short,
compose.ci_base.release.version,
compose.ci_base.release.type_suffix if compose.conf['old_composes_per_release_type'] else None,
compose.ci_base.base_product.short if compose.ci_base.release.is_layered else None,
compose.ci_base.base_product.version if compose.ci_base.release.is_layered else None,
allowed_statuses=['FINISHED', 'FINISHED_INCOMPLETE'],
)
if not old_compose_path:
compose.log_info("No suitable old compose found in: %s" % compose.old_composes)
return None
rel_dir = relative_path(repo_dir, compose.topdir.rstrip('/') + '/')
old_package_dirs = os.path.join(old_compose_path, rel_dir, 'Packages')
if compose.conf['hashed_directories']:
old_package_dirs = _find_package_dirs(old_package_dirs)
return old_package_dirs
def _find_package_dirs(base):
"""Assuming the packages are in directories hashed by first letter, find
all the buckets in given base.
"""
buckets = set()
try:
for subdir in os.listdir(base):
bucket = os.path.join(base, subdir)
if os.path.isdir(bucket):
buckets.add(bucket)
except OSError:
# The directory does not exist, so no drpms for you!
pass
return sorted(buckets)
def _has_deltas(compose, variant, arch):
"""Check if delta RPMs are enabled for given variant and architecture."""
key = 'createrepo_deltas'
if isinstance(compose.conf.get(key), bool):
return compose.conf[key]
return any(get_arch_variant_data(compose.conf, key, arch, variant))