When looking at a package in a lookaside repo, it does not make much sense to process its dependencies. We should just assume that the lookaside can satisfy them. In the worst case, this could result in packages being pulled into the compose just so that they could satisfy a dep of something in lookaside. Signed-off-by: Lubomír Sedlář <lsedlar@redhat.com>
790 lines
29 KiB
790 lines
29 KiB
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU Library General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <https://gnu.org/licenses/>.
from enum import Enum
from itertools import count
import logging
from kobo.rpmlib import parse_nvra
import pungi.common
import pungi.dnf_wrapper
import pungi.multilib_dnf
from pungi.profiler import Profiler
def get_source_name(pkg):
# Workaround for rhbz#1418298
return pkg.sourcerpm.rsplit('-', 2)[0]
class GatherOptions(pungi.common.OptionsBase):
def __init__(self, **kwargs):
super(GatherOptions, self).__init__()
# include all unused sub-packages of already included RPMs
self.fulltree = False
# A set of packages for which fulltree does not apply.
self.fulltree_excludes = set()
# include langpacks
self.langpacks = [] # format: [{"package": "langpack-pattern-%s"}]
# resolve dependencies
self.resolve_deps = True
# pull build dependencies
self.selfhosting = False
# none, all, build
# TODO: validate values
self.greedy_method = "none"
# multilib options
self.multilib_methods = []
self.multilib_blacklist = []
self.multilib_whitelist = []
# prepopulate
self.prepopulate = []
# lookaside repos; packages will be flagged accordingly
self.lookaside_repos = []
class QueryCache(object):
def __init__(self, queue, *args, **kwargs):
self.cache = {}
self.nargs = len(args)
if kwargs:
queue = queue.filter(**kwargs)
for pkg in queue:
key = tuple(getattr(pkg, arg) for arg in args)
pkgs = self.cache.setdefault(key, [])
if pkg not in pkgs:
# use list preserve package order
def get(self, *args):
if len(args) != self.nargs:
raise ValueError("Expected %s arguments, got %s" % (self.nargs, len(args)))
key = tuple(args)
return self.cache.get(key, None)
class PkgFlag(Enum):
lookaside = 1
input = 2
greedy_build = 4
prepopulate = 8
conditional = 16
self_hosting = 32
fulltree = 64
multilib = 128
langpack = 256
class GatherBase(object):
def __init__(self, dnf_obj):
self.dnf = dnf_obj
q = self._query
q = q.filter(latest_per_arch=True).apply()
# source packages
self.q_source_packages = q.filter(arch=self.dnf.arch_wrapper.source_arches).apply()
q = q.difference(self.q_source_packages)
# filter arches
q = q.filter(arch=self.dnf.arch_wrapper.all_arches).apply()
q_noarch = q.filter(arch="noarch").apply()
q_native = q.filter(arch=self.dnf.arch_wrapper.native_arches).apply()
q_multilib = q.difference(q_native).union(q_noarch).apply()
# debug packages
self.q_debug_packages = q.filter(name__glob=["*-debuginfo", "*-debuginfo-*"]).apply()
self.q_native_debug_packages = self.q_debug_packages.intersection(q_native)
self.q_multilib_debug_packages = self.q_debug_packages.intersection(q_multilib)
# binary packages
self.q_binary_packages = q.difference(self.q_debug_packages)
self.q_native_binary_packages = q_native.difference(self.q_debug_packages)
self.q_multilib_binary_packages = q_multilib.difference(self.q_debug_packages)
self.q_noarch_binary_packages = q_noarch.difference(self.q_debug_packages)
def _query(self):
return self.dnf._sack.query()
def is_noarch_package(self, pkg):
return pkg.arch == "noarch"
def is_native_package(self, pkg):
if pkg.arch in self.dnf.arch_wrapper.source_arches:
return False
if pkg.arch == "noarch":
return True
if pkg.arch in self.dnf.arch_wrapper.native_arches:
return True
return False
def is_multilib_package(self, pkg):
if pkg.arch in self.dnf.arch_wrapper.source_arches:
return False
if pkg.arch == "noarch":
return False
if pkg.arch in self.dnf.arch_wrapper.multilib_arches:
return True
return False
class Gather(GatherBase):
def __init__(self, dnf_obj, gather_options, logger=None):
super(Gather, self).__init__(dnf_obj)
self.logger = logger
if not self.logger:
# default logger
self.logger = logging.getLogger("gather_dnf")
if not self.logger.handlers:
# default logging handler
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)-8s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"))
self.opts = gather_options
self.logger.debug("Gather received gather_options=%s" % gather_options.__dict__)
self._multilib = pungi.multilib_dnf.Multilib.from_globs(
# already processed packages
self.finished_add_binary_package_deps = {} # {pkg: [deps]}
self.finished_add_debug_package_deps = {} # {pkg: [deps]}
self.finished_add_source_package_deps = {} # {pkg: [deps]}
self.finished_get_package_deps_reqs = {}
self.finished_add_conditional_packages = {} # {pkg: [pkgs]}
self.finished_add_source_packages = {} # {pkg: src-pkg|None}
self.sourcerpm_cache = {} # {src_nvra: src-pkg|None}
self.finished_add_debug_packages = {} # {pkg: [debug-pkgs]}
self.finished_add_fulltree_packages = {} # {pkg: [pkgs]}
self.finished_add_langpack_packages = {} # {pkg: [pkgs]}
self.finished_add_multilib_packages = {} # {pkg: pkg|None}
# result
self.result_binary_packages = set()
self.result_debug_packages = set()
self.result_source_packages = set()
self.result_package_flags = {}
def _set_flag(self, pkg, *flags):
self.result_package_flags.setdefault(pkg, set()).update(flags)
def _has_flag(self, pkg, flag):
return flag in self.result_package_flags.get(pkg, set())
def _get_best_package(self, package_list, pkg=None, req=None):
if not package_list:
return []
if self.opts.greedy_method == "all":
return list(package_list)
all_pkgs = list(package_list)
native_pkgs = self.q_native_binary_packages.filter(pkg=all_pkgs).apply()
multilib_pkgs = self.q_multilib_binary_packages.filter(pkg=all_pkgs).apply()
result = set()
# try seen native packages first
seen_pkgs = set(native_pkgs) & self.result_binary_packages
if seen_pkgs:
result = seen_pkgs
# then try seen multilib packages
if not result:
seen_pkgs = set(multilib_pkgs) & self.result_binary_packages
if seen_pkgs:
result = seen_pkgs
if not result:
result = set(native_pkgs)
if not result:
result = set(multilib_pkgs)
if not result:
return []
# return package with shortest name, alphabetically ordered
result = list(result)
result.sort(lambda x, y: cmp(x.name, y.name))
result.sort(lambda x, y: cmp(len(x.name), len(y.name)))
# best arch
arches = self.dnf.arch_wrapper.all_arches
result.sort(lambda x, y: cmp(arches.index(x.arch), arches.index(y.arch)))
match = result[0]
if self.opts.greedy_method == "build" and req:
if self.is_native_package(match):
return [i for i in native_pkgs if i.sourcerpm == match.sourcerpm]
return [i for i in multilib_pkgs if i.sourcerpm == match.sourcerpm]
return [match]
def _add_packages(self, packages, pulled_by=None, req=None, reason=None):
added = set()
for i in packages:
assert i is not None
if i not in self.result_binary_packages:
pb = ""
if pulled_by:
pb = " (pulled by %s, repo: %s)" % (pulled_by, pulled_by.repo.id)
if req:
pb += " (Requires: %s)" % req
if reason:
pb += " (%s)" % reason
self.logger.debug("Added package %s%s" % (i, pb))
# lookaside
if i.repoid in self.opts.lookaside_repos:
self._set_flag(i, PkgFlag.lookaside)
def _get_package_deps(self, pkg):
"""Return all direct (1st level) deps for a package.
The return value is a set of tuples (pkg, reldep). Each package is
tagged with the particular reldep that pulled it in. Requires_pre and
_post are not distinguished.
assert pkg is not None
result = set()
if pkg.repoid in self.opts.lookaside_repos:
# Don't resolve deps for stuff in lookaside.
return result
# DNF package has the _pre and _post attributes only if they are not
# empty.
requires = (pkg.requires +
getattr(pkg, 'requires_pre', []) +
getattr(pkg, 'requires_post', []))
q = self.q_binary_packages.filter(provides=requires).apply()
for req in requires:
deps = self.finished_get_package_deps_reqs.setdefault(str(req), set())
if deps:
result.update((dep, req) for dep in deps)
# TODO: need query also debuginfo
deps = q.filter(provides=req)
if deps:
deps = self._get_best_package(deps, req=req)
result.update((dep, req) for dep in deps)
return result
def add_initial_packages(self, pattern_list):
added = set()
excludes = []
includes = []
for pattern in pattern_list:
if pattern.startswith("-"):
exclude = set()
for pattern in excludes:
with Profiler("Gather.add_initial_packages():exclude"):
# TODO: debug
if pattern.endswith(".+"):
pkgs = self.q_multilib_binary_packages.filter(name__glob=pattern[:-2], arch__neq='noarch')
elif pattern.endswith(".src"):
pkgs = self.q_source_packages.filter(name__glob=pattern[:-4])
pkgs = self.q_binary_packages.filter(name__glob=pattern)
self.logger.debug("EXCLUDED by %s: %s", pattern, [str(p) for p in pkgs])
for pattern in self.opts.multilib_blacklist:
with Profiler("Gather.add_initial_packages():exclude-multilib-blacklist"):
# TODO: does whitelist affect this in any way?
pkgs = self.q_multilib_binary_packages.filter(name__glob=pattern, arch__neq='noarch')
self.logger.debug("EXCLUDED by %s: %s", pattern, [str(p) for p in pkgs])
with Profiler("Gather.add_initial_packages():exclude-queries"):
self.q_binary_packages = self.q_binary_packages.filter(pkg__neq=exclude).apply()
self.q_native_binary_packages = self.q_native_binary_packages.filter(pkg__neq=exclude).apply()
self.q_multilib_binary_packages = self.q_multilib_binary_packages.filter(pkg__neq=exclude).apply()
self.q_noarch_binary_packages = self.q_noarch_binary_packages.filter(pkg__neq=exclude).apply()
self.q_source_packages = self.q_source_packages.filter(pkg__neq=exclude).apply()
for pattern in includes:
with Profiler("Gather.add_initial_packages():include"):
if pattern == "system-release" and self.opts.greedy_method == "all":
pkgs = self.q_binary_packages.filter(provides="system-release").apply()
if pattern.endswith(".+"):
pkgs = self.q_multilib_binary_packages.filter(name__glob=pattern[:-2]).apply()
pkgs = self.q_binary_packages.filter(name__glob=pattern).apply()
if not pkgs:
self.logger.error("No package matches pattern %s" % pattern)
# The pattern could have been a glob. In that case we want to
# group the packages by name and get best match in those
# smaller groups.
packages_by_name = {}
for po in pkgs:
packages_by_name.setdefault(po.name, []).append(po)
for name, packages in packages_by_name.iteritems():
pkgs = self._get_best_package(packages)
if pkgs:
for pkg in added:
self._set_flag(pkg, PkgFlag.input)
return added
def init_query_cache(self):
# HACK: workaround for insufficient hawkey query performance
# Must be executed *after* add_initial_packages() to exclude packages properly.
# source
self.source_pkgs_cache = QueryCache(self.q_source_packages, "name", "version", "release")
# debug
self.native_debug_packages_cache = QueryCache(self.q_native_debug_packages, "sourcerpm")
self.multilib_debug_packages_cache = QueryCache(self.q_multilib_debug_packages, "sourcerpm")
# packages by sourcerpm
self.q_native_pkgs_by_sourcerpm_cache = QueryCache(self.q_native_binary_packages, "sourcerpm", arch__neq="noarch")
self.q_multilib_pkgs_by_sourcerpm_cache = QueryCache(self.q_multilib_binary_packages, "sourcerpm", arch__neq="noarch")
self.q_noarch_pkgs_by_sourcerpm_cache = QueryCache(self.q_native_binary_packages, "sourcerpm", arch="noarch")
# multilib
self.q_multilib_binary_packages_cache = QueryCache(self.q_multilib_binary_packages, "name", "version", "release", arch__neq="noarch")
# prepopulate
self.prepopulate_cache = QueryCache(self.q_binary_packages, "name", "arch")
def add_prepopulate_packages(self):
added = set()
for name_arch in self.opts.prepopulate:
name, arch = name_arch.rsplit(".", 1)
pkgs = self.prepopulate_cache.get(name, arch)
pkgs = self._get_best_package(pkgs)
if pkgs:
self.logger.warn("Prepopulate: Doesn't match: %s" % name_arch)
for pkg in added:
self._set_flag(pkg, PkgFlag.prepopulate)
return added
def add_binary_package_deps(self):
added = set()
if not self.opts.resolve_deps:
return added
for pkg in self.result_binary_packages.copy():
assert pkg is not None
if pkg not in self.finished_add_binary_package_deps:
deps = self._get_package_deps(pkg)
for i, req in deps:
if i not in self.result_binary_packages:
self._add_packages([i], pulled_by=pkg, req=req, reason='binary-dep')
self.finished_add_binary_package_deps[pkg] = deps
return added
def add_conditional_packages(self):
For each binary package add their conditional dependencies as specified in comps.
Return newly added packages.
added = set()
if not self.opts.resolve_deps:
return added
for pkg in self.result_binary_packages.copy():
assert pkg is not None
deps = self.finished_add_conditional_packages[pkg]
except KeyError:
deps = set()
for cond in self.conditional_packages:
if cond["name"] != pkg.name:
pkgs = self.q_binary_packages.filter(name=cond["install"]).apply()
pkgs = self._get_best_package(pkgs) # TODO: multilib?
self.finished_add_conditional_packages[pkg] = deps
for i in deps:
if i not in self.result_binary_packages:
self._add_packages([i], pulled_by=pkg, reason='cond-dep')
self._set_flag(pkg, PkgFlag.conditional)
return added
def add_source_package_deps(self):
added = set()
if not self.opts.resolve_deps:
return added
if not self.opts.selfhosting:
return added
for pkg in self.result_source_packages:
assert pkg is not None
deps = self.finished_add_source_package_deps[pkg]
except KeyError:
deps = self._get_package_deps(pkg)
self.finished_add_source_package_deps[pkg] = set(dep for (dep, req) in deps)
for i, req in deps:
if i not in self.result_binary_packages:
self._add_packages([i], pulled_by=pkg, req=req, reason='source-dep')
self._set_flag(pkg, PkgFlag.self_hosting)
return added
def add_source_packages(self):
For each binary package add it's source package.
Return newly added source packages.
added = set()
for pkg in self.result_binary_packages:
assert pkg is not None
source_pkg = self.finished_add_source_packages[pkg]
except KeyError:
source_pkg = None
if pkg.sourcerpm:
source_pkg = self.sourcerpm_cache.get(pkg.sourcerpm, None)
if source_pkg is None:
nvra = parse_nvra(pkg.sourcerpm)
source_pkgs = self.source_pkgs_cache.get(nvra["name"], nvra["version"], nvra["release"])
if source_pkgs:
source_pkg = list(source_pkgs)[0]
self.sourcerpm_cache[pkg.sourcerpm] = source_pkg
self.finished_add_source_packages[pkg] = source_pkg
if not source_pkg:
lookaside = self._has_flag(pkg, PkgFlag.lookaside)
if lookaside:
self._set_flag(source_pkg, PkgFlag.lookaside)
if source_pkg not in self.result_source_packages:
return added
def add_debug_packages(self):
For each binary package add debuginfo packages built from the same source.
Return newly added debug packages.
added = set()
for pkg in self.result_binary_packages:
assert pkg is not None
if pkg in self.finished_add_debug_packages:
candidates = []
if pkg.sourcerpm:
if self.is_native_package(pkg):
candidates = self.native_debug_packages_cache.get(pkg.sourcerpm)
candidates = self.multilib_debug_packages_cache.get(pkg.sourcerpm)
if not candidates:
debug_pkgs = []
lookaside = self._has_flag(pkg, PkgFlag.lookaside)
for i in candidates:
if pkg.arch == 'noarch' and i.arch != 'noarch':
# If the package is noarch, we will only pull debuginfo if
# it's noarch as well. This covers mingw use case, but
# means we don't for example pull debuginfo just because of
# -doc subpackage.
if lookaside:
self._set_flag(i, PkgFlag.lookaside)
if i not in self.result_debug_packages:
self.finished_add_debug_packages[pkg] = debug_pkgs
return added
def add_fulltree_packages(self):
For each binary package add all binary packages built from the same source.
Return newly added binary packages.
added = set()
if not self.opts.fulltree:
return added
for pkg in sorted(self.result_binary_packages):
assert pkg is not None
if get_source_name(pkg) in self.opts.fulltree_excludes:
self.logger.debug('No fulltree for %s due to exclude list', pkg)
fulltree_pkgs = self.finished_add_fulltree_packages[pkg]
except KeyError:
native_fulltree_pkgs = self.q_native_pkgs_by_sourcerpm_cache.get(pkg.sourcerpm) or []
multilib_fulltree_pkgs = self.q_multilib_pkgs_by_sourcerpm_cache.get(pkg.sourcerpm) or []
noarch_fulltree_pkgs = self.q_noarch_pkgs_by_sourcerpm_cache.get(pkg.sourcerpm) or []
if not native_fulltree_pkgs:
# no existing native pkgs -> pull multilib
pull_native = False
elif set(native_fulltree_pkgs) & self.result_binary_packages:
# native pkgs in result -> pull native
pull_native = True
elif set(multilib_fulltree_pkgs) & self.result_binary_packages:
# multilib pkgs in result -> pull multilib
pull_native = False
# fallback / default
pull_native = True
# We pull packages determined by `pull_native`, or everything
# if we're greedy
fulltree_pkgs = []
if pull_native or self.opts.greedy_method == 'all':
if not pull_native or self.opts.greedy_method == 'all':
# always pull all noarch subpackages
fulltree_pkgs += noarch_fulltree_pkgs
for i in fulltree_pkgs:
if i not in self.result_binary_packages:
self._add_packages([i], reason='fulltree')
self._set_flag(i, PkgFlag.fulltree)
# don't run fulltree on added packages
self.finished_add_fulltree_packages[i] = []
self.finished_add_fulltree_packages[pkg] = fulltree_pkgs
return added
def add_langpack_packages(self, langpack_patterns):
For each binary package add all matching langpack packages.
Return newly added binary packages.
langpack_patterns: [{"name": <str>, "install": <str>}]
added = set()
if not self.opts.langpacks:
return added
exceptions = ["man-pages-overrides"]
for pkg in sorted(self.result_binary_packages):
assert pkg is not None
langpack_pkgs = self.finished_add_langpack_packages[pkg]
except KeyError:
patterns = [i["install"] for i in langpack_patterns if i["name"] == pkg.name]
patterns = [i.replace("%s", "*") for i in patterns]
if not patterns:
self.finished_add_langpack_packages[pkg] = []
langpack_pkgs = self.q_binary_packages.filter(name__glob=patterns).apply()
langpack_pkgs = langpack_pkgs.filter(name__glob__not=["*-devel", "*-static"])
langpack_pkgs = langpack_pkgs.filter(name__neq=exceptions)
pkgs_by_name = {}
for i in langpack_pkgs:
pkgs_by_name.setdefault(i.name, set()).add(i)
langpack_pkgs = set()
for name in sorted(pkgs_by_name):
pkgs = pkgs_by_name[name]
i = self._get_best_package(pkgs)
if i:
# TODO: greedy
i = i[0]
self._set_flag(i, PkgFlag.langpack)
if i not in self.result_binary_packages:
self._add_packages([i], pulled_by=pkg, reason='langpack')
self.finished_add_langpack_packages[pkg] = langpack_pkgs
return added
def add_multilib_packages(self):
added = set()
for pkg in sorted(self.result_binary_packages):
if pkg in self.finished_add_multilib_packages:
if pkg.arch in ("noarch", "src", "nosrc"):
self.finished_add_multilib_packages[pkg] = None
if pkg.arch in self.dnf.arch_wrapper.multilib_arches:
self.finished_add_multilib_packages[pkg] = None
pkgs = self.q_multilib_binary_packages_cache.get(pkg.name, pkg.version, pkg.release)
pkgs = self._get_best_package(pkgs)
multilib_pkgs = []
for i in pkgs:
is_multilib = self._multilib.is_multilib(i)
if is_multilib:
self._set_flag(i, PkgFlag.multilib)
self._add_packages([i], reason='multilib:%s' % is_multilib)
self.finished_add_multilib_packages[pkg] = i
# TODO: ^^^ may get multiple results; i686, i586, etc.
return added
def gather(self, pattern_list, conditional_packages=None):
self.conditional_packages = conditional_packages or []
self.logger.debug("INITIAL PACKAGES")
added = self.add_initial_packages(pattern_list)
added = self.log_count('PREPOPULATE', self.add_prepopulate_packages)
self._add_packages(added, reason='prepopulate')
for pass_num in count(1):
self.logger.debug("PASS %s" % pass_num)
if self.log_count('CONDITIONAL DEPS', self.add_conditional_packages):
# resolve deps
if self.log_count('BINARY DEPS', self.add_binary_package_deps):
if self.log_count('SOURCE DEPS', self.add_source_package_deps):
if self.log_count('SOURCE PACKAGES', self.add_source_packages):
if self.log_count('DEBUG PACKAGES', self.add_debug_packages):
# TODO: debug deps
if self.log_count('FULLTREE', self.add_fulltree_packages):
if self.log_count('LANGPACKS', self.add_langpack_packages, self.opts.langpacks):
if self.log_count('MULTILIB', self.add_multilib_packages):
# nothing added -> break depsolving cycle
def log_count(self, msg, method, *args):
Print a message, run the function with given arguments and log length
of result.
self.logger.debug('%s', msg)
added = method(*args)
self.logger.debug('ADDED: %s', len(added))
return added