hybrid: Optimize getting lookaside packages

The original code ended up downloading all repodata from the lookaside
repo. This could cause a lot of memory to be used.

The new code only downloads the repomd.xml and then primary record,
which is sufficient to obtain all needed information. A lot less memory
is used and the code is also significantly faster.

Here are some alternative ways of getting a list of packages from the
lookaside repo and reasons why they did not work:

 * dnf repoquery - this doesn't include modular packages unless the
   stream is default
 * dnf reposync - requires `--urls` option to only print the names,
   which is not available on RHEL 7

JIRA: RHELCMP-4761
Signed-off-by: Lubomír Sedlář <lsedlar@redhat.com>
This commit is contained in:
Lubomír Sedlář 2021-04-09 13:53:31 +02:00
parent c8091899b2
commit ab1b5b48ec
2 changed files with 33 additions and 39 deletions

View File

@ -499,6 +499,27 @@ def _make_result(paths):
return [{"path": path, "flags": []} for path in sorted(paths)] return [{"path": path, "flags": []} for path in sorted(paths)]
def get_repo_packages(path):
"""Extract file names of all packages in the given repository."""
packages = set()
def callback(pkg):
packages.add(os.path.basename(pkg.location_href))
repomd = os.path.join(path, "repodata/repomd.xml")
with as_local_file(repomd) as url_:
repomd = cr.Repomd(url_)
for rec in repomd.records:
if rec.type != "primary":
continue
record_url = os.path.join(path, rec.location_href)
with as_local_file(record_url) as url_:
cr.xml_parse_primary(url_, pkgcb=callback, do_files=False)
return packages
def expand_packages(nevra_to_pkg, lookasides, nvrs, filter_packages): def expand_packages(nevra_to_pkg, lookasides, nvrs, filter_packages):
"""For each package add source RPM.""" """For each package add source RPM."""
# This will serve as the final result. We collect sets of paths to the # This will serve as the final result. We collect sets of paths to the
@ -509,25 +530,16 @@ def expand_packages(nevra_to_pkg, lookasides, nvrs, filter_packages):
filters = set(filter_packages) filters = set(filter_packages)
# Collect list of all packages in lookaside. These will not be added to the
# result. Fus handles this in part: if a package is explicitly mentioned as
# input (which can happen with comps group expansion), it will be in the
# output even if it's in lookaside.
lookaside_packages = set() lookaside_packages = set()
for repo in lookasides: for repo in lookasides:
md = cr.Metadata() lookaside_packages.update(get_repo_packages(repo))
md.locate_and_load_xml(repo)
for key in md.keys():
pkg = md.get(key)
url = os.path.join(pkg.location_base or repo, pkg.location_href)
# Strip file:// prefix
lookaside_packages.add(url[7:])
for nvr, pkg_arch, flags in nvrs: for nvr, pkg_arch, flags in nvrs:
pkg = nevra_to_pkg["%s.%s" % (nvr, pkg_arch)] pkg = nevra_to_pkg["%s.%s" % (nvr, pkg_arch)]
if pkg.file_path in lookaside_packages: if os.path.basename(pkg.file_path) in lookaside_packages:
# Package is in lookaside, don't add it and ignore sources and # Fus can return lookaside package in output if the package is
# debuginfo too. # explicitly listed as input. This can happen during comps
# expansion.
continue continue
if pkg_is_debug(pkg): if pkg_is_debug(pkg):
debuginfo.add(pkg.file_path) debuginfo.add(pkg.file_path)
@ -540,7 +552,7 @@ def expand_packages(nevra_to_pkg, lookasides, nvrs, filter_packages):
if (srpm.name, "src") in filters: if (srpm.name, "src") in filters:
# Filtered package, skipping # Filtered package, skipping
continue continue
if srpm.file_path not in lookaside_packages: if os.path.basename(srpm.file_path) not in lookaside_packages:
srpms.add(srpm.file_path) srpms.add(srpm.file_path)
except KeyError: except KeyError:
# Didn't find source RPM.. this should be logged # Didn't find source RPM.. this should be logged

View File

@ -934,20 +934,11 @@ class TestExpandPackages(helpers.PungiTestCase):
}, },
) )
@mock.patch("pungi.phases.gather.methods.method_hybrid.cr") @mock.patch("pungi.phases.gather.methods.method_hybrid.get_repo_packages")
def test_skip_lookaside_source(self, cr): def test_skip_lookaside_source(self, get_repo_packages):
nevra_to_pkg = self._mk_packages(src=True) nevra_to_pkg = self._mk_packages(src=True)
lookasides = [mock.Mock()] lookasides = [mock.Mock()]
repo = { get_repo_packages.return_value = ["pkg.src.rpm"]
"abc": NamedMock(
name="pkg",
arch="src",
location_base="file:///tmp/",
location_href="pkg.src.rpm",
),
}
cr.Metadata.return_value.keys.return_value = repo.keys()
cr.Metadata.return_value.get.side_effect = lambda key: repo[key]
res = hybrid.expand_packages( res = hybrid.expand_packages(
nevra_to_pkg, lookasides, [("pkg-3:1-2", "x86_64", [])], [] nevra_to_pkg, lookasides, [("pkg-3:1-2", "x86_64", [])], []
@ -962,20 +953,11 @@ class TestExpandPackages(helpers.PungiTestCase):
}, },
) )
@mock.patch("pungi.phases.gather.methods.method_hybrid.cr") @mock.patch("pungi.phases.gather.methods.method_hybrid.get_repo_packages")
def test_skip_lookaside_packages(self, cr): def test_skip_lookaside_packages(self, get_repo_packages):
nevra_to_pkg = self._mk_packages(debug_arch="x86_64") nevra_to_pkg = self._mk_packages(debug_arch="x86_64")
lookasides = [mock.Mock()] lookasides = [mock.Mock()]
repo = { get_repo_packages.return_value = ["pkg.rpm"]
"abc": NamedMock(
name="pkg",
arch="x86_64",
location_base="file:///tmp/",
location_href="pkg.rpm",
)
}
cr.Metadata.return_value.keys.return_value = repo.keys()
cr.Metadata.return_value.get.side_effect = lambda key: repo[key]
res = hybrid.expand_packages( res = hybrid.expand_packages(
nevra_to_pkg, lookasides, [("pkg-3:1-2", "x86_64", [])], [] nevra_to_pkg, lookasides, [("pkg-3:1-2", "x86_64", [])], []