lorax-composer: Estimate metadata size required by Anaconda

Anaconda doesn't download the filelists or 'other' metadata, which can
add a significant amount of space to the metadata cache on the build
host. So to estimate the amount of space needed by Anaconda exclude the
space used by *filelists* and *other* files.

With these changes the lorax-composer estimate should actually be fairly
accurate. But since Anaconda only uses the installedsize * 1.35 we have
to make sure that the size we use is at least as big as what anaconda
will estimate, otherwise the installation will fail.

Resolves: rhbz#1761337
This commit is contained in:
Brian C. Lane 2019-10-17 10:44:01 -07:00
parent e0da9b987b
commit 9291f3f3ea
3 changed files with 60 additions and 16 deletions

View File

@ -52,6 +52,7 @@ from pylorax.api.projects import projects_depsolve, projects_depsolve_with_size,
from pylorax.api.projects import ProjectsError from pylorax.api.projects import ProjectsError
from pylorax.api.recipes import read_recipe_and_id from pylorax.api.recipes import read_recipe_and_id
from pylorax.api.timestamp import TS_CREATED, write_timestamp from pylorax.api.timestamp import TS_CREATED, write_timestamp
from pylorax.executils import runcmd_output
from pylorax.imgutils import default_image_name from pylorax.imgutils import default_image_name
from pylorax.sysutils import joinpaths from pylorax.sysutils import joinpaths
@ -628,6 +629,25 @@ def add_customizations(f, recipe):
else: else:
log.warning("Skipping group %s, already created by user", group["name"]) log.warning("Skipping group %s, already created by user", group["name"])
def get_md_size(yum_path):
"""Estimate the amount of space needed by anaconda
Anaconda doesn't download the filelists or 'other' metadata, which can add
up to a significant difference, so exclude those from the calculation.
"""
try:
du_output = runcmd_output(["/usr/bin/du",
"--exclude", "*other*",
"--exclude", "*filelists*",
"-sb",
yum_path])
return int(du_output.split()[0])
except (ValueError, IndexError) as e:
log.error("Problem calculating metadata size from '%s': %s", du_output, str(e))
return 0
def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_mode=0): def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_mode=0):
""" Start the build """ Start the build
@ -660,7 +680,7 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m
try: try:
# This can possibly update repodata and reset the YumBase object. # This can possibly update repodata and reset the YumBase object.
with yumlock.lock_check: with yumlock.lock_check:
(installed_size, deps) = projects_depsolve_with_size(yumlock.yb, projects, recipe.group_names, with_core=False) (installed_size, anaconda_size, deps) = projects_depsolve_with_size(yumlock.yb, projects, recipe.group_names, with_core=False)
except ProjectsError as e: except ProjectsError as e:
log.error("start_build depsolve: %s", str(e)) log.error("start_build depsolve: %s", str(e))
raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e))) raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e)))
@ -669,7 +689,7 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m
ks_template_path = joinpaths(share_dir, "composer", compose_type) + ".ks" ks_template_path = joinpaths(share_dir, "composer", compose_type) + ".ks"
ks_template = open(ks_template_path, "r").read() ks_template = open(ks_template_path, "r").read()
# How much space will the packages in the default template take? # How much space will the packages in the selected template take?
ks_version = makeVersion(RHEL7) ks_version = makeVersion(RHEL7)
ks = KickstartParser(ks_version, errorsAreFatal=False, missingIncludeIsFatal=False) ks = KickstartParser(ks_version, errorsAreFatal=False, missingIncludeIsFatal=False)
ks.readKickstartFromString(ks_template+"\n%end\n") ks.readKickstartFromString(ks_template+"\n%end\n")
@ -677,16 +697,29 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m
grps = [grp.name for grp in ks.handler.packages.groupList] grps = [grp.name for grp in ks.handler.packages.groupList]
try: try:
with yumlock.lock: with yumlock.lock:
(template_size, _) = projects_depsolve_with_size(yumlock.yb, pkgs, grps, (template_size, anaconda_tmpl_size, _) = projects_depsolve_with_size(yumlock.yb, pkgs, grps, with_core=not ks.handler.packages.nocore)
with_core=not ks.handler.packages.nocore)
except ProjectsError as e: except ProjectsError as e:
log.error("start_build depsolve: %s", str(e)) log.error("start_build depsolve: %s", str(e))
raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e))) raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e)))
log.debug("installed_size = %d, template_size=%d", installed_size, template_size)
# Minimum LMC disk size is 1GiB, and anaconda bumps the estimated size up by 35% (which doesn't always work). # Anaconda also stores the metadata on the disk once it is partitioned, try to take this into account by
installed_size = max(1024**3, int((installed_size+template_size) * 1.4)) # adding the size of the lorax-composer metadata storage.
log.debug("/ partition size = %d", installed_size) with yumlock.lock:
metadata_size = get_md_size(yumlock.yb.conf.installroot)
# Anaconda estimates size differently, only taking into account installed size and adding 35%
# But we must make sure that our actual disk size is at least as big as the anaconda size, otherwise the install will fail
anaconda_minimum = int((anaconda_size+anaconda_tmpl_size) * 1.35)
log.debug("anaconda_size = %d, anaconda_template_size=%d, anaconda_minimum=%d", anaconda_size, anaconda_tmpl_size, anaconda_minimum)
log.debug("installed_size = %d, template_size=%d, metadata_size=%d", installed_size, template_size, metadata_size)
# Add 10% to the composer estimate
installed_size = int((installed_size+template_size+metadata_size) * 1.10)
# Select the largest size for the partition
partition_size = max(1024**3, anaconda_minimum, installed_size)
log.debug("/ partition size = %d", partition_size)
# Create the results directory # Create the results directory
build_id = str(uuid4()) build_id = str(uuid4())

View File

@ -352,20 +352,26 @@ def estimate_size(packages, block_size=4096):
:type packages: list of TransactionMember objects :type packages: list of TransactionMember objects
:param block_size: The block size to use for rounding up file sizes. :param block_size: The block size to use for rounding up file sizes.
:type block_size: int :type block_size: int
:returns: The estimated size of installed packages :returns: Tuple of the the estimated size needed, and the size anaconda will calculate
:rtype: int :rtype: tuple(int, int)
Estimating actual requirements is difficult without the actual file sizes, which Estimating actual requirements is difficult without the actual file sizes, which
yum doesn't provide access to. So use the file count and block size to estimate yum doesn't provide access to. So use the file count and block size to estimate
a minimum size for each package. a minimum size for each package.
Anaconda only takes into account the installedsize of each package. It then fudges
this by 35% to make sure there is enough space.
""" """
installed_size = 0 installed_size = 0
anaconda_size = 0
for p in packages: for p in packages:
installed_size += len(p.po.filelist) * block_size installed_size += len(p.po.filelist) * block_size
installed_size += p.po.installedsize installed_size += p.po.installedsize
# anaconda only takes into account installedsize
anaconda_size += p.po.installedsize
# also count the RPM package size (yum cache) # also count the RPM package size (yum cache)
installed_size += ((p.po.size / block_size) + 1) * block_size installed_size += ((p.po.size / block_size) + 1) * block_size
return installed_size return (installed_size, anaconda_size)
def projects_depsolve_with_size(yb, projects, groups, with_core=True): def projects_depsolve_with_size(yb, projects, groups, with_core=True):
"""Return the dependencies and installed size for a list of projects """Return the dependencies and installed size for a list of projects
@ -376,9 +382,12 @@ def projects_depsolve_with_size(yb, projects, groups, with_core=True):
:type projects: List of tuples :type projects: List of tuples
:param groups: The groups to include in dependency solving :param groups: The groups to include in dependency solving
:type groups: List of str :type groups: List of str
:returns: installed size and a list of NEVRA's of the project and its dependencies :returns: installed size, size estimated by anaconda, and a list of NEVRA's of the project and its dependencies
:rtype: tuple of (int, list of dicts) :rtype: tuple of (int, int, list of dicts)
:raises: ProjectsError if there was a problem installing something :raises: ProjectsError if there was a problem installing something
The anaconda_size only includes the installed package size, not file block or cache estimation like
installed_size includes.
""" """
try: try:
install_errors = _depsolve(yb, projects, groups) install_errors = _depsolve(yb, projects, groups)
@ -393,13 +402,13 @@ def projects_depsolve_with_size(yb, projects, groups, with_core=True):
if rc not in [0, 1, 2]: if rc not in [0, 1, 2]:
raise ProjectsError("There was a problem depsolving %s: %s" % (projects, msg)) raise ProjectsError("There was a problem depsolving %s: %s" % (projects, msg))
yb.tsInfo.makelists() yb.tsInfo.makelists()
installed_size = estimate_size(yb.tsInfo.installed + yb.tsInfo.depinstalled) (installed_size, anaconda_size) = estimate_size(yb.tsInfo.installed + yb.tsInfo.depinstalled)
deps = sorted(map(tm_to_dep, yb.tsInfo.installed + yb.tsInfo.depinstalled), key=lambda p: p["name"].lower()) deps = sorted(map(tm_to_dep, yb.tsInfo.installed + yb.tsInfo.depinstalled), key=lambda p: p["name"].lower())
except YumBaseError as e: except YumBaseError as e:
raise ProjectsError("There was a problem depsolving %s: %s" % (projects, str(e))) raise ProjectsError("There was a problem depsolving %s: %s" % (projects, str(e)))
finally: finally:
yb.closeRpmDB() yb.closeRpmDB()
return (installed_size, deps) return (installed_size, anaconda_size, deps)
def modules_list(yb, module_names): def modules_list(yb, module_names):
"""Return a list of modules """Return a list of modules

View File

@ -223,8 +223,10 @@ class ProjectsTest(unittest.TestCase):
def test_projects_size_depsolve_glob(self): def test_projects_size_depsolve_glob(self):
"""Test that depsolving with a '*' version glob doesn't glob package names""" """Test that depsolving with a '*' version glob doesn't glob package names"""
size, deps = projects_depsolve_with_size(self.yb, [("python", "*")], [], with_core=False) size, anaconda_size, deps = projects_depsolve_with_size(self.yb, [("python", "*")], [], with_core=False)
self.assertTrue(size > 0) self.assertTrue(size > 0)
self.assertTrue(anaconda_size > 0)
self.assertTrue(anaconda_size < size)
self.assertTrue(len(deps) > 1) self.assertTrue(len(deps) > 1)
self.assertTrue("python" in [dep["name"] for dep in deps]) self.assertTrue("python" in [dep["name"] for dep in deps])
self.assertTrue("python-blivet" not in [dep["name"] for dep in deps]) self.assertTrue("python-blivet" not in [dep["name"] for dep in deps])