lorax-composer: Estimate metadata size required by Anaconda

Anaconda doesn't download the filelists or 'other' metadata, which can
add a significant amount of space to the metadata cache on the build
host. So to estimate the amount of space needed by Anaconda exclude the
space used by *filelists* and *other* files.

With these changes the lorax-composer estimate should actually be fairly
accurate. But since Anaconda only uses the installedsize * 1.35 we have
to make sure that the size we use is at least as big as what anaconda
will estimate, otherwise the installation will fail.

Resolves: rhbz#1761337
This commit is contained in:
Brian C. Lane 2019-10-17 10:44:01 -07:00
parent e0da9b987b
commit 9291f3f3ea
3 changed files with 60 additions and 16 deletions

View File

@ -52,6 +52,7 @@ from pylorax.api.projects import projects_depsolve, projects_depsolve_with_size,
from pylorax.api.projects import ProjectsError
from pylorax.api.recipes import read_recipe_and_id
from pylorax.api.timestamp import TS_CREATED, write_timestamp
from pylorax.executils import runcmd_output
from pylorax.imgutils import default_image_name
from pylorax.sysutils import joinpaths
@ -628,6 +629,25 @@ def add_customizations(f, recipe):
else:
log.warning("Skipping group %s, already created by user", group["name"])
def get_md_size(yum_path):
"""Estimate the amount of space needed by anaconda
Anaconda doesn't download the filelists or 'other' metadata, which can add
up to a significant difference, so exclude those from the calculation.
"""
try:
du_output = runcmd_output(["/usr/bin/du",
"--exclude", "*other*",
"--exclude", "*filelists*",
"-sb",
yum_path])
return int(du_output.split()[0])
except (ValueError, IndexError) as e:
log.error("Problem calculating metadata size from '%s': %s", du_output, str(e))
return 0
def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_mode=0):
""" Start the build
@ -660,7 +680,7 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m
try:
# This can possibly update repodata and reset the YumBase object.
with yumlock.lock_check:
(installed_size, deps) = projects_depsolve_with_size(yumlock.yb, projects, recipe.group_names, with_core=False)
(installed_size, anaconda_size, deps) = projects_depsolve_with_size(yumlock.yb, projects, recipe.group_names, with_core=False)
except ProjectsError as e:
log.error("start_build depsolve: %s", str(e))
raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e)))
@ -669,7 +689,7 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m
ks_template_path = joinpaths(share_dir, "composer", compose_type) + ".ks"
ks_template = open(ks_template_path, "r").read()
# How much space will the packages in the default template take?
# How much space will the packages in the selected template take?
ks_version = makeVersion(RHEL7)
ks = KickstartParser(ks_version, errorsAreFatal=False, missingIncludeIsFatal=False)
ks.readKickstartFromString(ks_template+"\n%end\n")
@ -677,16 +697,29 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m
grps = [grp.name for grp in ks.handler.packages.groupList]
try:
with yumlock.lock:
(template_size, _) = projects_depsolve_with_size(yumlock.yb, pkgs, grps,
with_core=not ks.handler.packages.nocore)
(template_size, anaconda_tmpl_size, _) = projects_depsolve_with_size(yumlock.yb, pkgs, grps, with_core=not ks.handler.packages.nocore)
except ProjectsError as e:
log.error("start_build depsolve: %s", str(e))
raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e)))
log.debug("installed_size = %d, template_size=%d", installed_size, template_size)
# Minimum LMC disk size is 1GiB, and anaconda bumps the estimated size up by 35% (which doesn't always work).
installed_size = max(1024**3, int((installed_size+template_size) * 1.4))
log.debug("/ partition size = %d", installed_size)
# Anaconda also stores the metadata on the disk once it is partitioned, try to take this into account by
# adding the size of the lorax-composer metadata storage.
with yumlock.lock:
metadata_size = get_md_size(yumlock.yb.conf.installroot)
# Anaconda estimates size differently, only taking into account installed size and adding 35%
# But we must make sure that our actual disk size is at least as big as the anaconda size, otherwise the install will fail
anaconda_minimum = int((anaconda_size+anaconda_tmpl_size) * 1.35)
log.debug("anaconda_size = %d, anaconda_template_size=%d, anaconda_minimum=%d", anaconda_size, anaconda_tmpl_size, anaconda_minimum)
log.debug("installed_size = %d, template_size=%d, metadata_size=%d", installed_size, template_size, metadata_size)
# Add 10% to the composer estimate
installed_size = int((installed_size+template_size+metadata_size) * 1.10)
# Select the largest size for the partition
partition_size = max(1024**3, anaconda_minimum, installed_size)
log.debug("/ partition size = %d", partition_size)
# Create the results directory
build_id = str(uuid4())

View File

@ -352,20 +352,26 @@ def estimate_size(packages, block_size=4096):
:type packages: list of TransactionMember objects
:param block_size: The block size to use for rounding up file sizes.
:type block_size: int
:returns: The estimated size of installed packages
:rtype: int
:returns: Tuple of the the estimated size needed, and the size anaconda will calculate
:rtype: tuple(int, int)
Estimating actual requirements is difficult without the actual file sizes, which
yum doesn't provide access to. So use the file count and block size to estimate
a minimum size for each package.
Anaconda only takes into account the installedsize of each package. It then fudges
this by 35% to make sure there is enough space.
"""
installed_size = 0
anaconda_size = 0
for p in packages:
installed_size += len(p.po.filelist) * block_size
installed_size += p.po.installedsize
# anaconda only takes into account installedsize
anaconda_size += p.po.installedsize
# also count the RPM package size (yum cache)
installed_size += ((p.po.size / block_size) + 1) * block_size
return installed_size
return (installed_size, anaconda_size)
def projects_depsolve_with_size(yb, projects, groups, with_core=True):
"""Return the dependencies and installed size for a list of projects
@ -376,9 +382,12 @@ def projects_depsolve_with_size(yb, projects, groups, with_core=True):
:type projects: List of tuples
:param groups: The groups to include in dependency solving
:type groups: List of str
:returns: installed size and a list of NEVRA's of the project and its dependencies
:rtype: tuple of (int, list of dicts)
:returns: installed size, size estimated by anaconda, and a list of NEVRA's of the project and its dependencies
:rtype: tuple of (int, int, list of dicts)
:raises: ProjectsError if there was a problem installing something
The anaconda_size only includes the installed package size, not file block or cache estimation like
installed_size includes.
"""
try:
install_errors = _depsolve(yb, projects, groups)
@ -393,13 +402,13 @@ def projects_depsolve_with_size(yb, projects, groups, with_core=True):
if rc not in [0, 1, 2]:
raise ProjectsError("There was a problem depsolving %s: %s" % (projects, msg))
yb.tsInfo.makelists()
installed_size = estimate_size(yb.tsInfo.installed + yb.tsInfo.depinstalled)
(installed_size, anaconda_size) = estimate_size(yb.tsInfo.installed + yb.tsInfo.depinstalled)
deps = sorted(map(tm_to_dep, yb.tsInfo.installed + yb.tsInfo.depinstalled), key=lambda p: p["name"].lower())
except YumBaseError as e:
raise ProjectsError("There was a problem depsolving %s: %s" % (projects, str(e)))
finally:
yb.closeRpmDB()
return (installed_size, deps)
return (installed_size, anaconda_size, deps)
def modules_list(yb, module_names):
"""Return a list of modules

View File

@ -223,8 +223,10 @@ class ProjectsTest(unittest.TestCase):
def test_projects_size_depsolve_glob(self):
"""Test that depsolving with a '*' version glob doesn't glob package names"""
size, deps = projects_depsolve_with_size(self.yb, [("python", "*")], [], with_core=False)
size, anaconda_size, deps = projects_depsolve_with_size(self.yb, [("python", "*")], [], with_core=False)
self.assertTrue(size > 0)
self.assertTrue(anaconda_size > 0)
self.assertTrue(anaconda_size < size)
self.assertTrue(len(deps) > 1)
self.assertTrue("python" in [dep["name"] for dep in deps])
self.assertTrue("python-blivet" not in [dep["name"] for dep in deps])