From 9291f3f3ea15c3a258d67b828cfb7d04c16e918b Mon Sep 17 00:00:00 2001 From: "Brian C. Lane" Date: Thu, 17 Oct 2019 10:44:01 -0700 Subject: [PATCH] lorax-composer: Estimate metadata size required by Anaconda Anaconda doesn't download the filelists or 'other' metadata, which can add a significant amount of space to the metadata cache on the build host. So to estimate the amount of space needed by Anaconda exclude the space used by *filelists* and *other* files. With these changes the lorax-composer estimate should actually be fairly accurate. But since Anaconda only uses the installedsize * 1.35 we have to make sure that the size we use is at least as big as what anaconda will estimate, otherwise the installation will fail. Resolves: rhbz#1761337 --- src/pylorax/api/compose.py | 49 ++++++++++++++++++++++++++++------ src/pylorax/api/projects.py | 23 +++++++++++----- tests/pylorax/test_projects.py | 4 ++- 3 files changed, 60 insertions(+), 16 deletions(-) diff --git a/src/pylorax/api/compose.py b/src/pylorax/api/compose.py index 35f3282f..b760f386 100644 --- a/src/pylorax/api/compose.py +++ b/src/pylorax/api/compose.py @@ -52,6 +52,7 @@ from pylorax.api.projects import projects_depsolve, projects_depsolve_with_size, from pylorax.api.projects import ProjectsError from pylorax.api.recipes import read_recipe_and_id from pylorax.api.timestamp import TS_CREATED, write_timestamp +from pylorax.executils import runcmd_output from pylorax.imgutils import default_image_name from pylorax.sysutils import joinpaths @@ -628,6 +629,25 @@ def add_customizations(f, recipe): else: log.warning("Skipping group %s, already created by user", group["name"]) + +def get_md_size(yum_path): + """Estimate the amount of space needed by anaconda + + Anaconda doesn't download the filelists or 'other' metadata, which can add + up to a significant difference, so exclude those from the calculation. + """ + try: + du_output = runcmd_output(["/usr/bin/du", + "--exclude", "*other*", + "--exclude", "*filelists*", + "-sb", + yum_path]) + return int(du_output.split()[0]) + except (ValueError, IndexError) as e: + log.error("Problem calculating metadata size from '%s': %s", du_output, str(e)) + return 0 + + def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_mode=0): """ Start the build @@ -660,7 +680,7 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m try: # This can possibly update repodata and reset the YumBase object. with yumlock.lock_check: - (installed_size, deps) = projects_depsolve_with_size(yumlock.yb, projects, recipe.group_names, with_core=False) + (installed_size, anaconda_size, deps) = projects_depsolve_with_size(yumlock.yb, projects, recipe.group_names, with_core=False) except ProjectsError as e: log.error("start_build depsolve: %s", str(e)) raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e))) @@ -669,7 +689,7 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m ks_template_path = joinpaths(share_dir, "composer", compose_type) + ".ks" ks_template = open(ks_template_path, "r").read() - # How much space will the packages in the default template take? + # How much space will the packages in the selected template take? ks_version = makeVersion(RHEL7) ks = KickstartParser(ks_version, errorsAreFatal=False, missingIncludeIsFatal=False) ks.readKickstartFromString(ks_template+"\n%end\n") @@ -677,16 +697,29 @@ def start_build(cfg, yumlock, gitlock, branch, recipe_name, compose_type, test_m grps = [grp.name for grp in ks.handler.packages.groupList] try: with yumlock.lock: - (template_size, _) = projects_depsolve_with_size(yumlock.yb, pkgs, grps, - with_core=not ks.handler.packages.nocore) + (template_size, anaconda_tmpl_size, _) = projects_depsolve_with_size(yumlock.yb, pkgs, grps, with_core=not ks.handler.packages.nocore) except ProjectsError as e: log.error("start_build depsolve: %s", str(e)) raise RuntimeError("Problem depsolving %s: %s" % (recipe["name"], str(e))) - log.debug("installed_size = %d, template_size=%d", installed_size, template_size) - # Minimum LMC disk size is 1GiB, and anaconda bumps the estimated size up by 35% (which doesn't always work). - installed_size = max(1024**3, int((installed_size+template_size) * 1.4)) - log.debug("/ partition size = %d", installed_size) + # Anaconda also stores the metadata on the disk once it is partitioned, try to take this into account by + # adding the size of the lorax-composer metadata storage. + with yumlock.lock: + metadata_size = get_md_size(yumlock.yb.conf.installroot) + + # Anaconda estimates size differently, only taking into account installed size and adding 35% + # But we must make sure that our actual disk size is at least as big as the anaconda size, otherwise the install will fail + anaconda_minimum = int((anaconda_size+anaconda_tmpl_size) * 1.35) + + log.debug("anaconda_size = %d, anaconda_template_size=%d, anaconda_minimum=%d", anaconda_size, anaconda_tmpl_size, anaconda_minimum) + log.debug("installed_size = %d, template_size=%d, metadata_size=%d", installed_size, template_size, metadata_size) + + # Add 10% to the composer estimate + installed_size = int((installed_size+template_size+metadata_size) * 1.10) + + # Select the largest size for the partition + partition_size = max(1024**3, anaconda_minimum, installed_size) + log.debug("/ partition size = %d", partition_size) # Create the results directory build_id = str(uuid4()) diff --git a/src/pylorax/api/projects.py b/src/pylorax/api/projects.py index 51512056..5c6b9913 100644 --- a/src/pylorax/api/projects.py +++ b/src/pylorax/api/projects.py @@ -352,20 +352,26 @@ def estimate_size(packages, block_size=4096): :type packages: list of TransactionMember objects :param block_size: The block size to use for rounding up file sizes. :type block_size: int - :returns: The estimated size of installed packages - :rtype: int + :returns: Tuple of the the estimated size needed, and the size anaconda will calculate + :rtype: tuple(int, int) Estimating actual requirements is difficult without the actual file sizes, which yum doesn't provide access to. So use the file count and block size to estimate a minimum size for each package. + + Anaconda only takes into account the installedsize of each package. It then fudges + this by 35% to make sure there is enough space. """ installed_size = 0 + anaconda_size = 0 for p in packages: installed_size += len(p.po.filelist) * block_size installed_size += p.po.installedsize + # anaconda only takes into account installedsize + anaconda_size += p.po.installedsize # also count the RPM package size (yum cache) installed_size += ((p.po.size / block_size) + 1) * block_size - return installed_size + return (installed_size, anaconda_size) def projects_depsolve_with_size(yb, projects, groups, with_core=True): """Return the dependencies and installed size for a list of projects @@ -376,9 +382,12 @@ def projects_depsolve_with_size(yb, projects, groups, with_core=True): :type projects: List of tuples :param groups: The groups to include in dependency solving :type groups: List of str - :returns: installed size and a list of NEVRA's of the project and its dependencies - :rtype: tuple of (int, list of dicts) + :returns: installed size, size estimated by anaconda, and a list of NEVRA's of the project and its dependencies + :rtype: tuple of (int, int, list of dicts) :raises: ProjectsError if there was a problem installing something + + The anaconda_size only includes the installed package size, not file block or cache estimation like + installed_size includes. """ try: install_errors = _depsolve(yb, projects, groups) @@ -393,13 +402,13 @@ def projects_depsolve_with_size(yb, projects, groups, with_core=True): if rc not in [0, 1, 2]: raise ProjectsError("There was a problem depsolving %s: %s" % (projects, msg)) yb.tsInfo.makelists() - installed_size = estimate_size(yb.tsInfo.installed + yb.tsInfo.depinstalled) + (installed_size, anaconda_size) = estimate_size(yb.tsInfo.installed + yb.tsInfo.depinstalled) deps = sorted(map(tm_to_dep, yb.tsInfo.installed + yb.tsInfo.depinstalled), key=lambda p: p["name"].lower()) except YumBaseError as e: raise ProjectsError("There was a problem depsolving %s: %s" % (projects, str(e))) finally: yb.closeRpmDB() - return (installed_size, deps) + return (installed_size, anaconda_size, deps) def modules_list(yb, module_names): """Return a list of modules diff --git a/tests/pylorax/test_projects.py b/tests/pylorax/test_projects.py index f63cc344..81ce15ff 100644 --- a/tests/pylorax/test_projects.py +++ b/tests/pylorax/test_projects.py @@ -223,8 +223,10 @@ class ProjectsTest(unittest.TestCase): def test_projects_size_depsolve_glob(self): """Test that depsolving with a '*' version glob doesn't glob package names""" - size, deps = projects_depsolve_with_size(self.yb, [("python", "*")], [], with_core=False) + size, anaconda_size, deps = projects_depsolve_with_size(self.yb, [("python", "*")], [], with_core=False) self.assertTrue(size > 0) + self.assertTrue(anaconda_size > 0) + self.assertTrue(anaconda_size < size) self.assertTrue(len(deps) > 1) self.assertTrue("python" in [dep["name"] for dep in deps]) self.assertTrue("python-blivet" not in [dep["name"] for dep in deps])