From 98d56fba679855a5e938480a9200e56768cb4d25 Mon Sep 17 00:00:00 2001 From: "Brian C. Lane" Date: Fri, 18 Jan 2019 12:06:18 -0800 Subject: [PATCH] lorax: Move default tmp dir to /var/tmp/lorax If systemd's tmpfiles.d timer is executed while lorax is running it will remove any files and directories older than 30 days. This is what has been causing the occasional error where /proc/ would seem to vanish during the install. Upstream has proposed this solution, https://github.com/systemd/systemd/pull/11482 but until that is released we need a work-around to protect the lorax files. This commit does several things: * Move the default tmpdir from /var/tmp/ to /var/tmp/lorax/ * Add a lorax.conf tmpfiles.d file that prevents systemd-tmpfiles from removing anything under /var/tmp/lorax/ * Add an exit handler to lorax so that temporary directories are removed on exit or on a python traceback. * Use flock to lock access to the tempdir while lorax is running. * Remove any unlocked tempdirs named /var/tmp/lorax/lorax.* at startup Note that the exit handler will not remove the tempdir if lorax is killed with a signal -- those are being caught by dnf and prevent the exit handler from running. systemd-tmpfiles cannot clean up the tempdirs at boot time because they contain files labeled as shadow_t, so we have to remove those when lorax runs. It uses the flock to prevent removing any directories created by parallel instances of lorax and only removes ones that are unlocked. Worst case they will be around until the first run of lorax after a reboot. If you want to keep the working directory around for debugging purposes use --workdir /var/tmp/lorax/my-workdir and it won't be removed by lorax. (cherry picked from commit e4fe1aab32acdaead8718716238c83690c88f7e0) --- lorax.spec | 1 + setup.py | 3 +- src/pylorax/cmdline.py | 4 +-- src/sbin/lorax | 64 ++++++++++++++++++++++++++++++++++++++++-- systemd/lorax.conf | 3 ++ 5 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 systemd/lorax.conf diff --git a/lorax.spec b/lorax.spec index d3eb711c..886632e6 100644 --- a/lorax.spec +++ b/lorax.spec @@ -202,6 +202,7 @@ getent passwd weldr >/dev/null 2>&1 || useradd -r -g weldr -d / -s /sbin/nologin %config(noreplace) %{_sysconfdir}/lorax/lorax.conf %dir %{_datadir}/lorax %{_mandir}/man1/*.1* +%{_tmpfilesdir}/lorax.conf %files lmc-virt diff --git a/setup.py b/setup.py index 266c54d5..f2e4a033 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ data_files = [("/etc/lorax", ["etc/lorax.conf"]), ("/etc/lorax", ["etc/composer.conf"]), ("/usr/lib/systemd/system", ["systemd/lorax-composer.service", "systemd/lorax-composer.socket"]), - ("/usr/lib/tmpfiles.d/", ["systemd/lorax-composer.conf"])] + ("/usr/lib/tmpfiles.d/", ["systemd/lorax-composer.conf", + "systemd/lorax.conf"])] # shared files for root, dnames, fnames in os.walk("share"): diff --git a/src/pylorax/cmdline.py b/src/pylorax/cmdline.py index b1147681..2c7ede7e 100644 --- a/src/pylorax/cmdline.py +++ b/src/pylorax/cmdline.py @@ -75,12 +75,12 @@ def lorax_parser(dracut_default=""): action="store_false", default=True, dest="doupgrade") optional.add_argument("--logfile", default="./lorax.log", type=os.path.abspath, help="Path to logfile") - optional.add_argument("--tmp", default="/var/tmp", + optional.add_argument("--tmp", default="/var/tmp/lorax", help="Top level temporary directory" ) optional.add_argument("--cachedir", default=None, type=os.path.abspath, help="DNF cache directory. Default is a temporary dir.") optional.add_argument("--workdir", default=None, type=os.path.abspath, - help="Work directory, overrides --tmp. Default is a temporary dir under /var/tmp") + help="Work directory, overrides --tmp. Default is a temporary dir under /var/tmp/lorax") optional.add_argument("--force", default=False, action="store_true", help="Run even when the destination directory exists") optional.add_argument("--add-template", dest="add_templates", diff --git a/src/sbin/lorax b/src/sbin/lorax index e6235a52..82125719 100755 --- a/src/sbin/lorax +++ b/src/sbin/lorax @@ -24,6 +24,9 @@ log = logging.getLogger("lorax") dnf_log = logging.getLogger("dnf") +import atexit +import fcntl +from glob import glob import sys import os import tempfile @@ -37,6 +40,42 @@ from pylorax import DRACUT_DEFAULT, log_selinux_state from pylorax.cmdline import lorax_parser from pylorax.dnfbase import get_dnf_base_object +def exit_handler(tempdir): + """Handle cleanup of tmpdir, if it still exists + """ + if not tempdir: + return + if os.path.exists(tempdir): + log.info("Cleaning up tempdir - %s", tempdir) + shutil.rmtree(tempdir) + + +def remove_tempdirs(): + """Delete all unlocked tempdirs under tempfile.gettempdir + + When lorax crashes it can leave behind tempdirs, which cannot be cleaned up by + systemd-tmpfiles (SELinux restricts a complete cleanup). + + So we lock them while in use and cleanup all the ones that are not locked + when lorax starts. + """ + for d in glob(os.path.join(tempfile.gettempdir(), "lorax.*")): + if not os.path.isdir(d): + continue + try: + dir_fd = os.open(d, os.O_RDONLY|os.O_DIRECTORY|os.O_CLOEXEC) + fcntl.flock(dir_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except OSError: + # lock failed, skip this directory + os.close(dir_fd) + continue + + # Lock succeeded, remove the directory + log.info("Removing old tempdir %s", d) + shutil.rmtree(d) + os.close(dir_fd) + + def setup_logging(opts): pylorax.setup_logging(opts.logfile, log) @@ -76,15 +115,28 @@ def main(): log_selinux_state() if not opts.workdir: + if not os.path.exists(opts.tmp): + os.makedirs(opts.tmp) + tempfile.tempdir = opts.tmp + # Remove any orphaned lorax tempdirs + remove_tempdirs() + # create the temporary directory for lorax - tempdir = tempfile.mkdtemp(prefix="lorax.", dir=tempfile.gettempdir()) + tempdir = tempfile.mkdtemp(prefix="lorax.") + + # register an exit handler to cleanup the temporary directory + atexit.register(exit_handler, tempdir) else: + # NOTE: workdir is not cleaned up on exit tempdir = opts.workdir if not os.path.exists(tempdir): os.makedirs(tempdir) + # Remove any orphaned lorax tempdirs + remove_tempdirs() + installtree = os.path.join(tempdir, "installtree") if not os.path.exists(installtree): os.mkdir(installtree) @@ -92,15 +144,18 @@ def main(): if not os.path.exists(dnftempdir): os.mkdir(dnftempdir) + # Obtain an exclusive lock on the tempdir + dir_fd = os.open(tempdir, os.O_RDONLY|os.O_DIRECTORY|os.O_CLOEXEC) + fcntl.flock(dir_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + dnfbase = get_dnf_base_object(installtree, opts.source, opts.mirrorlist, opts.repos, opts.enablerepos, opts.disablerepos, dnftempdir, opts.proxy, opts.version, opts.cachedir, os.path.dirname(opts.logfile), not opts.noverifyssl) if dnfbase is None: + os.close(dir_fd) print("error: unable to create the dnf base object", file=sys.stderr) - if not opts.workdir: - shutil.rmtree(tempdir) sys.exit(1) parsed_add_template_vars = {} @@ -140,5 +195,8 @@ def main(): user_dracut_args=opts.dracut_args) + # Release the lock on the tempdir + os.close(dir_fd) + if __name__ == "__main__": main() diff --git a/systemd/lorax.conf b/systemd/lorax.conf new file mode 100644 index 00000000..f720ceb4 --- /dev/null +++ b/systemd/lorax.conf @@ -0,0 +1,3 @@ +# Prevent systemd from removing installtree files +# This should eventually be fixed by - https://github.com/systemd/systemd/pull/11482 +x /var/tmp/lorax 750 root root