From e6a2cc605c0fb28d2b9d54287c714c18c184365a Mon Sep 17 00:00:00 2001 From: "Brian C. Lane" Date: Tue, 8 Dec 2020 15:57:51 -0800 Subject: [PATCH] Improve lmc no-virt error handling When monitoring log output in livemedia-creator --no-virt it could get stuck if the output from anaconda stops for some reason. This changes execReadlines so that it will only read output when it is available, will monitor the process state, and continue to call the callback function. It also adds a final timeout on proc.communicate() so that if Anaconda becomes stuck and won't exit livemedia-creator will eventually exit. When the no-virt callback terminates anaconda on an error it now sends a TERM signal to all of the unshare process' children because just sending it to unshare doesn't cause anaconda to exit. (cherry picked from commit 6400515880e59ab7d0d68a848e2f57052faa0d30) --- lorax.spec | 1 + src/pylorax/executils.py | 52 +++++++++++++++++++++++++++++----------- src/pylorax/installer.py | 9 +++++-- test-packages | 1 + 4 files changed, 47 insertions(+), 16 deletions(-) diff --git a/lorax.spec b/lorax.spec index 89743685..87bedf9f 100644 --- a/lorax.spec +++ b/lorax.spec @@ -121,6 +121,7 @@ Requires: anaconda-core Requires: anaconda-tui Requires: anaconda-install-env-deps Requires: system-logos +Requires: python3-psutil %description lmc-novirt Additional dependencies required by livemedia-creator when using it with --no-virt diff --git a/src/pylorax/executils.py b/src/pylorax/executils.py index da5df60c..ffb26b68 100644 --- a/src/pylorax/executils.py +++ b/src/pylorax/executils.py @@ -19,9 +19,11 @@ # import os +import select import subprocess from subprocess import TimeoutExpired import signal +import time import logging log = logging.getLogger("pylorax") @@ -288,6 +290,7 @@ def execReadlines(command, argv, stdin=None, root='/', env_prune=None, filter_st self._proc = proc self._argv = argv self._callback = callback + self._data = "" def __iter__(self): return self @@ -302,22 +305,43 @@ def execReadlines(command, argv, stdin=None, root='/', env_prune=None, filter_st pass def __next__(self): - # Read the next line, blocking if a line is not yet available - line = self._proc.stdout.readline().decode("utf-8") - if line == '' or not self._callback(self._proc): - # Output finished, wait for the process to end - self._proc.communicate() + # Return lines from stdout while also calling _callback + while True: + # Check for input without blocking + if select.select([self._proc.stdout], [], [], 0)[0]: + size = len(self._proc.stdout.peek(1)) + if size > 0: + self._data += self._proc.stdout.read(size).decode("utf-8") - # Check for successful exit - if self._proc.returncode < 0: - raise OSError("process '%s' was killed by signal %s" % - (self._argv, -self._proc.returncode)) - elif self._proc.returncode > 0: - raise OSError("process '%s' exited with status %s" % - (self._argv, self._proc.returncode)) - raise StopIteration + if self._data.find("\n") >= 0: + line = self._data.split("\n", 1) + self._data = line[1] + return line[0] - return line.strip() + if self._proc.poll() is not None or not self._callback(self._proc): + # Output finished, wait 60s for the process to end + try: + self._proc.communicate(timeout=60) + except subprocess.TimeoutExpired: + # Did not exit in 60s, kill it and wait 30s more + self._proc.kill() + try: + self._proc.communicate(timeout=30) + except subprocess.TimeoutExpired: + pass + + if self._proc.returncode is None: + raise OSError("process '%s' failed to be killed" % self._argv) + elif self._proc.returncode < 0: + raise OSError("process '%s' was killed by signal %s" % + (self._argv, -self._proc.returncode)) + elif self._proc.returncode > 0: + raise OSError("process '%s' exited with status %s" % + (self._argv, self._proc.returncode)) + raise StopIteration + + # Don't loop too fast with no input to read + time.sleep(0.5) argv = [command] + argv diff --git a/src/pylorax/installer.py b/src/pylorax/installer.py index 9d0a8527..15284748 100644 --- a/src/pylorax/installer.py +++ b/src/pylorax/installer.py @@ -291,7 +291,12 @@ def novirt_cancel_check(cancel_funcs, proc): """ for f in cancel_funcs: if f(): - proc.terminate() + # Anaconda runs from unshare, anaconda doesn't exit correctly so try to + # send TERM to all of them directly + import psutil + for p in psutil.Process(proc.pid).children(recursive=True): + p.terminate() + psutil.Process(proc.pid).terminate() return True return False @@ -401,7 +406,7 @@ def novirt_install(opts, disk_img, disk_size, cancel_func=None, tar_img=None): # Preload libgomp.so.1 to workaround rhbz#1722181 log.info("Running anaconda.") try: - unshare_args = [ "--pid", "--kill-child", "--mount", "--propagation", "unchanged", "anaconda" ] + args + unshare_args = ["--pid", "--kill-child", "--mount", "--propagation", "unchanged", "anaconda"] + args for line in execReadlines("unshare", unshare_args, reset_lang=False, env_add={"ANACONDA_PRODUCTNAME": opts.project, "ANACONDA_PRODUCTVERSION": opts.releasever, diff --git a/test-packages b/test-packages index bc5bf20f..77583c7a 100644 --- a/test-packages +++ b/test-packages @@ -12,6 +12,7 @@ python3-librepo python3-magic python3-mako python3-pocketlint +python3-psutil python3-pycdlib python3-pylint python3-pyparted