From d2c64b1b2647015996b019b37dd66eb2fa895aca Mon Sep 17 00:00:00 2001 From: Vratislav Podzimek Date: Tue, 24 Mar 2015 12:40:46 +0100 Subject: [PATCH] Check that the transaction process is still alive If it terminates really badly (e.g. with SIGSEGV), it doesn't report any error, just doesn't put anything to the queue. So instead of just blindly waiting on the queue forever, check that the process is still alive if we don't get any message in long time interval. --- src/pylorax/ltmpl.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/pylorax/ltmpl.py b/src/pylorax/ltmpl.py index 1abbfc8e..fdf82ab7 100644 --- a/src/pylorax/ltmpl.py +++ b/src/pylorax/ltmpl.py @@ -40,6 +40,7 @@ import sys, traceback import struct import dnf import multiprocessing +import Queue class LoraxTemplate(object): def __init__(self, directories=None): @@ -504,6 +505,25 @@ class LoraxTemplateRunner(object): else: logger.debug("removepkg %s: no files to remove!", p) + def get_token_checked(self, process, queue): + """Try to get token from queue checking that process is still alive""" + + try: + # wait at most a minute for the token + (token, msg) = queue.get(timeout=60) + except Queue.Empty: + if process.is_alive(): + try: + # process still alive, give it 2 minutes more + (token, msg) = queue.get(timeout=120) + except Queue.Empty: + # waited for 3 minutes and got nothing + raise Exception("The transaction process got stuck somewhere (no message from it in 3 minutes)") + else: + raise Exception("The transaction process has ended abruptly") + + return (token, msg) + def run_pkg_transaction(self): ''' run_pkg_transaction @@ -543,12 +563,14 @@ class LoraxTemplateRunner(object): msgout = output.LoraxOutput() process = multiprocessing.Process(target=do_transaction, args=(self.dbo, queue)) process.start() - (token, msg) = queue.get() + (token, msg) = self.get_token_checked(process, queue) + while token not in ('post', 'quit'): if token == 'install': logging.info("%s", msg) msgout.writeline(msg) - (token, msg) = queue.get() + (token, msg) = self.get_token_checked(process, queue) + if token == 'quit': logger.error("Transaction failed.") raise Exception("Transaction failed")