From a813e926dcf7a7f4b90d0d95a01358c22b5ff41c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lubom=C3=ADr=20Sedl=C3=A1=C5=99?= Date: Wed, 2 Mar 2016 12:36:17 +0100 Subject: [PATCH] [koji-wrapper] Retry watching on connection errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With this patch Pungi should be more tolerant of network failures when running a blocking command (creating live media or live images). If the connection drops and the output indicates network problems, Pungi will try to watch the task with `koji watch-task`. This will be retried until it finishes (successfully or with some other failure). There is an increasing timeout after each retry. Currently the maximum number of retries is not limited. Signed-off-by: Lubomír Sedlář --- pungi/wrappers/kojiwrapper.py | 42 ++++++++++--- tests/test_koji_wrapper.py | 107 ++++++++++++++++++++++++++++++++++ 2 files changed, 141 insertions(+), 8 deletions(-) diff --git a/pungi/wrappers/kojiwrapper.py b/pungi/wrappers/kojiwrapper.py index c1a30f6f..5267eef0 100644 --- a/pungi/wrappers/kojiwrapper.py +++ b/pungi/wrappers/kojiwrapper.py @@ -18,6 +18,7 @@ import os import pipes import re +import time import koji import rpmUtils.arch @@ -234,28 +235,53 @@ class KojiWrapper(object): return cmd - def run_blocking_cmd(self, command, log_file=None): + def _has_connection_error(self, output): + """Checks if output indicates connection error.""" + return re.search('error: failed to connect\n$', output) + + def _wait_for_task(self, task_id, logfile=None, max_retries=None): + """Tries to wait for a task to finish. On connection error it will + retry with `watch-task` command. + """ + cmd = [self.executable, 'watch-task', str(task_id)] + attempt = 0 + + while True: + retcode, output = run(cmd, can_fail=True, logfile=logfile) + + if retcode == 0 or not self._has_connection_error(output): + # Task finished for reason other than connection error. + return retcode, output + + attempt += 1 + if max_retries and attempt >= max_retries: + break + time.sleep(attempt * 10) + + raise RuntimeError('Failed to wait for task %s. Too many connection errors.' % task_id) + + def run_blocking_cmd(self, command, log_file=None, max_retries=None): """ Run a blocking koji command. Returns a dict with output of the command, its exit code and parsed task id. This method will block until the command finishes. """ - try: - retcode, output = run(command, can_fail=True, logfile=log_file) - except RuntimeError, e: - raise RuntimeError("%s. %s failed with '%s'" % (e, command, output)) + retcode, output = run(command, can_fail=True, logfile=log_file) match = re.search(r"Created task: (\d+)", output) if not match: raise RuntimeError("Could not find task ID in output. Command '%s' returned '%s'." % (" ".join(command), output)) + task_id = int(match.groups()[0]) - result = { + if retcode != 0 and self._has_connection_error(output): + retcode, output = self._wait_for_task(task_id, logfile=log_file, max_retries=max_retries) + + return { "retcode": retcode, "output": output, - "task_id": int(match.groups()[0]), + "task_id": task_id, } - return result def get_image_paths(self, task_id): """ diff --git a/tests/test_koji_wrapper.py b/tests/test_koji_wrapper.py index b5dccfa8..185ad7f5 100755 --- a/tests/test_koji_wrapper.py +++ b/tests/test_koji_wrapper.py @@ -398,5 +398,112 @@ class RunrootKojiWrapperTest(KojiWrapperBaseTestCase): self.assertDictEqual(result, {'retcode': 1, 'output': output, 'task_id': 12345}) +class RunBlockingCmdTest(KojiWrapperBaseTestCase): + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_minimal(self, run): + output = 'Created task: 1234\nHello\n' + run.return_value = (0, output) + + result = self.koji.run_blocking_cmd('cmd') + + self.assertDictEqual(result, {'retcode': 0, 'output': output, 'task_id': 1234}) + self.assertItemsEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile=None)]) + + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_with_log(self, run): + output = 'Created task: 1234\nHello\n' + run.return_value = (0, output) + + result = self.koji.run_blocking_cmd('cmd', log_file='logfile') + + self.assertDictEqual(result, {'retcode': 0, 'output': output, 'task_id': 1234}) + self.assertItemsEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile='logfile')]) + + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_fail_with_task_id(self, run): + output = 'Created task: 1234\nBoom\n' + run.return_value = (1, output) + + result = self.koji.run_blocking_cmd('cmd') + + self.assertDictEqual(result, {'retcode': 1, 'output': output, 'task_id': 1234}) + self.assertItemsEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile=None)]) + + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_fail_without_task_id(self, run): + output = 'Not found\n' + run.return_value = (1, output) + + with self.assertRaises(RuntimeError) as ctx: + self.koji.run_blocking_cmd('cmd') + + self.assertItemsEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile=None)]) + self.assertIn('Could not find task ID', ctx.exception.message) + + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_disconnect_and_retry(self, run): + output = 'Created task: 1234\nerror: failed to connect\n' + retry = 'Created task: 1234\nOook\n' + run.side_effect = [(1, output), (0, retry)] + + result = self.koji.run_blocking_cmd('cmd') + + self.assertDictEqual(result, {'retcode': 0, 'output': retry, 'task_id': 1234}) + self.assertEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile=None), + mock.call(['koji', 'watch-task', '1234'], can_fail=True, logfile=None)]) + + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_disconnect_and_retry_but_fail(self, run): + output = 'Created task: 1234\nerror: failed to connect\n' + retry = 'Created task: 1234\nNot working still\n' + run.side_effect = [(1, output), (1, retry)] + + result = self.koji.run_blocking_cmd('cmd') + + self.assertDictEqual(result, {'retcode': 1, 'output': retry, 'task_id': 1234}) + self.assertEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile=None), + mock.call(['koji', 'watch-task', '1234'], can_fail=True, logfile=None)]) + + @mock.patch('time.sleep') + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_disconnect_and_retry_multiple_times(self, run, sleep): + output = 'Created task: 1234\nerror: failed to connect\n' + retry = 'Created task: 1234\nOK\n' + run.side_effect = [(1, output), (1, output), (1, output), (0, retry)] + + result = self.koji.run_blocking_cmd('cmd') + + self.assertDictEqual(result, {'retcode': 0, 'output': retry, 'task_id': 1234}) + self.assertEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile=None), + mock.call(['koji', 'watch-task', '1234'], can_fail=True, logfile=None), + mock.call(['koji', 'watch-task', '1234'], can_fail=True, logfile=None), + mock.call(['koji', 'watch-task', '1234'], can_fail=True, logfile=None)]) + self.assertEqual(sleep.mock_calls, + [mock.call(i * 10) for i in range(1, 3)]) + + @mock.patch('time.sleep') + @mock.patch('pungi.wrappers.kojiwrapper.run') + def test_disconnect_and_never_reconnect(self, run, sleep): + output = 'Created task: 1234\nerror: failed to connect\n' + run.side_effect = [(1, output), (1, output), (1, output), (1, output)] + + with self.assertRaises(RuntimeError) as ctx: + self.koji.run_blocking_cmd('cmd', max_retries=2) + + self.assertIn('Failed to wait', ctx.exception.message) + self.assertEqual(run.mock_calls, + [mock.call('cmd', can_fail=True, logfile=None), + mock.call(['koji', 'watch-task', '1234'], can_fail=True, logfile=None), + mock.call(['koji', 'watch-task', '1234'], can_fail=True, logfile=None)]) + self.assertEqual(sleep.mock_calls, [mock.call(i * 10) for i in range(1, 2)]) + + if __name__ == "__main__": unittest.main()