Retry buildinstall tasks on losetup error

JIRA: RHELCMP-1394
Signed-off-by: Haibo Lin <hlin@redhat.com>
This commit is contained in:
Haibo Lin 2020-07-20 17:11:06 +08:00
parent f7167fa3b6
commit 7e6bed9713
3 changed files with 78 additions and 7 deletions

View File

@ -801,6 +801,10 @@ class BuildinstallThread(WorkerThread):
weight=compose.conf["runroot_weights"].get("buildinstall"),
)
else:
try:
lorax_log_dir = _get_log_dir(compose, variant, arch)
except Exception:
lorax_log_dir = None
runroot.run(
cmd,
log_file=log_file,
@ -809,6 +813,7 @@ class BuildinstallThread(WorkerThread):
mounts=[compose.topdir],
weight=compose.conf["runroot_weights"].get("buildinstall"),
chown_paths=chown_paths,
log_dir=lorax_log_dir,
)
if final_output_dir != output_dir:

View File

@ -74,12 +74,38 @@ class Runroot(kobo.log.LoggingBase):
run(command, show_cmd=True, logfile=log_file)
self._result = True
def _has_losetup_error(self, log_dir):
"""
Check if there's losetup error in log.
This error happens if the Koji builder runs out of loopback devices.
This can happen if too many tasks that require them are scheduled on
the same builder. A retried task might end up on a different builder,
or maybe some other task will have finished already.
:param str log_dir: path to buildinstall log dir,
e.g. logs/s390x/buildinstall-BaseOS-logs/
"""
if not log_dir:
return False
log_file = os.path.join(log_dir, "program.log")
try:
with open(log_file) as f:
for line in f:
if "losetup: cannot find an unused loop device" in line:
return True
except Exception:
pass
return False
def _run_koji(self, command, log_file=None, packages=None, arch=None, **kwargs):
"""
Runs the runroot command in Koji.
"""
runroot_channel = self.compose.conf.get("runroot_channel")
runroot_tag = self.compose.conf["runroot_tag"]
log_dir = kwargs.pop("log_dir", None)
koji_wrapper = kojiwrapper.KojiWrapper(self.compose.conf["koji_profile"])
koji_cmd = koji_wrapper.get_runroot_cmd(
@ -92,13 +118,19 @@ class Runroot(kobo.log.LoggingBase):
**kwargs
)
output = koji_wrapper.run_runroot_cmd(koji_cmd, log_file=log_file)
if output["retcode"] != 0:
raise RuntimeError(
"Runroot task failed: %s. See %s for more details."
% (output["task_id"], log_file)
)
self._result = output
attempt = 0
max_retries = 3
while True:
output = koji_wrapper.run_runroot_cmd(koji_cmd, log_file=log_file)
if output["retcode"] == 0:
self._result = output
return
elif attempt >= max_retries or not self._has_losetup_error(log_dir):
raise RuntimeError(
"Runroot task failed: %s. See %s for more details."
% (output["task_id"], log_file)
)
attempt += 1
def _ssh_run(self, hostname, user, command, fmt_dict=None, log_file=None):
"""

View File

@ -198,3 +198,37 @@ class TestRunrootOpenSSH(helpers.PungiTestCase):
),
]
)
class TestRunrootKoji(helpers.PungiTestCase):
def setUp(self):
super(TestRunrootKoji, self).setUp()
self.compose = helpers.DummyCompose(
self.topdir, {"runroot": True, "runroot_tag": "f28-build"},
)
self.runroot = Runroot(self.compose)
def test_has_losetup_error(self):
self.assertFalse(self.runroot._has_losetup_error(None))
with mock.patch("pungi.runroot.open", mock.mock_open(read_data="")):
self.assertFalse(self.runroot._has_losetup_error("/foo_log_dir"))
with mock.patch(
"pungi.runroot.open",
mock.mock_open(read_data="losetup: cannot find an unused loop device"),
):
self.assertTrue(self.runroot._has_losetup_error("/bar_log_dir"))
@mock.patch("pungi.runroot.kojiwrapper.KojiWrapper")
def test_run_koji_retry(self, mock_kojiwrapper):
self.compose.conf["koji_profile"] = "test"
mock_kojiwrapper.return_value.get_runroot_cmd.return_value = ["df -h"]
mock_kojiwrapper.return_value.run_runroot_cmd.side_effect = [
{"retcode": 1, "task_id": 1},
{"retcode": 0, "task_id": 2},
]
self.runroot._has_losetup_error = mock.Mock(side_effect=[True, False])
self.runroot._run_koji("")
self.assertEqual(mock_kojiwrapper.return_value.run_runroot_cmd.call_count, 2)