Retry buildinstall tasks on losetup error
JIRA: RHELCMP-1394 Signed-off-by: Haibo Lin <hlin@redhat.com>
This commit is contained in:
parent
f7167fa3b6
commit
7e6bed9713
@ -801,6 +801,10 @@ class BuildinstallThread(WorkerThread):
|
|||||||
weight=compose.conf["runroot_weights"].get("buildinstall"),
|
weight=compose.conf["runroot_weights"].get("buildinstall"),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
|
lorax_log_dir = _get_log_dir(compose, variant, arch)
|
||||||
|
except Exception:
|
||||||
|
lorax_log_dir = None
|
||||||
runroot.run(
|
runroot.run(
|
||||||
cmd,
|
cmd,
|
||||||
log_file=log_file,
|
log_file=log_file,
|
||||||
@ -809,6 +813,7 @@ class BuildinstallThread(WorkerThread):
|
|||||||
mounts=[compose.topdir],
|
mounts=[compose.topdir],
|
||||||
weight=compose.conf["runroot_weights"].get("buildinstall"),
|
weight=compose.conf["runroot_weights"].get("buildinstall"),
|
||||||
chown_paths=chown_paths,
|
chown_paths=chown_paths,
|
||||||
|
log_dir=lorax_log_dir,
|
||||||
)
|
)
|
||||||
|
|
||||||
if final_output_dir != output_dir:
|
if final_output_dir != output_dir:
|
||||||
|
@ -74,12 +74,38 @@ class Runroot(kobo.log.LoggingBase):
|
|||||||
run(command, show_cmd=True, logfile=log_file)
|
run(command, show_cmd=True, logfile=log_file)
|
||||||
self._result = True
|
self._result = True
|
||||||
|
|
||||||
|
def _has_losetup_error(self, log_dir):
|
||||||
|
"""
|
||||||
|
Check if there's losetup error in log.
|
||||||
|
|
||||||
|
This error happens if the Koji builder runs out of loopback devices.
|
||||||
|
This can happen if too many tasks that require them are scheduled on
|
||||||
|
the same builder. A retried task might end up on a different builder,
|
||||||
|
or maybe some other task will have finished already.
|
||||||
|
|
||||||
|
:param str log_dir: path to buildinstall log dir,
|
||||||
|
e.g. logs/s390x/buildinstall-BaseOS-logs/
|
||||||
|
"""
|
||||||
|
if not log_dir:
|
||||||
|
return False
|
||||||
|
|
||||||
|
log_file = os.path.join(log_dir, "program.log")
|
||||||
|
try:
|
||||||
|
with open(log_file) as f:
|
||||||
|
for line in f:
|
||||||
|
if "losetup: cannot find an unused loop device" in line:
|
||||||
|
return True
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
def _run_koji(self, command, log_file=None, packages=None, arch=None, **kwargs):
|
def _run_koji(self, command, log_file=None, packages=None, arch=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
Runs the runroot command in Koji.
|
Runs the runroot command in Koji.
|
||||||
"""
|
"""
|
||||||
runroot_channel = self.compose.conf.get("runroot_channel")
|
runroot_channel = self.compose.conf.get("runroot_channel")
|
||||||
runroot_tag = self.compose.conf["runroot_tag"]
|
runroot_tag = self.compose.conf["runroot_tag"]
|
||||||
|
log_dir = kwargs.pop("log_dir", None)
|
||||||
|
|
||||||
koji_wrapper = kojiwrapper.KojiWrapper(self.compose.conf["koji_profile"])
|
koji_wrapper = kojiwrapper.KojiWrapper(self.compose.conf["koji_profile"])
|
||||||
koji_cmd = koji_wrapper.get_runroot_cmd(
|
koji_cmd = koji_wrapper.get_runroot_cmd(
|
||||||
@ -92,13 +118,19 @@ class Runroot(kobo.log.LoggingBase):
|
|||||||
**kwargs
|
**kwargs
|
||||||
)
|
)
|
||||||
|
|
||||||
output = koji_wrapper.run_runroot_cmd(koji_cmd, log_file=log_file)
|
attempt = 0
|
||||||
if output["retcode"] != 0:
|
max_retries = 3
|
||||||
raise RuntimeError(
|
while True:
|
||||||
"Runroot task failed: %s. See %s for more details."
|
output = koji_wrapper.run_runroot_cmd(koji_cmd, log_file=log_file)
|
||||||
% (output["task_id"], log_file)
|
if output["retcode"] == 0:
|
||||||
)
|
self._result = output
|
||||||
self._result = output
|
return
|
||||||
|
elif attempt >= max_retries or not self._has_losetup_error(log_dir):
|
||||||
|
raise RuntimeError(
|
||||||
|
"Runroot task failed: %s. See %s for more details."
|
||||||
|
% (output["task_id"], log_file)
|
||||||
|
)
|
||||||
|
attempt += 1
|
||||||
|
|
||||||
def _ssh_run(self, hostname, user, command, fmt_dict=None, log_file=None):
|
def _ssh_run(self, hostname, user, command, fmt_dict=None, log_file=None):
|
||||||
"""
|
"""
|
||||||
|
@ -198,3 +198,37 @@ class TestRunrootOpenSSH(helpers.PungiTestCase):
|
|||||||
),
|
),
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestRunrootKoji(helpers.PungiTestCase):
|
||||||
|
def setUp(self):
|
||||||
|
super(TestRunrootKoji, self).setUp()
|
||||||
|
self.compose = helpers.DummyCompose(
|
||||||
|
self.topdir, {"runroot": True, "runroot_tag": "f28-build"},
|
||||||
|
)
|
||||||
|
|
||||||
|
self.runroot = Runroot(self.compose)
|
||||||
|
|
||||||
|
def test_has_losetup_error(self):
|
||||||
|
self.assertFalse(self.runroot._has_losetup_error(None))
|
||||||
|
|
||||||
|
with mock.patch("pungi.runroot.open", mock.mock_open(read_data="")):
|
||||||
|
self.assertFalse(self.runroot._has_losetup_error("/foo_log_dir"))
|
||||||
|
|
||||||
|
with mock.patch(
|
||||||
|
"pungi.runroot.open",
|
||||||
|
mock.mock_open(read_data="losetup: cannot find an unused loop device"),
|
||||||
|
):
|
||||||
|
self.assertTrue(self.runroot._has_losetup_error("/bar_log_dir"))
|
||||||
|
|
||||||
|
@mock.patch("pungi.runroot.kojiwrapper.KojiWrapper")
|
||||||
|
def test_run_koji_retry(self, mock_kojiwrapper):
|
||||||
|
self.compose.conf["koji_profile"] = "test"
|
||||||
|
mock_kojiwrapper.return_value.get_runroot_cmd.return_value = ["df -h"]
|
||||||
|
mock_kojiwrapper.return_value.run_runroot_cmd.side_effect = [
|
||||||
|
{"retcode": 1, "task_id": 1},
|
||||||
|
{"retcode": 0, "task_id": 2},
|
||||||
|
]
|
||||||
|
self.runroot._has_losetup_error = mock.Mock(side_effect=[True, False])
|
||||||
|
self.runroot._run_koji("")
|
||||||
|
self.assertEqual(mock_kojiwrapper.return_value.run_runroot_cmd.call_count, 2)
|
||||||
|
Loading…
Reference in New Issue
Block a user