From 84c5cad5921e96c6106cfd217de2064b64e1464f Mon Sep 17 00:00:00 2001 Message-Id: <84c5cad5921e96c6106cfd217de2064b64e1464f@dist-git> From: Michal Privoznik Date: Thu, 16 Jan 2020 10:03:54 +0100 Subject: [PATCH] qemu: Stop domain on failed restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When resuming a domain from a save file, we read the domain XML from the file, add it onto our internal list of domains, start the qemu process, let it load the incoming migration stream and resume its vCPUs afterwards. If anything goes wrong, the domain object is removed from the list of domains and error is returned to the caller. However, the qemu process might be left behind - if resuming vCPUs fails (e.g. because qemu is unable to acquire write lock on a disk) then due to a bug the qemu process is not killed but the domain object is removed from the list. Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1718707 Signed-off-by: Michal Privoznik Reviewed-by: Daniel Henrique Barboza (cherry picked from commit 4c581527d431939a63be70c201b4ddab703cddbe) Signed-off-by: Michal Privoznik Message-Id: <4048f92488a8b8c31c7a17a14b579840a9492328.1579165329.git.mprivozn@redhat.com> Reviewed-by: Ján Tomko --- src/qemu/qemu_driver.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c index ce9b1772c1..217d873671 100644 --- a/src/qemu/qemu_driver.c +++ b/src/qemu/qemu_driver.c @@ -6800,7 +6800,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn, { qemuDomainObjPrivatePtr priv = vm->privateData; int ret = -1; - bool restored = false; + bool started = false; virObjectEventPtr event; VIR_AUTOCLOSE intermediatefd = -1; g_autoptr(virCommand) cmd = NULL; @@ -6808,6 +6808,7 @@ qemuDomainSaveImageStartVM(virConnectPtr conn, g_autoptr(virQEMUDriverConfig) cfg = virQEMUDriverGetConfig(driver); virQEMUSaveHeaderPtr header = &data->header; g_autoptr(qemuDomainSaveCookie) cookie = NULL; + int rc = 0; if (virSaveCookieParseString(data->cookie, (virObjectPtr *)&cookie, virDomainXMLOptionGetSaveCookie(driver->xmlopt)) < 0) @@ -6848,12 +6849,12 @@ qemuDomainSaveImageStartVM(virConnectPtr conn, VIR_NETDEV_VPORT_PROFILE_OP_RESTORE, VIR_QEMU_PROCESS_START_PAUSED | VIR_QEMU_PROCESS_START_GEN_VMID) == 0) - restored = true; + started = true; if (intermediatefd != -1) { virErrorPtr orig_err = NULL; - if (!restored) { + if (!started) { /* if there was an error setting up qemu, the intermediate * process will wait forever to write to stdout, so we * must manually kill it and ignore any error related to @@ -6864,21 +6865,17 @@ qemuDomainSaveImageStartVM(virConnectPtr conn, VIR_FORCE_CLOSE(*fd); } - if (virCommandWait(cmd, NULL) < 0) { - qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, asyncJob, 0); - restored = false; - } + rc = virCommandWait(cmd, NULL); VIR_DEBUG("Decompression binary stderr: %s", NULLSTR(errbuf)); - virErrorRestore(&orig_err); } if (VIR_CLOSE(*fd) < 0) { virReportSystemError(errno, _("cannot close file: %s"), path); - restored = false; + rc = -1; } - virDomainAuditStart(vm, "restored", restored); - if (!restored) + virDomainAuditStart(vm, "restored", started); + if (!started || rc < 0) goto cleanup; /* qemuProcessStart doesn't unset the qemu error reporting infrastructure @@ -6918,6 +6915,10 @@ qemuDomainSaveImageStartVM(virConnectPtr conn, ret = 0; cleanup: + if (ret < 0 && started) { + qemuProcessStop(driver, vm, VIR_DOMAIN_SHUTOFF_FAILED, + asyncJob, VIR_QEMU_PROCESS_STOP_MIGRATED); + } if (qemuSecurityRestoreSavedStateLabel(driver, vm, path) < 0) VIR_WARN("failed to restore save state label on %s", path); return ret; -- 2.25.0