99 lines
4.1 KiB
Diff
99 lines
4.1 KiB
Diff
From 8d2acfa55b9c9f522c848439e8bcdad303681658 Mon Sep 17 00:00:00 2001
|
|
Message-ID: <8d2acfa55b9c9f522c848439e8bcdad303681658.1730734026.git.jdenemar@redhat.com>
|
|
From: Jiri Denemark <jdenemar@redhat.com>
|
|
Date: Thu, 8 Aug 2024 13:02:08 +0200
|
|
Subject: [PATCH] qemu: Avoid false failure when resuming post-copy migration
|
|
|
|
Depending on timing between QEMU and libvirt an attempt to resume failed
|
|
post-copy migration could immediately report a failure in post-copy
|
|
phase again even though the migration actually resumed and is
|
|
progressing just fine.
|
|
|
|
This is caused by QEMU reporting the original migration state (i.e.,
|
|
postcopy-paused) until migration is successfully resumed and QEMU
|
|
switches to postcopy-active. QEMU 9.1 introduced a new
|
|
postcopy-recover-setup migration state which is entered immediately
|
|
after requesting migration to be resumed and we can reliably wait for
|
|
the migration to either continue or fail without being confused by the
|
|
old state.
|
|
|
|
https://issues.redhat.com/browse/RHEL-22166
|
|
|
|
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
|
|
Reviewed-by: Michal Privoznik <mprivozn@redhat.com>
|
|
(cherry picked from commit 11f6773f198636b80e73fb3f69adc83554860172)
|
|
|
|
https://issues.redhat.com/browse/RHEL-63877
|
|
|
|
Signed-off-by: Jiri Denemark <jdenemar@redhat.com>
|
|
---
|
|
src/qemu/qemu_migration.c | 25 ++++++++++++++++++++++++-
|
|
1 file changed, 24 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/src/qemu/qemu_migration.c b/src/qemu/qemu_migration.c
|
|
index 70318e26f3..a1ebb621d1 100644
|
|
--- a/src/qemu/qemu_migration.c
|
|
+++ b/src/qemu/qemu_migration.c
|
|
@@ -1972,6 +1972,7 @@ enum qemuMigrationCompletedFlags {
|
|
QEMU_MIGRATION_COMPLETED_CHECK_STORAGE = (1 << 1),
|
|
QEMU_MIGRATION_COMPLETED_POSTCOPY = (1 << 2),
|
|
QEMU_MIGRATION_COMPLETED_PRE_SWITCHOVER = (1 << 3),
|
|
+ QEMU_MIRGATION_COMPLETED_RECOVERY = (1 << 4),
|
|
};
|
|
|
|
|
|
@@ -2033,6 +2034,16 @@ qemuMigrationAnyCompleted(virDomainObj *vm,
|
|
return 1;
|
|
}
|
|
|
|
+ /* When QEMU is new enough to enter postcopy-recover-setup state during
|
|
+ * post-copy recovery, the source waits for the recovery to start
|
|
+ * before letting the destination wait for migration to complete.
|
|
+ */
|
|
+ if (flags & QEMU_MIRGATION_COMPLETED_RECOVERY &&
|
|
+ jobData->status == VIR_DOMAIN_JOB_STATUS_POSTCOPY) {
|
|
+ VIR_DEBUG("Post-copy recovery active");
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
if (jobData->status == VIR_DOMAIN_JOB_STATUS_HYPERVISOR_COMPLETED)
|
|
return 1;
|
|
else
|
|
@@ -5131,6 +5142,7 @@ qemuMigrationSrcResume(virDomainObj *vm,
|
|
char **cookieout,
|
|
int *cookieoutlen,
|
|
qemuMigrationSpec *spec,
|
|
+ virConnectPtr dconn,
|
|
unsigned int flags)
|
|
{
|
|
qemuDomainObjPrivate *priv = vm->privateData;
|
|
@@ -5161,6 +5173,17 @@ qemuMigrationSrcResume(virDomainObj *vm,
|
|
if (rc < 0)
|
|
return -1;
|
|
|
|
+ /* Wait for postcopy recovery to start (or fail) if QEMU is new enough to
|
|
+ * support postcopy-recover-setup migration state. */
|
|
+ if (priv->migrationRecoverSetup) {
|
|
+ VIR_DEBUG("Waiting for post-copy recovery to start");
|
|
+ if (qemuMigrationSrcWaitForCompletion(vm, VIR_ASYNC_JOB_MIGRATION_OUT, dconn,
|
|
+ QEMU_MIRGATION_COMPLETED_RECOVERY) < 0)
|
|
+ return -1;
|
|
+ } else {
|
|
+ VIR_WARN("QEMU is too old, we may report a failure in post-copy phase even though the migration may be running just fine");
|
|
+ }
|
|
+
|
|
if (qemuMigrationCookieFormat(mig, driver, vm,
|
|
QEMU_MIGRATION_SOURCE,
|
|
cookieout, cookieoutlen,
|
|
@@ -5265,7 +5288,7 @@ qemuMigrationSrcPerformNative(virQEMUDriver *driver,
|
|
|
|
if (flags & VIR_MIGRATE_POSTCOPY_RESUME) {
|
|
ret = qemuMigrationSrcResume(vm, migParams, cookiein, cookieinlen,
|
|
- cookieout, cookieoutlen, &spec, flags);
|
|
+ cookieout, cookieoutlen, &spec, dconn, flags);
|
|
} else {
|
|
ret = qemuMigrationSrcRun(driver, vm, xmlin, persist_xml, cookiein, cookieinlen,
|
|
cookieout, cookieoutlen, flags, resource,
|
|
--
|
|
2.47.0
|