qemu-kvm/kvm-migration-refactor-channel-discovery-mechanism.patch
Miroslav Rezanina ebd0a23ab5 * Mon Jul 28 2025 Miroslav Rezanina <mrezanin@redhat.com> - 10.0.0-8
- kvm-migration-multifd-move-macros-to-multifd-header.patch [RHEL-59697]
- kvm-migration-refactor-channel-discovery-mechanism.patch [RHEL-59697]
- kvm-migration-Add-save_postcopy_prepare-savevm-handler.patch [RHEL-59697]
- kvm-migration-ram-Implement-save_postcopy_prepare.patch [RHEL-59697]
- kvm-tests-qtest-migration-consolidate-set-capabilities.patch [RHEL-59697]
- kvm-migration-write-zero-pages-when-postcopy-enabled.patch [RHEL-59697]
- kvm-migration-enable-multifd-and-postcopy-together.patch [RHEL-59697]
- kvm-migration-Add-qtest-for-migration-over-RDMA.patch [RHEL-59697]
- kvm-qtest-migration-rdma-Enforce-RLIMIT_MEMLOCK-128MB-re.patch [RHEL-59697]
- kvm-qtest-migration-rdma-Add-test-for-rdma-migration-wit.patch [RHEL-59697]
- kvm-tests-qtest-migration-add-postcopy-tests-with-multif.patch [RHEL-59697]
- kvm-file-posix-Fix-aio-threads-performance-regression-af.patch [RHEL-96854]
- kvm-block-remove-outdated-comments-about-AioContext-lock.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-read-locked-bdrv_reopen_.patch [RHEL-88561]
- kvm-block-snapshot-move-drain-outside-of-read-locked-bdr.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-read-locked-bdrv_inactiv.patch [RHEL-88561]
- kvm-block-mark-bdrv_parent_change_aio_context-GRAPH_RDLO.patch [RHEL-88561]
- kvm-block-mark-change_aio_ctx-callback-and-instances-as-.patch [RHEL-88561]
- kvm-block-mark-bdrv_child_change_aio_context-GRAPH_RDLOC.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_change_aio_context-.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_try_change_aio_cont.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_set_backing_hd_drai.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_root_attach_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_attach_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-quorum_add_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-bdrv_root_unref_child.patch [RHEL-88561]
- kvm-block-move-drain-outside-of-quorum_del_child.patch [RHEL-88561]
- kvm-blockdev-drain-while-unlocked-in-internal_snapshot_a.patch [RHEL-88561]
- kvm-blockdev-drain-while-unlocked-in-external_snapshot_a.patch [RHEL-88561]
- kvm-block-mark-bdrv_drained_begin-and-friends-as-GRAPH_U.patch [RHEL-88561]
- kvm-iotests-graph-changes-while-io-remove-image-file-aft.patch [RHEL-88561]
- kvm-iotests-graph-changes-while-io-add-test-case-with-re.patch [RHEL-88561]
- Resolves: RHEL-59697
  (Allow multifd+postcopy features being enabled together, but only use multifd during precopy )
- Resolves: RHEL-96854
  (Performance Degradation(aio=threads) between Upstream Commit b75c5f9 and 984a32f)
- Resolves: RHEL-88561
  (qemu graph deadlock during job-dismiss)
2025-07-28 02:29:30 -04:00

240 lines
8.8 KiB
Diff

From 21ec86cdc48de9ddf3f5bba994edd9f9427ffd4c Mon Sep 17 00:00:00 2001
From: Prasad Pandit <pjp@fedoraproject.org>
Date: Fri, 11 Apr 2025 17:15:29 +0530
Subject: [PATCH 02/33] migration: refactor channel discovery mechanism
RH-Author: Prasad Pandit <None>
RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only
RH-Jira: RHEL-59697
RH-Acked-by: Juraj Marcin <None>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/11] 7f40da01d8c9a827627c73641f3b90a27bbfb8a0 (pjp/cs-qemu-kvm)
The various logical migration channels don't have a
standardized way of advertising themselves and their
connections may be seen out of order by the migration
destination. When a new connection arrives, the incoming
migration currently make use of heuristics to determine
which channel it belongs to.
The next few patches will need to change how the multifd
and postcopy capabilities interact and that affects the
channel discovery heuristic.
Refactor the channel discovery heuristic to make it less
opaque and simplify the subsequent patches.
Jira: https://issues.redhat.com/browse/RHEL-59697
Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
Reviewed-by: Fabiano Rosas <farosas@suse.de>
Message-ID: <20250411114534.3370816-3-ppandit@redhat.com>
Signed-off-by: Fabiano Rosas <farosas@suse.de>
(cherry picked from commit 00f3fcef1981eb23f98b956d9cda2df528bfef40)
Signed-off-by: Prasad Pandit <ppandit@redhat.com>
---
migration/migration.c | 130 +++++++++++++++++++++++-------------------
1 file changed, 70 insertions(+), 60 deletions(-)
diff --git a/migration/migration.c b/migration/migration.c
index d46e776e24..64f4f40ae3 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -95,6 +95,9 @@ enum mig_rp_message_type {
MIG_RP_MSG_MAX
};
+/* Migration channel types */
+enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY };
+
/* When we add fault tolerance, we could have several
migrations at once. For now we don't need to add
dynamic creation of migration */
@@ -931,9 +934,8 @@ static void migration_incoming_setup(QEMUFile *f)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- if (!mis->from_src_file) {
- mis->from_src_file = f;
- }
+ assert(!mis->from_src_file);
+ mis->from_src_file = f;
qemu_file_set_blocking(f, false);
}
@@ -985,28 +987,19 @@ void migration_fd_process_incoming(QEMUFile *f)
migration_incoming_process();
}
-/*
- * Returns true when we want to start a new incoming migration process,
- * false otherwise.
- */
-static bool migration_should_start_incoming(bool main_channel)
+static bool migration_has_main_and_multifd_channels(void)
{
- /* Multifd doesn't start unless all channels are established */
- if (migrate_multifd()) {
- return migration_has_all_channels();
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ if (!mis->from_src_file) {
+ /* main channel not established */
+ return false;
}
- /* Preempt channel only starts when the main channel is created */
- if (migrate_postcopy_preempt()) {
- return main_channel;
+ if (migrate_multifd() && !multifd_recv_all_channels_created()) {
+ return false;
}
- /*
- * For all the rest types of migration, we should only reach here when
- * it's the main channel that's being created, and we should always
- * proceed with this channel.
- */
- assert(main_channel);
+ /* main and all multifd channels are established */
return true;
}
@@ -1015,59 +1008,81 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
QEMUFile *f;
- bool default_channel = true;
+ uint8_t channel;
uint32_t channel_magic = 0;
int ret = 0;
- if (migrate_multifd() && !migrate_mapped_ram() &&
- !migrate_postcopy_ram() &&
- qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
- /*
- * With multiple channels, it is possible that we receive channels
- * out of order on destination side, causing incorrect mapping of
- * source channels on destination side. Check channel MAGIC to
- * decide type of channel. Please note this is best effort, postcopy
- * preempt channel does not send any magic number so avoid it for
- * postcopy live migration. Also tls live migration already does
- * tls handshake while initializing main channel so with tls this
- * issue is not possible.
- */
- ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
- sizeof(channel_magic), errp);
+ if (!migration_has_main_and_multifd_channels()) {
+ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ /*
+ * With multiple channels, it is possible that we receive channels
+ * out of order on destination side, causing incorrect mapping of
+ * source channels on destination side. Check channel MAGIC to
+ * decide type of channel. Please note this is best effort,
+ * postcopy preempt channel does not send any magic number so
+ * avoid it for postcopy live migration. Also tls live migration
+ * already does tls handshake while initializing main channel so
+ * with tls this issue is not possible.
+ */
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
+ sizeof(channel_magic), errp);
+ if (ret != 0) {
+ return;
+ }
- if (ret != 0) {
+ channel_magic = be32_to_cpu(channel_magic);
+ if (channel_magic == QEMU_VM_FILE_MAGIC) {
+ channel = CH_MAIN;
+ } else if (channel_magic == MULTIFD_MAGIC) {
+ assert(migrate_multifd());
+ channel = CH_MULTIFD;
+ } else if (!mis->from_src_file &&
+ mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
+ /* reconnect main channel for postcopy recovery */
+ channel = CH_MAIN;
+ } else {
+ error_setg(errp, "unknown channel magic: %u", channel_magic);
+ return;
+ }
+ } else if (mis->from_src_file && migrate_multifd()) {
+ /*
+ * Non-peekable channels like tls/file are processed as
+ * multifd channels when multifd is enabled.
+ */
+ channel = CH_MULTIFD;
+ } else if (!mis->from_src_file) {
+ channel = CH_MAIN;
+ } else {
+ error_setg(errp, "non-peekable channel used without multifd");
return;
}
-
- default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
} else {
- default_channel = !mis->from_src_file;
+ assert(migrate_postcopy_preempt());
+ channel = CH_POSTCOPY;
}
if (multifd_recv_setup(errp) != 0) {
return;
}
- if (default_channel) {
+ if (channel == CH_MAIN) {
f = qemu_file_new_input(ioc);
migration_incoming_setup(f);
- } else {
+ } else if (channel == CH_MULTIFD) {
/* Multiple connections */
- assert(migration_needs_multiple_sockets());
- if (migrate_multifd()) {
- multifd_recv_new_channel(ioc, &local_err);
- } else {
- assert(migrate_postcopy_preempt());
- f = qemu_file_new_input(ioc);
- postcopy_preempt_new_channel(mis, f);
- }
+ multifd_recv_new_channel(ioc, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
+ } else if (channel == CH_POSTCOPY) {
+ assert(!mis->postcopy_qemufile_dst);
+ f = qemu_file_new_input(ioc);
+ postcopy_preempt_new_channel(mis, f);
+ return;
}
- if (migration_should_start_incoming(default_channel)) {
+ if (migration_has_main_and_multifd_channels()) {
/* If it's a recovery, we're done */
if (postcopy_try_recover()) {
return;
@@ -1084,18 +1099,13 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
*/
bool migration_has_all_channels(void)
{
- MigrationIncomingState *mis = migration_incoming_get_current();
-
- if (!mis->from_src_file) {
+ if (!migration_has_main_and_multifd_channels()) {
return false;
}
- if (migrate_multifd()) {
- return multifd_recv_all_channels_created();
- }
-
- if (migrate_postcopy_preempt()) {
- return mis->postcopy_qemufile_dst != NULL;
+ MigrationIncomingState *mis = migration_incoming_get_current();
+ if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) {
+ return false;
}
return true;
--
2.39.3