- kvm-migration-multifd-move-macros-to-multifd-header.patch [RHEL-59697] - kvm-migration-refactor-channel-discovery-mechanism.patch [RHEL-59697] - kvm-migration-Add-save_postcopy_prepare-savevm-handler.patch [RHEL-59697] - kvm-migration-ram-Implement-save_postcopy_prepare.patch [RHEL-59697] - kvm-tests-qtest-migration-consolidate-set-capabilities.patch [RHEL-59697] - kvm-migration-write-zero-pages-when-postcopy-enabled.patch [RHEL-59697] - kvm-migration-enable-multifd-and-postcopy-together.patch [RHEL-59697] - kvm-migration-Add-qtest-for-migration-over-RDMA.patch [RHEL-59697] - kvm-qtest-migration-rdma-Enforce-RLIMIT_MEMLOCK-128MB-re.patch [RHEL-59697] - kvm-qtest-migration-rdma-Add-test-for-rdma-migration-wit.patch [RHEL-59697] - kvm-tests-qtest-migration-add-postcopy-tests-with-multif.patch [RHEL-59697] - kvm-file-posix-Fix-aio-threads-performance-regression-af.patch [RHEL-96854] - kvm-block-remove-outdated-comments-about-AioContext-lock.patch [RHEL-88561] - kvm-block-move-drain-outside-of-read-locked-bdrv_reopen_.patch [RHEL-88561] - kvm-block-snapshot-move-drain-outside-of-read-locked-bdr.patch [RHEL-88561] - kvm-block-move-drain-outside-of-read-locked-bdrv_inactiv.patch [RHEL-88561] - kvm-block-mark-bdrv_parent_change_aio_context-GRAPH_RDLO.patch [RHEL-88561] - kvm-block-mark-change_aio_ctx-callback-and-instances-as-.patch [RHEL-88561] - kvm-block-mark-bdrv_child_change_aio_context-GRAPH_RDLOC.patch [RHEL-88561] - kvm-block-move-drain-outside-of-bdrv_change_aio_context-.patch [RHEL-88561] - kvm-block-move-drain-outside-of-bdrv_try_change_aio_cont.patch [RHEL-88561] - kvm-block-move-drain-outside-of-bdrv_attach_child_common.patch [RHEL-88561] - kvm-block-move-drain-outside-of-bdrv_set_backing_hd_drai.patch [RHEL-88561] - kvm-block-move-drain-outside-of-bdrv_root_attach_child.patch [RHEL-88561] - kvm-block-move-drain-outside-of-bdrv_attach_child.patch [RHEL-88561] - kvm-block-move-drain-outside-of-quorum_add_child.patch [RHEL-88561] - kvm-block-move-drain-outside-of-bdrv_root_unref_child.patch [RHEL-88561] - kvm-block-move-drain-outside-of-quorum_del_child.patch [RHEL-88561] - kvm-blockdev-drain-while-unlocked-in-internal_snapshot_a.patch [RHEL-88561] - kvm-blockdev-drain-while-unlocked-in-external_snapshot_a.patch [RHEL-88561] - kvm-block-mark-bdrv_drained_begin-and-friends-as-GRAPH_U.patch [RHEL-88561] - kvm-iotests-graph-changes-while-io-remove-image-file-aft.patch [RHEL-88561] - kvm-iotests-graph-changes-while-io-add-test-case-with-re.patch [RHEL-88561] - Resolves: RHEL-59697 (Allow multifd+postcopy features being enabled together, but only use multifd during precopy ) - Resolves: RHEL-96854 (Performance Degradation(aio=threads) between Upstream Commit b75c5f9 and 984a32f) - Resolves: RHEL-88561 (qemu graph deadlock during job-dismiss)
240 lines
8.8 KiB
Diff
240 lines
8.8 KiB
Diff
From 21ec86cdc48de9ddf3f5bba994edd9f9427ffd4c Mon Sep 17 00:00:00 2001
|
|
From: Prasad Pandit <pjp@fedoraproject.org>
|
|
Date: Fri, 11 Apr 2025 17:15:29 +0530
|
|
Subject: [PATCH 02/33] migration: refactor channel discovery mechanism
|
|
|
|
RH-Author: Prasad Pandit <None>
|
|
RH-MergeRequest: 390: migration: allow to enable multifd+postcopy features together, but use multifd during precopy only
|
|
RH-Jira: RHEL-59697
|
|
RH-Acked-by: Juraj Marcin <None>
|
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
RH-Commit: [2/11] 7f40da01d8c9a827627c73641f3b90a27bbfb8a0 (pjp/cs-qemu-kvm)
|
|
|
|
The various logical migration channels don't have a
|
|
standardized way of advertising themselves and their
|
|
connections may be seen out of order by the migration
|
|
destination. When a new connection arrives, the incoming
|
|
migration currently make use of heuristics to determine
|
|
which channel it belongs to.
|
|
|
|
The next few patches will need to change how the multifd
|
|
and postcopy capabilities interact and that affects the
|
|
channel discovery heuristic.
|
|
|
|
Refactor the channel discovery heuristic to make it less
|
|
opaque and simplify the subsequent patches.
|
|
|
|
Jira: https://issues.redhat.com/browse/RHEL-59697
|
|
Signed-off-by: Prasad Pandit <pjp@fedoraproject.org>
|
|
Reviewed-by: Fabiano Rosas <farosas@suse.de>
|
|
Message-ID: <20250411114534.3370816-3-ppandit@redhat.com>
|
|
Signed-off-by: Fabiano Rosas <farosas@suse.de>
|
|
(cherry picked from commit 00f3fcef1981eb23f98b956d9cda2df528bfef40)
|
|
Signed-off-by: Prasad Pandit <ppandit@redhat.com>
|
|
---
|
|
migration/migration.c | 130 +++++++++++++++++++++++-------------------
|
|
1 file changed, 70 insertions(+), 60 deletions(-)
|
|
|
|
diff --git a/migration/migration.c b/migration/migration.c
|
|
index d46e776e24..64f4f40ae3 100644
|
|
--- a/migration/migration.c
|
|
+++ b/migration/migration.c
|
|
@@ -95,6 +95,9 @@ enum mig_rp_message_type {
|
|
MIG_RP_MSG_MAX
|
|
};
|
|
|
|
+/* Migration channel types */
|
|
+enum { CH_MAIN, CH_MULTIFD, CH_POSTCOPY };
|
|
+
|
|
/* When we add fault tolerance, we could have several
|
|
migrations at once. For now we don't need to add
|
|
dynamic creation of migration */
|
|
@@ -931,9 +934,8 @@ static void migration_incoming_setup(QEMUFile *f)
|
|
{
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
|
|
|
- if (!mis->from_src_file) {
|
|
- mis->from_src_file = f;
|
|
- }
|
|
+ assert(!mis->from_src_file);
|
|
+ mis->from_src_file = f;
|
|
qemu_file_set_blocking(f, false);
|
|
}
|
|
|
|
@@ -985,28 +987,19 @@ void migration_fd_process_incoming(QEMUFile *f)
|
|
migration_incoming_process();
|
|
}
|
|
|
|
-/*
|
|
- * Returns true when we want to start a new incoming migration process,
|
|
- * false otherwise.
|
|
- */
|
|
-static bool migration_should_start_incoming(bool main_channel)
|
|
+static bool migration_has_main_and_multifd_channels(void)
|
|
{
|
|
- /* Multifd doesn't start unless all channels are established */
|
|
- if (migrate_multifd()) {
|
|
- return migration_has_all_channels();
|
|
+ MigrationIncomingState *mis = migration_incoming_get_current();
|
|
+ if (!mis->from_src_file) {
|
|
+ /* main channel not established */
|
|
+ return false;
|
|
}
|
|
|
|
- /* Preempt channel only starts when the main channel is created */
|
|
- if (migrate_postcopy_preempt()) {
|
|
- return main_channel;
|
|
+ if (migrate_multifd() && !multifd_recv_all_channels_created()) {
|
|
+ return false;
|
|
}
|
|
|
|
- /*
|
|
- * For all the rest types of migration, we should only reach here when
|
|
- * it's the main channel that's being created, and we should always
|
|
- * proceed with this channel.
|
|
- */
|
|
- assert(main_channel);
|
|
+ /* main and all multifd channels are established */
|
|
return true;
|
|
}
|
|
|
|
@@ -1015,59 +1008,81 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
|
MigrationIncomingState *mis = migration_incoming_get_current();
|
|
Error *local_err = NULL;
|
|
QEMUFile *f;
|
|
- bool default_channel = true;
|
|
+ uint8_t channel;
|
|
uint32_t channel_magic = 0;
|
|
int ret = 0;
|
|
|
|
- if (migrate_multifd() && !migrate_mapped_ram() &&
|
|
- !migrate_postcopy_ram() &&
|
|
- qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
|
|
- /*
|
|
- * With multiple channels, it is possible that we receive channels
|
|
- * out of order on destination side, causing incorrect mapping of
|
|
- * source channels on destination side. Check channel MAGIC to
|
|
- * decide type of channel. Please note this is best effort, postcopy
|
|
- * preempt channel does not send any magic number so avoid it for
|
|
- * postcopy live migration. Also tls live migration already does
|
|
- * tls handshake while initializing main channel so with tls this
|
|
- * issue is not possible.
|
|
- */
|
|
- ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
|
|
- sizeof(channel_magic), errp);
|
|
+ if (!migration_has_main_and_multifd_channels()) {
|
|
+ if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
|
|
+ /*
|
|
+ * With multiple channels, it is possible that we receive channels
|
|
+ * out of order on destination side, causing incorrect mapping of
|
|
+ * source channels on destination side. Check channel MAGIC to
|
|
+ * decide type of channel. Please note this is best effort,
|
|
+ * postcopy preempt channel does not send any magic number so
|
|
+ * avoid it for postcopy live migration. Also tls live migration
|
|
+ * already does tls handshake while initializing main channel so
|
|
+ * with tls this issue is not possible.
|
|
+ */
|
|
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
|
|
+ sizeof(channel_magic), errp);
|
|
+ if (ret != 0) {
|
|
+ return;
|
|
+ }
|
|
|
|
- if (ret != 0) {
|
|
+ channel_magic = be32_to_cpu(channel_magic);
|
|
+ if (channel_magic == QEMU_VM_FILE_MAGIC) {
|
|
+ channel = CH_MAIN;
|
|
+ } else if (channel_magic == MULTIFD_MAGIC) {
|
|
+ assert(migrate_multifd());
|
|
+ channel = CH_MULTIFD;
|
|
+ } else if (!mis->from_src_file &&
|
|
+ mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
|
|
+ /* reconnect main channel for postcopy recovery */
|
|
+ channel = CH_MAIN;
|
|
+ } else {
|
|
+ error_setg(errp, "unknown channel magic: %u", channel_magic);
|
|
+ return;
|
|
+ }
|
|
+ } else if (mis->from_src_file && migrate_multifd()) {
|
|
+ /*
|
|
+ * Non-peekable channels like tls/file are processed as
|
|
+ * multifd channels when multifd is enabled.
|
|
+ */
|
|
+ channel = CH_MULTIFD;
|
|
+ } else if (!mis->from_src_file) {
|
|
+ channel = CH_MAIN;
|
|
+ } else {
|
|
+ error_setg(errp, "non-peekable channel used without multifd");
|
|
return;
|
|
}
|
|
-
|
|
- default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
|
|
} else {
|
|
- default_channel = !mis->from_src_file;
|
|
+ assert(migrate_postcopy_preempt());
|
|
+ channel = CH_POSTCOPY;
|
|
}
|
|
|
|
if (multifd_recv_setup(errp) != 0) {
|
|
return;
|
|
}
|
|
|
|
- if (default_channel) {
|
|
+ if (channel == CH_MAIN) {
|
|
f = qemu_file_new_input(ioc);
|
|
migration_incoming_setup(f);
|
|
- } else {
|
|
+ } else if (channel == CH_MULTIFD) {
|
|
/* Multiple connections */
|
|
- assert(migration_needs_multiple_sockets());
|
|
- if (migrate_multifd()) {
|
|
- multifd_recv_new_channel(ioc, &local_err);
|
|
- } else {
|
|
- assert(migrate_postcopy_preempt());
|
|
- f = qemu_file_new_input(ioc);
|
|
- postcopy_preempt_new_channel(mis, f);
|
|
- }
|
|
+ multifd_recv_new_channel(ioc, &local_err);
|
|
if (local_err) {
|
|
error_propagate(errp, local_err);
|
|
return;
|
|
}
|
|
+ } else if (channel == CH_POSTCOPY) {
|
|
+ assert(!mis->postcopy_qemufile_dst);
|
|
+ f = qemu_file_new_input(ioc);
|
|
+ postcopy_preempt_new_channel(mis, f);
|
|
+ return;
|
|
}
|
|
|
|
- if (migration_should_start_incoming(default_channel)) {
|
|
+ if (migration_has_main_and_multifd_channels()) {
|
|
/* If it's a recovery, we're done */
|
|
if (postcopy_try_recover()) {
|
|
return;
|
|
@@ -1084,18 +1099,13 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
|
|
*/
|
|
bool migration_has_all_channels(void)
|
|
{
|
|
- MigrationIncomingState *mis = migration_incoming_get_current();
|
|
-
|
|
- if (!mis->from_src_file) {
|
|
+ if (!migration_has_main_and_multifd_channels()) {
|
|
return false;
|
|
}
|
|
|
|
- if (migrate_multifd()) {
|
|
- return multifd_recv_all_channels_created();
|
|
- }
|
|
-
|
|
- if (migrate_postcopy_preempt()) {
|
|
- return mis->postcopy_qemufile_dst != NULL;
|
|
+ MigrationIncomingState *mis = migration_incoming_get_current();
|
|
+ if (migrate_postcopy_preempt() && !mis->postcopy_qemufile_dst) {
|
|
+ return false;
|
|
}
|
|
|
|
return true;
|
|
--
|
|
2.39.3
|
|
|