- kvm-vmstate-Introduce-VMSTATE_VARRAY_INT32_ALLOC.patch [RHEL-174858] - kvm-target-arm-Move-compare_u64-to-helper.c.patch [RHEL-174858] - kvm-target-arm-Convert-init_cpreg_list-to-g_hash_table_f.patch [RHEL-174858] - kvm-target-arm-machine-Use-VMSTATE_VARRAY_INT32_ALLOC-fo.patch [RHEL-174858] - kvm-target-arm-kvm-Export-kvm_print_register_name.patch [RHEL-174858] - kvm-target-arm-kvm-Tweak-print_register_name-for-arm64-s.patch [RHEL-174858] - kvm-target-arm-machine-Trace-cpreg-names-which-do-not-ma.patch [RHEL-174858] - kvm-target-arm-machine-Trace-all-register-mismatches.patch [RHEL-174858] - kvm-target-arm-machine-Fix-detection-of-unknown-incoming.patch [RHEL-174858] - kvm-target-arm-cpu-Introduce-the-infrastructure-for-cpre.patch [RHEL-174858] - kvm-target-arm-machine-Handle-ToleranceNotOnBothEnds-mig.patch [RHEL-174858] - kvm-target-arm-machine-Handle-ToleranceOnlySrcTestValue-.patch [RHEL-174858] - kvm-target-arm-cpu64-Mitigate-migration-failures-due-to-.patch [RHEL-174858] - kvm-target-arm-cpu64-Define-cpreg-migration-tolerance-fo.patch [RHEL-174858] - kvm-target-arm-helper-Define-cpreg-migration-tolerance-f.patch [RHEL-174858] - kvm-Revert-target-arm-Reinstate-bogus-AArch32-DBGDTRTX-r.patch [RHEL-174858] - kvm-hw-pci-host-gpex-acpi-Fix-_DSM-function-0-support-re.patch [RHEL-138494] - kvm-vfio-scsi-ui-Error-check-qio_channel_socket_connect_.patch [RHEL-138494] - kvm-vfio-igd-Enable-quirks-when-IGD-is-not-the-primary-d.patch [RHEL-138494] - kvm-vfio-Remove-vfio-amd-xgbe-device.patch [RHEL-138494] - kvm-vfio-Remove-vfio-calxeda-xgmac-device.patch [RHEL-138494] - kvm-hw-arm-virt-Include-system-system.h.patch [RHEL-138494] - kvm-vfio-Remove-vfio-platform.patch [RHEL-138494] - kvm-vfio-Move-vfio-region.h-under-hw-vfio.patch [RHEL-138494] - kvm-vfio-container-set-error-on-cpr-failure.patch [RHEL-138494] - kvm-vfio-Report-an-error-when-the-dma_max_mappings-limit.patch [RHEL-138494] - kvm-hw-vfio-user-add-x-pci-class-code.patch [RHEL-138494] - kvm-vfio-Introduce-helper-vfio_pci_from_vfio_device.patch [RHEL-138494] - kvm-vfio-vfio-container-base.h-update-VFIOContainerBase-.patch [RHEL-138494] - kvm-vfio-vfio-container.h-update-VFIOContainer-declarati.patch [RHEL-138494] - kvm-hw-vfio-cpr-legacy.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-hw-vfio-container.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-spapr.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-vfio-container.h-rename-VFIOContainer-bcontaine.patch [RHEL-138494] - kvm-vfio-user-container.h-update-VFIOUserContainer-decla.patch [RHEL-138494] - kvm-vfio-container.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-user-container.h-rename-VFIOUserContainer-bcont.patch [RHEL-138494] - kvm-vfio-user-pci.c-update-VFIOUserPCIDevice-declaration.patch [RHEL-138494] - kvm-vfio-user-pci.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-user-pci.c-rename-VFIOUserPCIDevice-device-fiel.patch [RHEL-138494] - kvm-vfio-pci.h-update-VFIOPCIDevice-declaration.patch [RHEL-138494] - kvm-vfio-pci.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-pci-quirks.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-cpr.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-igd.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-user-pci.c-use-QOM-casts-where-appropriate2.patch [RHEL-138494] - kvm-vfio-pci.h-rename-VFIOPCIDevice-pdev-field-to-parent.patch [RHEL-138494] - kvm-treewide-handle-result-of-qio_channel_set_blocking.patch [RHEL-138494] - kvm-vfio-pci-Do-not-unparent-in-instance_finalize.patch [RHEL-138494] - kvm-vfio-Do-not-unparent-in-instance_finalize.patch [RHEL-138494] - kvm-include-hw-vfio-vfio-container.h-rename-VFIOContaine.patch [RHEL-138494] - kvm-include-hw-vfio-vfio-container-base.h-rename-VFIOCon.patch [RHEL-138494] - kvm-include-hw-vfio-vfio-container.h-rename-file-to-vfio.patch [RHEL-138494] - kvm-include-hw-vfio-vfio-container-base.h-rename-file-to.patch [RHEL-138494] - kvm-hw-vfio-container.c-rename-file-to-container-legacy..patch [RHEL-138494] - kvm-hw-vfio-container-base.c-rename-file-to-container.c.patch [RHEL-138494] - kvm-vfio-iommufd.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-cpr-iommufd.c-use-QOM-casts-where-appropriate.patch [RHEL-138494] - kvm-vfio-vfio-iommufd.h-rename-VFIOContainer-bcontainer-.patch [RHEL-138494] - kvm-vfio-spapr.c-use-QOM-casts-where-appropriate2.patch [RHEL-138494] - kvm-vfio-spapr.c-rename-VFIOContainer-bcontainer-field-t.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_instance_init-to-vfio_pci_ini.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_instance_finalize-to-vfio_pci.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_dev_class_init-to-vfio_pc.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_dev_info-to-vfio_pci_info.patch [RHEL-138494] - kvm-s390x-s390-pci-vfio.c-use-QOM-casts-where-appropriat.patch [RHEL-138494] - kvm-hw-vfio-types.h-rename-TYPE_VFIO_PCI_BASE-to-TYPE_VF.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_base_dev_class_init-to-vf.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_base_dev_info-to-vfio_pci.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_dev_properties-to-vfio_pc.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_dev_nohotplug_properties-.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_nohotplug_dev_class_init-.patch [RHEL-138494] - kvm-vfio-pci.c-rename-vfio_pci_nohotplug_dev_info-to-vfi.patch [RHEL-138494] - kvm-vfio-user-pci.c-rename-vfio_user_pci_dev_class_init-.patch [RHEL-138494] - kvm-vfio-user-pci.c-rename-vfio_user_pci_dev_properties-.patch [RHEL-138494] - kvm-vfio-user-pci.c-rename-vfio_user_instance_init-to-vf.patch [RHEL-138494] - kvm-vfio-user-pci.c-rename-vfio_user_instance_finalize-t.patch [RHEL-138494] - kvm-vfio-user-pci.c-rename-vfio_user_pci_dev_info-to-vfi.patch [RHEL-138494] - kvm-include-hw-vfio-vfio-device.h-fix-include-header-gua.patch [RHEL-138494] - kvm-vfio-Remove-workaround-for-kernel-DMA-unmap-overflow.patch [RHEL-138494] - kvm-system-iommufd-Use-uint64_t-type-for-IOVA-mapping-si.patch [RHEL-138494] - kvm-hw-vfio-Reorder-vfio_container_query_dirty_bitmap-tr.patch [RHEL-138494] - kvm-hw-vfio-Avoid-ram_addr_t-in-vfio_container_query_dir.patch [RHEL-138494] - kvm-hw-vfio-Use-uint64_t-for-IOVA-mapping-size-in-vfio_c.patch [RHEL-138494] - kvm-migration-push-Error-errp-into-vmstate_subsection_lo.patch [RHEL-138494] - kvm-migration-push-Error-errp-into-vmstate_load_state.patch [RHEL-138494] - kvm-migration-Remove-error-variant-of-vmstate_save_state.patch [RHEL-138494] - kvm-migration-multi-mode-notifier.patch [RHEL-138494] - kvm-migration-add-cpr_walk_fd.patch [RHEL-138494] - kvm-oslib-qemu_clear_cloexec.patch [RHEL-138494] - kvm-migration-cpr-exec-command-parameter.patch [RHEL-138494] - kvm-migration-cpr-exec-save-and-load.patch [RHEL-138494] - kvm-migration-cpr-exec-mode.patch [RHEL-138494] - kvm-migration-cpr-exec-docs.patch [RHEL-138494] - kvm-vfio-cpr-exec-mode.patch [RHEL-138494] - kvm-hw-vfio-listener-Include-missing-exec-target_page.h-.patch [RHEL-138494] - kvm-hw-Remove-unnecessary-system-ram_addr.h-header.patch [RHEL-138494] - kvm-vfio-container-Remap-only-populated-parts-in-a-secti.patch [RHEL-138494] - kvm-vfio-cpr-legacy-drop-an-erroneous-assert.patch [RHEL-138494] - kvm-vfio-iommufd-Set-cpr.ioas_id-on-source-side-for-CPR-.patch [RHEL-138494] - kvm-vfio-iommufd-Restore-vbasedev-s-reference-to-hwpt-af.patch [RHEL-138494] - kvm-vfio-container-Support-unmap-all-in-one-ioctl.patch [RHEL-138494] - kvm-vfio-iommufd-Support-unmap-all-in-one-ioctl.patch [RHEL-138494] - kvm-vfio-listener-Add-an-assertion-for-unmap_all.patch [RHEL-138494] - kvm-vfio-Clean-up-includes.patch [RHEL-138494] - kvm-migration-set-correct-list-pointer-when-removing-not.patch [RHEL-138494] - kvm-vfio-user-simplify-vfio_user_process.patch [RHEL-138494] - kvm-vfio-user-clarify-partial-message-handling.patch [RHEL-138494] - kvm-vfio-user-refactor-out-header-handling.patch [RHEL-138494] - kvm-vfio-user-simplify-vfio_user_recv_one.patch [RHEL-138494] - kvm-vfio-user-recycle-msg-on-failure.patch [RHEL-138494] - kvm-include-hw-hyperv-Remove-unused-struct-mshv_vp_regis.patch [RHEL-138494] - kvm-linux-headers-Update-to-Linux-v6.18-rc3.patch [RHEL-138494] - kvm-linux-headers-Update-to-Linux-v6.19-rc1.patch [RHEL-138494] - kvm-hw-vfio-Add-helper-to-retrieve-device-feature.patch [RHEL-138494] - kvm-hw-vfio-region-Create-dmabuf-for-PCI-BAR-per-region.patch [RHEL-138494] - Resolves: RHEL-174858 ([rhel10] Backport qemu cross-kernel migration mitigation series) - Resolves: RHEL-138494 (NVIDIA:Grace-Hopper:Backport vfio: Add DMABUF support for PCI BAR regions - RHEL 10.3)
420 lines
14 KiB
Diff
420 lines
14 KiB
Diff
From d233d9745e4519119159aecb4ff7050f20660041 Mon Sep 17 00:00:00 2001
|
|
From: Steve Sistare <steven.sistare@oracle.com>
|
|
Date: Wed, 1 Oct 2025 08:33:58 -0700
|
|
Subject: [PATCH 093/116] migration: cpr-exec mode
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
RH-Author: Rodolfo Vick <None>
|
|
RH-MergeRequest: 486: Add DMABUF support
|
|
RH-Jira: RHEL-138494
|
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
|
RH-Acked-by: Eric Auger <eric.auger@redhat.com>
|
|
RH-Commit: [77/100] bea8a1d3bf0582076d7b81bb4f83e19b9d257a75 (rovick1/qemu-kvm)
|
|
|
|
Add the cpr-exec migration mode. Usage:
|
|
qemu-system-$arch -machine aux-ram-share=on ...
|
|
migrate_set_parameter mode cpr-exec
|
|
migrate_set_parameter cpr-exec-command \
|
|
<arg1> <arg2> ... -incoming <uri-1> \
|
|
migrate -d <uri-1>
|
|
|
|
The migrate command stops the VM, saves state to uri-1,
|
|
directly exec's a new version of QEMU on the same host,
|
|
replacing the original process while retaining its PID, and
|
|
loads state from uri-1. Guest RAM is preserved in place,
|
|
albeit with new virtual addresses.
|
|
|
|
The new QEMU process is started by exec'ing the command
|
|
specified by the @cpr-exec-command parameter. The first word of
|
|
the command is the binary, and the remaining words are its
|
|
arguments. The command may be a direct invocation of new QEMU,
|
|
or may be a non-QEMU command that exec's the new QEMU binary.
|
|
|
|
This mode creates a second migration channel that is not visible
|
|
to the user. At the start of migration, old QEMU saves CPR state
|
|
to the second channel, and at the end of migration, it tells the
|
|
main loop to call cpr_exec. New QEMU loads CPR state early, before
|
|
objects are created.
|
|
|
|
Because old QEMU terminates when new QEMU starts, one cannot
|
|
stream data between the two, so uri-1 must be a type,
|
|
such as a file, that accepts all data before old QEMU exits.
|
|
Otherwise, old QEMU may quietly block writing to the channel.
|
|
|
|
Memory-backend objects must have the share=on attribute, but
|
|
memory-backend-epc is not supported. The VM must be started with
|
|
the '-machine aux-ram-share=on' option, which allows anonymous
|
|
memory to be transferred in place to the new process. The memfds
|
|
are kept open across exec by clearing the close-on-exec flag, their
|
|
values are saved in CPR state, and they are mmap'd in new QEMU.
|
|
|
|
Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
|
|
Acked-by: Markus Armbruster <armbru@redhat.com>
|
|
Link: https://lore.kernel.org/r/1759332851-370353-7-git-send-email-steven.sistare@oracle.com
|
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
---
|
|
include/migration/cpr.h | 2 +
|
|
migration/cpr-exec.c | 95 +++++++++++++++++++++++++++++++++++++++
|
|
migration/cpr.c | 23 +++++++++-
|
|
migration/migration.c | 10 ++++-
|
|
migration/ram.c | 1 +
|
|
migration/trace-events | 1 +
|
|
migration/vmstate-types.c | 8 ++++
|
|
qapi/migration.json | 25 ++++++++++-
|
|
system/vl.c | 4 +-
|
|
9 files changed, 164 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/include/migration/cpr.h b/include/migration/cpr.h
|
|
index b84389ff04..a412d6663c 100644
|
|
--- a/include/migration/cpr.h
|
|
+++ b/include/migration/cpr.h
|
|
@@ -53,9 +53,11 @@ int cpr_get_fd_param(const char *name, const char *fdname, int index,
|
|
QEMUFile *cpr_transfer_output(MigrationChannel *channel, Error **errp);
|
|
QEMUFile *cpr_transfer_input(MigrationChannel *channel, Error **errp);
|
|
|
|
+void cpr_exec_init(void);
|
|
QEMUFile *cpr_exec_output(Error **errp);
|
|
QEMUFile *cpr_exec_input(Error **errp);
|
|
void cpr_exec_persist_state(QEMUFile *f);
|
|
bool cpr_exec_has_state(void);
|
|
void cpr_exec_unpersist_state(void);
|
|
+void cpr_exec_unpreserve_fds(void);
|
|
#endif
|
|
diff --git a/migration/cpr-exec.c b/migration/cpr-exec.c
|
|
index 81d84425e1..d57714bc5d 100644
|
|
--- a/migration/cpr-exec.c
|
|
+++ b/migration/cpr-exec.c
|
|
@@ -6,15 +6,21 @@
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "qemu/cutils.h"
|
|
+#include "qemu/error-report.h"
|
|
#include "qemu/memfd.h"
|
|
#include "qapi/error.h"
|
|
+#include "qapi/type-helpers.h"
|
|
#include "io/channel-file.h"
|
|
#include "io/channel-socket.h"
|
|
+#include "block/block-global-state.h"
|
|
+#include "qemu/main-loop.h"
|
|
#include "migration/cpr.h"
|
|
#include "migration/qemu-file.h"
|
|
+#include "migration/migration.h"
|
|
#include "migration/misc.h"
|
|
#include "migration/vmstate.h"
|
|
#include "system/runstate.h"
|
|
+#include "trace.h"
|
|
|
|
#define CPR_EXEC_STATE_NAME "QEMU_CPR_EXEC_STATE"
|
|
|
|
@@ -97,3 +103,92 @@ QEMUFile *cpr_exec_input(Error **errp)
|
|
lseek(mfd, 0, SEEK_SET);
|
|
return qemu_file_new_fd_input(mfd, CPR_EXEC_STATE_NAME);
|
|
}
|
|
+
|
|
+static bool preserve_fd(int fd)
|
|
+{
|
|
+ qemu_clear_cloexec(fd);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static bool unpreserve_fd(int fd)
|
|
+{
|
|
+ qemu_set_cloexec(fd);
|
|
+ return true;
|
|
+}
|
|
+
|
|
+static void cpr_exec_preserve_fds(void)
|
|
+{
|
|
+ cpr_walk_fd(preserve_fd);
|
|
+}
|
|
+
|
|
+void cpr_exec_unpreserve_fds(void)
|
|
+{
|
|
+ cpr_walk_fd(unpreserve_fd);
|
|
+}
|
|
+
|
|
+static void cpr_exec_cb(void *opaque)
|
|
+{
|
|
+ MigrationState *s = migrate_get_current();
|
|
+ char **argv = strv_from_str_list(s->parameters.cpr_exec_command);
|
|
+ Error *err = NULL;
|
|
+
|
|
+ /*
|
|
+ * Clear the close-on-exec flag for all preserved fd's. We cannot do so
|
|
+ * earlier because they should not persist across miscellaneous fork and
|
|
+ * exec calls that are performed during normal operation.
|
|
+ */
|
|
+ cpr_exec_preserve_fds();
|
|
+
|
|
+ trace_cpr_exec();
|
|
+ execvp(argv[0], argv);
|
|
+
|
|
+ /*
|
|
+ * exec should only fail if argv[0] is bogus, or has a permissions problem,
|
|
+ * or the system is very short on resources.
|
|
+ */
|
|
+ g_strfreev(argv);
|
|
+ cpr_exec_unpreserve_fds();
|
|
+
|
|
+ error_setg_errno(&err, errno, "execvp %s failed", argv[0]);
|
|
+ error_report_err(error_copy(err));
|
|
+ migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
|
|
+ migrate_set_error(s, err);
|
|
+
|
|
+ /* Note, we can go from state COMPLETED to FAILED */
|
|
+ migration_call_notifiers(s, MIG_EVENT_PRECOPY_FAILED, NULL);
|
|
+
|
|
+ err = NULL;
|
|
+ if (!migration_block_activate(&err)) {
|
|
+ /* error was already reported */
|
|
+ error_free(err);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (runstate_is_live(s->vm_old_state)) {
|
|
+ vm_start();
|
|
+ }
|
|
+}
|
|
+
|
|
+static int cpr_exec_notifier(NotifierWithReturn *notifier, MigrationEvent *e,
|
|
+ Error **errp)
|
|
+{
|
|
+ MigrationState *s = migrate_get_current();
|
|
+
|
|
+ if (e->type == MIG_EVENT_PRECOPY_DONE) {
|
|
+ QEMUBH *cpr_exec_bh = qemu_bh_new(cpr_exec_cb, NULL);
|
|
+ assert(s->state == MIGRATION_STATUS_COMPLETED);
|
|
+ qemu_bh_schedule(cpr_exec_bh);
|
|
+ qemu_notify_event();
|
|
+ } else if (e->type == MIG_EVENT_PRECOPY_FAILED) {
|
|
+ cpr_exec_unpersist_state();
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void cpr_exec_init(void)
|
|
+{
|
|
+ static NotifierWithReturn exec_notifier;
|
|
+
|
|
+ migration_add_notifier_mode(&exec_notifier, cpr_exec_notifier,
|
|
+ MIG_MODE_CPR_EXEC);
|
|
+}
|
|
diff --git a/migration/cpr.c b/migration/cpr.c
|
|
index a995b349d9..0b87c2343f 100644
|
|
--- a/migration/cpr.c
|
|
+++ b/migration/cpr.c
|
|
@@ -6,6 +6,7 @@
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
+#include "qemu/error-report.h"
|
|
#include "qapi/error.h"
|
|
#include "hw/vfio/vfio-device.h"
|
|
#include "migration/cpr.h"
|
|
@@ -185,6 +186,8 @@ int cpr_state_save(MigrationChannel *channel, Error **errp)
|
|
if (mode == MIG_MODE_CPR_TRANSFER) {
|
|
g_assert(channel);
|
|
f = cpr_transfer_output(channel, errp);
|
|
+ } else if (mode == MIG_MODE_CPR_EXEC) {
|
|
+ f = cpr_exec_output(errp);
|
|
} else {
|
|
return 0;
|
|
}
|
|
@@ -201,6 +204,10 @@ int cpr_state_save(MigrationChannel *channel, Error **errp)
|
|
return ret;
|
|
}
|
|
|
|
+ if (migrate_mode() == MIG_MODE_CPR_EXEC) {
|
|
+ cpr_exec_persist_state(f);
|
|
+ }
|
|
+
|
|
/*
|
|
* Close the socket only partially so we can later detect when the other
|
|
* end closes by getting a HUP event.
|
|
@@ -219,7 +226,13 @@ int cpr_state_load(MigrationChannel *channel, Error **errp)
|
|
QEMUFile *f;
|
|
MigMode mode = 0;
|
|
|
|
- if (channel) {
|
|
+ if (cpr_exec_has_state()) {
|
|
+ mode = MIG_MODE_CPR_EXEC;
|
|
+ f = cpr_exec_input(errp);
|
|
+ if (channel) {
|
|
+ warn_report("ignoring cpr channel for migration mode cpr-exec");
|
|
+ }
|
|
+ } else if (channel) {
|
|
mode = MIG_MODE_CPR_TRANSFER;
|
|
cpr_set_incoming_mode(mode);
|
|
f = cpr_transfer_input(channel, errp);
|
|
@@ -231,6 +244,7 @@ int cpr_state_load(MigrationChannel *channel, Error **errp)
|
|
}
|
|
|
|
trace_cpr_state_load(MigMode_str(mode));
|
|
+ cpr_set_incoming_mode(mode);
|
|
|
|
v = qemu_get_be32(f);
|
|
if (v != QEMU_CPR_FILE_MAGIC) {
|
|
@@ -251,6 +265,11 @@ int cpr_state_load(MigrationChannel *channel, Error **errp)
|
|
return ret;
|
|
}
|
|
|
|
+ if (migrate_mode() == MIG_MODE_CPR_EXEC) {
|
|
+ /* Set cloexec to prevent fd leaks from fork until the next cpr-exec */
|
|
+ cpr_exec_unpreserve_fds();
|
|
+ }
|
|
+
|
|
/*
|
|
* Let the caller decide when to close the socket (and generate a HUP event
|
|
* for the sending side).
|
|
@@ -271,7 +290,7 @@ void cpr_state_close(void)
|
|
bool cpr_incoming_needed(void *opaque)
|
|
{
|
|
MigMode mode = migrate_mode();
|
|
- return mode == MIG_MODE_CPR_TRANSFER;
|
|
+ return mode == MIG_MODE_CPR_TRANSFER || mode == MIG_MODE_CPR_EXEC;
|
|
}
|
|
|
|
/*
|
|
diff --git a/migration/migration.c b/migration/migration.c
|
|
index 08a98f74ef..2515bec48f 100644
|
|
--- a/migration/migration.c
|
|
+++ b/migration/migration.c
|
|
@@ -333,6 +333,7 @@ void migration_object_init(void)
|
|
|
|
ram_mig_init();
|
|
dirty_bitmap_mig_init();
|
|
+ cpr_exec_init();
|
|
|
|
/* Initialize cpu throttle timers */
|
|
cpu_throttle_init();
|
|
@@ -1796,7 +1797,8 @@ bool migrate_mode_is_cpr(MigrationState *s)
|
|
{
|
|
MigMode mode = s->parameters.mode;
|
|
return mode == MIG_MODE_CPR_REBOOT ||
|
|
- mode == MIG_MODE_CPR_TRANSFER;
|
|
+ mode == MIG_MODE_CPR_TRANSFER ||
|
|
+ mode == MIG_MODE_CPR_EXEC;
|
|
}
|
|
|
|
int migrate_init(MigrationState *s, Error **errp)
|
|
@@ -2145,6 +2147,12 @@ static bool migrate_prepare(MigrationState *s, bool resume, Error **errp)
|
|
return false;
|
|
}
|
|
|
|
+ if (migrate_mode() == MIG_MODE_CPR_EXEC &&
|
|
+ !s->parameters.has_cpr_exec_command) {
|
|
+ error_setg(errp, "cpr-exec mode requires setting cpr-exec-command");
|
|
+ return false;
|
|
+ }
|
|
+
|
|
if (migration_is_blocked(errp)) {
|
|
return false;
|
|
}
|
|
diff --git a/migration/ram.c b/migration/ram.c
|
|
index 7208bc114f..6730a41ff5 100644
|
|
--- a/migration/ram.c
|
|
+++ b/migration/ram.c
|
|
@@ -228,6 +228,7 @@ bool migrate_ram_is_ignored(RAMBlock *block)
|
|
MigMode mode = migrate_mode();
|
|
return !qemu_ram_is_migratable(block) ||
|
|
mode == MIG_MODE_CPR_TRANSFER ||
|
|
+ mode == MIG_MODE_CPR_EXEC ||
|
|
(migrate_ignore_shared() && qemu_ram_is_shared(block)
|
|
&& qemu_ram_is_named_file(block));
|
|
}
|
|
diff --git a/migration/trace-events b/migration/trace-events
|
|
index 706db97def..e8edd1fbba 100644
|
|
--- a/migration/trace-events
|
|
+++ b/migration/trace-events
|
|
@@ -354,6 +354,7 @@ cpr_state_save(const char *mode) "%s mode"
|
|
cpr_state_load(const char *mode) "%s mode"
|
|
cpr_transfer_input(const char *path) "%s"
|
|
cpr_transfer_output(const char *path) "%s"
|
|
+cpr_exec(void) ""
|
|
|
|
# block-dirty-bitmap.c
|
|
send_bitmap_header_enter(void) ""
|
|
diff --git a/migration/vmstate-types.c b/migration/vmstate-types.c
|
|
index a1cd7a95fa..4b01dc19c2 100644
|
|
--- a/migration/vmstate-types.c
|
|
+++ b/migration/vmstate-types.c
|
|
@@ -322,6 +322,10 @@ static int get_fd(QEMUFile *f, void *pv, size_t size,
|
|
const VMStateField *field)
|
|
{
|
|
int32_t *v = pv;
|
|
+ if (migrate_mode() == MIG_MODE_CPR_EXEC) {
|
|
+ qemu_get_sbe32s(f, v);
|
|
+ return 0;
|
|
+ }
|
|
*v = qemu_file_get_fd(f);
|
|
return 0;
|
|
}
|
|
@@ -330,6 +334,10 @@ static int put_fd(QEMUFile *f, void *pv, size_t size,
|
|
const VMStateField *field, JSONWriter *vmdesc)
|
|
{
|
|
int32_t *v = pv;
|
|
+ if (migrate_mode() == MIG_MODE_CPR_EXEC) {
|
|
+ qemu_put_sbe32s(f, v);
|
|
+ return 0;
|
|
+ }
|
|
return qemu_file_put_fd(f, *v);
|
|
}
|
|
|
|
diff --git a/qapi/migration.json b/qapi/migration.json
|
|
index 2be8fa1d16..be0f3fcc12 100644
|
|
--- a/qapi/migration.json
|
|
+++ b/qapi/migration.json
|
|
@@ -694,9 +694,32 @@
|
|
# until you issue the `migrate-incoming` command.
|
|
#
|
|
# (since 10.0)
|
|
+#
|
|
+# @cpr-exec: The migrate command stops the VM, saves state to the
|
|
+# migration channel, directly exec's a new version of QEMU on the
|
|
+# same host, replacing the original process while retaining its
|
|
+# PID, and loads state from the channel. Guest RAM is preserved
|
|
+# in place. Devices and their pinned pages are also preserved for
|
|
+# VFIO and IOMMUFD.
|
|
+#
|
|
+# Old QEMU starts new QEMU by exec'ing the command specified by
|
|
+# the @cpr-exec-command parameter. The command may be a direct
|
|
+# invocation of new QEMU, or may be a wrapper that exec's the new
|
|
+# QEMU binary.
|
|
+#
|
|
+# Because old QEMU terminates when new QEMU starts, one cannot
|
|
+# stream data between the two, so the channel must be a type,
|
|
+# such as a file, that accepts all data before old QEMU exits.
|
|
+# Otherwise, old QEMU may quietly block writing to the channel.
|
|
+#
|
|
+# Memory-backend objects must have the share=on attribute, but
|
|
+# memory-backend-epc is not supported. The VM must be started
|
|
+# with the '-machine aux-ram-share=on' option.
|
|
+#
|
|
+# (since 10.2)
|
|
##
|
|
{ 'enum': 'MigMode',
|
|
- 'data': [ 'normal', 'cpr-reboot', 'cpr-transfer' ] }
|
|
+ 'data': [ 'normal', 'cpr-reboot', 'cpr-transfer', 'cpr-exec' ] }
|
|
|
|
##
|
|
# @ZeroPageDetection:
|
|
diff --git a/system/vl.c b/system/vl.c
|
|
index d3e6158753..7a32043625 100644
|
|
--- a/system/vl.c
|
|
+++ b/system/vl.c
|
|
@@ -3850,6 +3850,8 @@ void qemu_init(int argc, char **argv)
|
|
}
|
|
qemu_init_displays();
|
|
accel_setup_post(current_machine);
|
|
- os_setup_post();
|
|
+ if (migrate_mode() != MIG_MODE_CPR_EXEC) {
|
|
+ os_setup_post();
|
|
+ }
|
|
resume_mux_open();
|
|
}
|
|
--
|
|
2.52.0
|
|
|