diff --git a/kvm-block-blkio-fix-module_block.py-parsing.patch b/kvm-block-blkio-fix-module_block.py-parsing.patch new file mode 100644 index 0000000..1c89a0b --- /dev/null +++ b/kvm-block-blkio-fix-module_block.py-parsing.patch @@ -0,0 +1,205 @@ +From 545482400ea87d54b1b839587f8aaad41e30692f Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Tue, 4 Jul 2023 14:34:36 +0200 +Subject: [PATCH 36/37] block/blkio: fix module_block.py parsing + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 181: block/blkio: fix module_block.py parsing +RH-Bugzilla: 2213317 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Kevin Wolf +RH-Commit: [1/2] c85df95824f4889526a73527771dec9efcb06926 (stefanha/centos-stream-qemu-kvm) + +When QEMU is built with --enable-modules, the module_block.py script +parses block/*.c to find block drivers that are built as modules. The +script generates a table of block drivers called block_driver_modules[]. +This table is used for block driver module loading. + +The blkio.c driver uses macros to define its BlockDriver structs. This +was done to avoid code duplication but the module_block.py script is +unable to parse the macro. The result is that libblkio-based block +drivers can be built as modules but will not be found at runtime. + +One fix is to make the module_block.py script or build system fancier so +it can parse C macros (e.g. by parsing the preprocessed source code). I +chose not to do this because it raises the complexity of the build, +making future issues harder to debug. + +Keep things simple: use the macro to avoid duplicating BlockDriver +function pointers but define .format_name and .protocol_name manually +for each BlockDriver. This way the module_block.py is able to parse the +code. + +Also get rid of the block driver name macros (e.g. DRIVER_IO_URING) +because module_block.py cannot parse them either. + +Fixes: fd66dbd424f5 ("blkio: add libblkio block driver") +Reported-by: Qing Wang +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Stefano Garzarella +Message-id: 20230704123436.187761-1-stefanha@redhat.com +Cc: Stefano Garzarella +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit c21eae1ccc782440f320accb6f90c66cb8f45ee9) + +Conflicts: +- Downstream lacks commit 28ff7b4dfbb5 ("block/blkio: convert to + blk_io_plug_call() API") so keep the .bdrv_co_io_unplug callback. + +Signed-off-by: Stefan Hajnoczi +--- + block/blkio.c | 118 ++++++++++++++++++++++++++------------------------ + 1 file changed, 61 insertions(+), 57 deletions(-) + +diff --git a/block/blkio.c b/block/blkio.c +index 6a6f20f923..afcec359f2 100644 +--- a/block/blkio.c ++++ b/block/blkio.c +@@ -21,16 +21,6 @@ + + #include "block/block-io.h" + +-/* +- * Keep the QEMU BlockDriver names identical to the libblkio driver names. +- * Using macros instead of typing out the string literals avoids typos. +- */ +-#define DRIVER_IO_URING "io_uring" +-#define DRIVER_NVME_IO_URING "nvme-io_uring" +-#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci" +-#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user" +-#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa" +- + /* + * Allocated bounce buffers are kept in a list sorted by buffer address. + */ +@@ -743,15 +733,15 @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags, + return ret; + } + +- if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) { ++ if (strcmp(blkio_driver, "io_uring") == 0) { + ret = blkio_io_uring_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) { ++ } else if (strcmp(blkio_driver, "nvme-io_uring") == 0) { + ret = blkio_nvme_io_uring(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vfio-pci") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-user") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); +- } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) { ++ } else if (strcmp(blkio_driver, "virtio-blk-vhost-vdpa") == 0) { + ret = blkio_virtio_blk_common_open(bs, options, flags, errp); + } else { + g_assert_not_reached(); +@@ -1027,50 +1017,64 @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp) + * - truncate + */ + +-#define BLKIO_DRIVER(name, ...) \ +- { \ +- .format_name = name, \ +- .protocol_name = name, \ +- .instance_size = sizeof(BDRVBlkioState), \ +- .bdrv_file_open = blkio_file_open, \ +- .bdrv_close = blkio_close, \ +- .bdrv_co_getlength = blkio_co_getlength, \ +- .bdrv_co_truncate = blkio_truncate, \ +- .bdrv_co_get_info = blkio_co_get_info, \ +- .bdrv_attach_aio_context = blkio_attach_aio_context, \ +- .bdrv_detach_aio_context = blkio_detach_aio_context, \ +- .bdrv_co_pdiscard = blkio_co_pdiscard, \ +- .bdrv_co_preadv = blkio_co_preadv, \ +- .bdrv_co_pwritev = blkio_co_pwritev, \ +- .bdrv_co_flush_to_disk = blkio_co_flush, \ +- .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ +- .bdrv_co_io_unplug = blkio_co_io_unplug, \ +- .bdrv_refresh_limits = blkio_refresh_limits, \ +- .bdrv_register_buf = blkio_register_buf, \ +- .bdrv_unregister_buf = blkio_unregister_buf, \ +- __VA_ARGS__ \ +- } +- +-static BlockDriver bdrv_io_uring = BLKIO_DRIVER( +- DRIVER_IO_URING, +- .bdrv_needs_filename = true, +-); +- +-static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER( +- DRIVER_NVME_IO_URING, +-); +- +-static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VFIO_PCI +-); ++/* ++ * Do not include .format_name and .protocol_name because module_block.py ++ * does not parse macros in the source code. ++ */ ++#define BLKIO_DRIVER_COMMON \ ++ .instance_size = sizeof(BDRVBlkioState), \ ++ .bdrv_file_open = blkio_file_open, \ ++ .bdrv_close = blkio_close, \ ++ .bdrv_co_getlength = blkio_co_getlength, \ ++ .bdrv_co_truncate = blkio_truncate, \ ++ .bdrv_co_get_info = blkio_co_get_info, \ ++ .bdrv_attach_aio_context = blkio_attach_aio_context, \ ++ .bdrv_detach_aio_context = blkio_detach_aio_context, \ ++ .bdrv_co_pdiscard = blkio_co_pdiscard, \ ++ .bdrv_co_preadv = blkio_co_preadv, \ ++ .bdrv_co_pwritev = blkio_co_pwritev, \ ++ .bdrv_co_flush_to_disk = blkio_co_flush, \ ++ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \ ++ .bdrv_co_io_unplug = blkio_co_io_unplug, \ ++ .bdrv_refresh_limits = blkio_refresh_limits, \ ++ .bdrv_register_buf = blkio_register_buf, \ ++ .bdrv_unregister_buf = blkio_unregister_buf, + +-static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VHOST_USER +-); ++/* ++ * Use the same .format_name and .protocol_name as the libblkio driver name for ++ * consistency. ++ */ + +-static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER( +- DRIVER_VIRTIO_BLK_VHOST_VDPA +-); ++static BlockDriver bdrv_io_uring = { ++ .format_name = "io_uring", ++ .protocol_name = "io_uring", ++ .bdrv_needs_filename = true, ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_nvme_io_uring = { ++ .format_name = "nvme-io_uring", ++ .protocol_name = "nvme-io_uring", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vfio_pci = { ++ .format_name = "virtio-blk-vfio-pci", ++ .protocol_name = "virtio-blk-vfio-pci", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vhost_user = { ++ .format_name = "virtio-blk-vhost-user", ++ .protocol_name = "virtio-blk-vhost-user", ++ BLKIO_DRIVER_COMMON ++}; ++ ++static BlockDriver bdrv_virtio_blk_vhost_vdpa = { ++ .format_name = "virtio-blk-vhost-vdpa", ++ .protocol_name = "virtio-blk-vhost-vdpa", ++ BLKIO_DRIVER_COMMON ++}; + + static void bdrv_blkio_init(void) + { +-- +2.39.3 + diff --git a/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch b/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch new file mode 100644 index 0000000..ffabd75 --- /dev/null +++ b/kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch @@ -0,0 +1,76 @@ +From fcd6219a95851d17fd8bde69d87e78c6533be990 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 24/37] hw/vfio/pci-quirks: Sanitize capability pointer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [22/28] cb080409c1912f4365f8e31cd23c914b48f91575 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0ddcb39c9357 +Author: Alex Williamson +Date: Fri Jun 30 16:36:08 2023 -0600 + + hw/vfio/pci-quirks: Sanitize capability pointer + + Coverity reports a tained scalar when traversing the capabilities + chain (CID 1516589). In practice I've never seen a device with a + chain so broken as to cause an issue, but it's also pretty easy to + sanitize. + + Fixes: f6b30c1984f7 ("hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques") + Signed-off-by: Alex Williamson + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci-quirks.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index 0ed2fcd531..f4ff836805 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1530,6 +1530,12 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { + .set = set_nv_gpudirect_clique_id, + }; + ++static bool is_valid_std_cap_offset(uint8_t pos) ++{ ++ return (pos >= PCI_STD_HEADER_SIZEOF && ++ pos <= (PCI_CFG_SPACE_SIZE - PCI_CAP_SIZEOF)); ++} ++ + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + { + PCIDevice *pdev = &vdev->pdev; +@@ -1563,7 +1569,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + */ + ret = pread(vdev->vbasedev.fd, &tmp, 1, + vdev->config_offset + PCI_CAPABILITY_LIST); +- if (ret != 1 || !tmp) { ++ if (ret != 1 || !is_valid_std_cap_offset(tmp)) { + error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); + return -EINVAL; + } +@@ -1575,7 +1581,7 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + d4_conflict = true; + } + tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; +- } while (tmp); ++ } while (is_valid_std_cap_offset(tmp)); + + if (!c8_conflict) { + pos = 0xC8; +-- +2.39.3 + diff --git a/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch b/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch new file mode 100644 index 0000000..99f5c75 --- /dev/null +++ b/kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch @@ -0,0 +1,110 @@ +From dd38230a0a375fb8427fa106ff79562e56c51b6c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 18/37] hw/vfio/pci-quirks: Support alternate offset for + GPUDirect Cliques +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [16/28] 9befb7c9adaeb58e9d0b49686cf54b751c742832 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit f6b30c1984f7 +Author: Alex Williamson +Date: Thu Jun 8 12:05:07 2023 -0600 + + hw/vfio/pci-quirks: Support alternate offset for GPUDirect Cliques + + NVIDIA Turing and newer GPUs implement the MSI-X capability at the offset + previously reserved for use by hypervisors to implement the GPUDirect + Cliques capability. A revised specification provides an alternate + location. Add a config space walk to the quirk to check for conflicts, + allowing us to fall back to the new location or generate an error at the + quirk setup rather than when the real conflicting capability is added + should there be no available location. + + Signed-off-by: Alex Williamson + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci-quirks.c | 41 ++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 40 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c +index f0147a050a..0ed2fcd531 100644 +--- a/hw/vfio/pci-quirks.c ++++ b/hw/vfio/pci-quirks.c +@@ -1490,6 +1490,9 @@ void vfio_setup_resetfn_quirk(VFIOPCIDevice *vdev) + * +---------------------------------+---------------------------------+ + * + * https://lists.gnu.org/archive/html/qemu-devel/2017-08/pdfUda5iEpgOS.pdf ++ * ++ * Specification for Turning and later GPU architectures: ++ * https://lists.gnu.org/archive/html/qemu-devel/2023-06/pdf142OR4O4c2.pdf + */ + static void get_nv_gpudirect_clique_id(Object *obj, Visitor *v, + const char *name, void *opaque, +@@ -1530,7 +1533,9 @@ const PropertyInfo qdev_prop_nv_gpudirect_clique = { + static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + { + PCIDevice *pdev = &vdev->pdev; +- int ret, pos = 0xC8; ++ int ret, pos; ++ bool c8_conflict = false, d4_conflict = false; ++ uint8_t tmp; + + if (vdev->nv_gpudirect_clique == 0xFF) { + return 0; +@@ -1547,6 +1552,40 @@ static int vfio_add_nv_gpudirect_cap(VFIOPCIDevice *vdev, Error **errp) + return -EINVAL; + } + ++ /* ++ * Per the updated specification above, it's recommended to use offset ++ * D4h for Turing and later GPU architectures due to a conflict of the ++ * MSI-X capability at C8h. We don't know how to determine the GPU ++ * architecture, instead we walk the capability chain to mark conflicts ++ * and choose one or error based on the result. ++ * ++ * NB. Cap list head in pdev->config is already cleared, read from device. ++ */ ++ ret = pread(vdev->vbasedev.fd, &tmp, 1, ++ vdev->config_offset + PCI_CAPABILITY_LIST); ++ if (ret != 1 || !tmp) { ++ error_setg(errp, "NVIDIA GPUDirect Clique ID: error getting cap list"); ++ return -EINVAL; ++ } ++ ++ do { ++ if (tmp == 0xC8) { ++ c8_conflict = true; ++ } else if (tmp == 0xD4) { ++ d4_conflict = true; ++ } ++ tmp = pdev->config[tmp + PCI_CAP_LIST_NEXT]; ++ } while (tmp); ++ ++ if (!c8_conflict) { ++ pos = 0xC8; ++ } else if (!d4_conflict) { ++ pos = 0xD4; ++ } else { ++ error_setg(errp, "NVIDIA GPUDirect Clique ID: invalid config space"); ++ return -EINVAL; ++ } ++ + ret = pci_add_capability(pdev, PCI_CAP_ID_VNDR, pos, 8, errp); + if (ret < 0) { + error_prepend(errp, "Failed to add NVIDIA GPUDirect cap: "); +-- +2.39.3 + diff --git a/kvm-migration-Add-switchover-ack-capability.patch b/kvm-migration-Add-switchover-ack-capability.patch new file mode 100644 index 0000000..399c9ed --- /dev/null +++ b/kvm-migration-Add-switchover-ack-capability.patch @@ -0,0 +1,162 @@ +From 8f89d3bc8f226cd038bf88b9fb3ef43b0fb33034 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 10/37] migration: Add switchover ack capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/28] 2f4ca020783bd617eca13b18289fce764279833b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 6574232fff6a +Author: Avihai Horon +Date: Wed Jun 21 14:11:54 2023 +0300 + + migration: Add switchover ack capability + + Migration downtime estimation is calculated based on bandwidth and + remaining migration data. This assumes that loading of migration data in + the destination takes a negligible amount of time and that downtime + depends only on network speed. + + While this may be true for RAM, it's not necessarily true for other + migrated devices. For example, loading the data of a VFIO device in the + destination might require from the device to allocate resources, prepare + internal data structures and so on. These operations can take a + significant amount of time which can increase migration downtime. + + This patch adds a new capability "switchover ack" that prevents the + source from stopping the VM and completing the migration until an ACK + is received from the destination that it's OK to do so. + + This can be used by migrated devices in various ways to reduce downtime. + For example, a device can send initial precopy metadata to pre-allocate + resources in the destination and use this capability to make sure that + the pre-allocation is completed before the source VM is stopped, so it + will have full effect. + + This new capability relies on the return path capability to communicate + from the destination back to the source. + + The actual implementation of the capability will be added in the + following patches. + + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Acked-by: Markus Armbruster + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - qapi/migration.json + re-indent of @switchover-ack to avoid ../qapi/migration.json:482:1: + unexpected de-indent (expected at least 17 spaces) + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 21 +++++++++++++++++++++ + migration/options.h | 1 + + qapi/migration.json | 14 +++++++++++++- + 3 files changed, 35 insertions(+), 1 deletion(-) + +diff --git a/migration/options.c b/migration/options.c +index a76984276d..c3df6c6dde 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -182,6 +182,8 @@ Property migration_properties[] = { + DEFINE_PROP_MIG_CAP("x-zero-copy-send", + MIGRATION_CAPABILITY_ZERO_COPY_SEND), + #endif ++ DEFINE_PROP_MIG_CAP("x-switchover-ack", ++ MIGRATION_CAPABILITY_SWITCHOVER_ACK), + + DEFINE_PROP_END_OF_LIST(), + }; +@@ -305,6 +307,13 @@ bool migrate_return_path(void) + return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + ++bool migrate_switchover_ack(void) ++{ ++ MigrationState *s = migrate_get_current(); ++ ++ return s->capabilities[MIGRATION_CAPABILITY_SWITCHOVER_ACK]; ++} ++ + bool migrate_validate_uuid(void) + { + MigrationState *s = migrate_get_current(); +@@ -532,6 +541,18 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + } + } + ++ if (new_caps[MIGRATION_CAPABILITY_SWITCHOVER_ACK]) { ++ if (!new_caps[MIGRATION_CAPABILITY_RETURN_PATH]) { ++ error_setg(errp, "Capability 'switchover-ack' requires capability " ++ "'return-path'"); ++ return false; ++ } ++ ++ /* Disable this capability until it's implemented */ ++ error_setg(errp, "'switchover-ack' is not implemented yet"); ++ return false; ++ } ++ + return true; + } + +diff --git a/migration/options.h b/migration/options.h +index 7b0f7245ad..0fc7be6869 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -47,6 +47,7 @@ bool migrate_postcopy_ram(void); + bool migrate_rdma_pin_all(void); + bool migrate_release_ram(void); + bool migrate_return_path(void); ++bool migrate_switchover_ack(void); + bool migrate_validate_uuid(void); + bool migrate_xbzrle(void); + bool migrate_zero_blocks(void); +diff --git a/qapi/migration.json b/qapi/migration.json +index 2c35b7b9cf..b6a58347cc 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -478,6 +478,18 @@ + # should not affect the correctness of postcopy migration. + # (since 7.1) + # ++# @switchover-ack: If enabled, migration will not stop the source VM ++# and complete the migration until an ACK is received ++# from the destination that it's OK to do so. ++# Exactly when this ACK is sent depends on the ++# migrated devices that use this feature. For ++# example, a device can use it to make sure some of ++# its data is sent and loaded in the destination ++# before doing switchover. This can reduce downtime ++# if devices that support this capability are ++# present. 'return-path' capability must be enabled ++# to use it. (since 8.1) ++# + # Features: + # @unstable: Members @x-colo and @x-ignore-shared are experimental. + # +@@ -492,7 +504,7 @@ + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', + { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, + 'validate-uuid', 'background-snapshot', +- 'zero-copy-send', 'postcopy-preempt'] } ++ 'zero-copy-send', 'postcopy-preempt', 'switchover-ack'] } + + ## + # @MigrationCapabilityStatus: +-- +2.39.3 + diff --git a/kvm-migration-Enable-switchover-ack-capability.patch b/kvm-migration-Enable-switchover-ack-capability.patch new file mode 100644 index 0000000..e08e5df --- /dev/null +++ b/kvm-migration-Enable-switchover-ack-capability.patch @@ -0,0 +1,56 @@ +From bbe565f7d3b7fe46971e020e9bd8e79dc9ffa69c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 12/37] migration: Enable switchover ack capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/28] c4a7d7d26a97181c9516d133a6610bfa5dcb1d16 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 538ef4fe2f72 +Author: Avihai Horon +Date: Wed Jun 21 14:11:56 2023 +0300 + + migration: Enable switchover ack capability + + Now that switchover ack logic has been implemented, enable the + capability. + + Signed-off-by: Avihai Horon + Reviewed-by: Juan Quintela + Reviewed-by: Peter Xu + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index c3df6c6dde..ccd7ef3907 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -547,10 +547,6 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp) + "'return-path'"); + return false; + } +- +- /* Disable this capability until it's implemented */ +- error_setg(errp, "'switchover-ack' is not implemented yet"); +- return false; + } + + return true; +-- +2.39.3 + diff --git a/kvm-migration-Implement-switchover-ack-logic.patch b/kvm-migration-Implement-switchover-ack-logic.patch new file mode 100644 index 0000000..49b9f12 --- /dev/null +++ b/kvm-migration-Implement-switchover-ack-logic.patch @@ -0,0 +1,339 @@ +From 387c39f198d94f600be525e363edc7ca916dc261 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 11/37] migration: Implement switchover ack logic +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/28] 853e1978f3b9f87942863bba894a0ed908bde6b1 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 1b4adb10f898 +Author: Avihai Horon +Date: Wed Jun 21 14:11:55 2023 +0300 + + migration: Implement switchover ack logic + + Implement switchover ack logic. This prevents the source from stopping + the VM and completing the migration until an ACK is received from the + destination that it's OK to do so. + + To achieve this, a new SaveVMHandlers handler switchover_ack_needed() + and a new return path message MIG_RP_MSG_SWITCHOVER_ACK are added. + + The switchover_ack_needed() handler is called during migration setup in + the destination to check if switchover ack is used by the migrated + device. + + When switchover is approved by all migrated devices in the destination + that support this capability, the MIG_RP_MSG_SWITCHOVER_ACK return path + message is sent to the source to notify it that it's OK to do + switchover. + + Signed-off-by: Avihai Horon + Reviewed-by: Peter Xu + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + context changes due to commit f4584076fc31 ("migration: switch + from .vm_was_running to .vm_old_state") + +Signed-off-by: Cédric Le Goater +--- + include/migration/register.h | 2 ++ + migration/migration.c | 32 +++++++++++++++++++-- + migration/migration.h | 14 ++++++++++ + migration/savevm.c | 54 ++++++++++++++++++++++++++++++++++++ + migration/savevm.h | 1 + + migration/trace-events | 3 ++ + 6 files changed, 104 insertions(+), 2 deletions(-) + +diff --git a/include/migration/register.h b/include/migration/register.h +index a8dfd8fefd..90914f32f5 100644 +--- a/include/migration/register.h ++++ b/include/migration/register.h +@@ -71,6 +71,8 @@ typedef struct SaveVMHandlers { + int (*load_cleanup)(void *opaque); + /* Called when postcopy migration wants to resume from failure */ + int (*resume_prepare)(MigrationState *s, void *opaque); ++ /* Checks if switchover ack should be used. Called only in dest */ ++ bool (*switchover_ack_needed)(void *opaque); + } SaveVMHandlers; + + int register_savevm_live(const char *idstr, +diff --git a/migration/migration.c b/migration/migration.c +index 1ac5f19bc2..9bf1caee6c 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -76,6 +76,7 @@ enum mig_rp_message_type { + MIG_RP_MSG_REQ_PAGES, /* data (start: be64, len: be32) */ + MIG_RP_MSG_RECV_BITMAP, /* send recved_bitmap back to source */ + MIG_RP_MSG_RESUME_ACK, /* tell source that we are ready to resume */ ++ MIG_RP_MSG_SWITCHOVER_ACK, /* Tell source it's OK to do switchover */ + + MIG_RP_MSG_MAX + }; +@@ -756,6 +757,11 @@ bool migration_has_all_channels(void) + return true; + } + ++int migrate_send_rp_switchover_ack(MigrationIncomingState *mis) ++{ ++ return migrate_send_rp_message(mis, MIG_RP_MSG_SWITCHOVER_ACK, 0, NULL); ++} ++ + /* + * Send a 'SHUT' message on the return channel with the given value + * to indicate that we've finished with the RP. Non-0 value indicates +@@ -1415,6 +1421,7 @@ void migrate_init(MigrationState *s) + s->vm_was_running = false; + s->iteration_initial_bytes = 0; + s->threshold_size = 0; ++ s->switchover_acked = false; + } + + int migrate_add_blocker_internal(Error *reason, Error **errp) +@@ -1731,6 +1738,7 @@ static struct rp_cmd_args { + [MIG_RP_MSG_REQ_PAGES_ID] = { .len = -1, .name = "REQ_PAGES_ID" }, + [MIG_RP_MSG_RECV_BITMAP] = { .len = -1, .name = "RECV_BITMAP" }, + [MIG_RP_MSG_RESUME_ACK] = { .len = 4, .name = "RESUME_ACK" }, ++ [MIG_RP_MSG_SWITCHOVER_ACK] = { .len = 0, .name = "SWITCHOVER_ACK" }, + [MIG_RP_MSG_MAX] = { .len = -1, .name = "MAX" }, + }; + +@@ -1969,6 +1977,11 @@ retry: + } + break; + ++ case MIG_RP_MSG_SWITCHOVER_ACK: ++ ms->switchover_acked = true; ++ trace_source_return_path_thread_switchover_acked(); ++ break; ++ + default: + break; + } +@@ -2720,6 +2733,20 @@ static void migration_update_counters(MigrationState *s, + bandwidth, s->threshold_size); + } + ++static bool migration_can_switchover(MigrationState *s) ++{ ++ if (!migrate_switchover_ack()) { ++ return true; ++ } ++ ++ /* No reason to wait for switchover ACK if VM is stopped */ ++ if (!runstate_is_running()) { ++ return true; ++ } ++ ++ return s->switchover_acked; ++} ++ + /* Migration thread iteration status */ + typedef enum { + MIG_ITERATE_RESUME, /* Resume current iteration */ +@@ -2735,6 +2762,7 @@ static MigIterateState migration_iteration_run(MigrationState *s) + { + uint64_t must_precopy, can_postcopy; + bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; ++ bool can_switchover = migration_can_switchover(s); + + qemu_savevm_state_pending_estimate(&must_precopy, &can_postcopy); + uint64_t pending_size = must_precopy + can_postcopy; +@@ -2747,14 +2775,14 @@ static MigIterateState migration_iteration_run(MigrationState *s) + trace_migrate_pending_exact(pending_size, must_precopy, can_postcopy); + } + +- if (!pending_size || pending_size < s->threshold_size) { ++ if ((!pending_size || pending_size < s->threshold_size) && can_switchover) { + trace_migration_thread_low_pending(pending_size); + migration_completion(s); + return MIG_ITERATE_BREAK; + } + + /* Still a significant amount to transfer */ +- if (!in_postcopy && must_precopy <= s->threshold_size && ++ if (!in_postcopy && must_precopy <= s->threshold_size && can_switchover && + qatomic_read(&s->start_postcopy)) { + if (postcopy_start(s)) { + error_report("%s: postcopy failed to start", __func__); +diff --git a/migration/migration.h b/migration/migration.h +index 2b71df8617..e9679f8029 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -204,6 +204,13 @@ struct MigrationIncomingState { + * contains valid information. + */ + QemuMutex page_request_mutex; ++ ++ /* ++ * Number of devices that have yet to approve switchover. When this reaches ++ * zero an ACK that it's OK to do switchover is sent to the source. No lock ++ * is needed as this field is updated serially. ++ */ ++ unsigned int switchover_ack_pending_num; + }; + + MigrationIncomingState *migration_incoming_get_current(void); +@@ -421,6 +428,12 @@ struct MigrationState { + + /* QEMU_VM_VMDESCRIPTION content filled for all non-iterable devices. */ + JSONWriter *vmdesc; ++ ++ /* ++ * Indicates whether an ACK from the destination that it's OK to do ++ * switchover has been received. ++ */ ++ bool switchover_acked; + }; + + void migrate_set_state(int *state, int old_state, int new_state); +@@ -461,6 +474,7 @@ int migrate_send_rp_message_req_pages(MigrationIncomingState *mis, + void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis, + char *block_name); + void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value); ++int migrate_send_rp_switchover_ack(MigrationIncomingState *mis); + + void dirty_bitmap_mig_before_vm_start(void); + void dirty_bitmap_mig_cancel_outgoing(void); +diff --git a/migration/savevm.c b/migration/savevm.c +index 211eff3a8b..aff70e6263 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -2358,6 +2358,21 @@ static int loadvm_process_command(QEMUFile *f) + error_report("CMD_OPEN_RETURN_PATH failed"); + return -1; + } ++ ++ /* ++ * Switchover ack is enabled but no device uses it, so send an ACK to ++ * source that it's OK to switchover. Do it here, after return path has ++ * been created. ++ */ ++ if (migrate_switchover_ack() && !mis->switchover_ack_pending_num) { ++ int ret = migrate_send_rp_switchover_ack(mis); ++ if (ret) { ++ error_report( ++ "Could not send switchover ack RP MSG, err %d (%s)", ret, ++ strerror(-ret)); ++ return ret; ++ } ++ } + break; + + case MIG_CMD_PING: +@@ -2584,6 +2599,23 @@ static int qemu_loadvm_state_header(QEMUFile *f) + return 0; + } + ++static void qemu_loadvm_state_switchover_ack_needed(MigrationIncomingState *mis) ++{ ++ SaveStateEntry *se; ++ ++ QTAILQ_FOREACH(se, &savevm_state.handlers, entry) { ++ if (!se->ops || !se->ops->switchover_ack_needed) { ++ continue; ++ } ++ ++ if (se->ops->switchover_ack_needed(se->opaque)) { ++ mis->switchover_ack_pending_num++; ++ } ++ } ++ ++ trace_loadvm_state_switchover_ack_needed(mis->switchover_ack_pending_num); ++} ++ + static int qemu_loadvm_state_setup(QEMUFile *f) + { + SaveStateEntry *se; +@@ -2787,6 +2819,10 @@ int qemu_loadvm_state(QEMUFile *f) + return -EINVAL; + } + ++ if (migrate_switchover_ack()) { ++ qemu_loadvm_state_switchover_ack_needed(mis); ++ } ++ + cpu_synchronize_all_pre_loadvm(); + + ret = qemu_loadvm_state_main(f, mis); +@@ -2860,6 +2896,24 @@ int qemu_load_device_state(QEMUFile *f) + return 0; + } + ++int qemu_loadvm_approve_switchover(void) ++{ ++ MigrationIncomingState *mis = migration_incoming_get_current(); ++ ++ if (!mis->switchover_ack_pending_num) { ++ return -EINVAL; ++ } ++ ++ mis->switchover_ack_pending_num--; ++ trace_loadvm_approve_switchover(mis->switchover_ack_pending_num); ++ ++ if (mis->switchover_ack_pending_num) { ++ return 0; ++ } ++ ++ return migrate_send_rp_switchover_ack(mis); ++} ++ + bool save_snapshot(const char *name, bool overwrite, const char *vmstate, + bool has_devices, strList *devices, Error **errp) + { +diff --git a/migration/savevm.h b/migration/savevm.h +index fb636735f0..e894bbc143 100644 +--- a/migration/savevm.h ++++ b/migration/savevm.h +@@ -65,6 +65,7 @@ int qemu_loadvm_state(QEMUFile *f); + void qemu_loadvm_state_cleanup(void); + int qemu_loadvm_state_main(QEMUFile *f, MigrationIncomingState *mis); + int qemu_load_device_state(QEMUFile *f); ++int qemu_loadvm_approve_switchover(void); + int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f, + bool in_postcopy, bool inactivate_disks); + +diff --git a/migration/trace-events b/migration/trace-events +index 92161eeac5..cda807d271 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -7,6 +7,7 @@ qemu_loadvm_state_section_partend(uint32_t section_id) "%u" + qemu_loadvm_state_post_main(int ret) "%d" + qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u" + qemu_savevm_send_packaged(void) "" ++loadvm_state_switchover_ack_needed(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" + loadvm_state_setup(void) "" + loadvm_state_cleanup(void) "" + loadvm_handle_cmd_packaged(unsigned int length) "%u" +@@ -23,6 +24,7 @@ loadvm_postcopy_ram_handle_discard_end(void) "" + loadvm_postcopy_ram_handle_discard_header(const char *ramid, uint16_t len) "%s: %ud" + loadvm_process_command(const char *s, uint16_t len) "com=%s len=%d" + loadvm_process_command_ping(uint32_t val) "0x%x" ++loadvm_approve_switchover(unsigned int switchover_ack_pending_num) "Switchover ack pending num=%u" + postcopy_ram_listen_thread_exit(void) "" + postcopy_ram_listen_thread_start(void) "" + qemu_savevm_send_postcopy_advise(void) "" +@@ -180,6 +182,7 @@ source_return_path_thread_loop_top(void) "" + source_return_path_thread_pong(uint32_t val) "0x%x" + source_return_path_thread_shut(uint32_t val) "0x%x" + source_return_path_thread_resume_ack(uint32_t v) "%"PRIu32 ++source_return_path_thread_switchover_acked(void) "" + migration_thread_low_pending(uint64_t pending) "%" PRIu64 + migrate_transferred(uint64_t tranferred, uint64_t time_spent, uint64_t bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %" PRIu64 " max_size %" PRId64 + process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d" +-- +2.39.3 + diff --git a/kvm-migration-Make-all-functions-check-have-the-same-for.patch b/kvm-migration-Make-all-functions-check-have-the-same-for.patch new file mode 100644 index 0000000..f873f3f --- /dev/null +++ b/kvm-migration-Make-all-functions-check-have-the-same-for.patch @@ -0,0 +1,431 @@ +From eaccfc91b34f93dcaf597e6b39f78741da618ff3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 08/37] migration: Make all functions check have the same + format +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/28] 774df2a81502d3eab5d5b8f64fa9b69f8be43669 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8f9c532756c5 +Author: Juan Quintela +Date: Wed Mar 1 23:11:08 2023 +0100 + + migration: Make all functions check have the same format + + Signed-off-by: Juan Quintela + Reviewed-by: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Cédric Le Goater +--- + migration/options.c | 153 +++++++++++--------------------------------- + 1 file changed, 39 insertions(+), 114 deletions(-) + +diff --git a/migration/options.c b/migration/options.c +index e51d667e14..bcfe244fa9 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -33,27 +33,21 @@ + + bool migrate_auto_converge(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE]; + } + + bool migrate_background_snapshot(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]; + } + + bool migrate_block(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_BLOCK]; + } +@@ -61,95 +55,76 @@ bool migrate_block(void) + bool migrate_colo(void) + { + MigrationState *s = migrate_get_current(); ++ + return s->capabilities[MIGRATION_CAPABILITY_X_COLO]; + } + + bool migrate_compress(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_COMPRESS]; + } + + bool migrate_dirty_bitmaps(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS]; + } + + bool migrate_events(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_EVENTS]; + } + + bool migrate_ignore_shared(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED]; + } + + bool migrate_late_block_activate(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE]; + } + + bool migrate_multifd(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_MULTIFD]; + } + + bool migrate_pause_before_switchover(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; + } + + bool migrate_postcopy_blocktime(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME]; + } + + bool migrate_postcopy_preempt(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_PREEMPT]; + } + + bool migrate_postcopy_ram(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; + } +@@ -163,54 +138,42 @@ bool migrate_rdma_pin_all(void) + + bool migrate_release_ram(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + + bool migrate_return_path(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_RETURN_PATH]; + } + + bool migrate_validate_uuid(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID]; + } + + bool migrate_xbzrle(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_XBZRLE]; + } + + bool migrate_zero_blocks(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS]; + } + + bool migrate_zero_copy_send(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND]; + } +@@ -224,9 +187,7 @@ bool migrate_postcopy(void) + + bool migrate_tls(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.tls_creds && *s->parameters.tls_creds; + } +@@ -491,126 +452,98 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, + + bool migrate_block_incremental(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.block_incremental; + } + + uint32_t migrate_checkpoint_delay(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.x_checkpoint_delay; + } + + int migrate_compress_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_level; + } + + int migrate_compress_threads(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_threads; + } + + int migrate_compress_wait_thread(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.compress_wait_thread; + } + + uint8_t migrate_cpu_throttle_increment(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_increment; + } + + uint8_t migrate_cpu_throttle_initial(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_initial; + } + + bool migrate_cpu_throttle_tailslow(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.cpu_throttle_tailslow; + } + + int migrate_decompress_threads(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.decompress_threads; + } + + uint8_t migrate_max_cpu_throttle(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_cpu_throttle; + } + + uint64_t migrate_max_bandwidth(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_bandwidth; + } + + int64_t migrate_max_postcopy_bandwidth(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.max_postcopy_bandwidth; + } + + int migrate_multifd_channels(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_channels; + } + + MultiFDCompression migrate_multifd_compression(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + assert(s->parameters.multifd_compression < MULTIFD_COMPRESSION__MAX); + return s->parameters.multifd_compression; +@@ -618,36 +551,28 @@ MultiFDCompression migrate_multifd_compression(void) + + int migrate_multifd_zlib_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_zlib_level; + } + + int migrate_multifd_zstd_level(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.multifd_zstd_level; + } + + uint8_t migrate_throttle_trigger_threshold(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.throttle_trigger_threshold; + } + + uint64_t migrate_xbzrle_cache_size(void) + { +- MigrationState *s; +- +- s = migrate_get_current(); ++ MigrationState *s = migrate_get_current(); + + return s->parameters.xbzrle_cache_size; + } +-- +2.39.3 + diff --git a/kvm-migration-Move-migration_properties-to-options.c.patch b/kvm-migration-Move-migration_properties-to-options.c.patch new file mode 100644 index 0000000..145b510 --- /dev/null +++ b/kvm-migration-Move-migration_properties-to-options.c.patch @@ -0,0 +1,409 @@ +From 0911e025a9dc8a0c85944ac11fb9df72e5ad0677 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 09/37] migration: Move migration_properties to options.c +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/28] ff07358afa0c90f13125b177b0e08c74ef1b9905 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit f9436522c8dd +Author: Juan Quintela +Date: Thu Mar 2 12:55:57 2023 +0100 + + migration: Move migration_properties to options.c + + Signed-off-by: Juan Quintela + Reviewed-by: Vladimir Sementsov-Ogievskiy + +Signed-off-by: Cédric Le Goater +--- + migration/migration.c | 157 ------------------------------------------ + migration/options.c | 155 +++++++++++++++++++++++++++++++++++++++++ + migration/options.h | 7 ++ + 3 files changed, 162 insertions(+), 157 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 08f87f2b0e..1ac5f19bc2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -52,8 +52,6 @@ + #include "io/channel-tls.h" + #include "migration/colo.h" + #include "hw/boards.h" +-#include "hw/qdev-properties.h" +-#include "hw/qdev-properties-system.h" + #include "monitor/monitor.h" + #include "net/announce.h" + #include "qemu/queue.h" +@@ -65,51 +63,6 @@ + #include "sysemu/qtest.h" + #include "options.h" + +-#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ +- +-/* Time in milliseconds we are allowed to stop the source, +- * for sending the last part */ +-#define DEFAULT_MIGRATE_SET_DOWNTIME 300 +- +-/* Default compression thread count */ +-#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 +-/* Default decompression thread count, usually decompression is at +- * least 4 times as fast as compression.*/ +-#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 +-/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ +-#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 +-/* Define default autoconverge cpu throttle migration parameters */ +-#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 +-#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 +-#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 +-#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 +- +-/* Migration XBZRLE default cache size */ +-#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) +- +-/* The delay time (in ms) between two COLO checkpoints */ +-#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) +-#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +-#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE +-/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ +-#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 +-/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ +-#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 +- +-/* Background transfer rate for postcopy, 0 means unlimited, note +- * that page requests can still exceed this limit. +- */ +-#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 +- +-/* +- * Parameters for self_announce_delay giving a stream of RARP/ARP +- * packets after migration. +- */ +-#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 +-#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 +-#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 +-#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 +- + static NotifierList migration_state_notifiers = + NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); + +@@ -3336,116 +3289,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) + s->migration_thread_running = true; + } + +-#define DEFINE_PROP_MIG_CAP(name, x) \ +- DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) +- +-static Property migration_properties[] = { +- DEFINE_PROP_BOOL("store-global-state", MigrationState, +- store_global_state, true), +- DEFINE_PROP_BOOL("send-configuration", MigrationState, +- send_configuration, true), +- DEFINE_PROP_BOOL("send-section-footer", MigrationState, +- send_section_footer, true), +- DEFINE_PROP_BOOL("decompress-error-check", MigrationState, +- decompress_error_check, true), +- DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, +- clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), +- DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, +- preempt_pre_7_2, false), +- +- /* Migration parameters */ +- DEFINE_PROP_UINT8("x-compress-level", MigrationState, +- parameters.compress_level, +- DEFAULT_MIGRATE_COMPRESS_LEVEL), +- DEFINE_PROP_UINT8("x-compress-threads", MigrationState, +- parameters.compress_threads, +- DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), +- DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, +- parameters.compress_wait_thread, true), +- DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, +- parameters.decompress_threads, +- DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), +- DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, +- parameters.throttle_trigger_threshold, +- DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), +- DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, +- parameters.cpu_throttle_initial, +- DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), +- DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, +- parameters.cpu_throttle_increment, +- DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), +- DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, +- parameters.cpu_throttle_tailslow, false), +- DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, +- parameters.max_bandwidth, MAX_THROTTLE), +- DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, +- parameters.downtime_limit, +- DEFAULT_MIGRATE_SET_DOWNTIME), +- DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, +- parameters.x_checkpoint_delay, +- DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), +- DEFINE_PROP_UINT8("multifd-channels", MigrationState, +- parameters.multifd_channels, +- DEFAULT_MIGRATE_MULTIFD_CHANNELS), +- DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, +- parameters.multifd_compression, +- DEFAULT_MIGRATE_MULTIFD_COMPRESSION), +- DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, +- parameters.multifd_zlib_level, +- DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), +- DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, +- parameters.multifd_zstd_level, +- DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), +- DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, +- parameters.xbzrle_cache_size, +- DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), +- DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, +- parameters.max_postcopy_bandwidth, +- DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), +- DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, +- parameters.max_cpu_throttle, +- DEFAULT_MIGRATE_MAX_CPU_THROTTLE), +- DEFINE_PROP_SIZE("announce-initial", MigrationState, +- parameters.announce_initial, +- DEFAULT_MIGRATE_ANNOUNCE_INITIAL), +- DEFINE_PROP_SIZE("announce-max", MigrationState, +- parameters.announce_max, +- DEFAULT_MIGRATE_ANNOUNCE_MAX), +- DEFINE_PROP_SIZE("announce-rounds", MigrationState, +- parameters.announce_rounds, +- DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), +- DEFINE_PROP_SIZE("announce-step", MigrationState, +- parameters.announce_step, +- DEFAULT_MIGRATE_ANNOUNCE_STEP), +- DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), +- DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), +- DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), +- +- /* Migration capabilities */ +- DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), +- DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), +- DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), +- DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), +- DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), +- DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), +- DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), +- DEFINE_PROP_MIG_CAP("x-postcopy-preempt", +- MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), +- DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), +- DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), +- DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), +- DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), +- DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), +- DEFINE_PROP_MIG_CAP("x-background-snapshot", +- MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), +-#ifdef CONFIG_LINUX +- DEFINE_PROP_MIG_CAP("x-zero-copy-send", +- MIGRATION_CAPABILITY_ZERO_COPY_SEND), +-#endif +- +- DEFINE_PROP_END_OF_LIST(), +-}; +- + static void migration_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); +diff --git a/migration/options.c b/migration/options.c +index bcfe244fa9..a76984276d 100644 +--- a/migration/options.c ++++ b/migration/options.c +@@ -31,6 +31,161 @@ + #define MAX_MIGRATE_DOWNTIME_SECONDS 2000 + #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000) + ++#define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ ++ ++/* Time in milliseconds we are allowed to stop the source, ++ * for sending the last part */ ++#define DEFAULT_MIGRATE_SET_DOWNTIME 300 ++ ++/* Default compression thread count */ ++#define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8 ++/* Default decompression thread count, usually decompression is at ++ * least 4 times as fast as compression.*/ ++#define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2 ++/*0: means nocompress, 1: best speed, ... 9: best compress ratio */ ++#define DEFAULT_MIGRATE_COMPRESS_LEVEL 1 ++/* Define default autoconverge cpu throttle migration parameters */ ++#define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50 ++#define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20 ++#define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 ++#define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99 ++ ++/* Migration XBZRLE default cache size */ ++#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) ++ ++/* The delay time (in ms) between two COLO checkpoints */ ++#define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100) ++#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 ++#define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE ++/* 0: means nocompress, 1: best speed, ... 9: best compress ratio */ ++#define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1 ++/* 0: means nocompress, 1: best speed, ... 20: best compress ratio */ ++#define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1 ++ ++/* Background transfer rate for postcopy, 0 means unlimited, note ++ * that page requests can still exceed this limit. ++ */ ++#define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0 ++ ++/* ++ * Parameters for self_announce_delay giving a stream of RARP/ARP ++ * packets after migration. ++ */ ++#define DEFAULT_MIGRATE_ANNOUNCE_INITIAL 50 ++#define DEFAULT_MIGRATE_ANNOUNCE_MAX 550 ++#define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS 5 ++#define DEFAULT_MIGRATE_ANNOUNCE_STEP 100 ++ ++#define DEFINE_PROP_MIG_CAP(name, x) \ ++ DEFINE_PROP_BOOL(name, MigrationState, capabilities[x], false) ++ ++Property migration_properties[] = { ++ DEFINE_PROP_BOOL("store-global-state", MigrationState, ++ store_global_state, true), ++ DEFINE_PROP_BOOL("send-configuration", MigrationState, ++ send_configuration, true), ++ DEFINE_PROP_BOOL("send-section-footer", MigrationState, ++ send_section_footer, true), ++ DEFINE_PROP_BOOL("decompress-error-check", MigrationState, ++ decompress_error_check, true), ++ DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, ++ clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), ++ DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, ++ preempt_pre_7_2, false), ++ ++ /* Migration parameters */ ++ DEFINE_PROP_UINT8("x-compress-level", MigrationState, ++ parameters.compress_level, ++ DEFAULT_MIGRATE_COMPRESS_LEVEL), ++ DEFINE_PROP_UINT8("x-compress-threads", MigrationState, ++ parameters.compress_threads, ++ DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), ++ DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState, ++ parameters.compress_wait_thread, true), ++ DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, ++ parameters.decompress_threads, ++ DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), ++ DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState, ++ parameters.throttle_trigger_threshold, ++ DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD), ++ DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, ++ parameters.cpu_throttle_initial, ++ DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), ++ DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, ++ parameters.cpu_throttle_increment, ++ DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), ++ DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState, ++ parameters.cpu_throttle_tailslow, false), ++ DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, ++ parameters.max_bandwidth, MAX_THROTTLE), ++ DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, ++ parameters.downtime_limit, ++ DEFAULT_MIGRATE_SET_DOWNTIME), ++ DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, ++ parameters.x_checkpoint_delay, ++ DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), ++ DEFINE_PROP_UINT8("multifd-channels", MigrationState, ++ parameters.multifd_channels, ++ DEFAULT_MIGRATE_MULTIFD_CHANNELS), ++ DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState, ++ parameters.multifd_compression, ++ DEFAULT_MIGRATE_MULTIFD_COMPRESSION), ++ DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState, ++ parameters.multifd_zlib_level, ++ DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL), ++ DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, ++ parameters.multifd_zstd_level, ++ DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), ++ DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, ++ parameters.xbzrle_cache_size, ++ DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), ++ DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState, ++ parameters.max_postcopy_bandwidth, ++ DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH), ++ DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState, ++ parameters.max_cpu_throttle, ++ DEFAULT_MIGRATE_MAX_CPU_THROTTLE), ++ DEFINE_PROP_SIZE("announce-initial", MigrationState, ++ parameters.announce_initial, ++ DEFAULT_MIGRATE_ANNOUNCE_INITIAL), ++ DEFINE_PROP_SIZE("announce-max", MigrationState, ++ parameters.announce_max, ++ DEFAULT_MIGRATE_ANNOUNCE_MAX), ++ DEFINE_PROP_SIZE("announce-rounds", MigrationState, ++ parameters.announce_rounds, ++ DEFAULT_MIGRATE_ANNOUNCE_ROUNDS), ++ DEFINE_PROP_SIZE("announce-step", MigrationState, ++ parameters.announce_step, ++ DEFAULT_MIGRATE_ANNOUNCE_STEP), ++ DEFINE_PROP_STRING("tls-creds", MigrationState, parameters.tls_creds), ++ DEFINE_PROP_STRING("tls-hostname", MigrationState, parameters.tls_hostname), ++ DEFINE_PROP_STRING("tls-authz", MigrationState, parameters.tls_authz), ++ ++ /* Migration capabilities */ ++ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), ++ DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL), ++ DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE), ++ DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS), ++ DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS), ++ DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS), ++ DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM), ++ DEFINE_PROP_MIG_CAP("x-postcopy-preempt", ++ MIGRATION_CAPABILITY_POSTCOPY_PREEMPT), ++ DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO), ++ DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), ++ DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), ++ DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), ++ DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), ++ DEFINE_PROP_MIG_CAP("x-background-snapshot", ++ MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), ++#ifdef CONFIG_LINUX ++ DEFINE_PROP_MIG_CAP("x-zero-copy-send", ++ MIGRATION_CAPABILITY_ZERO_COPY_SEND), ++#endif ++ ++ DEFINE_PROP_END_OF_LIST(), ++}; ++ + bool migrate_auto_converge(void) + { + MigrationState *s = migrate_get_current(); +diff --git a/migration/options.h b/migration/options.h +index 89067e59a0..7b0f7245ad 100644 +--- a/migration/options.h ++++ b/migration/options.h +@@ -14,6 +14,9 @@ + #ifndef QEMU_MIGRATION_OPTIONS_H + #define QEMU_MIGRATION_OPTIONS_H + ++#include "hw/qdev-properties.h" ++#include "hw/qdev-properties-system.h" ++ + /* constants */ + + /* Amount of time to allocate to each "chunk" of bandwidth-throttled +@@ -21,6 +24,10 @@ + #define BUFFER_DELAY 100 + #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY) + ++/* migration properties */ ++ ++extern Property migration_properties[]; ++ + /* capabilities */ + + bool migrate_auto_converge(void); +-- +2.39.3 + diff --git a/1005-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch b/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch similarity index 79% rename from 1005-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch rename to kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch index ae3bac6..312af68 100644 --- a/1005-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch +++ b/kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch @@ -1,8 +1,18 @@ -From ee8a96338078126e17d6aa8569b4638fdfa9423b Mon Sep 17 00:00:00 2001 +From 7495a51c586818925470fb247882f5ba0f7b0ffd Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 27 Jun 2023 09:47:03 +0200 -Subject: [PATCH 4/4] pc-bios/s390-ccw: Don't use __bss_start with the "larl" +Subject: [PATCH 34/37] pc-bios/s390-ccw: Don't use __bss_start with the "larl" instruction +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/4] 2483a50c0ed37fa29db649ec44220ac83c215698 (thuth/qemu-kvm-cs9) start.S currently cannot be compiled with Clang 16 and binutils 2.40: diff --git a/1003-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch b/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch similarity index 92% rename from 1003-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch rename to kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch index 25e037e..bd13187 100644 --- a/1003-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch +++ b/kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch @@ -1,11 +1,18 @@ -From ef50e8f94f0467d7f55745be9f58c7cd369e57c6 Mon Sep 17 00:00:00 2001 +From 24bc8fc932ae1c88cc2e97f0f90786a7be411bb2 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 27 Jun 2023 09:47:00 +0200 -Subject: [PATCH 2/4] pc-bios/s390-ccw: Fix indentation in start.S +Subject: [PATCH 32/37] pc-bios/s390-ccw: Fix indentation in start.S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/4] cf8fa053602ce1cfac0b6efa67f491688d4f9348 (thuth/qemu-kvm-cs9) + start.S is currently indented with a mixture of spaces and tabs, which is quite ugly. QEMU coding style says indentation should be 4 spaces, and this is also what we are using in the assembler files in the diff --git a/1002-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch b/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch similarity index 66% rename from 1002-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch rename to kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch index 434942c..907fe43 100644 --- a/1002-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch +++ b/kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch @@ -1,8 +1,18 @@ -From d8048e4eb5efcc52c4c77c3443a1cabbf12d087c Mon Sep 17 00:00:00 2001 +From b5b243cbbb897b236c08699529e13457e1e49924 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 22 Jun 2023 15:08:22 +0200 -Subject: [PATCH 1/4] pc-bios/s390-ccw/Makefile: Use -z noexecstack to silence - linker warning +Subject: [PATCH 31/37] pc-bios/s390-ccw/Makefile: Use -z noexecstack to + silence linker warning +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/4] 04f6f83169f1c5545a0e2772b4babfc6a50bd5bf (thuth/qemu-kvm-cs9) Recent versions of ld complain when linking the s390-ccw bios: diff --git a/1004-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch b/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch similarity index 76% rename from 1004-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch rename to kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch index 896dc6e..0c4ce6f 100644 --- a/1004-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch +++ b/kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch @@ -1,12 +1,19 @@ -From 5db331dc14cea38ceb96f7cd55365f7c7d326c29 Mon Sep 17 00:00:00 2001 +From 2c52aebf90f28121a3e46a9305304406023b9747 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 27 Jun 2023 09:47:01 +0200 -Subject: [PATCH 3/4] pc-bios/s390-ccw: Provide space for initial stack frame +Subject: [PATCH 33/37] pc-bios/s390-ccw: Provide space for initial stack frame in start.S MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit +RH-Author: Thomas Huth +RH-MergeRequest: 180: Fix misaligned symbol error in the s390-ccw image during qemu-kvm build with binutils 2.40 +RH-Bugzilla: 2220866 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/4] c2f69ce5998861fe20b799bf0113def8cf0cd128 (thuth/qemu-kvm-cs9) + Providing the space of a stack frame is the duty of the caller, so we should reserve 160 bytes before jumping into the main function. Otherwise the main() function might write past the stack array. diff --git a/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch b/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch new file mode 100644 index 0000000..ef99b30 --- /dev/null +++ b/kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch @@ -0,0 +1,88 @@ +From b998f8474846886fa1e0428fe79fe2a79231cc05 Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Fri, 12 May 2023 15:43:38 +0100 +Subject: [PATCH 35/37] ui: Fix pixel colour channel order for PNG screenshots +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Marc-André Lureau +RH-MergeRequest: 183: ui: Fix pixel colour channel order for PNG screenshots +RH-Bugzilla: 2222579 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 76acd3c5526639e70bc2998f584503c78fc9bc56 (marcandre.lureau-rh/qemu-kvm-centos) + +When we take a PNG screenshot the ordering of the colour channels in +the data is not correct, resulting in the image having weird +colouring compared to the actual display. (Specifically, on a +little-endian host the blue and red channels are swapped; on +big-endian everything is wrong.) + +This happens because the pixman idea of the pixel data and the libpng +idea differ. PIXMAN_a8r8g8b8 defines that pixels are 32-bit values, +with A in bits 24-31, R in bits 16-23, G in bits 8-15 and B in bits +0-7. This means that on little-endian systems the bytes in memory +are + B G R A +and on big-endian systems they are + A R G B + +libpng, on the other hand, thinks of pixels as being a series of +values for each channel, so its format PNG_COLOR_TYPE_RGB_ALPHA +always wants bytes in the order + R G B A + +This isn't the same as the pixman order for either big or little +endian hosts. + +The alpha channel is also unnecessary bulk in the output PNG file, +because there is no alpha information in a screenshot. + +To handle the endianness issue, we already define in ui/qemu-pixman.h +various PIXMAN_BE_* and PIXMAN_LE_* values that give consistent +byte-order pixel channel formats. So we can use PIXMAN_BE_r8g8b8 and +PNG_COLOR_TYPE_RGB, which both have an in-memory byte order of + R G B +and 3 bytes per pixel. + +(PPM format screenshots get this right; they already use the +PIXMAN_BE_r8g8b8 format.) + +Cc: qemu-stable@nongnu.org +Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1622 +Fixes: 9a0a119a382867 ("Added parameter to take screenshot with screendump as PNG") +Signed-off-by: Peter Maydell +Reviewed-by: Marc-André Lureau +Message-id: 20230502135548.2451309-1-peter.maydell@linaro.org + +(cherry picked from commit cd22a0f520f471e3bd33bc19cf3b2fa772cdb2a8) +Signed-off-by: Marc-André Lureau +--- + ui/console.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ui/console.c b/ui/console.c +index 6e8a3cdc62..e173731e20 100644 +--- a/ui/console.c ++++ b/ui/console.c +@@ -311,7 +311,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) + png_struct *png_ptr; + png_info *info_ptr; + g_autoptr(pixman_image_t) linebuf = +- qemu_pixman_linebuf_create(PIXMAN_a8r8g8b8, width); ++ qemu_pixman_linebuf_create(PIXMAN_BE_r8g8b8, width); + uint8_t *buf = (uint8_t *)pixman_image_get_data(linebuf); + FILE *f = fdopen(fd, "wb"); + int y; +@@ -341,7 +341,7 @@ static bool png_save(int fd, pixman_image_t *image, Error **errp) + png_init_io(png_ptr, f); + + png_set_IHDR(png_ptr, info_ptr, width, height, 8, +- PNG_COLOR_TYPE_RGB_ALPHA, PNG_INTERLACE_NONE, ++ PNG_COLOR_TYPE_RGB, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + png_write_info(png_ptr, info_ptr); +-- +2.39.3 + diff --git a/kvm-util-vfio-helpers-Use-g_file_read_link.patch b/kvm-util-vfio-helpers-Use-g_file_read_link.patch new file mode 100644 index 0000000..4e492d9 --- /dev/null +++ b/kvm-util-vfio-helpers-Use-g_file_read_link.patch @@ -0,0 +1,82 @@ +From fb2d40cc84f689e46138a81c57ccd1f234dbbb7c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 07/37] util/vfio-helpers: Use g_file_read_link() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/28] 3545a07c967782dba8dd081415232f91d3f600a9 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit dbdea0dbfe2c +Author: Akihiko Odaki +Date: Tue May 23 11:39:12 2023 +0900 + + util/vfio-helpers: Use g_file_read_link() + + When _FORTIFY_SOURCE=2, glibc version is 2.35, and GCC version is + 12.1.0, the compiler complains as follows: + + In file included from /usr/include/features.h:490, + from /usr/include/bits/libc-header-start.h:33, + from /usr/include/stdint.h:26, + from /usr/lib/gcc/aarch64-unknown-linux-gnu/12.1.0/include/stdint.h:9, + from /home/alarm/q/var/qemu/include/qemu/osdep.h:94, + from ../util/vfio-helpers.c:13: + In function 'readlink', + inlined from 'sysfs_find_group_file' at ../util/vfio-helpers.c:116:9, + inlined from 'qemu_vfio_init_pci' at ../util/vfio-helpers.c:326:18, + inlined from 'qemu_vfio_open_pci' at ../util/vfio-helpers.c:517:9: + /usr/include/bits/unistd.h:119:10: error: argument 2 is null but the corresponding size argument 3 value is 4095 [-Werror=nonnull] + 119 | return __glibc_fortify (readlink, __len, sizeof (char), + | ^~~~~~~~~~~~~~~ + + This error implies the allocated buffer can be NULL. Use + g_file_read_link(), which allocates buffer automatically to avoid the + error. + + Signed-off-by: Akihiko Odaki + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + util/vfio-helpers.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c +index 2d8af38f88..f8bab46c68 100644 +--- a/util/vfio-helpers.c ++++ b/util/vfio-helpers.c +@@ -106,15 +106,17 @@ struct QEMUVFIOState { + */ + static char *sysfs_find_group_file(const char *device, Error **errp) + { ++ g_autoptr(GError) gerr = NULL; + char *sysfs_link; + char *sysfs_group; + char *p; + char *path = NULL; + + sysfs_link = g_strdup_printf("/sys/bus/pci/devices/%s/iommu_group", device); +- sysfs_group = g_malloc0(PATH_MAX); +- if (readlink(sysfs_link, sysfs_group, PATH_MAX - 1) == -1) { +- error_setg_errno(errp, errno, "Failed to find iommu group sysfs path"); ++ sysfs_group = g_file_read_link(sysfs_link, &gerr); ++ if (gerr) { ++ error_setg(errp, "Failed to find iommu group sysfs path: %s", ++ gerr->message); + goto out; + } + p = strrchr(sysfs_group, '/'); +-- +2.39.3 + diff --git a/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch b/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch new file mode 100644 index 0000000..1e00427 --- /dev/null +++ b/kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch @@ -0,0 +1,72 @@ +From 97124d4f2afbc8e65a3ecf76096e6b34a9b71541 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 30/37] vfio: Fix null pointer dereference bug in + vfio_bars_finalize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [28/28] 4bbdf7f9c5595897244c6cc3d88d487dd5f99bf0 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8af87a3ec7e4 +Author: Avihai Horon +Date: Tue Jul 4 16:39:27 2023 +0300 + + vfio: Fix null pointer dereference bug in vfio_bars_finalize() + + vfio_realize() has the following flow: + 1. vfio_bars_prepare() -- sets VFIOBAR->size. + 2. msix_early_setup(). + 3. vfio_bars_register() -- allocates VFIOBAR->mr. + + After vfio_bars_prepare() is called msix_early_setup() can fail. If it + does fail, vfio_bars_register() is never called and VFIOBAR->mr is not + allocated. + + In this case, vfio_bars_finalize() is called as part of the error flow + to free the bars' resources. However, vfio_bars_finalize() calls + object_unparent() for VFIOBAR->mr after checking only VFIOBAR->size, and + thus we get a null pointer dereference. + + Fix it by checking VFIOBAR->mr in vfio_bars_finalize(). + + Fixes: 89d5202edc50 ("vfio/pci: Allow relocating MSI-X MMIO") + Signed-off-by: Avihai Horon + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index ba40ca8784..9189459a38 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -1755,9 +1755,11 @@ static void vfio_bars_finalize(VFIOPCIDevice *vdev) + + vfio_bar_quirk_finalize(vdev, i); + vfio_region_finalize(&bar->region); +- if (bar->size) { ++ if (bar->mr) { ++ assert(bar->size); + object_unparent(OBJECT(bar->mr)); + g_free(bar->mr); ++ bar->mr = NULL; + } + } + +-- +2.39.3 + diff --git a/kvm-vfio-Implement-a-common-device-info-helper.patch b/kvm-vfio-Implement-a-common-device-info-helper.patch new file mode 100644 index 0000000..78a554d --- /dev/null +++ b/kvm-vfio-Implement-a-common-device-info-helper.patch @@ -0,0 +1,196 @@ +From f68e8c5d841cd7fc785cc3d15b3c280211bfb4c3 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 17/37] vfio: Implement a common device info helper +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [15/28] 9cfd233ab1b95dc7de776e8ef901823bd37c5a6b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 634f38f0f73f +Author: Alex Williamson +Date: Thu Jun 1 08:45:06 2023 -0600 + + vfio: Implement a common device info helper + + A common helper implementing the realloc algorithm for handling + capabilities. + + Reviewed-by: Philippe Mathieu-Daudé + Reviewed-by: Cédric Le Goater + Signed-off-by: Alex Williamson + Reviewed-by: Robin Voetter + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 37 ++++------------------------ + hw/vfio/common.c | 46 ++++++++++++++++++++++++++--------- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 41 insertions(+), 43 deletions(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index f51190d466..59a2e03873 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -289,38 +289,11 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, + memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); + } + +-static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, +- uint32_t argsz) ++static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev) + { +- struct vfio_device_info *info = g_malloc0(argsz); +- VFIOPCIDevice *vfio_pci; +- int fd; ++ VFIOPCIDevice *vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + +- vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); +- fd = vfio_pci->vbasedev.fd; +- +- /* +- * If the specified argsz is not large enough to contain all capabilities +- * it will be updated upon return from the ioctl. Retry until we have +- * a big enough buffer to hold the entire capability chain. On error, +- * just exit and rely on CLP defaults. +- */ +-retry: +- info->argsz = argsz; +- +- if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { +- trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); +- g_free(info); +- return NULL; +- } +- +- if (info->argsz > argsz) { +- argsz = info->argsz; +- info = g_realloc(info, argsz); +- goto retry; +- } +- +- return info; ++ return vfio_get_device_info(vfio_pci->vbasedev.fd); + } + + /* +@@ -335,7 +308,7 @@ bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) + + assert(fh); + +- info = get_device_info(pbdev, sizeof(*info)); ++ info = get_device_info(pbdev); + if (!info) { + return false; + } +@@ -356,7 +329,7 @@ void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) + { + g_autofree struct vfio_device_info *info = NULL; + +- info = get_device_info(pbdev, sizeof(*info)); ++ info = get_device_info(pbdev); + if (!info) { + return; + } +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index b73086e17a..3b4ac53f15 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -2845,11 +2845,35 @@ void vfio_put_group(VFIOGroup *group) + } + } + ++struct vfio_device_info *vfio_get_device_info(int fd) ++{ ++ struct vfio_device_info *info; ++ uint32_t argsz = sizeof(*info); ++ ++ info = g_malloc0(argsz); ++ ++retry: ++ info->argsz = argsz; ++ ++ if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { ++ g_free(info); ++ return NULL; ++ } ++ ++ if (info->argsz > argsz) { ++ argsz = info->argsz; ++ info = g_realloc(info, argsz); ++ goto retry; ++ } ++ ++ return info; ++} ++ + int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp) + { +- struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) }; +- int ret, fd; ++ g_autofree struct vfio_device_info *info = NULL; ++ int fd; + + fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name); + if (fd < 0) { +@@ -2861,11 +2885,11 @@ int vfio_get_device(VFIOGroup *group, const char *name, + return fd; + } + +- ret = ioctl(fd, VFIO_DEVICE_GET_INFO, &dev_info); +- if (ret) { ++ info = vfio_get_device_info(fd); ++ if (!info) { + error_setg_errno(errp, errno, "error getting device info"); + close(fd); +- return ret; ++ return -1; + } + + /* +@@ -2893,14 +2917,14 @@ int vfio_get_device(VFIOGroup *group, const char *name, + vbasedev->group = group; + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next); + +- vbasedev->num_irqs = dev_info.num_irqs; +- vbasedev->num_regions = dev_info.num_regions; +- vbasedev->flags = dev_info.flags; ++ vbasedev->num_irqs = info->num_irqs; ++ vbasedev->num_regions = info->num_regions; ++ vbasedev->flags = info->flags; ++ ++ trace_vfio_get_device(name, info->flags, info->num_regions, info->num_irqs); + +- trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions, +- dev_info.num_irqs); ++ vbasedev->reset_works = !!(info->flags & VFIO_DEVICE_FLAGS_RESET); + +- vbasedev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET); + return 0; + } + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 3dc5f2104c..6d1b8487c3 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -216,6 +216,7 @@ void vfio_region_finalize(VFIORegion *region); + void vfio_reset_handler(void *opaque); + VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp); + void vfio_put_group(VFIOGroup *group); ++struct vfio_device_info *vfio_get_device_info(int fd); + int vfio_get_device(VFIOGroup *group, const char *name, + VFIODevice *vbasedev, Error **errp); + +-- +2.39.3 + diff --git a/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch b/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch new file mode 100644 index 0000000..b8e72e6 --- /dev/null +++ b/kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch @@ -0,0 +1,438 @@ +From 080d28c191b7d951f1f4596dcaa13d590c07d886 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 15/37] vfio/migration: Add VFIO migration pre-copy support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [13/28] 7b2ea1471440d47e5aed1211c96942ca7bface96 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit eda7362af959 +Author: Avihai Horon +Date: Wed Jun 21 14:12:00 2023 +0300 + + vfio/migration: Add VFIO migration pre-copy support + + Pre-copy support allows the VFIO device data to be transferred while the + VM is running. This helps to accommodate VFIO devices that have a large + amount of data that needs to be transferred, and it can reduce migration + downtime. + + Pre-copy support is optional in VFIO migration protocol v2. + Implement pre-copy of VFIO migration protocol v2 and use it for devices + that support it. Full description of it can be found in the following + Linux commit: 4db52602a607 ("vfio: Extend the device migration protocol + with PRE_COPY"). + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + docs/devel/vfio-migration.rst | 35 +++++--- + hw/vfio/common.c | 6 +- + hw/vfio/migration.c | 165 ++++++++++++++++++++++++++++++++-- + hw/vfio/trace-events | 4 +- + include/hw/vfio/vfio-common.h | 2 + + 5 files changed, 190 insertions(+), 22 deletions(-) + +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst +index 1b68ccf115..e896b2a673 100644 +--- a/docs/devel/vfio-migration.rst ++++ b/docs/devel/vfio-migration.rst +@@ -7,12 +7,14 @@ the guest is running on source host and restoring this saved state on the + destination host. This document details how saving and restoring of VFIO + devices is done in QEMU. + +-Migration of VFIO devices currently consists of a single stop-and-copy phase. +-During the stop-and-copy phase the guest is stopped and the entire VFIO device +-data is transferred to the destination. +- +-The pre-copy phase of migration is currently not supported for VFIO devices. +-Support for VFIO pre-copy will be added later on. ++Migration of VFIO devices consists of two phases: the optional pre-copy phase, ++and the stop-and-copy phase. The pre-copy phase is iterative and allows to ++accommodate VFIO devices that have a large amount of data that needs to be ++transferred. The iterative pre-copy phase of migration allows for the guest to ++continue whilst the VFIO device state is transferred to the destination, this ++helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy ++support by reporting the VFIO_MIGRATION_PRE_COPY flag in the ++VFIO_DEVICE_FEATURE_MIGRATION ioctl. + + Note that currently VFIO migration is supported only for a single device. This + is due to VFIO migration's lack of P2P support. However, P2P support is planned +@@ -29,10 +31,20 @@ VFIO implements the device hooks for the iterative approach as follows: + * A ``load_setup`` function that sets the VFIO device on the destination in + _RESUMING state. + ++* A ``state_pending_estimate`` function that reports an estimate of the ++ remaining pre-copy data that the vendor driver has yet to save for the VFIO ++ device. ++ + * A ``state_pending_exact`` function that reads pending_bytes from the vendor + driver, which indicates the amount of data that the vendor driver has yet to + save for the VFIO device. + ++* An ``is_active_iterate`` function that indicates ``save_live_iterate`` is ++ active only when the VFIO device is in pre-copy states. ++ ++* A ``save_live_iterate`` function that reads the VFIO device's data from the ++ vendor driver during iterative pre-copy phase. ++ + * A ``save_state`` function to save the device config space if it is present. + + * A ``save_live_complete_precopy`` function that sets the VFIO device in +@@ -111,8 +123,10 @@ Flow of state changes during Live migration + =========================================== + + Below is the flow of state change during live migration. +-The values in the brackets represent the VM state, the migration state, and ++The values in the parentheses represent the VM state, the migration state, and + the VFIO device state, respectively. ++The text in the square brackets represents the flow if the VFIO device supports ++pre-copy. + + Live migration save path + ------------------------ +@@ -124,11 +138,12 @@ Live migration save path + | + migrate_init spawns migration_thread + Migration thread then calls each device's .save_setup() +- (RUNNING, _SETUP, _RUNNING) ++ (RUNNING, _SETUP, _RUNNING [_PRE_COPY]) + | +- (RUNNING, _ACTIVE, _RUNNING) +- If device is active, get pending_bytes by .state_pending_exact() ++ (RUNNING, _ACTIVE, _RUNNING [_PRE_COPY]) ++ If device is active, get pending_bytes by .state_pending_{estimate,exact}() + If total pending_bytes >= threshold_size, call .save_live_iterate() ++ [Data of VFIO device for pre-copy phase is copied] + Iterate till total pending bytes converge and are less than threshold + | + On migration completion, vCPU stops and calls .save_live_complete_precopy for +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 78358ede27..b73086e17a 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -492,7 +492,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + } + + if (vbasedev->pre_copy_dirty_page_tracking == ON_OFF_AUTO_OFF && +- migration->device_state == VFIO_DEVICE_STATE_RUNNING) { ++ (migration->device_state == VFIO_DEVICE_STATE_RUNNING || ++ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY)) { + return false; + } + } +@@ -537,7 +538,8 @@ static bool vfio_devices_all_running_and_mig_active(VFIOContainer *container) + return false; + } + +- if (migration->device_state == VFIO_DEVICE_STATE_RUNNING) { ++ if (migration->device_state == VFIO_DEVICE_STATE_RUNNING || ++ migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { + continue; + } else { + return false; +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 8d33414379..d8f6a22ae1 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -68,6 +68,8 @@ static const char *mig_state_to_str(enum vfio_device_mig_state state) + return "STOP_COPY"; + case VFIO_DEVICE_STATE_RESUMING: + return "RESUMING"; ++ case VFIO_DEVICE_STATE_PRE_COPY: ++ return "PRE_COPY"; + default: + return "UNKNOWN STATE"; + } +@@ -241,6 +243,25 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + return 0; + } + ++static int vfio_query_precopy_size(VFIOMigration *migration) ++{ ++ struct vfio_precopy_info precopy = { ++ .argsz = sizeof(precopy), ++ }; ++ ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; ++ ++ if (ioctl(migration->data_fd, VFIO_MIG_GET_PRECOPY_INFO, &precopy)) { ++ return -errno; ++ } ++ ++ migration->precopy_init_size = precopy.initial_bytes; ++ migration->precopy_dirty_size = precopy.dirty_bytes; ++ ++ return 0; ++} ++ + /* Returns the size of saved data on success and -errno on error */ + static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + { +@@ -249,6 +270,14 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + data_size = read(migration->data_fd, migration->data_buffer, + migration->data_buffer_size); + if (data_size < 0) { ++ /* ++ * Pre-copy emptied all the device state for now. For more information, ++ * please refer to the Linux kernel VFIO uAPI. ++ */ ++ if (errno == ENOMSG) { ++ return 0; ++ } ++ + return -errno; + } + if (data_size == 0) { +@@ -265,6 +294,38 @@ static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + return qemu_file_get_error(f) ?: data_size; + } + ++static void vfio_update_estimated_pending_data(VFIOMigration *migration, ++ uint64_t data_size) ++{ ++ if (!data_size) { ++ /* ++ * Pre-copy emptied all the device state for now, update estimated sizes ++ * accordingly. ++ */ ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; ++ ++ return; ++ } ++ ++ if (migration->precopy_init_size) { ++ uint64_t init_size = MIN(migration->precopy_init_size, data_size); ++ ++ migration->precopy_init_size -= init_size; ++ data_size -= init_size; ++ } ++ ++ migration->precopy_dirty_size -= MIN(migration->precopy_dirty_size, ++ data_size); ++} ++ ++static bool vfio_precopy_supported(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ return migration->mig_flags & VFIO_MIGRATION_PRE_COPY; ++} ++ + /* ---------------------------------------------------------------------- */ + + static int vfio_save_setup(QEMUFile *f, void *opaque) +@@ -285,6 +346,28 @@ static int vfio_save_setup(QEMUFile *f, void *opaque) + return -ENOMEM; + } + ++ if (vfio_precopy_supported(vbasedev)) { ++ int ret; ++ ++ switch (migration->device_state) { ++ case VFIO_DEVICE_STATE_RUNNING: ++ ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_PRE_COPY, ++ VFIO_DEVICE_STATE_RUNNING); ++ if (ret) { ++ return ret; ++ } ++ ++ vfio_query_precopy_size(migration); ++ ++ break; ++ case VFIO_DEVICE_STATE_STOP: ++ /* vfio_save_complete_precopy() will go to STOP_COPY */ ++ break; ++ default: ++ return -EINVAL; ++ } ++ } ++ + trace_vfio_save_setup(vbasedev->name, migration->data_buffer_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); +@@ -299,26 +382,42 @@ static void vfio_save_cleanup(void *opaque) + + g_free(migration->data_buffer); + migration->data_buffer = NULL; ++ migration->precopy_init_size = 0; ++ migration->precopy_dirty_size = 0; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); + } + ++static void vfio_state_pending_estimate(void *opaque, uint64_t *must_precopy, ++ uint64_t *can_postcopy) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ++ if (migration->device_state != VFIO_DEVICE_STATE_PRE_COPY) { ++ return; ++ } ++ ++ *must_precopy += ++ migration->precopy_init_size + migration->precopy_dirty_size; ++ ++ trace_vfio_state_pending_estimate(vbasedev->name, *must_precopy, ++ *can_postcopy, ++ migration->precopy_init_size, ++ migration->precopy_dirty_size); ++} ++ + /* + * Migration size of VFIO devices can be as little as a few KBs or as big as + * many GBs. This value should be big enough to cover the worst case. + */ + #define VFIO_MIG_STOP_COPY_SIZE (100 * GiB) + +-/* +- * Only exact function is implemented and not estimate function. The reason is +- * that during pre-copy phase of migration the estimate function is called +- * repeatedly while pending RAM size is over the threshold, thus migration +- * can't converge and querying the VFIO device pending data size is useless. +- */ + static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + uint64_t *can_postcopy) + { + VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; + uint64_t stop_copy_size = VFIO_MIG_STOP_COPY_SIZE; + + /* +@@ -328,8 +427,48 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + vfio_query_stop_copy_size(vbasedev, &stop_copy_size); + *must_precopy += stop_copy_size; + ++ if (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY) { ++ vfio_query_precopy_size(migration); ++ ++ *must_precopy += ++ migration->precopy_init_size + migration->precopy_dirty_size; ++ } ++ + trace_vfio_state_pending_exact(vbasedev->name, *must_precopy, *can_postcopy, +- stop_copy_size); ++ stop_copy_size, migration->precopy_init_size, ++ migration->precopy_dirty_size); ++} ++ ++static bool vfio_is_active_iterate(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ++ return migration->device_state == VFIO_DEVICE_STATE_PRE_COPY; ++} ++ ++static int vfio_save_iterate(QEMUFile *f, void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; ++ ssize_t data_size; ++ ++ data_size = vfio_save_block(f, migration); ++ if (data_size < 0) { ++ return data_size; ++ } ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ ++ vfio_update_estimated_pending_data(migration, data_size); ++ ++ trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, ++ migration->precopy_dirty_size); ++ ++ /* ++ * A VFIO device's pre-copy dirty_bytes is not guaranteed to reach zero. ++ * Return 1 so following handlers will not be potentially blocked. ++ */ ++ return 1; + } + + static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) +@@ -338,7 +477,7 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + ssize_t data_size; + int ret; + +- /* We reach here with device state STOP only */ ++ /* We reach here with device state STOP or STOP_COPY only */ + ret = vfio_migration_set_state(vbasedev, VFIO_DEVICE_STATE_STOP_COPY, + VFIO_DEVICE_STATE_STOP); + if (ret) { +@@ -457,7 +596,10 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + static const SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, ++ .state_pending_estimate = vfio_state_pending_estimate, + .state_pending_exact = vfio_state_pending_exact, ++ .is_active_iterate = vfio_is_active_iterate, ++ .save_live_iterate = vfio_save_iterate, + .save_live_complete_precopy = vfio_save_complete_precopy, + .save_state = vfio_save_state, + .load_setup = vfio_load_setup, +@@ -470,13 +612,18 @@ static const SaveVMHandlers savevm_vfio_handlers = { + static void vfio_vmstate_change(void *opaque, bool running, RunState state) + { + VFIODevice *vbasedev = opaque; ++ VFIOMigration *migration = vbasedev->migration; + enum vfio_device_mig_state new_state; + int ret; + + if (running) { + new_state = VFIO_DEVICE_STATE_RUNNING; + } else { +- new_state = VFIO_DEVICE_STATE_STOP; ++ new_state = ++ (migration->device_state == VFIO_DEVICE_STATE_PRE_COPY && ++ (state == RUN_STATE_FINISH_MIGRATE || state == RUN_STATE_PAUSED)) ? ++ VFIO_DEVICE_STATE_STOP_COPY : ++ VFIO_DEVICE_STATE_STOP; + } + + /* +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 646e42fd27..4150b59e58 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -162,6 +162,8 @@ vfio_save_block(const char *name, int data_size) " (%s) data_size %d" + vfio_save_cleanup(const char *name) " (%s)" + vfio_save_complete_precopy(const char *name, int ret) " (%s) ret %d" + vfio_save_device_config_state(const char *name) " (%s)" ++vfio_save_iterate(const char *name, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_save_setup(const char *name, uint64_t data_buffer_size) " (%s) data buffer size 0x%"PRIx64 +-vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64 ++vfio_state_pending_estimate(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 ++vfio_state_pending_exact(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t stopcopy_size, uint64_t precopy_init_size, uint64_t precopy_dirty_size) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" stopcopy size 0x%"PRIx64" precopy initial size 0x%"PRIx64" precopy dirty size 0x%"PRIx64 + vfio_vmstate_change(const char *name, int running, const char *reason, const char *dev_state) " (%s) running %d reason %s device state %s" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 5f29dab839..1db901c194 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -67,6 +67,8 @@ typedef struct VFIOMigration { + void *data_buffer; + size_t data_buffer_size; + uint64_t mig_flags; ++ uint64_t precopy_init_size; ++ uint64_t precopy_dirty_size; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch b/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch new file mode 100644 index 0000000..d87680d --- /dev/null +++ b/kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch @@ -0,0 +1,192 @@ +From 169dc1bb051b3aebc571936d956b49ba0621ae43 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 16/37] vfio/migration: Add support for switchover ack + capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [14/28] b3bd2eb2d0ca49ff05a0a82ae5bb956a354aed47 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 745c42912a04 +Author: Avihai Horon +Date: Wed Jun 21 14:12:01 2023 +0300 + + vfio/migration: Add support for switchover ack capability + + Loading of a VFIO device's data can take a substantial amount of time as + the device may need to allocate resources, prepare internal data + structures, etc. This can increase migration downtime, especially for + VFIO devices with a lot of resources. + + To solve this, VFIO migration uAPI defines "initial bytes" as part of + its precopy data stream. Initial bytes can be used in various ways to + improve VFIO migration performance. For example, it can be used to + transfer device metadata to pre-allocate resources in the destination. + However, for this to work we need to make sure that all initial bytes + are sent and loaded in the destination before the source VM is stopped. + + Use migration switchover ack capability to make sure a VFIO device's + initial bytes are sent and loaded in the destination before the source + stops the VM and attempts to complete the migration. + This can significantly reduce migration downtime for some devices. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + docs/devel/vfio-migration.rst | 10 +++++++++ + hw/vfio/migration.c | 39 ++++++++++++++++++++++++++++++++++- + include/hw/vfio/vfio-common.h | 1 + + 3 files changed, 49 insertions(+), 1 deletion(-) + +diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst +index e896b2a673..b433cb5bb2 100644 +--- a/docs/devel/vfio-migration.rst ++++ b/docs/devel/vfio-migration.rst +@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices opt-in to pre-copy + support by reporting the VFIO_MIGRATION_PRE_COPY flag in the + VFIO_DEVICE_FEATURE_MIGRATION ioctl. + ++When pre-copy is supported, it's possible to further reduce downtime by ++enabling "switchover-ack" migration capability. ++VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream ++and recommends that the initial bytes are sent and loaded in the destination ++before stopping the source VM. Enabling this migration capability will ++guarantee that and thus, can potentially reduce downtime even further. ++ + Note that currently VFIO migration is supported only for a single device. This + is due to VFIO migration's lack of P2P support. However, P2P support is planned + to be added later on. +@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach as follows: + * A ``save_live_iterate`` function that reads the VFIO device's data from the + vendor driver during iterative pre-copy phase. + ++* A ``switchover_ack_needed`` function that checks if the VFIO device uses ++ "switchover-ack" migration capability when this capability is enabled. ++ + * A ``save_state`` function to save the device config space if it is present. + + * A ``save_live_complete_precopy`` function that sets the VFIO device in +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index d8f6a22ae1..acbf0bb7ab 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -18,6 +18,8 @@ + #include "sysemu/runstate.h" + #include "hw/vfio/vfio-common.h" + #include "migration/migration.h" ++#include "migration/options.h" ++#include "migration/savevm.h" + #include "migration/vmstate.h" + #include "migration/qemu-file.h" + #include "migration/register.h" +@@ -45,6 +47,7 @@ + #define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL) + #define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL) + #define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL) ++#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL) + + /* + * This is an arbitrary size based on migration of mlx5 devices, where typically +@@ -384,6 +387,7 @@ static void vfio_save_cleanup(void *opaque) + migration->data_buffer = NULL; + migration->precopy_init_size = 0; + migration->precopy_dirty_size = 0; ++ migration->initial_data_sent = false; + vfio_migration_cleanup(vbasedev); + trace_vfio_save_cleanup(vbasedev->name); + } +@@ -457,10 +461,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque) + if (data_size < 0) { + return data_size; + } +- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + + vfio_update_estimated_pending_data(migration, data_size); + ++ if (migrate_switchover_ack() && !migration->precopy_init_size && ++ !migration->initial_data_sent) { ++ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT); ++ migration->initial_data_sent = true; ++ } else { ++ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); ++ } ++ + trace_vfio_save_iterate(vbasedev->name, migration->precopy_init_size, + migration->precopy_dirty_size); + +@@ -579,6 +590,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + } + break; + } ++ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT: ++ { ++ if (!vfio_precopy_supported(vbasedev) || ++ !migrate_switchover_ack()) { ++ error_report("%s: Received INIT_DATA_SENT but switchover ack " ++ "is not used", vbasedev->name); ++ return -EINVAL; ++ } ++ ++ ret = qemu_loadvm_approve_switchover(); ++ if (ret) { ++ error_report( ++ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)", ++ vbasedev->name, ret, strerror(-ret)); ++ } ++ ++ return ret; ++ } + default: + error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data); + return -EINVAL; +@@ -593,6 +622,13 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id) + return ret; + } + ++static bool vfio_switchover_ack_needed(void *opaque) ++{ ++ VFIODevice *vbasedev = opaque; ++ ++ return vfio_precopy_supported(vbasedev); ++} ++ + static const SaveVMHandlers savevm_vfio_handlers = { + .save_setup = vfio_save_setup, + .save_cleanup = vfio_save_cleanup, +@@ -605,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = { + .load_setup = vfio_load_setup, + .load_cleanup = vfio_load_cleanup, + .load_state = vfio_load_state, ++ .switchover_ack_needed = vfio_switchover_ack_needed, + }; + + /* ---------------------------------------------------------------------- */ +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 1db901c194..3dc5f2104c 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -69,6 +69,7 @@ typedef struct VFIOMigration { + uint64_t mig_flags; + uint64_t precopy_init_size; + uint64_t precopy_dirty_size; ++ bool initial_data_sent; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch b/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch new file mode 100644 index 0000000..dde2e24 --- /dev/null +++ b/kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch @@ -0,0 +1,171 @@ +From 35c7d0d3b02d61d6f29afae74bd83edd70a6a1b4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 26/37] vfio/migration: Change vIOMMU blocker from global to + per device +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [24/28] 8fda1c82a81fadd4f38e6a5e878c9228a81c0f6e (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 3c26c80a0a26 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:07 2023 +0800 + + vfio/migration: Change vIOMMU blocker from global to per device + + Contrary to multiple device blocker which needs to consider already-attached + devices to unblock/block dynamically, the vIOMMU migration blocker is a device + specific config. Meaning it only needs to know whether the device is bypassing + or not the vIOMMU (via machine property, or per pxb-pcie::bypass_iommu), and + does not need the state of currently present devices. For this reason, the + vIOMMU global migration blocker can be consolidated into the per-device + migration blocker, allowing us to remove some unnecessary code. + + This change also makes vfio_mig_active() more accurate as it doesn't check for + global blocker. + + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 51 ++--------------------------------- + hw/vfio/migration.c | 7 ++--- + hw/vfio/pci.c | 1 - + include/hw/vfio/vfio-common.h | 3 +-- + 4 files changed, 7 insertions(+), 55 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 136d8243d6..e815f6ba30 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -362,7 +362,6 @@ bool vfio_mig_active(void) + } + + static Error *multiple_devices_migration_blocker; +-static Error *giommu_migration_blocker; + + static unsigned int vfio_migratable_device_num(void) + { +@@ -420,55 +419,9 @@ void vfio_unblock_multiple_devices_migration(void) + multiple_devices_migration_blocker = NULL; + } + +-static bool vfio_viommu_preset(void) ++bool vfio_viommu_preset(VFIODevice *vbasedev) + { +- VFIOAddressSpace *space; +- +- QLIST_FOREACH(space, &vfio_address_spaces, list) { +- if (space->as != &address_space_memory) { +- return true; +- } +- } +- +- return false; +-} +- +-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) +-{ +- int ret; +- +- if (giommu_migration_blocker || +- !vfio_viommu_preset()) { +- return 0; +- } +- +- if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { +- error_setg(errp, +- "Migration is currently not supported with vIOMMU enabled"); +- return -EINVAL; +- } +- +- error_setg(&giommu_migration_blocker, +- "Migration is currently not supported with vIOMMU enabled"); +- ret = migrate_add_blocker(giommu_migration_blocker, errp); +- if (ret < 0) { +- error_free(giommu_migration_blocker); +- giommu_migration_blocker = NULL; +- } +- +- return ret; +-} +- +-void vfio_migration_finalize(void) +-{ +- if (!giommu_migration_blocker || +- vfio_viommu_preset()) { +- return; +- } +- +- migrate_del_blocker(giommu_migration_blocker); +- error_free(giommu_migration_blocker); +- giommu_migration_blocker = NULL; ++ return vbasedev->group->container->space->as != &address_space_memory; + } + + static void vfio_set_migration_error(int err) +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 1db7d52ab2..e6e5e85f75 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -878,9 +878,10 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + return ret; + } + +- ret = vfio_block_giommu_migration(vbasedev, errp); +- if (ret) { +- return ret; ++ if (vfio_viommu_preset(vbasedev)) { ++ error_setg(&err, "%s: Migration is currently not supported " ++ "with vIOMMU enabled", vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); + } + + trace_vfio_migration_realize(vbasedev->name); +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 2d059832a4..922c81872c 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3279,7 +3279,6 @@ static void vfio_instance_finalize(Object *obj) + */ + vfio_put_device(vdev); + vfio_put_group(group); +- vfio_migration_finalize(); + } + + static void vfio_exitfn(PCIDevice *pdev) +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 93429b9abb..45167c8a8a 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -227,7 +227,7 @@ extern VFIOGroupList vfio_group_list; + bool vfio_mig_active(void); + int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); + void vfio_unblock_multiple_devices_migration(void); +-int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); ++bool vfio_viommu_preset(VFIODevice *vbasedev); + int64_t vfio_mig_bytes_transferred(void); + void vfio_reset_bytes_transferred(void); + +@@ -254,6 +254,5 @@ int vfio_spapr_remove_window(VFIOContainer *container, + + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); +-void vfio_migration_finalize(void); + + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch b/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch new file mode 100644 index 0000000..9deaf1a --- /dev/null +++ b/kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch @@ -0,0 +1,145 @@ +From a36fa46369fe9bf2a2174e9ed6ab83042e904066 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 27/37] vfio/migration: Free resources when + vfio_migration_realize fails +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [25/28] b3ab8d3443d4bc12a689dc7d88a94da315814bb7 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 2b43b2995b02 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:08 2023 +0800 + + vfio/migration: Free resources when vfio_migration_realize fails + + When vfio_realize() succeeds, hot unplug will call vfio_exitfn() + to free resources allocated in vfio_realize(); when vfio_realize() + fails, vfio_exitfn() is never called and we need to free resources + in vfio_realize(). + + In the case that vfio_migration_realize() fails, + e.g: with -only-migratable & enable-migration=off, we see below: + + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off + 0000:81:11.1: Migration disabled + Error: disallowing migration blocker (--only-migratable) for: 0000:81:11.1: Migration is disabled for VFIO device + + If we hotplug again we should see same log as above, but we see: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,enable-migration=off + Error: vfio 0000:81:11.1: device is already attached + + That's because some references to VFIO device isn't released. + For resources allocated in vfio_migration_realize(), free them by + jumping to out_deinit path with calling a new function + vfio_migration_deinit(). For resources allocated in vfio_realize(), + free them by jumping to de-register path in vfio_realize(). + + Signed-off-by: Zhenzhong Duan + Fixes: a22651053b59 ("vfio: Make vfio-pci device migration capable") + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 33 +++++++++++++++++++++++---------- + hw/vfio/pci.c | 1 + + 2 files changed, 24 insertions(+), 10 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index e6e5e85f75..e3954570c8 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -802,6 +802,17 @@ static int vfio_migration_init(VFIODevice *vbasedev) + return 0; + } + ++static void vfio_migration_deinit(VFIODevice *vbasedev) ++{ ++ VFIOMigration *migration = vbasedev->migration; ++ ++ remove_migration_state_change_notifier(&migration->migration_state); ++ qemu_del_vm_change_state_handler(migration->vm_state); ++ unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); ++ vfio_migration_free(vbasedev); ++ vfio_unblock_multiple_devices_migration(); ++} ++ + static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) + { + int ret; +@@ -866,7 +877,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + error_setg(&err, + "%s: VFIO device doesn't support device dirty tracking", + vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ goto add_blocker; + } + + warn_report("%s: VFIO device doesn't support device dirty tracking", +@@ -875,29 +886,31 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + + ret = vfio_block_multiple_devices_migration(vbasedev, errp); + if (ret) { +- return ret; ++ goto out_deinit; + } + + if (vfio_viommu_preset(vbasedev)) { + error_setg(&err, "%s: Migration is currently not supported " + "with vIOMMU enabled", vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ goto add_blocker; + } + + trace_vfio_migration_realize(vbasedev->name); + return 0; ++ ++add_blocker: ++ ret = vfio_block_migration(vbasedev, err, errp); ++out_deinit: ++ if (ret) { ++ vfio_migration_deinit(vbasedev); ++ } ++ return ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) + { + if (vbasedev->migration) { +- VFIOMigration *migration = vbasedev->migration; +- +- remove_migration_state_change_notifier(&migration->migration_state); +- qemu_del_vm_change_state_handler(migration->vm_state); +- unregister_savevm(VMSTATE_IF(vbasedev->dev), "vfio", vbasedev); +- vfio_migration_free(vbasedev); +- vfio_unblock_multiple_devices_migration(); ++ vfio_migration_deinit(vbasedev); + } + + if (vbasedev->migration_blocker) { +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 922c81872c..037b7d4176 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3234,6 +3234,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + ret = vfio_migration_realize(vbasedev, errp); + if (ret) { + error_report("%s: Migration disabled", vbasedev->name); ++ goto out_deregister; + } + } + +-- +2.39.3 + diff --git a/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch b/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch new file mode 100644 index 0000000..3258541 --- /dev/null +++ b/kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch @@ -0,0 +1,283 @@ +From 747c34c0a3b8048ebdab387d22f2b922c81d572a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 21/37] vfio/migration: Make VFIO migration non-experimental +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [19/28] 2f457c1c0de95a3fced0270f2edbbc5193cc4de9 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 8bbcb64a71d8 +Author: Avihai Horon +Date: Wed Jun 28 10:31:12 2023 +0300 + + vfio/migration: Make VFIO migration non-experimental + + The major parts of VFIO migration are supported today in QEMU. This + includes basic VFIO migration, device dirty page tracking and precopy + support. + + Thus, at this point in time, it seems appropriate to make VFIO migration + non-experimental: remove the x prefix from enable_migration property, + change it to ON_OFF_AUTO and let the default value be AUTO. + + In addition, make the following adjustments: + 1. When enable_migration is ON and migration is not supported, fail VFIO + device realization. + 2. When enable_migration is AUTO (i.e., not explicitly enabled), require + device dirty tracking support. This is because device dirty tracking + is currently the only method to do dirty page tracking, which is + essential for migrating in a reasonable downtime. Setting + enable_migration to ON will not require device dirty tracking. + 3. Make migration error and blocker messages more elaborate. + 4. Remove error prints in vfio_migration_query_flags(). + 5. Rename trace_vfio_migration_probe() to + trace_vfio_migration_realize(). + + Signed-off-by: Avihai Horon + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 16 ++++++- + hw/vfio/migration.c | 79 +++++++++++++++++++++++------------ + hw/vfio/pci.c | 4 +- + hw/vfio/trace-events | 2 +- + include/hw/vfio/vfio-common.h | 6 +-- + 5 files changed, 73 insertions(+), 34 deletions(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 3b4ac53f15..136d8243d6 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -381,7 +381,7 @@ static unsigned int vfio_migratable_device_num(void) + return device_num; + } + +-int vfio_block_multiple_devices_migration(Error **errp) ++int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp) + { + int ret; + +@@ -390,6 +390,12 @@ int vfio_block_multiple_devices_migration(Error **errp) + return 0; + } + ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_setg(errp, "Migration is currently not supported with multiple " ++ "VFIO devices"); ++ return -EINVAL; ++ } ++ + error_setg(&multiple_devices_migration_blocker, + "Migration is currently not supported with multiple " + "VFIO devices"); +@@ -427,7 +433,7 @@ static bool vfio_viommu_preset(void) + return false; + } + +-int vfio_block_giommu_migration(Error **errp) ++int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp) + { + int ret; + +@@ -436,6 +442,12 @@ int vfio_block_giommu_migration(Error **errp) + return 0; + } + ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_setg(errp, ++ "Migration is currently not supported with vIOMMU enabled"); ++ return -EINVAL; ++ } ++ + error_setg(&giommu_migration_blocker, + "Migration is currently not supported with vIOMMU enabled"); + ret = migrate_add_blocker(giommu_migration_blocker, errp); +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 7cf143926c..1db7d52ab2 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -724,14 +724,6 @@ static int vfio_migration_query_flags(VFIODevice *vbasedev, uint64_t *mig_flags) + feature->argsz = sizeof(buf); + feature->flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_MIGRATION; + if (ioctl(vbasedev->fd, VFIO_DEVICE_FEATURE, feature)) { +- if (errno == ENOTTY) { +- error_report("%s: VFIO migration is not supported in kernel", +- vbasedev->name); +- } else { +- error_report("%s: Failed to query VFIO migration support, err: %s", +- vbasedev->name, strerror(errno)); +- } +- + return -errno; + } + +@@ -810,6 +802,27 @@ static int vfio_migration_init(VFIODevice *vbasedev) + return 0; + } + ++static int vfio_block_migration(VFIODevice *vbasedev, Error *err, Error **errp) ++{ ++ int ret; ++ ++ if (vbasedev->enable_migration == ON_OFF_AUTO_ON) { ++ error_propagate(errp, err); ++ return -EINVAL; ++ } ++ ++ vbasedev->migration_blocker = error_copy(err); ++ error_free(err); ++ ++ ret = migrate_add_blocker(vbasedev->migration_blocker, errp); ++ if (ret < 0) { ++ error_free(vbasedev->migration_blocker); ++ vbasedev->migration_blocker = NULL; ++ } ++ ++ return ret; ++} ++ + /* ---------------------------------------------------------------------- */ + + int64_t vfio_mig_bytes_transferred(void) +@@ -824,40 +837,54 @@ void vfio_reset_bytes_transferred(void) + + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { +- int ret = -ENOTSUP; ++ Error *err = NULL; ++ int ret; + +- if (!vbasedev->enable_migration) { +- goto add_blocker; ++ if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { ++ error_setg(&err, "%s: Migration is disabled for VFIO device", ++ vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); + } + + ret = vfio_migration_init(vbasedev); + if (ret) { +- goto add_blocker; ++ if (ret == -ENOTTY) { ++ error_setg(&err, "%s: VFIO migration is not supported in kernel", ++ vbasedev->name); ++ } else { ++ error_setg(&err, ++ "%s: Migration couldn't be initialized for VFIO device, " ++ "err: %d (%s)", ++ vbasedev->name, ret, strerror(-ret)); ++ } ++ ++ return vfio_block_migration(vbasedev, err, errp); ++ } ++ ++ if (!vbasedev->dirty_pages_supported) { ++ if (vbasedev->enable_migration == ON_OFF_AUTO_AUTO) { ++ error_setg(&err, ++ "%s: VFIO device doesn't support device dirty tracking", ++ vbasedev->name); ++ return vfio_block_migration(vbasedev, err, errp); ++ } ++ ++ warn_report("%s: VFIO device doesn't support device dirty tracking", ++ vbasedev->name); + } + +- ret = vfio_block_multiple_devices_migration(errp); ++ ret = vfio_block_multiple_devices_migration(vbasedev, errp); + if (ret) { + return ret; + } + +- ret = vfio_block_giommu_migration(errp); ++ ret = vfio_block_giommu_migration(vbasedev, errp); + if (ret) { + return ret; + } + +- trace_vfio_migration_probe(vbasedev->name); ++ trace_vfio_migration_realize(vbasedev->name); + return 0; +- +-add_blocker: +- error_setg(&vbasedev->migration_blocker, +- "VFIO device doesn't support migration"); +- +- ret = migrate_add_blocker(vbasedev->migration_blocker, errp); +- if (ret < 0) { +- error_free(vbasedev->migration_blocker); +- vbasedev->migration_blocker = NULL; +- } +- return ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 15e7554954..6634945a70 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3371,8 +3371,8 @@ static Property vfio_pci_dev_properties[] = { + VFIO_FEATURE_ENABLE_REQ_BIT, true), + DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features, + VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false), +- DEFINE_PROP_BOOL("x-enable-migration", VFIOPCIDevice, +- vbasedev.enable_migration, false), ++ DEFINE_PROP_ON_OFF_AUTO("enable-migration", VFIOPCIDevice, ++ vbasedev.enable_migration, ON_OFF_AUTO_AUTO), + DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false), + DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice, + vbasedev.ram_block_discard_allowed, false), +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 4150b59e58..0391bd583b 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -155,7 +155,7 @@ vfio_load_cleanup(const char *name) " (%s)" + vfio_load_device_config_state(const char *name) " (%s)" + vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64 + vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) " (%s) size 0x%"PRIx64" ret %d" +-vfio_migration_probe(const char *name) " (%s)" ++vfio_migration_realize(const char *name) " (%s)" + vfio_migration_set_state(const char *name, const char *state) " (%s) state %s" + vfio_migration_state_notifier(const char *name, const char *state) " (%s) state %s" + vfio_save_block(const char *name, int data_size) " (%s) data_size %d" +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 1d19c6f251..93429b9abb 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -139,7 +139,7 @@ typedef struct VFIODevice { + bool needs_reset; + bool no_mmap; + bool ram_block_discard_allowed; +- bool enable_migration; ++ OnOffAuto enable_migration; + VFIODeviceOps *ops; + unsigned int num_irqs; + unsigned int num_regions; +@@ -225,9 +225,9 @@ typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList; + extern VFIOGroupList vfio_group_list; + + bool vfio_mig_active(void); +-int vfio_block_multiple_devices_migration(Error **errp); ++int vfio_block_multiple_devices_migration(VFIODevice *vbasedev, Error **errp); + void vfio_unblock_multiple_devices_migration(void); +-int vfio_block_giommu_migration(Error **errp); ++int vfio_block_giommu_migration(VFIODevice *vbasedev, Error **errp); + int64_t vfio_mig_bytes_transferred(void); + void vfio_reset_bytes_transferred(void); + +-- +2.39.3 + diff --git a/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch b/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch new file mode 100644 index 0000000..3b61c5d --- /dev/null +++ b/kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch @@ -0,0 +1,102 @@ +From edcf24a08d66d620a10c746824e31d230c8516ce Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 13/37] vfio/migration: Refactor vfio_save_block() to return + saved data size +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [11/28] b4aed6ddcbde159e98275a0675dcdf45d644673b (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit cf53efbbda2e +Author: Avihai Horon +Date: Wed Jun 21 14:11:58 2023 +0300 + + vfio/migration: Refactor vfio_save_block() to return saved data size + + Refactor vfio_save_block() to return the size of saved data on success + and -errno on error. + + This will be used in next patch to implement VFIO migration pre-copy + support. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Reviewed-by: Juan Quintela + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 6b58dddb88..235978fd68 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -241,8 +241,8 @@ static int vfio_query_stop_copy_size(VFIODevice *vbasedev, + return 0; + } + +-/* Returns 1 if end-of-stream is reached, 0 if more data and -errno if error */ +-static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) ++/* Returns the size of saved data on success and -errno on error */ ++static ssize_t vfio_save_block(QEMUFile *f, VFIOMigration *migration) + { + ssize_t data_size; + +@@ -252,7 +252,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) + return -errno; + } + if (data_size == 0) { +- return 1; ++ return 0; + } + + qemu_put_be64(f, VFIO_MIG_FLAG_DEV_DATA_STATE); +@@ -262,7 +262,7 @@ static int vfio_save_block(QEMUFile *f, VFIOMigration *migration) + + trace_vfio_save_block(migration->vbasedev->name, data_size); + +- return qemu_file_get_error(f); ++ return qemu_file_get_error(f) ?: data_size; + } + + /* ---------------------------------------------------------------------- */ +@@ -335,6 +335,7 @@ static void vfio_state_pending_exact(void *opaque, uint64_t *must_precopy, + static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + { + VFIODevice *vbasedev = opaque; ++ ssize_t data_size; + int ret; + + /* We reach here with device state STOP only */ +@@ -345,11 +346,11 @@ static int vfio_save_complete_precopy(QEMUFile *f, void *opaque) + } + + do { +- ret = vfio_save_block(f, vbasedev->migration); +- if (ret < 0) { +- return ret; ++ data_size = vfio_save_block(f, vbasedev->migration); ++ if (data_size < 0) { ++ return data_size; + } +- } while (!ret); ++ } while (data_size); + + qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE); + ret = qemu_file_get_error(f); +-- +2.39.3 + diff --git a/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch b/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch new file mode 100644 index 0000000..ad3c6ca --- /dev/null +++ b/kvm-vfio-migration-Remove-print-of-Migration-disabled.patch @@ -0,0 +1,56 @@ +From 5bb94c4eaeb94f0b41a57660098a4c12a295b725 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 28/37] vfio/migration: Remove print of "Migration disabled" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [26/28] c7ff1f9c90b4cfcb327ef474042ea71ea577a94d (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0520d63c7701 +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:09 2023 +0800 + + vfio/migration: Remove print of "Migration disabled" + + Property enable_migration supports [on/off/auto]. + In ON mode, error pointer is passed to errp and logged. + In OFF mode, we doesn't need to log "Migration disabled" as it's intentional. + In AUTO mode, we should only ever see errors or warnings if the device + supports migration and an error or incompatibility occurs while further + probing or configuring it. Lack of support for migration shoundn't + generate an error or warning. + + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 037b7d4176..a60b868c38 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3233,7 +3233,6 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + if (!pdev->failover_pair_id) { + ret = vfio_migration_realize(vbasedev, errp); + if (ret) { +- error_report("%s: Migration disabled", vbasedev->name); + goto out_deregister; + } + } +-- +2.39.3 + diff --git a/kvm-vfio-migration-Reset-bytes_transferred-properly.patch b/kvm-vfio-migration-Reset-bytes_transferred-properly.patch new file mode 100644 index 0000000..2666460 --- /dev/null +++ b/kvm-vfio-migration-Reset-bytes_transferred-properly.patch @@ -0,0 +1,165 @@ +From a63b4010ba4f491c9144afff363bebcf35ecf496 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 20/37] vfio/migration: Reset bytes_transferred properly +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [18/28] e9a70faeca4fd5aa7ef36502cf76bf0b62f65057 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 808642a2f640 +Author: Avihai Horon +Date: Wed Jun 28 10:31:11 2023 +0300 + + vfio/migration: Reset bytes_transferred properly + + Currently, VFIO bytes_transferred is not reset properly: + 1. bytes_transferred is not reset after a VM snapshot (so a migration + following a snapshot will report incorrect value). + 2. bytes_transferred is a single counter for all VFIO devices, however + upon migration failure it is reset multiple times, by each VFIO + device. + + Fix it by introducing a new function vfio_reset_bytes_transferred() and + calling it during migration and snapshot start. + + Remove existing bytes_transferred reset in VFIO migration state + notifier, which is not needed anymore. + + Fixes: 3710586caa5d ("qapi: Add VFIO devices migration stats in Migration stats") + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Reviewed-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Conflicts: + - migration/migration.c + migration/savevm.c + context changes due to commit aff3f6606d14 ("migration: Rename + ram_counters to mig_stats") + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 6 +++++- + include/hw/vfio/vfio-common.h | 1 + + migration/migration.c | 1 + + migration/migration.h | 1 + + migration/savevm.c | 1 + + migration/target.c | 17 +++++++++++++++-- + 6 files changed, 24 insertions(+), 3 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index acbf0bb7ab..7cf143926c 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -697,7 +697,6 @@ static void vfio_migration_state_notifier(Notifier *notifier, void *data) + case MIGRATION_STATUS_CANCELLING: + case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_FAILED: +- bytes_transferred = 0; + /* + * If setting the device in RUNNING state fails, the device should + * be reset. To do so, use ERROR state as a recover state. +@@ -818,6 +817,11 @@ int64_t vfio_mig_bytes_transferred(void) + return bytes_transferred; + } + ++void vfio_reset_bytes_transferred(void) ++{ ++ bytes_transferred = 0; ++} ++ + int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { + int ret = -ENOTSUP; +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 6d1b8487c3..1d19c6f251 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -229,6 +229,7 @@ int vfio_block_multiple_devices_migration(Error **errp); + void vfio_unblock_multiple_devices_migration(void); + int vfio_block_giommu_migration(Error **errp); + int64_t vfio_mig_bytes_transferred(void); ++void vfio_reset_bytes_transferred(void); + + #ifdef CONFIG_LINUX + int vfio_get_region_info(VFIODevice *vbasedev, int index, +diff --git a/migration/migration.c b/migration/migration.c +index 9bf1caee6c..47ad6c43cb 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1638,6 +1638,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, + */ + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); ++ reset_vfio_bytes_transferred(); + + return true; + } +diff --git a/migration/migration.h b/migration/migration.h +index e9679f8029..7ccf460aa2 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -495,6 +495,7 @@ bool migration_rate_limit(void); + void migration_cancel(const Error *error); + + void populate_vfio_info(MigrationInfo *info); ++void reset_vfio_bytes_transferred(void); + void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); + + #endif +diff --git a/migration/savevm.c b/migration/savevm.c +index aff70e6263..83088fc3f8 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1620,6 +1620,7 @@ static int qemu_savevm_state(QEMUFile *f, Error **errp) + migrate_init(ms); + memset(&ram_counters, 0, sizeof(ram_counters)); + memset(&compression_counters, 0, sizeof(compression_counters)); ++ reset_vfio_bytes_transferred(); + ms->to_dst_file = f; + + qemu_mutex_unlock_iothread(); +diff --git a/migration/target.c b/migration/target.c +index 00ca007f97..f39c9a8d88 100644 +--- a/migration/target.c ++++ b/migration/target.c +@@ -14,12 +14,25 @@ + #include "hw/vfio/vfio-common.h" + #endif + ++#ifdef CONFIG_VFIO + void populate_vfio_info(MigrationInfo *info) + { +-#ifdef CONFIG_VFIO + if (vfio_mig_active()) { + info->vfio = g_malloc0(sizeof(*info->vfio)); + info->vfio->transferred = vfio_mig_bytes_transferred(); + } +-#endif + } ++ ++void reset_vfio_bytes_transferred(void) ++{ ++ vfio_reset_bytes_transferred(); ++} ++#else ++void populate_vfio_info(MigrationInfo *info) ++{ ++} ++ ++void reset_vfio_bytes_transferred(void) ++{ ++} ++#endif +-- +2.39.3 + diff --git a/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch b/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch new file mode 100644 index 0000000..efd42a9 --- /dev/null +++ b/kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch @@ -0,0 +1,125 @@ +From 223eef8363c9ba58514b2d4f93e5ff015d111ff2 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 29/37] vfio/migration: Return bool type for + vfio_migration_realize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [27/28] d5aea3ea4c53e4573076cbacbbe3134f9f0f9e53 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit d4a2af747d5a +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:10 2023 +0800 + + vfio/migration: Return bool type for vfio_migration_realize() + + Make vfio_migration_realize() adhere to the convention of other realize() + callbacks(like qdev_realize) by returning bool instead of int. + + Suggested-by: Cédric Le Goater + Suggested-by: Joao Martins + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 15 ++++++++++----- + hw/vfio/pci.c | 3 +-- + include/hw/vfio/vfio-common.h | 2 +- + 3 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index e3954570c8..2674f4bc47 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -846,7 +846,12 @@ void vfio_reset_bytes_transferred(void) + bytes_transferred = 0; + } + +-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) ++/* ++ * Return true when either migration initialized or blocker registered. ++ * Currently only return false when adding blocker fails which will ++ * de-register vfio device. ++ */ ++bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + { + Error *err = NULL; + int ret; +@@ -854,7 +859,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + if (vbasedev->enable_migration == ON_OFF_AUTO_OFF) { + error_setg(&err, "%s: Migration is disabled for VFIO device", + vbasedev->name); +- return vfio_block_migration(vbasedev, err, errp); ++ return !vfio_block_migration(vbasedev, err, errp); + } + + ret = vfio_migration_init(vbasedev); +@@ -869,7 +874,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + vbasedev->name, ret, strerror(-ret)); + } + +- return vfio_block_migration(vbasedev, err, errp); ++ return !vfio_block_migration(vbasedev, err, errp); + } + + if (!vbasedev->dirty_pages_supported) { +@@ -896,7 +901,7 @@ int vfio_migration_realize(VFIODevice *vbasedev, Error **errp) + } + + trace_vfio_migration_realize(vbasedev->name); +- return 0; ++ return true; + + add_blocker: + ret = vfio_block_migration(vbasedev, err, errp); +@@ -904,7 +909,7 @@ out_deinit: + if (ret) { + vfio_migration_deinit(vbasedev); + } +- return ret; ++ return !ret; + } + + void vfio_migration_exit(VFIODevice *vbasedev) +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index a60b868c38..ba40ca8784 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3231,8 +3231,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + } + + if (!pdev->failover_pair_id) { +- ret = vfio_migration_realize(vbasedev, errp); +- if (ret) { ++ if (!vfio_migration_realize(vbasedev, errp)) { + goto out_deregister; + } + } +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index 45167c8a8a..da43d27352 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -252,7 +252,7 @@ int vfio_spapr_create_window(VFIOContainer *container, + int vfio_spapr_remove_window(VFIOContainer *container, + hwaddr offset_within_address_space); + +-int vfio_migration_realize(VFIODevice *vbasedev, Error **errp); ++bool vfio_migration_realize(VFIODevice *vbasedev, Error **errp); + void vfio_migration_exit(VFIODevice *vbasedev); + + #endif /* HW_VFIO_VFIO_COMMON_H */ +-- +2.39.3 + diff --git a/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch b/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch new file mode 100644 index 0000000..6211db7 --- /dev/null +++ b/kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch @@ -0,0 +1,68 @@ +From 76208f7824d5139ac8d86140b0e01031b67638cc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 04/37] vfio/migration: Skip log_sync during migration SETUP + state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/28] 4c340992b472ac4627b57705f4e971f14bbb0846 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit ff180c6bd7a8 +Author: Avihai Horon +Date: Mon Apr 3 16:00:00 2023 +0300 + + vfio/migration: Skip log_sync during migration SETUP state + + Currently, VFIO log_sync can be issued while migration is in SETUP + state. However, doing this log_sync is at best redundant and at worst + can fail. + + Redundant -- all RAM is marked dirty in migration SETUP state and is + transferred only after migration is set to ACTIVE state, so doing + log_sync during migration SETUP is pointless. + + Can fail -- there is a time window, between setting migration state to + SETUP and starting dirty tracking by RAM save_live_setup handler, during + which dirty tracking is still not started. Any VFIO log_sync call that + is issued during this time window will fail. For example, this error can + be triggered by migrating a VM when a GUI is active, which constantly + calls log_sync. + + Fix it by skipping VFIO log_sync while migration is in SETUP state. + + Fixes: 758b96b61d5c ("vfio/migrate: Move switch of dirty tracking into vfio_memory_listener") + Signed-off-by: Avihai Horon + Link: https://lore.kernel.org/r/20230403130000.6422-1-avihaih@nvidia.com + Signed-off-by: Alex Williamson + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/common.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 4d01ea3515..78358ede27 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -478,7 +478,8 @@ static bool vfio_devices_all_dirty_tracking(VFIOContainer *container) + VFIODevice *vbasedev; + MigrationState *ms = migrate_get_current(); + +- if (!migration_is_setup_or_active(ms->state)) { ++ if (ms->state != MIGRATION_STATUS_ACTIVE && ++ ms->state != MIGRATION_STATUS_DEVICE) { + return false; + } + +-- +2.39.3 + diff --git a/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch b/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch new file mode 100644 index 0000000..2db8511 --- /dev/null +++ b/kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch @@ -0,0 +1,70 @@ +From 77353cdafd08562dff9c99e9f3984d12224bee52 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 14/37] vfio/migration: Store VFIO migration flags in + VFIOMigration +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [12/28] 31a9c39e6ee6338a35dc08c3e7f5c1a204166249 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 6cd1fe11598a +Author: Avihai Horon +Date: Wed Jun 21 14:11:59 2023 +0300 + + vfio/migration: Store VFIO migration flags in VFIOMigration + + VFIO migration flags are queried once in vfio_migration_init(). Store + them in VFIOMigration so they can be used later to check the device's + migration capabilities without re-querying them. + + This will be used in the next patch to check if the device supports + precopy migration. + + Signed-off-by: Avihai Horon + Reviewed-by: Cédric Le Goater + Tested-by: YangHang Liu + Acked-by: Alex Williamson + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/migration.c | 1 + + include/hw/vfio/vfio-common.h | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c +index 235978fd68..8d33414379 100644 +--- a/hw/vfio/migration.c ++++ b/hw/vfio/migration.c +@@ -603,6 +603,7 @@ static int vfio_migration_init(VFIODevice *vbasedev) + migration->vbasedev = vbasedev; + migration->device_state = VFIO_DEVICE_STATE_RUNNING; + migration->data_fd = -1; ++ migration->mig_flags = mig_flags; + + vbasedev->dirty_pages_supported = vfio_dma_logging_supported(vbasedev); + +diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h +index eed244f25f..5f29dab839 100644 +--- a/include/hw/vfio/vfio-common.h ++++ b/include/hw/vfio/vfio-common.h +@@ -66,6 +66,7 @@ typedef struct VFIOMigration { + int data_fd; + void *data_buffer; + size_t data_buffer_size; ++ uint64_t mig_flags; + } VFIOMigration; + + typedef struct VFIOAddressSpace { +-- +2.39.3 + diff --git a/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch b/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch new file mode 100644 index 0000000..b5d9d37 --- /dev/null +++ b/kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch @@ -0,0 +1,67 @@ +From b5a69101abac153c9c9be7f539d810e3e4af3bdf Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 19/37] vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI + retry path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [17/28] 2067bb58f3a2c1a793e5566cee3c78a8299c9c1c (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit c17408892319 +Author: Shameer Kolothum +Date: Tue Jun 13 15:09:43 2023 +0100 + + vfio/pci: Call vfio_prepare_kvm_msi_virq_batch() in MSI retry path + + When vfio_enable_vectors() returns with less than requested nr_vectors + we retry with what kernel reported back. But the retry path doesn't + call vfio_prepare_kvm_msi_virq_batch() and this results in, + + qemu-system-aarch64: vfio: Error: Failed to enable 4 MSI vectors, retry with 1 + qemu-system-aarch64: ../hw/vfio/pci.c:602: vfio_commit_kvm_msi_virq_batch: Assertion `vdev->defer_kvm_irq_routing' failed + + Fixes: dc580d51f7dd ("vfio: defer to commit kvm irq routing when enable msi/msix") + Reviewed-by: Longpeng + Signed-off-by: Shameer Kolothum + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 7c5e2b5996..15e7554954 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -666,6 +666,8 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) + + vfio_disable_interrupts(vdev); + ++ vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); ++retry: + /* + * Setting vector notifiers needs to enable route for each vector. + * Deferring to commit the KVM routes once rather than per vector +@@ -673,8 +675,6 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) + */ + vfio_prepare_kvm_msi_virq_batch(vdev); + +- vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); +-retry: + vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); + + for (i = 0; i < vdev->nr_vectors; i++) { +-- +2.39.3 + diff --git a/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch b/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch new file mode 100644 index 0000000..0aca4ef --- /dev/null +++ b/kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch @@ -0,0 +1,54 @@ +From 816c20b23546d31316c9ca450db8a6668ac6216c Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 25/37] vfio/pci: Disable INTx in vfio_realize error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [23/28] 2fde4bad00c4286e6bbe24947c2bfd6468fc0ff3 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit adee0da0368f +Author: Zhenzhong Duan +Date: Mon Jul 3 15:15:06 2023 +0800 + + vfio/pci: Disable INTx in vfio_realize error path + + When vfio realize fails, INTx isn't disabled if it has been enabled. + This may confuse host side with unhandled interrupt report. + + Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Joao Martins + Reviewed-by: Cédric Le Goater + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 87bd440504..2d059832a4 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3244,6 +3244,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + return; + + out_deregister: ++ if (vdev->interrupt == VFIO_INT_INTx) { ++ vfio_intx_disable(vdev); ++ } + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); + if (vdev->irqchip_change_notifier.notify) { + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); +-- +2.39.3 + diff --git a/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch b/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch new file mode 100644 index 0000000..d05d114 --- /dev/null +++ b/kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch @@ -0,0 +1,67 @@ +From 0b1ab3aacc02e70bfe8440236eb9def426bbe10e Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 22/37] vfio/pci: Fix a segfault in vfio_realize +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [20/28] 48b9c1efe295c2672693d9c99f6d11738d2b98d1 (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 357bd7932a13 +Author: Zhenzhong Duan +Date: Thu Jun 29 16:40:38 2023 +0800 + + vfio/pci: Fix a segfault in vfio_realize + + The kvm irqchip notifier is only registered if the device supports + INTx, however it's unconditionally removed in vfio realize error + path. If the assigned device does not support INTx, this will cause + QEMU to crash when vfio realize fails. Change it to conditionally + remove the notifier only if the notify hook is setup. + + Before fix: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 + Connection closed by foreign host. + + After fix: + (qemu) device_add vfio-pci,host=81:11.1,id=vfio1,bus=root1,xres=1 + Error: vfio 0000:81:11.1: xres and yres properties require display=on + (qemu) + + Fixes: c5478fea27ac ("vfio/pci: Respond to KVM irqchip change notifier") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6634945a70..d08e6c1a20 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3245,7 +3245,9 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + + out_deregister: + pci_device_set_intx_routing_notifier(&vdev->pdev, NULL); +- kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ if (vdev->irqchip_change_notifier.notify) { ++ kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); ++ } + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +-- +2.39.3 + diff --git a/kvm-vfio-pci-Fix-a-use-after-free-issue.patch b/kvm-vfio-pci-Fix-a-use-after-free-issue.patch new file mode 100644 index 0000000..1fa725f --- /dev/null +++ b/kvm-vfio-pci-Fix-a-use-after-free-issue.patch @@ -0,0 +1,56 @@ +From 2437a06ff137c4bc856df096e42407c1f50b25b0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 06/37] vfio/pci: Fix a use-after-free issue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/28] eca69a7e0a6fb8c1c70be8b91209a53b040e30ba (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit b83b40b61484 +Author: Zhenzhong Duan +Date: Wed May 17 10:46:51 2023 +0800 + + vfio/pci: Fix a use-after-free issue + + vbasedev->name is freed wrongly which leads to garbage VFIO trace log. + Fix it by allocating a dup of vbasedev->name and then free the dup. + + Fixes: 2dca1b37a760 ("vfio/pci: add support for VF token") + Suggested-by: Alex Williamson + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Matthew Rosato + Acked-by: Alex Williamson + Reviewed-by: Philippe Mathieu-Daudé + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 6cd3a98c39..7c5e2b5996 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3018,7 +3018,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + qemu_uuid_unparse(&vdev->vf_token, uuid); + name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); + } else { +- name = vbasedev->name; ++ name = g_strdup(vbasedev->name); + } + + ret = vfio_get_device(group, name, vbasedev, errp); +-- +2.39.3 + diff --git a/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch b/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch new file mode 100644 index 0000000..3978b96 --- /dev/null +++ b/kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch @@ -0,0 +1,55 @@ +From 9c5016c9b3f9cf66d1b531de829e8b5010962695 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:57 +0200 +Subject: [PATCH 23/37] vfio/pci: Free leaked timer in vfio_realize error path +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [21/28] dbaae4e484de4613f7f7735be519b7357627326e (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 0cc889c8826c +Author: Zhenzhong Duan +Date: Thu Jun 29 16:40:39 2023 +0800 + + vfio/pci: Free leaked timer in vfio_realize error path + + When vfio_realize fails, the mmap_timer used for INTx optimization + isn't freed. As this timer isn't activated yet, the potential impact + is just a piece of leaked memory. + + Fixes: ea486926b07d ("vfio-pci: Update slow path INTx algorithm timer related") + Signed-off-by: Zhenzhong Duan + Reviewed-by: Cédric Le Goater + Reviewed-by: Joao Martins + Signed-off-by: Cédric Le Goater + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index d08e6c1a20..87bd440504 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -3248,6 +3248,9 @@ out_deregister: + if (vdev->irqchip_change_notifier.notify) { + kvm_irqchip_remove_change_notifier(&vdev->irqchip_change_notifier); + } ++ if (vdev->intx.mmap_timer) { ++ timer_free(vdev->intx.mmap_timer); ++ } + out_teardown: + vfio_teardown_msi(vdev); + vfio_bars_exit(vdev); +-- +2.39.3 + diff --git a/kvm-vfio-pci-Static-Resizable-BAR-capability.patch b/kvm-vfio-pci-Static-Resizable-BAR-capability.patch new file mode 100644 index 0000000..d937140 --- /dev/null +++ b/kvm-vfio-pci-Static-Resizable-BAR-capability.patch @@ -0,0 +1,141 @@ +From db53345dba5682c3ba0bc3fc596b30a98dadb88f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 05/37] vfio/pci: Static Resizable BAR capability +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/28] 42e9f4b517eb919c77c6fdbe771d9d05a91955bd (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit b5048a4cbfa0 +Author: Alex Williamson +Date: Thu May 4 14:42:48 2023 -0600 + + vfio/pci: Static Resizable BAR capability + + The PCI Resizable BAR (ReBAR) capability is currently hidden from the + VM because the protocol for interacting with the capability does not + support a mechanism for the device to reject an advertised supported + BAR size. However, when assigned to a VM, the act of resizing the + BAR requires adjustment of host resources for the device, which + absolutely can fail. Linux does not currently allow us to reserve + resources for the device independent of the current usage. + + The only writable field within the ReBAR capability is the BAR Size + register. The PCIe spec indicates that when written, the device + should immediately begin to operate with the provided BAR size. The + spec however also notes that software must only write values + corresponding to supported sizes as indicated in the capability and + control registers. Writing unsupported sizes produces undefined + results. Therefore, if the hypervisor were to virtualize the + capability and control registers such that the current size is the + only indicated available size, then a write of anything other than + the current size falls into the category of undefined behavior, + where we can essentially expose the modified ReBAR capability as + read-only. + + This may seem pointless, but users have reported that virtualizing + the capability in this way not only allows guest software to expose + related features as available (even if only cosmetic), but in some + scenarios can resolve guest driver issues. Additionally, no + regressions in behavior have been reported for this change. + + A caveat here is that the PCIe spec requires for compatibility that + devices report support for a size in the range of 1MB to 512GB, + therefore if the current BAR size falls outside that range we revert + to hiding the capability. + + Reviewed-by: Cédric Le Goater + Link: https://lore.kernel.org/r/20230505232308.2869912-1-alex.williamson@redhat.com + Signed-off-by: Alex Williamson + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 53 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 579b92a6ed..6cd3a98c39 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2069,6 +2069,54 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) + return 0; + } + ++static int vfio_setup_rebar_ecap(VFIOPCIDevice *vdev, uint16_t pos) ++{ ++ uint32_t ctrl; ++ int i, nbar; ++ ++ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL); ++ nbar = (ctrl & PCI_REBAR_CTRL_NBAR_MASK) >> PCI_REBAR_CTRL_NBAR_SHIFT; ++ ++ for (i = 0; i < nbar; i++) { ++ uint32_t cap; ++ int size; ++ ++ ctrl = pci_get_long(vdev->pdev.config + pos + PCI_REBAR_CTRL + (i * 8)); ++ size = (ctrl & PCI_REBAR_CTRL_BAR_SIZE) >> PCI_REBAR_CTRL_BAR_SHIFT; ++ ++ /* The cap register reports sizes 1MB to 128TB, with 4 reserved bits */ ++ cap = size <= 27 ? 1U << (size + 4) : 0; ++ ++ /* ++ * The PCIe spec (v6.0.1, 7.8.6) requires HW to support at least one ++ * size in the range 1MB to 512GB. We intend to mask all sizes except ++ * the one currently enabled in the size field, therefore if it's ++ * outside the range, hide the whole capability as this virtualization ++ * trick won't work. If >512GB resizable BARs start to appear, we ++ * might need an opt-in or reservation scheme in the kernel. ++ */ ++ if (!(cap & PCI_REBAR_CAP_SIZES)) { ++ return -EINVAL; ++ } ++ ++ /* Hide all sizes reported in the ctrl reg per above requirement. */ ++ ctrl &= (PCI_REBAR_CTRL_BAR_SIZE | ++ PCI_REBAR_CTRL_NBAR_MASK | ++ PCI_REBAR_CTRL_BAR_IDX); ++ ++ /* ++ * The BAR size field is RW, however we've mangled the capability ++ * register such that we only report a single size, ie. the current ++ * BAR size. A write of an unsupported value is undefined, therefore ++ * the register field is essentially RO. ++ */ ++ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CAP + (i * 8), cap, ~0); ++ vfio_add_emulated_long(vdev, pos + PCI_REBAR_CTRL + (i * 8), ctrl, ~0); ++ } ++ ++ return 0; ++} ++ + static void vfio_add_ext_cap(VFIOPCIDevice *vdev) + { + PCIDevice *pdev = &vdev->pdev; +@@ -2142,9 +2190,13 @@ static void vfio_add_ext_cap(VFIOPCIDevice *vdev) + case 0: /* kernel masked capability */ + case PCI_EXT_CAP_ID_SRIOV: /* Read-only VF BARs confuse OVMF */ + case PCI_EXT_CAP_ID_ARI: /* XXX Needs next function virtualization */ +- case PCI_EXT_CAP_ID_REBAR: /* Can't expose read-only */ + trace_vfio_add_ext_cap_dropped(vdev->vbasedev.name, cap_id, next); + break; ++ case PCI_EXT_CAP_ID_REBAR: ++ if (!vfio_setup_rebar_ecap(vdev, next)) { ++ pcie_add_capability(pdev, cap_id, cap_ver, next, size); ++ } ++ break; + default: + pcie_add_capability(pdev, cap_id, cap_ver, next, size); + } +-- +2.39.3 + diff --git a/kvm-vfio-pci-add-support-for-VF-token.patch b/kvm-vfio-pci-add-support-for-VF-token.patch new file mode 100644 index 0000000..7b40e5e --- /dev/null +++ b/kvm-vfio-pci-add-support-for-VF-token.patch @@ -0,0 +1,104 @@ +From 3022cc31bca5a5441e285c971eaf72b7643b9be0 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Wed, 12 Jul 2023 17:46:56 +0200 +Subject: [PATCH 03/37] vfio/pci: add support for VF token +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 179: vfio: live migration support +RH-Bugzilla: 2192818 +RH-Acked-by: Eric Auger +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/28] ff24284ede2806e21f4f6709d8abd4c4029b7d5c (clegoate/qemu-kvm-c9s) + +Bugzilla: https://bugzilla.redhat.com/2192818 + +commit 2dca1b37a760 +Author: Minwoo Im +Date: Mon Mar 20 16:35:22 2023 +0900 + + vfio/pci: add support for VF token + + VF token was introduced [1] to kernel vfio-pci along with SR-IOV + support [2]. This patch adds support VF token among PF and VF(s). To + passthu PCIe VF to a VM, kernel >= v5.7 needs this. + + It can be configured with UUID like: + + -device vfio-pci,host=DDDD:BB:DD:F,vf-token=,... + + [1] https://lore.kernel.org/linux-pci/158396393244.5601.10297430724964025753.stgit@gimli.home/ + [2] https://lore.kernel.org/linux-pci/158396044753.5601.14804870681174789709.stgit@gimli.home/ + + Cc: Alex Williamson + Signed-off-by: Minwoo Im + Reviewed-by: Klaus Jensen + Link: https://lore.kernel.org/r/20230320073522epcms2p48f682ecdb73e0ae1a4850ad0712fd780@epcms2p4 + Signed-off-by: Alex Williamson + +Conflicts: + - hw/vfio/pci.c + context changes in vfio_realize () due to redhat commit 267071d16b23 + ("vfio: cap number of devices that can be assigned") + +Signed-off-by: Cédric Le Goater +--- + hw/vfio/pci.c | 13 ++++++++++++- + hw/vfio/pci.h | 1 + + 2 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index a779053be3..579b92a6ed 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2859,6 +2859,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + int groupid; + int ret, i = 0; + bool is_mdev; ++ char uuid[UUID_FMT_LEN]; ++ char *name; + + if (device_limit && device_limit != vdev->assigned_device_limit) { + error_setg(errp, "Assigned device limit has been redefined. " +@@ -2960,7 +2962,15 @@ static void vfio_realize(PCIDevice *pdev, Error **errp) + goto error; + } + +- ret = vfio_get_device(group, vbasedev->name, vbasedev, errp); ++ if (!qemu_uuid_is_null(&vdev->vf_token)) { ++ qemu_uuid_unparse(&vdev->vf_token, uuid); ++ name = g_strdup_printf("%s vf_token=%s", vbasedev->name, uuid); ++ } else { ++ name = vbasedev->name; ++ } ++ ++ ret = vfio_get_device(group, name, vbasedev, errp); ++ g_free(name); + if (ret) { + vfio_put_group(group); + goto error; +@@ -3292,6 +3302,7 @@ static void vfio_instance_init(Object *obj) + + static Property vfio_pci_dev_properties[] = { + DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIOPCIDevice, host), ++ DEFINE_PROP_UUID_NODEFAULT("vf-token", VFIOPCIDevice, vf_token), + DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev), + DEFINE_PROP_ON_OFF_AUTO("x-pre-copy-dirty-page-tracking", VFIOPCIDevice, + vbasedev.pre_copy_dirty_page_tracking, +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 45235d38ba..10530743ad 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -137,6 +137,7 @@ struct VFIOPCIDevice { + VFIOVGA *vga; /* 0xa0000, 0x3b0, 0x3c0 */ + void *igd_opregion; + PCIHostDeviceAddress host; ++ QemuUUID vf_token; + EventNotifier err_notifier; + EventNotifier req_notifier; + int (*resetfn)(struct VFIOPCIDevice *); +-- +2.39.3 + diff --git a/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch b/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch new file mode 100644 index 0000000..acfb3ae --- /dev/null +++ b/kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch @@ -0,0 +1,151 @@ +From 08c8af80dbd03b46a6a8397ef0c41cda3e6de22c Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jul 2023 18:51:17 +0200 +Subject: [PATCH 01/37] virtio-iommu: Fix 64kB host page size VFIO device + assignment + +RH-Author: Eric Auger +RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes +RH-Bugzilla: 2211609 2211634 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [1/2] b48db1c964559505dda4c6c9a3b79d68207b25eb (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211634 + +When running on a 64kB page size host and protecting a VFIO device +with the virtio-iommu, qemu crashes with this kind of message: + +qemu-kvm: virtio-iommu page mask 0xfffffffffffff000 is incompatible +with mask 0x20010000 +qemu: hardware error: vfio: DMA mapping failed, unable to continue + +This is due to the fact the IOMMU MR corresponding to the VFIO device +is enabled very late on domain attach, after the machine init. +The device reports a minimal 64kB page size but it is too late to be +applied. virtio_iommu_set_page_size_mask() fails and this causes +vfio_listener_region_add() to end up with hw_error(); + +To work around this issue, we transiently enable the IOMMU MR on +machine init to collect the page size requirements and then restore +the bypass state. + +Fixes: 90519b9053 ("virtio-iommu: Add bypass mode support to assigned device") +Signed-off-by: Eric Auger + +Message-Id: <20230705165118.28194-2-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Tested-by: Jean-Philippe Brucker +Reviewed-by: Zhenzhong Duan +(cherry picked from commit 94df5b2180d61fb2ee2b04cc007981e58b6479a9) +Signed-off-by: Eric Auger +--- + hw/virtio/trace-events | 1 + + hw/virtio/virtio-iommu.c | 31 +++++++++++++++++++++++++++++-- + include/hw/virtio/virtio-iommu.h | 2 ++ + 3 files changed, 32 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 8f8d05cf9b..68b752e304 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -131,6 +131,7 @@ virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "m + virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" + virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" + virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" ++virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 + + # virtio-mem.c + virtio_mem_send_response(uint16_t type) "type=%" PRIu16 +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 1cd258135d..542679b321 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -24,6 +24,7 @@ + #include "hw/virtio/virtio.h" + #include "sysemu/kvm.h" + #include "sysemu/reset.h" ++#include "sysemu/sysemu.h" + #include "qapi/error.h" + #include "qemu/error-report.h" + #include "trace.h" +@@ -1106,12 +1107,12 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + } + + /* +- * After the machine is finalized, we can't change the mask anymore. If by ++ * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still + * accept it. Having a different masks is possible but the guest will use + * sub-optimal block sizes, so warn about it. + */ +- if (phase_check(PHASE_MACHINE_READY)) { ++ if (s->granule_frozen) { + int new_granule = ctz64(new_mask); + int cur_granule = ctz64(cur_mask); + +@@ -1146,6 +1147,28 @@ static void virtio_iommu_system_reset(void *opaque) + + } + ++static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) ++{ ++ VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); ++ int granule; ++ ++ if (likely(s->config.bypass)) { ++ /* ++ * Transient IOMMU MR enable to collect page_size_mask requirements ++ * through memory_region_iommu_set_page_size_mask() called by ++ * VFIO region_add() callback ++ */ ++ s->config.bypass = false; ++ virtio_iommu_switch_address_space_all(s); ++ /* restore default */ ++ s->config.bypass = true; ++ virtio_iommu_switch_address_space_all(s); ++ } ++ s->granule_frozen = true; ++ granule = ctz64(s->config.page_size_mask); ++ trace_virtio_iommu_freeze_granule(BIT(granule)); ++} ++ + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + { + VirtIODevice *vdev = VIRTIO_DEVICE(dev); +@@ -1189,6 +1212,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) + error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); + } + ++ s->machine_done.notify = virtio_iommu_freeze_granule; ++ qemu_add_machine_init_done_notifier(&s->machine_done); ++ + qemu_register_reset(virtio_iommu_system_reset, s); + } + +@@ -1198,6 +1224,7 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) + VirtIOIOMMU *s = VIRTIO_IOMMU(dev); + + qemu_unregister_reset(virtio_iommu_system_reset, s); ++ qemu_remove_machine_init_done_notifier(&s->machine_done); + + g_hash_table_destroy(s->as_by_busptr); + if (s->domains) { +diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h +index 2ad5ee320b..a93fc5383e 100644 +--- a/include/hw/virtio/virtio-iommu.h ++++ b/include/hw/virtio/virtio-iommu.h +@@ -61,6 +61,8 @@ struct VirtIOIOMMU { + QemuRecMutex mutex; + GTree *endpoints; + bool boot_bypass; ++ Notifier machine_done; ++ bool granule_frozen; + }; + + #endif +-- +2.39.3 + diff --git a/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch b/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch new file mode 100644 index 0000000..7934a12 --- /dev/null +++ b/kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch @@ -0,0 +1,83 @@ +From 643d93343759a350fe0f6327d308bf6a93c79d25 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Wed, 5 Jul 2023 18:51:18 +0200 +Subject: [PATCH 02/37] virtio-iommu: Rework the traces in + virtio_iommu_set_page_size_mask() + +RH-Author: Eric Auger +RH-MergeRequest: 182: VIRTIO-IOMMU/VFIO page size related fixes +RH-Bugzilla: 2211609 2211634 +RH-Acked-by: Gavin Shan +RH-Acked-by: Sebastian Ott +RH-Commit: [2/2] 0af7078dde158f07c83e2b293adc5d9d475688ae (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2211609 + +The current error messages in virtio_iommu_set_page_size_mask() +sound quite similar for different situations and miss the IOMMU +memory region that causes the issue. + +Clarify them and rework the comment. + +Also remove the trace when the new page_size_mask is not applied as +the current frozen granule is kept. This message is rather confusing +for the end user and anyway the current granule would have been used +by the driver. + +Signed-off-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Message-Id: <20230705165118.28194-3-eric.auger@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Jean-Philippe Brucker +Tested-by: Jean-Philippe Brucker +(cherry picked from commit 587a7641d53055054d68d67d94c9408ef808f127) +Signed-off-by: Eric Auger +--- + hw/virtio/virtio-iommu.c | 19 +++++++------------ + 1 file changed, 7 insertions(+), 12 deletions(-) + +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 542679b321..421e2a944f 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -1101,29 +1101,24 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, + new_mask); + + if ((cur_mask & new_mask) == 0) { +- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 +- " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); ++ error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 ++ " incompatible with currently supported mask 0x%"PRIx64, ++ mr->parent_obj.name, new_mask, cur_mask); + return -1; + } + + /* + * Once the granule is frozen we can't change the mask anymore. If by + * chance the hotplugged device supports the same granule, we can still +- * accept it. Having a different masks is possible but the guest will use +- * sub-optimal block sizes, so warn about it. ++ * accept it. + */ + if (s->granule_frozen) { +- int new_granule = ctz64(new_mask); + int cur_granule = ctz64(cur_mask); + +- if (new_granule != cur_granule) { +- error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 +- " is incompatible with mask 0x%"PRIx64, cur_mask, +- new_mask); ++ if (!(BIT(cur_granule) & new_mask)) { ++ error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", ++ mr->parent_obj.name, BIT_ULL(cur_granule)); + return -1; +- } else if (new_mask != cur_mask) { +- warn_report("virtio-iommu page mask 0x%"PRIx64 +- " does not match 0x%"PRIx64, cur_mask, new_mask); + } + return 0; + } +-- +2.39.3 + diff --git a/qemu.spec b/qemu.spec index 8bc652f..ed91c06 100644 --- a/qemu.spec +++ b/qemu.spec @@ -100,7 +100,7 @@ %endif %global target_list %{kvm_target}-softmmu -%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress,virtio-blk-vdpa-blk,virtio-blk-vfio-pci,virtio-blk-vhost-user,io_uring,nvme-io_uring +%global block_drivers_rw_list qcow2,raw,file,host_device,nbd,iscsi,rbd,blkdebug,luks,null-co,nvme,copy-on-read,throttle,compress,virtio-blk-vhost-vdpa,virtio-blk-vfio-pci,virtio-blk-vhost-user,io_uring,nvme-io_uring %global block_drivers_ro_list vdi,vmdk,vhdx,vpc,https %define qemudocdir %{_docdir}/%{name} %global firmwaredirs "%{_datadir}/qemu-firmware:%{_datadir}/ipxe/qemu:%{_datadir}/seavgabios:%{_datadir}/seabios" @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 7%{?rcrel}%{?dist}%{?cc_suffix} +Release: 8%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -372,13 +372,83 @@ Patch108: kvm-vhost-fix-vhost_dev_enable_notifiers-error-case.patch Patch109: kvm-kvm-reuse-per-vcpu-stats-fd-to-avoid-vcpu-interrupti.patch # For bz#2128929 - [rhel9.2] hotplug/hotunplug mlx vdpa device to the occupied addr port, then qemu core dump occurs after shutdown guest Patch110: kvm-vhost-vdpa-do-not-cleanup-the-vdpa-vhost-net-structu.patch +# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" +# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package +Patch111: kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch +# For bz#2211609 - With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000" +# For bz#2211634 - [aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package +Patch112: kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch113: kvm-vfio-pci-add-support-for-VF-token.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch114: kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch115: kvm-vfio-pci-Static-Resizable-BAR-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch116: kvm-vfio-pci-Fix-a-use-after-free-issue.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch117: kvm-util-vfio-helpers-Use-g_file_read_link.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch118: kvm-migration-Make-all-functions-check-have-the-same-for.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch119: kvm-migration-Move-migration_properties-to-options.c.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch120: kvm-migration-Add-switchover-ack-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch121: kvm-migration-Implement-switchover-ack-logic.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch122: kvm-migration-Enable-switchover-ack-capability.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch123: kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch124: kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch125: kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch126: kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch127: kvm-vfio-Implement-a-common-device-info-helper.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch128: kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch129: kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch130: kvm-vfio-migration-Reset-bytes_transferred-properly.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch131: kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch132: kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch133: kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch134: kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch135: kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch136: kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch137: kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch138: kvm-vfio-migration-Remove-print-of-Migration-disabled.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch139: kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch +# For bz#2192818 - [VFIO LM] Live migration +Patch140: kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch141: kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch142: kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch143: kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch +# For bz#2220866 - Misaligned symbol for s390-ccw image during qemu-kvm build +Patch144: kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch +# For bz#2222579 - PNG screendump doesn't save screen correctly +Patch145: kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch +# For bz#2213317 - Enable libblkio-based block drivers in QEMU +Patch146: kvm-block-blkio-fix-module_block.py-parsing.patch # ELN specific changes Patch1001: 1001-keymaps-use-ara-X11-keyboard-map-instead-of-ar.patch -Patch1002: 1002-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch -Patch1003: 1003-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch -Patch1004: 1004-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch -Patch1005: 1005-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch %if %{have_clang} BuildRequires: clang @@ -1419,6 +1489,57 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jul 17 2023 Miroslav Rezanina - 8.0.0-8 +- kvm-virtio-iommu-Fix-64kB-host-page-size-VFIO-device-ass.patch [bz#2211609 bz#2211634] +- kvm-virtio-iommu-Rework-the-traces-in-virtio_iommu_set_p.patch [bz#2211609 bz#2211634] +- kvm-vfio-pci-add-support-for-VF-token.patch [bz#2192818] +- kvm-vfio-migration-Skip-log_sync-during-migration-SETUP-.patch [bz#2192818] +- kvm-vfio-pci-Static-Resizable-BAR-capability.patch [bz#2192818] +- kvm-vfio-pci-Fix-a-use-after-free-issue.patch [bz#2192818] +- kvm-util-vfio-helpers-Use-g_file_read_link.patch [bz#2192818] +- kvm-migration-Make-all-functions-check-have-the-same-for.patch [bz#2192818] +- kvm-migration-Move-migration_properties-to-options.c.patch [bz#2192818] +- kvm-migration-Add-switchover-ack-capability.patch [bz#2192818] +- kvm-migration-Implement-switchover-ack-logic.patch [bz#2192818] +- kvm-migration-Enable-switchover-ack-capability.patch [bz#2192818] +- kvm-vfio-migration-Refactor-vfio_save_block-to-return-sa.patch [bz#2192818] +- kvm-vfio-migration-Store-VFIO-migration-flags-in-VFIOMig.patch [bz#2192818] +- kvm-vfio-migration-Add-VFIO-migration-pre-copy-support.patch [bz#2192818] +- kvm-vfio-migration-Add-support-for-switchover-ack-capabi.patch [bz#2192818] +- kvm-vfio-Implement-a-common-device-info-helper.patch [bz#2192818] +- kvm-hw-vfio-pci-quirks-Support-alternate-offset-for-GPUD.patch [bz#2192818] +- kvm-vfio-pci-Call-vfio_prepare_kvm_msi_virq_batch-in-MSI.patch [bz#2192818] +- kvm-vfio-migration-Reset-bytes_transferred-properly.patch [bz#2192818] +- kvm-vfio-migration-Make-VFIO-migration-non-experimental.patch [bz#2192818] +- kvm-vfio-pci-Fix-a-segfault-in-vfio_realize.patch [bz#2192818] +- kvm-vfio-pci-Free-leaked-timer-in-vfio_realize-error-pat.patch [bz#2192818] +- kvm-hw-vfio-pci-quirks-Sanitize-capability-pointer.patch [bz#2192818] +- kvm-vfio-pci-Disable-INTx-in-vfio_realize-error-path.patch [bz#2192818] +- kvm-vfio-migration-Change-vIOMMU-blocker-from-global-to-.patch [bz#2192818] +- kvm-vfio-migration-Free-resources-when-vfio_migration_re.patch [bz#2192818] +- kvm-vfio-migration-Remove-print-of-Migration-disabled.patch [bz#2192818] +- kvm-vfio-migration-Return-bool-type-for-vfio_migration_r.patch [bz#2192818] +- kvm-vfio-Fix-null-pointer-dereference-bug-in-vfio_bars_f.patch [bz#2192818] +- kvm-pc-bios-s390-ccw-Makefile-Use-z-noexecstack-to-silen.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Fix-indentation-in-start.S.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Provide-space-for-initial-stack-fra.patch [bz#2220866] +- kvm-pc-bios-s390-ccw-Don-t-use-__bss_start-with-the-larl.patch [bz#2220866] +- kvm-ui-Fix-pixel-colour-channel-order-for-PNG-screenshot.patch [bz#2222579] +- kvm-block-blkio-fix-module_block.py-parsing.patch [bz#2213317] +- kvm-Fix-virtio-blk-vhost-vdpa-typo-in-spec-file.patch [bz#2213317] +- Resolves: bz#2211609 + (With virtio-iommu and vfio-pci, qemu reports "warning: virtio-iommu page mask 0xfffffffffffff000 does not match 0x40201000") +- Resolves: bz#2211634 + ([aarch64] With virtio-iommu and vfio-pci, qemu coredump when host using kernel-64k package) +- Resolves: bz#2192818 + ([VFIO LM] Live migration) +- Resolves: bz#2220866 + (Misaligned symbol for s390-ccw image during qemu-kvm build) +- Resolves: bz#2222579 + (PNG screendump doesn't save screen correctly) +- Resolves: bz#2213317 + (Enable libblkio-based block drivers in QEMU) + * Mon Jul 10 2023 Miroslav Rezanina - 8.0.0-7 - kvm-numa-Validate-cluster-and-NUMA-node-boundary-if-requ.patch [bz#2171363] - kvm-hw-arm-Validate-cluster-and-NUMA-node-boundary.patch [bz#2171363]