From 48e88e8e194ef8735aad9235b3ef75d214db0a25 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Thu, 20 Mar 2025 18:33:43 -0400 Subject: [PATCH] * Thu Mar 20 2025 Jon Maloy - 9.1.0-16 - kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188] - kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188] - kvm-hw-arm-smmuv3-Move-reset-to-exit-phase.patch [RHEL-7188] - kvm-hw-vfio-common-Add-a-trace-point-in-vfio_reset_handl.patch [RHEL-7188] - kvm-docs-devel-reset-Document-reset-expectations-for-DMA.patch [RHEL-7188] - kvm-qga-implement-a-guest-get-load-command.patch [RHEL-69622] - kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch [RHEL-69775] - kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch [RHEL-47340] - kvm-Recommend-systemtap-client-from-qemu-tools.patch [RHEL-47340] - Resolves: RHEL-7188 ([intel iommu][PF] DMAR: DRHD: handling fault status reg) - Resolves: RHEL-69622 ([qemu-guest-agent][RFE] Report CPU load average) - Resolves: RHEL-69775 (Guest crashed on the target host when the migration was canceled) - Resolves: RHEL-47340 ([Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully) --- ...-Document-reset-expectations-for-DMA.patch | 53 ++++++ ...-arm-smmuv3-Move-reset-to-exit-phase.patch | 123 ++++++++++++ ...intel-iommu-Migrate-to-3-phase-reset.patch | 96 ++++++++++ ...dd-a-trace-point-in-vfio_reset_handl.patch | 61 ++++++ ...irtio-iommu-Migrate-to-3-phase-reset.patch | 96 ++++++++++ ...F-for-incoming-migration-on-Migratio.patch | 180 ++++++++++++++++++ ...a-implement-a-guest-get-load-command.patch | 139 ++++++++++++++ ...error-from-qemu-trace-stap-on-missin.patch | 90 +++++++++ qemu-kvm.spec | 38 +++- 9 files changed, 875 insertions(+), 1 deletion(-) create mode 100644 kvm-docs-devel-reset-Document-reset-expectations-for-DMA.patch create mode 100644 kvm-hw-arm-smmuv3-Move-reset-to-exit-phase.patch create mode 100644 kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch create mode 100644 kvm-hw-vfio-common-Add-a-trace-point-in-vfio_reset_handl.patch create mode 100644 kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch create mode 100644 kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch create mode 100644 kvm-qga-implement-a-guest-get-load-command.patch create mode 100644 kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch diff --git a/kvm-docs-devel-reset-Document-reset-expectations-for-DMA.patch b/kvm-docs-devel-reset-Document-reset-expectations-for-DMA.patch new file mode 100644 index 0000000..fcd1246 --- /dev/null +++ b/kvm-docs-devel-reset-Document-reset-expectations-for-DMA.patch @@ -0,0 +1,53 @@ +From 389c3c6b4215c9be3fd784c73af0e9795e796380 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 18 Feb 2025 19:25:35 +0100 +Subject: [PATCH 5/9] docs/devel/reset: Document reset expectations for DMA and + IOMMU +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 341: Fix vIOMMU reset order +RH-Jira: RHEL-7188 +RH-Acked-by: Peter Xu +RH-Acked-by: Donald Dutile +RH-Acked-by: Cédric Le Goater +RH-Commit: [5/5] be8b9d9e34a2b301430dfa229c6785ab17d3fb16 (eauger1/centos-qemu-kvm) + +To avoid any translation faults, the IOMMUs are expected to be +reset after the devices they protect. Document that we expect +DMA requests to be stopped during the 'enter' or 'hold' phase +while IOMMUs should be reset during the 'exit' phase. + +Signed-off-by: Eric Auger +Reviewed-by: Zhenzhong Duan +Message-Id: <20250218182737.76722-6-eric.auger@redhat.com> +Reviewed-by: Peter Xu +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit dd6d545e8f2d9a0e8a8c287ec16469f03ef5c198) +Signed-off-by: Eric Auger +--- + docs/devel/reset.rst | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/docs/devel/reset.rst b/docs/devel/reset.rst +index 9746a4e8a0..24ab630465 100644 +--- a/docs/devel/reset.rst ++++ b/docs/devel/reset.rst +@@ -123,6 +123,11 @@ The *exit* phase is executed only when the last reset operation ends. Therefore + the object does not need to care how many of reset controllers it has and how + many of them have started a reset. + ++DMA capable devices are expected to cancel all outstanding DMA operations ++during either 'enter' or 'hold' phases. IOMMUs are expected to reset during ++the 'exit' phase and this sequencing makes sure no outstanding DMA request ++will fault. ++ + + Handling reset in a resettable object + ------------------------------------- +-- +2.48.1 + diff --git a/kvm-hw-arm-smmuv3-Move-reset-to-exit-phase.patch b/kvm-hw-arm-smmuv3-Move-reset-to-exit-phase.patch new file mode 100644 index 0000000..689a6f5 --- /dev/null +++ b/kvm-hw-arm-smmuv3-Move-reset-to-exit-phase.patch @@ -0,0 +1,123 @@ +From a3dfbe30e930c8d794057e45fffd91a9b0e6afd0 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 18 Feb 2025 19:25:33 +0100 +Subject: [PATCH 3/9] hw/arm/smmuv3: Move reset to exit phase +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 341: Fix vIOMMU reset order +RH-Jira: RHEL-7188 +RH-Acked-by: Peter Xu +RH-Acked-by: Donald Dutile +RH-Acked-by: Cédric Le Goater +RH-Commit: [3/5] e291cb45c32e0fab49b200c275553bbe76b97264 (eauger1/centos-qemu-kvm) + +Currently the iommu may be reset before the devices +it protects. For example this happens with virtio-scsi-pci. +when system_reset is issued from qmp monitor: spurious +"virtio: zero sized buffers are not allowed" warnings can +be observed. This happens because outstanding DMA requests +are still happening while the SMMU gets reset. + +This can also happen with VFIO devices. In that case +spurious DMA translation faults can be observed on host. + +Make sure the SMMU is reset in the 'exit' phase after +all DMA capable devices have been reset during the 'enter' +or 'hold' phase. + +Signed-off-by: Eric Auger +Reviewed-by: Zhenzhong Duan + +Message-Id: <20250218182737.76722-4-eric.auger@redhat.com> +Reviewed-by: Peter Xu +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit e39e3f8b8dea856f141e9945167d2b18021ef445) +Signed-off-by: Eric Auger +--- + hw/arm/smmu-common.c | 9 +++++++-- + hw/arm/smmuv3.c | 14 ++++++++++---- + hw/arm/trace-events | 1 + + 3 files changed, 18 insertions(+), 6 deletions(-) + +diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c +index 3f82728758..f4210fcbc1 100644 +--- a/hw/arm/smmu-common.c ++++ b/hw/arm/smmu-common.c +@@ -924,7 +924,12 @@ static void smmu_base_realize(DeviceState *dev, Error **errp) + } + } + +-static void smmu_base_reset_hold(Object *obj, ResetType type) ++/* ++ * Make sure the IOMMU is reset in 'exit' phase after ++ * all outstanding DMA requests have been quiesced during ++ * the 'enter' or 'hold' reset phases ++ */ ++static void smmu_base_reset_exit(Object *obj, ResetType type) + { + SMMUState *s = ARM_SMMU(obj); + +@@ -950,7 +955,7 @@ static void smmu_base_class_init(ObjectClass *klass, void *data) + device_class_set_props(dc, smmu_dev_properties); + device_class_set_parent_realize(dc, smmu_base_realize, + &sbc->parent_realize); +- rc->phases.hold = smmu_base_reset_hold; ++ rc->phases.exit = smmu_base_reset_exit; + } + + static const TypeInfo smmu_base_info = { +diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c +index 3971976389..2e90570915 100644 +--- a/hw/arm/smmuv3.c ++++ b/hw/arm/smmuv3.c +@@ -1870,13 +1870,19 @@ static void smmu_init_irq(SMMUv3State *s, SysBusDevice *dev) + } + } + +-static void smmu_reset_hold(Object *obj, ResetType type) ++/* ++ * Make sure the IOMMU is reset in 'exit' phase after ++ * all outstanding DMA requests have been quiesced during ++ * the 'enter' or 'hold' reset phases ++ */ ++static void smmu_reset_exit(Object *obj, ResetType type) + { + SMMUv3State *s = ARM_SMMUV3(obj); + SMMUv3Class *c = ARM_SMMUV3_GET_CLASS(s); + +- if (c->parent_phases.hold) { +- c->parent_phases.hold(obj, type); ++ trace_smmu_reset_exit(); ++ if (c->parent_phases.exit) { ++ c->parent_phases.exit(obj, type); + } + + smmuv3_init_regs(s); +@@ -1999,7 +2005,7 @@ static void smmuv3_class_init(ObjectClass *klass, void *data) + SMMUv3Class *c = ARM_SMMUV3_CLASS(klass); + + dc->vmsd = &vmstate_smmuv3; +- resettable_class_set_parent_phases(rc, NULL, smmu_reset_hold, NULL, ++ resettable_class_set_parent_phases(rc, NULL, NULL, smmu_reset_exit, + &c->parent_phases); + device_class_set_parent_realize(dc, smmu_realize, + &c->parent_realize); +diff --git a/hw/arm/trace-events b/hw/arm/trace-events +index be6c8f720b..79ef347e3e 100644 +--- a/hw/arm/trace-events ++++ b/hw/arm/trace-events +@@ -56,6 +56,7 @@ smmuv3_config_cache_inv(uint32_t sid) "Config cache INV for sid=0x%x" + smmuv3_notify_flag_add(const char *iommu) "ADD SMMUNotifier node for iommu mr=%s" + smmuv3_notify_flag_del(const char *iommu) "DEL SMMUNotifier node for iommu mr=%s" + smmuv3_inv_notifiers_iova(const char *name, int asid, int vmid, uint64_t iova, uint8_t tg, uint64_t num_pages, int stage) "iommu mr=%s asid=%d vmid=%d iova=0x%"PRIx64" tg=%d num_pages=0x%"PRIx64" stage=%d" ++smmu_reset_exit(void) "" + + # strongarm.c + strongarm_uart_update_parameters(const char *label, int speed, char parity, int data_bits, int stop_bits) "%s speed=%d parity=%c data=%d stop=%d" +-- +2.48.1 + diff --git a/kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch b/kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch new file mode 100644 index 0000000..827c43c --- /dev/null +++ b/kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch @@ -0,0 +1,96 @@ +From 67b281dc1ccdae05da6c6052c264ecd94723c0b2 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 18 Feb 2025 19:25:32 +0100 +Subject: [PATCH 2/9] hw/i386/intel-iommu: Migrate to 3-phase reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 341: Fix vIOMMU reset order +RH-Jira: RHEL-7188 +RH-Acked-by: Peter Xu +RH-Acked-by: Donald Dutile +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/5] 5b9b60b2b796529db10b846881e82e7df4626ec1 (eauger1/centos-qemu-kvm) + +Currently the IOMMU may be reset before the devices +it protects. For example this happens with virtio devices +but also with VFIO devices. In this latter case this +produces spurious translation faults on host. + +Let's use 3-phase reset mechanism and reset the IOMMU on +exit phase after all DMA capable devices have been reset +on 'enter' or 'hold' phase. + +Signed-off-by: Eric Auger +Acked-by: Michael S. Tsirkin +Acked-by: Jason Wang +Zhenzhong Duan + +Message-Id: <20250218182737.76722-3-eric.auger@redhat.com> +Reviewed-by: Peter Xu +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2aaf48bcf27d8b3da5b30af6c1ced464d3df30f7) +Signed-off-by: Eric Auger + +Conflicts: Code change + hw/i386/intel_iommu.c +We miss e3d0814368d0 ("hw: Use device_class_set_legacy_reset() instead +of opencoding") meaning that instead of removing +device_class_set_legacy_reset(dc, vtd_reset) we remove +dc->reset = vtd_reset; +--- + hw/i386/intel_iommu.c | 12 +++++++++--- + hw/i386/trace-events | 1 + + 2 files changed, 10 insertions(+), 3 deletions(-) + +diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c +index 16d2885fcc..4acefcf5c8 100644 +--- a/hw/i386/intel_iommu.c ++++ b/hw/i386/intel_iommu.c +@@ -4212,10 +4212,11 @@ static void vtd_init(IntelIOMMUState *s) + /* Should not reset address_spaces when reset because devices will still use + * the address space they got at first (won't ask the bus again). + */ +-static void vtd_reset(DeviceState *dev) ++static void vtd_reset_exit(Object *obj, ResetType type) + { +- IntelIOMMUState *s = INTEL_IOMMU_DEVICE(dev); ++ IntelIOMMUState *s = INTEL_IOMMU_DEVICE(obj); + ++ trace_vtd_reset_exit(); + vtd_init(s); + vtd_address_space_refresh_all(s); + } +@@ -4367,8 +4368,13 @@ static void vtd_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + X86IOMMUClass *x86_class = X86_IOMMU_DEVICE_CLASS(klass); ++ ResettableClass *rc = RESETTABLE_CLASS(klass); + +- dc->reset = vtd_reset; ++ /* ++ * Use 'exit' reset phase to make sure all DMA requests ++ * have been quiesced during 'enter' or 'hold' phase ++ */ ++ rc->phases.exit = vtd_reset_exit; + dc->vmsd = &vtd_vmstate; + device_class_set_props(dc, vtd_properties); + dc->hotpluggable = false; +diff --git a/hw/i386/trace-events b/hw/i386/trace-events +index 53c02d7ac8..ac9e1a10aa 100644 +--- a/hw/i386/trace-events ++++ b/hw/i386/trace-events +@@ -68,6 +68,7 @@ vtd_frr_new(int index, uint64_t hi, uint64_t lo) "index %d high 0x%"PRIx64" low + vtd_warn_invalid_qi_tail(uint16_t tail) "tail 0x%"PRIx16 + vtd_warn_ir_vector(uint16_t sid, int index, int vec, int target) "sid 0x%"PRIx16" index %d vec %d (should be: %d)" + vtd_warn_ir_trigger(uint16_t sid, int index, int trig, int target) "sid 0x%"PRIx16" index %d trigger %d (should be: %d)" ++vtd_reset_exit(void) "" + + # amd_iommu.c + amdvi_evntlog_fail(uint64_t addr, uint32_t head) "error: fail to write at addr 0x%"PRIx64" + offset 0x%"PRIx32 +-- +2.48.1 + diff --git a/kvm-hw-vfio-common-Add-a-trace-point-in-vfio_reset_handl.patch b/kvm-hw-vfio-common-Add-a-trace-point-in-vfio_reset_handl.patch new file mode 100644 index 0000000..0c06398 --- /dev/null +++ b/kvm-hw-vfio-common-Add-a-trace-point-in-vfio_reset_handl.patch @@ -0,0 +1,61 @@ +From 04f11749dd21b4df1ea2818785d650dd6eee2cbe Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 18 Feb 2025 19:25:34 +0100 +Subject: [PATCH 4/9] hw/vfio/common: Add a trace point in vfio_reset_handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 341: Fix vIOMMU reset order +RH-Jira: RHEL-7188 +RH-Acked-by: Peter Xu +RH-Acked-by: Donald Dutile +RH-Acked-by: Cédric Le Goater +RH-Commit: [4/5] 46878ffdc96997d1f6d09bde3fce350564e499fd (eauger1/centos-qemu-kvm) + +To ease the debug of reset sequence, let's add a trace point +in vfio_reset_handler() + +Signed-off-by: Eric Auger +Reviewed-by: Cédric Le Goater +Acked-by: Michael S. Tsirkin +Reviewed-by: Zhenzhong Duan +Message-Id: <20250218182737.76722-5-eric.auger@redhat.com> +Reviewed-by: Peter Xu +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d410e709526d1cd4aa9085c6e254a622594a02a5) +Signed-off-by: Eric Auger +--- + hw/vfio/common.c | 1 + + hw/vfio/trace-events | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/hw/vfio/common.c b/hw/vfio/common.c +index 36d0cf6585..6982f88fc8 100644 +--- a/hw/vfio/common.c ++++ b/hw/vfio/common.c +@@ -1395,6 +1395,7 @@ void vfio_reset_handler(void *opaque) + { + VFIODevice *vbasedev; + ++ trace_vfio_reset_handler(); + QLIST_FOREACH(vbasedev, &vfio_device_list, global_next) { + if (vbasedev->dev->realized) { + vbasedev->ops->vfio_compute_needs_reset(vbasedev); +diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events +index 3756ff660e..9523a9ccb0 100644 +--- a/hw/vfio/trace-events ++++ b/hw/vfio/trace-events +@@ -120,6 +120,7 @@ vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t subtype + vfio_legacy_dma_unmap_overflow_workaround(void) "" + vfio_get_dirty_bitmap(uint64_t iova, uint64_t size, uint64_t bitmap_size, uint64_t start, uint64_t dirty_pages) "iova=0x%"PRIx64" size= 0x%"PRIx64" bitmap_size=0x%"PRIx64" start=0x%"PRIx64" dirty_pages=%"PRIu64 + vfio_iommu_map_dirty_notify(uint64_t iova_start, uint64_t iova_end) "iommu dirty @ 0x%"PRIx64" - 0x%"PRIx64 ++vfio_reset_handler(void) "" + + # platform.c + vfio_platform_realize(char *name, char *compat) "vfio device %s, compat = %s" +-- +2.48.1 + diff --git a/kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch b/kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch new file mode 100644 index 0000000..3922e9b --- /dev/null +++ b/kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch @@ -0,0 +1,96 @@ +From 9ca5d7ac4f0ff5f10bf424df8104fe5abe01e431 Mon Sep 17 00:00:00 2001 +From: Eric Auger +Date: Tue, 18 Feb 2025 19:25:31 +0100 +Subject: [PATCH 1/9] hw/virtio/virtio-iommu: Migrate to 3-phase reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 341: Fix vIOMMU reset order +RH-Jira: RHEL-7188 +RH-Acked-by: Peter Xu +RH-Acked-by: Donald Dutile +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/5] 32bf47497d5d4817a448d07ffa7a844aee82ae3c (eauger1/centos-qemu-kvm) + +Currently the iommu may be reset before the devices +it protects. For example this happens with virtio-net. + +Let's use 3-phase reset mechanism and reset the IOMMU on +exit phase after all DMA capable devices have been +reset during the 'enter' or 'hold' phase. + +Signed-off-by: Eric Auger +Acked-by: Michael S. Tsirkin +Reviewed-by: Zhenzhong Duan +Acked-by: Jason Wang + +Message-Id: <20250218182737.76722-2-eric.auger@redhat.com> +Reviewed-by: Peter Xu +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d261b84d354a41a38336af813f92f636d3fb3f78) +Signed-off-by: Eric Auger +--- + hw/virtio/trace-events | 2 +- + hw/virtio/virtio-iommu.c | 14 ++++++++++---- + 2 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 04e36ae047..76f0d458b2 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -108,7 +108,7 @@ virtio_pci_notify_write(uint64_t addr, uint64_t val, unsigned int size) "0x%" PR + virtio_pci_notify_write_pio(uint64_t addr, uint64_t val, unsigned int size) "0x%" PRIx64" = 0x%" PRIx64 " (%d)" + + # hw/virtio/virtio-iommu.c +-virtio_iommu_device_reset(void) "reset!" ++virtio_iommu_device_reset_exit(void) "reset!" + virtio_iommu_system_reset(void) "system reset!" + virtio_iommu_get_features(uint64_t features) "device supports features=0x%"PRIx64 + virtio_iommu_device_status(uint8_t status) "driver status = %d" +diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c +index 59ef4fb217..496200ebc5 100644 +--- a/hw/virtio/virtio-iommu.c ++++ b/hw/virtio/virtio-iommu.c +@@ -1504,11 +1504,11 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) + virtio_cleanup(vdev); + } + +-static void virtio_iommu_device_reset(VirtIODevice *vdev) ++static void virtio_iommu_device_reset_exit(Object *obj, ResetType type) + { +- VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); ++ VirtIOIOMMU *s = VIRTIO_IOMMU(obj); + +- trace_virtio_iommu_device_reset(); ++ trace_virtio_iommu_device_reset_exit(); + + if (s->domains) { + g_tree_destroy(s->domains); +@@ -1669,6 +1669,7 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data) + { + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); ++ ResettableClass *rc = RESETTABLE_CLASS(klass); + + device_class_set_props(dc, virtio_iommu_properties); + dc->vmsd = &vmstate_virtio_iommu; +@@ -1676,7 +1677,12 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data) + set_bit(DEVICE_CATEGORY_MISC, dc->categories); + vdc->realize = virtio_iommu_device_realize; + vdc->unrealize = virtio_iommu_device_unrealize; +- vdc->reset = virtio_iommu_device_reset; ++ ++ /* ++ * Use 'exit' reset phase to make sure all DMA requests ++ * have been quiesced during 'enter' or 'hold' phase ++ */ ++ rc->phases.exit = virtio_iommu_device_reset_exit; + vdc->get_config = virtio_iommu_get_config; + vdc->set_config = virtio_iommu_set_config; + vdc->get_features = virtio_iommu_get_features; +-- +2.48.1 + diff --git a/kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch b/kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch new file mode 100644 index 0000000..d9d12bf --- /dev/null +++ b/kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch @@ -0,0 +1,180 @@ +From 5d7d7a2ec6301f4d0b0dbea4fbdcab4e41a9cf07 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 20 Feb 2025 08:24:59 -0500 +Subject: [PATCH 7/9] migration: Fix UAF for incoming migration on + MigrationState + +RH-Author: Peter Xu +RH-MergeRequest: 344: migration: Fix UAF for incoming migration on MigrationState +RH-Jira: RHEL-69775 +RH-Acked-by: Juraj Marcin +RH-Acked-by: Jon Maloy +RH-Commit: [1/1] 106e2b4c1c461202c912b5e3ea7e586c4ab05d8c (peterx/qemu-kvm) + +On the incoming migration side, QEMU uses a coroutine to load all the VM +states. Inside, it may reference MigrationState on global states like +migration capabilities, parameters, error state, shared mutexes and more. + +However there's nothing yet to make sure MigrationState won't get +destroyed (e.g. after migration_shutdown()). Meanwhile there's also no API +available to remove the incoming coroutine in migration_shutdown(), +avoiding it to access the freed elements. + +There's a bug report showing this can happen and crash dest QEMU when +migration is cancelled on source. + +When it happens, the dest main thread is trying to cleanup everything: + + #0 qemu_aio_coroutine_enter + #1 aio_dispatch_handler + #2 aio_poll + #3 monitor_cleanup + #4 qemu_cleanup + #5 qemu_default_main + +Then it found the migration incoming coroutine, schedule it (even after +migration_shutdown()), causing crash: + + #0 __pthread_kill_implementation + #1 __pthread_kill_internal + #2 __GI_raise + #3 __GI_abort + #4 __assert_fail_base + #5 __assert_fail + #6 qemu_mutex_lock_impl + #7 qemu_lockable_mutex_lock + #8 qemu_lockable_lock + #9 qemu_lockable_auto_lock + #10 migrate_set_error + #11 process_incoming_migration_co + #12 coroutine_trampoline + +To fix it, take a refcount after an incoming setup is properly done when +qmp_migrate_incoming() succeeded the 1st time. As it's during a QMP +handler which needs BQL, it means the main loop is still alive (without +going into cleanups, which also needs BQL). + +Releasing the refcount now only until the incoming migration coroutine +finished or failed. Hence the refcount is valid for both (1) setup phase +of incoming ports, mostly IO watches (e.g. qio_channel_add_watch_full()), +and (2) the incoming coroutine itself (process_incoming_migration_co()). + +Note that we can't unref in migration_incoming_state_destroy(), because +both qmp_xen_load_devices_state() and load_snapshot() will use it without +an incoming migration. Those hold BQL so they're not prone to this issue. + +PS: I suspect nobody uses Xen's command at all, as it didn't register yank, +hence AFAIU the command should crash on master when trying to unregister +yank in migration_incoming_state_destroy().. but that's another story. + +Also note that in some incoming failure cases we may not always unref the +MigrationState refcount, which is a trade-off to keep things simple. We +could make it accurate, but it can be an overkill. Some examples: + + - Unlike most of the rest protocols, socket_start_incoming_migration() + may create net listener after incoming port setup sucessfully. + It means we can't unref in migration_channel_process_incoming() as a + generic path because socket protocol might keep using MigrationState. + + - For either socket or file, multiple IO watches might be created, it + means logically each IO watch needs to take one refcount for + MigrationState so as to be 100% accurate on ownership of refcount taken. + +In general, we at least need per-protocol handling to make it accurate, +which can be an overkill if we know incoming failed after all. Add a short +comment to explain that when taking the refcount in qmp_migrate_incoming(). + +Bugzilla: https://issues.redhat.com/browse/RHEL-69775 +Tested-by: Yan Fu +Signed-off-by: Peter Xu +Reviewed-by: Fabiano Rosas +Message-ID: <20250220132459.512610-1-peterx@redhat.com> +Signed-off-by: Fabiano Rosas +(cherry picked from commit d657a14de5d597bbfe7b54e4c4f0646f440e98ad) +Signed-off-by: Peter Xu +--- + migration/migration.c | 40 ++++++++++++++++++++++++++++++++++++++-- + 1 file changed, 38 insertions(+), 2 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 999d4cac54..aabdc45c16 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -115,6 +115,27 @@ static void migration_downtime_start(MigrationState *s) + s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + } + ++/* ++ * This is unfortunate: incoming migration actually needs the outgoing ++ * migration state (MigrationState) to be there too, e.g. to query ++ * capabilities, parameters, using locks, setup errors, etc. ++ * ++ * NOTE: when calling this, making sure current_migration exists and not ++ * been freed yet! Otherwise trying to access the refcount is already ++ * an use-after-free itself.. ++ * ++ * TODO: Move shared part of incoming / outgoing out into separate object. ++ * Then this is not needed. ++ */ ++static void migrate_incoming_ref_outgoing_state(void) ++{ ++ object_ref(migrate_get_current()); ++} ++static void migrate_incoming_unref_outgoing_state(void) ++{ ++ object_unref(migrate_get_current()); ++} ++ + static void migration_downtime_end(MigrationState *s) + { + int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); +@@ -821,7 +842,7 @@ process_incoming_migration_co(void *opaque) + * postcopy thread. + */ + trace_process_incoming_migration_co_postcopy_end_main(); +- return; ++ goto out; + } + /* Else if something went wrong then just fall out of the normal exit */ + } +@@ -837,7 +858,8 @@ process_incoming_migration_co(void *opaque) + } + + migration_bh_schedule(process_incoming_migration_bh, mis); +- return; ++ goto out; ++ + fail: + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_FAILED); +@@ -854,6 +876,9 @@ fail: + + exit(EXIT_FAILURE); + } ++out: ++ /* Pairs with the refcount taken in qmp_migrate_incoming() */ ++ migrate_incoming_unref_outgoing_state(); + } + + /** +@@ -1875,6 +1900,17 @@ void qmp_migrate_incoming(const char *uri, bool has_channels, + return; + } + ++ /* ++ * Making sure MigrationState is available until incoming migration ++ * completes. ++ * ++ * NOTE: QEMU _might_ leak this refcount in some failure paths, but ++ * that's OK. This is the minimum change we need to at least making ++ * sure success case is clean on the refcount. We can try harder to ++ * make it accurate for any kind of failures, but it might be an ++ * overkill and doesn't bring us much benefit. ++ */ ++ migrate_incoming_ref_outgoing_state(); + once = false; + } + +-- +2.48.1 + diff --git a/kvm-qga-implement-a-guest-get-load-command.patch b/kvm-qga-implement-a-guest-get-load-command.patch new file mode 100644 index 0000000..d4622ff --- /dev/null +++ b/kvm-qga-implement-a-guest-get-load-command.patch @@ -0,0 +1,139 @@ +From 22f26a93ab94bf87c0724891a5886797a38c23b4 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Mon, 2 Dec 2024 12:19:27 +0000 +Subject: [PATCH 6/9] qga: implement a 'guest-get-load' command +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Konstantin Kostiuk +RH-MergeRequest: 343: RHEL-69622: qga: implement a 'guest-get-load' command +RH-Jira: RHEL-69622 +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Jon Maloy +RH-Commit: [1/1] 9284c70737ad9f700d37f8c3833f855f2354acb7 (kkostiuk/redhat-centos-stream-src-qemu-kvm) + +Provide a way to report the process load average, via a new +'guest-get-load' command. + +This is only implemented for POSIX platforms providing 'getloadavg'. + +Example illustrated with qmp-shell: + +(QEMU) guest-get-load +{ + "return": { + "load15m": 1.546875, + "load1m": 1.669921875, + "load5m": 1.9306640625 + } +} + +Windows has no native equivalent API, but it would be possible to +simulate it as illustrated here (BSD-3-Clause): + + https://github.com/giampaolo/psutil/pull/1485 + +This is left as an exercise for future contributors. + +Signed-off-by: Daniel P. Berrangé +Reviewed-by: Konstantin Kostiuk +Message-ID: <20241202121927.864335-1-berrange@redhat.com> +Signed-off-by: Konstantin Kostiuk +--- + meson.build | 1 + + qga/commands-posix.c | 20 ++++++++++++++++++++ + qga/qapi-schema.json | 37 +++++++++++++++++++++++++++++++++++++ + 3 files changed, 58 insertions(+) + +diff --git a/meson.build b/meson.build +index b3529aa0e1..1dd97c6f49 100644 +--- a/meson.build ++++ b/meson.build +@@ -2497,6 +2497,7 @@ config_host_data.set('CONFIG_SETNS', cc.has_function('setns') and cc.has_functio + config_host_data.set('CONFIG_SYNCFS', cc.has_function('syncfs')) + config_host_data.set('CONFIG_SYNC_FILE_RANGE', cc.has_function('sync_file_range')) + config_host_data.set('CONFIG_TIMERFD', cc.has_function('timerfd_create')) ++config_host_data.set('CONFIG_GETLOADAVG', cc.has_function('getloadavg')) + config_host_data.set('HAVE_COPY_FILE_RANGE', cc.has_function('copy_file_range')) + config_host_data.set('HAVE_GETIFADDRS', cc.has_function('getifaddrs')) + config_host_data.set('HAVE_GLIB_WITH_SLICE_ALLOCATOR', glib_has_gslice) +diff --git a/qga/commands-posix.c b/qga/commands-posix.c +index 49e40f9127..abfa53d6e9 100644 +--- a/qga/commands-posix.c ++++ b/qga/commands-posix.c +@@ -1371,3 +1371,23 @@ char *qga_get_host_name(Error **errp) + + return g_steal_pointer(&hostname); + } ++ ++#ifdef CONFIG_GETLOADAVG ++GuestLoadAverage *qmp_guest_get_load(Error **errp) ++{ ++ double loadavg[3]; ++ GuestLoadAverage *ret = NULL; ++ ++ if (getloadavg(loadavg, G_N_ELEMENTS(loadavg)) < 0) { ++ error_setg_errno(errp, errno, ++ "cannot query load average"); ++ return NULL; ++ } ++ ++ ret = g_new0(GuestLoadAverage, 1); ++ ret->load1m = loadavg[0]; ++ ret->load5m = loadavg[1]; ++ ret->load15m = loadavg[2]; ++ return ret; ++} ++#endif +diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json +index 495706cf73..739f008ff2 100644 +--- a/qga/qapi-schema.json ++++ b/qga/qapi-schema.json +@@ -1852,6 +1852,43 @@ + 'if': 'CONFIG_LINUX' + } + ++ ++## ++# @GuestLoadAverage: ++# ++# Statistics about process load information ++# ++# @load1m: 1-minute load avage ++# ++# @load5m: 5-minute load avage ++# ++# @load15m: 15-minute load avage ++# ++# Since: 10.0 ++## ++{ 'struct': 'GuestLoadAverage', ++ 'data': { ++ 'load1m': 'number', ++ 'load5m': 'number', ++ 'load15m': 'number' ++ }, ++ 'if': 'CONFIG_GETLOADAVG' ++} ++ ++## ++# @guest-get-load: ++# ++# Retrieve CPU process load information ++# ++# Returns: load information ++# ++# Since: 10.0 ++## ++{ 'command': 'guest-get-load', ++ 'returns': 'GuestLoadAverage', ++ 'if': 'CONFIG_GETLOADAVG' ++} ++ + ## + # @GuestNetworkRoute: + # +-- +2.48.1 + diff --git a/kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch b/kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch new file mode 100644 index 0000000..a6c8257 --- /dev/null +++ b/kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch @@ -0,0 +1,90 @@ +From 314804fa4be6d653a7809b64076d4f3133a0ff59 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Fri, 6 Dec 2024 11:45:24 +0000 +Subject: [PATCH 8/9] scripts: improve error from qemu-trace-stap on missing + 'stap' +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 345: scripts: improve error from qemu-trace-stap on missing 'stap' +RH-Jira: RHEL-47340 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] c90635123f40e683488d83b59c71a5236c6d4659 (berrange/centos-src-qemu) + +If the 'stap' binary is missing in $PATH, a huge trace is thrown + + $ qemu-trace-stap list /usr/bin/qemu-system-x86_64 + Traceback (most recent call last): + File "/usr/bin/qemu-trace-stap", line 169, in + main() + File "/usr/bin/qemu-trace-stap", line 165, in main + args.func(args) + File "/usr/bin/qemu-trace-stap", line 83, in cmd_run + subprocess.call(stapargs) + File "/usr/lib64/python3.12/subprocess.py", line 389, in call + with Popen(*popenargs, **kwargs) as p: + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/lib64/python3.12/subprocess.py", line 1026, in {}init{} + self._execute_child(args, executable, preexec_fn, close_fds, + File "/usr/lib64/python3.12/subprocess.py", line 1955, in _execute_child + raise child_exception_type(errno_num, err_msg, err_filename) + FileNotFoundError: [Errno 2] No such file or directory: 'stap' + +With this change the user now gets + + $ qemu-trace-stap list /usr/bin/qemu-system-x86_64 + Unable to find 'stap' in $PATH + +Signed-off-by: Daniel P. Berrangé +Reviewed-by: Philippe Mathieu-Daudé +Message-id: 20241206114524.1666664-1-berrange@redhat.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 9976be3911a2d0503f026ae37c17077273bf30ee) +--- + scripts/qemu-trace-stap | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/scripts/qemu-trace-stap b/scripts/qemu-trace-stap +index eb6e951ff2..e983460ee7 100755 +--- a/scripts/qemu-trace-stap ++++ b/scripts/qemu-trace-stap +@@ -56,6 +56,7 @@ def tapset_dir(binary): + + + def cmd_run(args): ++ stap = which("stap") + prefix = probe_prefix(args.binary) + tapsets = tapset_dir(args.binary) + +@@ -76,7 +77,7 @@ def cmd_run(args): + + # We request an 8MB buffer, since the stap default 1MB buffer + # can be easily overflowed by frequently firing QEMU traces +- stapargs = ["stap", "-s", "8", "-I", tapsets ] ++ stapargs = [stap, "-s", "8", "-I", tapsets ] + if args.pid is not None: + stapargs.extend(["-x", args.pid]) + stapargs.extend(["-e", script]) +@@ -84,6 +85,7 @@ def cmd_run(args): + + + def cmd_list(args): ++ stap = which("stap") + tapsets = tapset_dir(args.binary) + + if args.verbose: +@@ -96,7 +98,7 @@ def cmd_list(args): + + if verbose: + print("Listing probes with name '%s'" % script) +- proc = subprocess.Popen(["stap", "-I", tapsets, "-l", script], ++ proc = subprocess.Popen([stap, "-I", tapsets, "-l", script], + stdout=subprocess.PIPE, + universal_newlines=True) + out, err = proc.communicate() +-- +2.48.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 5422e1f..6247558 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 9.1.0 -Release: 15%{?rcrel}%{?dist}%{?cc_suffix} +Release: 16%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -465,6 +465,22 @@ Patch147: kvm-iotests-Add-qsd-migrate-case.patch # For RHEL-54296 - Provide QMP command for block device reactivation after migration [rhel-9.5] # For RHEL-78397 - backport fix for double migration of a paused VM (disk activation rewrite) Patch148: kvm-iotests-Add-NBD-based-tests-for-inactive-nodes.patch +# For RHEL-7188 - [intel iommu][PF] DMAR: DRHD: handling fault status reg +Patch149: kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch +# For RHEL-7188 - [intel iommu][PF] DMAR: DRHD: handling fault status reg +Patch150: kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch +# For RHEL-7188 - [intel iommu][PF] DMAR: DRHD: handling fault status reg +Patch151: kvm-hw-arm-smmuv3-Move-reset-to-exit-phase.patch +# For RHEL-7188 - [intel iommu][PF] DMAR: DRHD: handling fault status reg +Patch152: kvm-hw-vfio-common-Add-a-trace-point-in-vfio_reset_handl.patch +# For RHEL-7188 - [intel iommu][PF] DMAR: DRHD: handling fault status reg +Patch153: kvm-docs-devel-reset-Document-reset-expectations-for-DMA.patch +# For RHEL-69622 - [qemu-guest-agent][RFE] Report CPU load average +Patch154: kvm-qga-implement-a-guest-get-load-command.patch +# For RHEL-69775 - Guest crashed on the target host when the migration was canceled +Patch155: kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch +# For RHEL-47340 - [Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully +Patch156: kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch %if %{have_clang} BuildRequires: clang @@ -621,6 +637,7 @@ This package provides documentation and auxiliary programs used with %{name}. %package tools Summary: %{name} support tools +Recommends: systemtap-client %description tools %{name}-tools provides various tools related to %{name} usage. @@ -1531,6 +1548,25 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Thu Mar 20 2025 Jon Maloy - 9.1.0-16 +- kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188] +- kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188] +- kvm-hw-arm-smmuv3-Move-reset-to-exit-phase.patch [RHEL-7188] +- kvm-hw-vfio-common-Add-a-trace-point-in-vfio_reset_handl.patch [RHEL-7188] +- kvm-docs-devel-reset-Document-reset-expectations-for-DMA.patch [RHEL-7188] +- kvm-qga-implement-a-guest-get-load-command.patch [RHEL-69622] +- kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch [RHEL-69775] +- kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch [RHEL-47340] +- kvm-Recommend-systemtap-client-from-qemu-tools.patch [RHEL-47340] +- Resolves: RHEL-7188 + ([intel iommu][PF] DMAR: DRHD: handling fault status reg) +- Resolves: RHEL-69622 + ([qemu-guest-agent][RFE] Report CPU load average) +- Resolves: RHEL-69775 + (Guest crashed on the target host when the migration was canceled) +- Resolves: RHEL-47340 + ([Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully) + * Mon Feb 17 2025 Jon Maloy - 9.1.0-15 - kvm-net-Fix-announce_self.patch [RHEL-73891] - kvm-migration-Add-helper-to-get-target-runstate.patch [RHEL-54296 RHEL-78397]