From 5b6159d787a6193458d7092c3b4ab9e5936d9474 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 31 Mar 2025 17:55:14 -0400 Subject: [PATCH] * Mon Mar 31 2025 Jon Maloy - 9.1.0-17 - kvm-hw-pci-Rename-has_power-to-enabled.patch [RHEL-7301] - kvm-hw-pci-Basic-support-for-PCI-power-management.patch [RHEL-7301] - kvm-pci-Use-PCI-PM-capability-initializer.patch [RHEL-7301] - kvm-vfio-pci-Delete-local-pm_cap.patch [RHEL-7301] - kvm-pcie-virtio-Remove-redundant-pm_cap.patch [RHEL-7301] - kvm-hw-vfio-pci-Re-order-pre-reset.patch [RHEL-7301] - kvm-Also-recommend-systemtap-devel-from-qemu-tools.patch [RHEL-47340] - Resolves: RHEL-7301 ([intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown) - Resolves: RHEL-47340 ([Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully) --- ...sic-support-for-PCI-power-management.patch | 242 ++++++++++++++++++ kvm-hw-pci-Rename-has_power-to-enabled.patch | 130 ++++++++++ kvm-hw-vfio-pci-Re-order-pre-reset.patch | 74 ++++++ ...ci-Use-PCI-PM-capability-initializer.patch | 153 +++++++++++ kvm-pcie-virtio-Remove-redundant-pm_cap.patch | 99 +++++++ kvm-vfio-pci-Delete-local-pm_cap.patch | 81 ++++++ qemu-kvm.spec | 28 +- 7 files changed, 806 insertions(+), 1 deletion(-) create mode 100644 kvm-hw-pci-Basic-support-for-PCI-power-management.patch create mode 100644 kvm-hw-pci-Rename-has_power-to-enabled.patch create mode 100644 kvm-hw-vfio-pci-Re-order-pre-reset.patch create mode 100644 kvm-pci-Use-PCI-PM-capability-initializer.patch create mode 100644 kvm-pcie-virtio-Remove-redundant-pm_cap.patch create mode 100644 kvm-vfio-pci-Delete-local-pm_cap.patch diff --git a/kvm-hw-pci-Basic-support-for-PCI-power-management.patch b/kvm-hw-pci-Basic-support-for-PCI-power-management.patch new file mode 100644 index 0000000..6287a46 --- /dev/null +++ b/kvm-hw-pci-Basic-support-for-PCI-power-management.patch @@ -0,0 +1,242 @@ +From 98b0cd83c09d35a3da0ae142c09038174355e87e Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Tue, 25 Feb 2025 14:52:25 -0700 +Subject: [PATCH 2/7] hw/pci: Basic support for PCI power management +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing +RH-Jira: RHEL-7301 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Alex Williamson +RH-Acked-by: Jon Maloy +RH-Commit: [2/6] 5faff6382c124711887704fff4f857e8f85e7be5 (eauger1/centos-qemu-kvm) + +Conflicts: contextual conflict in include/hw/pci/pci.h +we don't have 449dca6ac93a ("pcie: enable Extended tag field support") +downstream so we don't have x-pcie-ext-tag definition. + +The memory and IO BARs for devices are only accessible in the D0 power +state. In other power states the PCI spec defines that the device +responds to TLPs and messages with an Unsupported Request response. + +To approximate this behavior, consider the BARs as unmapped when the +device is not in the D0 power state. This makes the BARs inaccessible +and has the additional bonus for vfio-pci that we don't attempt to DMA +map BARs for devices in a non-D0 power state. + +To support this, an interface is added for devices to register the PM +capability, which allows central tracking to enforce valid transitions +and unmap BARs in non-D0 states. + +NB. We currently have device models (eepro100 and pcie_pci_bridge) +that register a PM capability but do not set wmask to enable writes to +the power state field. In order to maintain migration compatibility, +this new helper does not manage the wmask to enable guest writes to +initiate a power state change. The contents and write access of the +PM capability are still managed by the caller. + +Cc: Michael S. Tsirkin +Cc: Marcel Apfelbaum +Signed-off-by: Alex Williamson +Reviewed-by: Eric Auger +Reviewed-by: Michael S. Tsirkin +Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-2-alex.williamson@redhat.com +Signed-off-by: Cédric Le Goater +(cherry picked from commit 9461afd2008b0820fc45a6a7bc675df1b6791e4f) +Signed-off-by: Eric Auger +--- + hw/pci/pci.c | 93 ++++++++++++++++++++++++++++++++++++- + hw/pci/trace-events | 2 + + include/hw/pci/pci.h | 3 ++ + include/hw/pci/pci_device.h | 3 ++ + 4 files changed, 99 insertions(+), 2 deletions(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index 83c9d5b9ea..d774ae47d2 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -365,6 +365,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) + attrs, NULL); + } + ++/* ++ * Register and track a PM capability. If wmask is also enabled for the power ++ * state field of the pmcsr register, guest writes may change the device PM ++ * state. BAR access is only enabled while the device is in the D0 state. ++ * Return the capability offset or negative error code. ++ */ ++int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp) ++{ ++ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp); ++ ++ if (cap < 0) { ++ return cap; ++ } ++ ++ d->pm_cap = cap; ++ d->cap_present |= QEMU_PCI_CAP_PM; ++ ++ return cap; ++} ++ ++static uint8_t pci_pm_state(PCIDevice *d) ++{ ++ uint16_t pmcsr; ++ ++ if (!(d->cap_present & QEMU_PCI_CAP_PM)) { ++ return 0; ++ } ++ ++ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL); ++ ++ return pmcsr & PCI_PM_CTRL_STATE_MASK; ++} ++ ++/* ++ * Update the PM capability state based on the new value stored in config ++ * space respective to the old, pre-write state provided. If the new value ++ * is rejected (unsupported or invalid transition) restore the old value. ++ * Return the resulting PM state. ++ */ ++static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old) ++{ ++ uint16_t pmc; ++ uint8_t new; ++ ++ if (!(d->cap_present & QEMU_PCI_CAP_PM) || ++ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) { ++ return old; ++ } ++ ++ new = pci_pm_state(d); ++ if (new == old) { ++ return old; ++ } ++ ++ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC); ++ ++ /* ++ * Transitions to D1 & D2 are only allowed if supported. Devices may ++ * only transition to higher D-states or to D0. ++ */ ++ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) || ++ (!(pmc & PCI_PM_CAP_D2) && new == 2) || ++ (old && new && new < old)) { ++ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL, ++ PCI_PM_CTRL_STATE_MASK); ++ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL, ++ old); ++ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d), ++ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn), ++ old, new); ++ return old; ++ } ++ ++ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn), ++ PCI_FUNC(d->devfn), old, new); ++ return new; ++} ++ + static void pci_reset_regions(PCIDevice *dev) + { + int r; +@@ -404,6 +482,11 @@ static void pci_do_device_reset(PCIDevice *dev) + pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) | + pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE)); + dev->config[PCI_CACHE_LINE_SIZE] = 0x0; ++ /* Default PM state is D0 */ ++ if (dev->cap_present & QEMU_PCI_CAP_PM) { ++ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL, ++ PCI_PM_CTRL_STATE_MASK); ++ } + pci_reset_regions(dev); + pci_update_mappings(dev); + +@@ -1525,7 +1608,7 @@ static void pci_update_mappings(PCIDevice *d) + continue; + + new_addr = pci_bar_address(d, i, r->type, r->size); +- if (!d->enabled) { ++ if (!d->enabled || pci_pm_state(d)) { + new_addr = PCI_BAR_UNMAPPED; + } + +@@ -1591,6 +1674,7 @@ uint32_t pci_default_read_config(PCIDevice *d, + + void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l) + { ++ uint8_t new_pm_state, old_pm_state = pci_pm_state(d); + int i, was_irq_disabled = pci_irq_disabled(d); + uint32_t val = val_in; + +@@ -1603,11 +1687,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int + d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask); + d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */ + } ++ ++ new_pm_state = pci_pm_update(d, addr, l, old_pm_state); ++ + if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) || + ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) || + ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) || +- range_covers_byte(addr, l, PCI_COMMAND)) ++ range_covers_byte(addr, l, PCI_COMMAND) || ++ !!new_pm_state != !!old_pm_state) { + pci_update_mappings(d); ++ } + + if (ranges_overlap(addr, l, PCI_COMMAND, 2)) { + pci_update_irq_disabled(d, was_irq_disabled); +diff --git a/hw/pci/trace-events b/hw/pci/trace-events +index 19643aa8c6..c82a87ffdd 100644 +--- a/hw/pci/trace-events ++++ b/hw/pci/trace-events +@@ -1,6 +1,8 @@ + # See docs/devel/tracing.rst for syntax documentation. + + # pci.c ++pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d" ++pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d" + pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 + pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64 + pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s" +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index 45365ae085..afeb5a2263 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -213,6 +213,8 @@ enum { + QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), + #define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12 + QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR), ++#define QEMU_PCI_CAP_PM_BITNR 14 ++ QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR), + }; + + typedef struct PCIINTxRoute { +@@ -680,5 +682,6 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) + MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); + void pci_set_enabled(PCIDevice *pci_dev, bool state); + void pci_set_power(PCIDevice *pci_dev, bool state); ++int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp); + + #endif +diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h +index f38fb31119..325d7bcaf7 100644 +--- a/include/hw/pci/pci_device.h ++++ b/include/hw/pci/pci_device.h +@@ -105,6 +105,9 @@ struct PCIDevice { + /* Capability bits */ + uint32_t cap_present; + ++ /* Offset of PM capability in config space */ ++ uint8_t pm_cap; ++ + /* Offset of MSI-X capability in config space */ + uint8_t msix_cap; + +-- +2.48.1 + diff --git a/kvm-hw-pci-Rename-has_power-to-enabled.patch b/kvm-hw-pci-Rename-has_power-to-enabled.patch new file mode 100644 index 0000000..4041ddb --- /dev/null +++ b/kvm-hw-pci-Rename-has_power-to-enabled.patch @@ -0,0 +1,130 @@ +From 8711bb1a54d4f5734d44545cd8e7262bc358f51d Mon Sep 17 00:00:00 2001 +From: Akihiko Odaki +Date: Thu, 9 Jan 2025 15:29:46 +0900 +Subject: [PATCH 1/7] hw/pci: Rename has_power to enabled +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing +RH-Jira: RHEL-7301 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Alex Williamson +RH-Acked-by: Jon Maloy +RH-Commit: [1/6] ac8a7427a1203e33aa323933818a7114c0eb4520 (eauger1/centos-qemu-kvm) + +The renamed state will not only represent powering state of PFs, but +also represent SR-IOV VF enablement in the future. + +Signed-off-by: Akihiko Odaki +Reviewed-by: Philippe Mathieu-Daudé +Message-ID: <20250109-reuse-v19-1-f541e82ca5f7@daynix.com> +Signed-off-by: Philippe Mathieu-Daudé +(cherry picked from commit c407eef162f765dd83d45e048585731be41a66fc) +Signed-off-by: Eric Auger +--- + hw/pci/pci.c | 17 +++++++++++------ + hw/pci/pci_host.c | 4 ++-- + include/hw/pci/pci.h | 1 + + include/hw/pci/pci_device.h | 2 +- + 4 files changed, 15 insertions(+), 9 deletions(-) + +diff --git a/hw/pci/pci.c b/hw/pci/pci.c +index fab86d0567..83c9d5b9ea 100644 +--- a/hw/pci/pci.c ++++ b/hw/pci/pci.c +@@ -1525,7 +1525,7 @@ static void pci_update_mappings(PCIDevice *d) + continue; + + new_addr = pci_bar_address(d, i, r->type, r->size); +- if (!d->has_power) { ++ if (!d->enabled) { + new_addr = PCI_BAR_UNMAPPED; + } + +@@ -1613,7 +1613,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int + pci_update_irq_disabled(d, was_irq_disabled); + memory_region_set_enabled(&d->bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) +- & PCI_COMMAND_MASTER) && d->has_power); ++ & PCI_COMMAND_MASTER) && d->enabled); + } + + msi_write_config(d, addr, val_in, l); +@@ -2886,16 +2886,21 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector) + + void pci_set_power(PCIDevice *d, bool state) + { +- if (d->has_power == state) { ++ pci_set_enabled(d, state); ++} ++ ++void pci_set_enabled(PCIDevice *d, bool state) ++{ ++ if (d->enabled == state) { + return; + } + +- d->has_power = state; ++ d->enabled = state; + pci_update_mappings(d); + memory_region_set_enabled(&d->bus_master_enable_region, + (pci_get_word(d->config + PCI_COMMAND) +- & PCI_COMMAND_MASTER) && d->has_power); +- if (!d->has_power) { ++ & PCI_COMMAND_MASTER) && d->enabled); ++ if (!d->enabled) { + pci_device_reset(d); + } + } +diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c +index dfe6fe6184..0d82727cc9 100644 +--- a/hw/pci/pci_host.c ++++ b/hw/pci/pci_host.c +@@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr, + * allowing direct removal of unexposed functions. + */ + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || +- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { ++ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { + return; + } + +@@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr, + * allowing direct removal of unexposed functions. + */ + if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) || +- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) { ++ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) { + return ~0x0; + } + +diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h +index eb26cac810..45365ae085 100644 +--- a/include/hw/pci/pci.h ++++ b/include/hw/pci/pci.h +@@ -678,6 +678,7 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev) + } + + MSIMessage pci_get_msi_message(PCIDevice *dev, int vector); ++void pci_set_enabled(PCIDevice *pci_dev, bool state); + void pci_set_power(PCIDevice *pci_dev, bool state); + + #endif +diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h +index 15694f2489..f38fb31119 100644 +--- a/include/hw/pci/pci_device.h ++++ b/include/hw/pci/pci_device.h +@@ -57,7 +57,7 @@ typedef struct PCIReqIDCache PCIReqIDCache; + struct PCIDevice { + DeviceState qdev; + bool partially_hotplugged; +- bool has_power; ++ bool enabled; + + /* PCI config space */ + uint8_t *config; +-- +2.48.1 + diff --git a/kvm-hw-vfio-pci-Re-order-pre-reset.patch b/kvm-hw-vfio-pci-Re-order-pre-reset.patch new file mode 100644 index 0000000..7318f84 --- /dev/null +++ b/kvm-hw-vfio-pci-Re-order-pre-reset.patch @@ -0,0 +1,74 @@ +From d6a961077e753b9ad5a670a1529634fe20322ce2 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Tue, 25 Feb 2025 14:52:29 -0700 +Subject: [PATCH 6/7] hw/vfio/pci: Re-order pre-reset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing +RH-Jira: RHEL-7301 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Alex Williamson +RH-Acked-by: Jon Maloy +RH-Commit: [6/6] c6c386ecbabda93f8a79da926ece95c2195fbc36 (eauger1/centos-qemu-kvm) + +We want the device in the D0 power state going into reset, but the +config write can enable the BARs in the address space, which are +then removed from the address space once we clear the memory enable +bit in the command register. Re-order to clear the command bit +first, so the power state change doesn't enable the BARs. + +Cc: Cédric Le Goater +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Signed-off-by: Alex Williamson +Reviewed-by: Michael S. Tsirkin +Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-6-alex.williamson@redhat.com +Signed-off-by: Cédric Le Goater +(cherry picked from commit 518a69a598916749338de3852d41d961d4503115) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 595b5c9b25..ffe72fd1d0 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2414,6 +2414,15 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + + vfio_disable_interrupts(vdev); + ++ /* ++ * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master. ++ * Also put INTx Disable in known state. ++ */ ++ cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2); ++ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | ++ PCI_COMMAND_INTX_DISABLE); ++ vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); ++ + /* Make sure the device is in D0 */ + if (pdev->pm_cap) { + uint16_t pmcsr; +@@ -2433,15 +2442,6 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + } + } + } +- +- /* +- * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master. +- * Also put INTx Disable in known state. +- */ +- cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2); +- cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | +- PCI_COMMAND_INTX_DISABLE); +- vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2); + } + + void vfio_pci_post_reset(VFIOPCIDevice *vdev) +-- +2.48.1 + diff --git a/kvm-pci-Use-PCI-PM-capability-initializer.patch b/kvm-pci-Use-PCI-PM-capability-initializer.patch new file mode 100644 index 0000000..e2470de --- /dev/null +++ b/kvm-pci-Use-PCI-PM-capability-initializer.patch @@ -0,0 +1,153 @@ +From 978951b390bb7073293c792c4714516ad40cba73 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Tue, 25 Feb 2025 14:52:26 -0700 +Subject: [PATCH 3/7] pci: Use PCI PM capability initializer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing +RH-Jira: RHEL-7301 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Alex Williamson +RH-Acked-by: Jon Maloy +RH-Commit: [3/6] fd862caa094490a9b8a04b00ad39ba58e0b46a7a (eauger1/centos-qemu-kvm) + +Switch callers directly initializing the PCI PM capability with +pci_add_capability() to use pci_pm_init(). + +Cc: Dmitry Fleytman +Cc: Akihiko Odaki +Cc: Jason Wang +Cc: Stefan Weil +Cc: Sriram Yagnaraman +Cc: Keith Busch +Cc: Klaus Jensen +Cc: Jesper Devantier +Cc: Michael S. Tsirkin +Cc: Marcel Apfelbaum +Cc: Cédric Le Goater +Signed-off-by: Alex Williamson +Reviewed-by: Eric Auger +Reviewed-by: Akihiko Odaki +Reviewed-by: Michael S. Tsirkin +Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-3-alex.williamson@redhat.com +Signed-off-by: Cédric Le Goater +(cherry picked from commit 0681ec253141d838210b3c5e6bc0d2d71f2e111e) +Signed-off-by: Eric Auger +--- + hw/net/e1000e.c | 3 +-- + hw/net/eepro100.c | 4 +--- + hw/net/igb.c | 3 +-- + hw/nvme/ctrl.c | 3 +-- + hw/pci-bridge/pcie_pci_bridge.c | 2 +- + hw/vfio/pci.c | 7 ++++++- + hw/virtio/virtio-pci.c | 3 +-- + 7 files changed, 12 insertions(+), 13 deletions(-) + +diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c +index 843892ce09..9eb93d049d 100644 +--- a/hw/net/e1000e.c ++++ b/hw/net/e1000e.c +@@ -372,8 +372,7 @@ static int + e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) + { + Error *local_err = NULL; +- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, +- PCI_PM_SIZEOF, &local_err); ++ int ret = pci_pm_init(pdev, offset, &local_err); + + if (local_err) { + error_report_err(local_err); +diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c +index d9a70c4544..668a410055 100644 +--- a/hw/net/eepro100.c ++++ b/hw/net/eepro100.c +@@ -549,9 +549,7 @@ static void e100_pci_reset(EEPRO100State *s, Error **errp) + if (info->power_management) { + /* Power Management Capabilities */ + int cfg_offset = 0xdc; +- int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM, +- cfg_offset, PCI_PM_SIZEOF, +- errp); ++ int r = pci_pm_init(&s->dev, cfg_offset, errp); + if (r < 0) { + return; + } +diff --git a/hw/net/igb.c b/hw/net/igb.c +index b92bba402e..a3c22e2391 100644 +--- a/hw/net/igb.c ++++ b/hw/net/igb.c +@@ -356,8 +356,7 @@ static int + igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc) + { + Error *local_err = NULL; +- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset, +- PCI_PM_SIZEOF, &local_err); ++ int ret = pci_pm_init(pdev, offset, &local_err); + + if (local_err) { + error_report_err(local_err); +diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c +index 9f277b81d8..d451ee0d00 100644 +--- a/hw/nvme/ctrl.c ++++ b/hw/nvme/ctrl.c +@@ -8293,8 +8293,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset) + Error *err = NULL; + int ret; + +- ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset, +- PCI_PM_SIZEOF, &err); ++ ret = pci_pm_init(pci_dev, offset, &err); + if (err) { + error_report_err(err); + return ret; +diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c +index 7646ac2397..2f098e3a13 100644 +--- a/hw/pci-bridge/pcie_pci_bridge.c ++++ b/hw/pci-bridge/pcie_pci_bridge.c +@@ -52,7 +52,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp) + goto cap_error; + } + +- pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp); ++ pos = pci_pm_init(d, 0, errp); + if (pos < 0) { + goto pm_error; + } +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index 82a47edc89..e18b57d864 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2220,7 +2220,12 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) + case PCI_CAP_ID_PM: + vfio_check_pm_reset(vdev, pos); + vdev->pm_cap = pos; +- ret = pci_add_capability(pdev, cap_id, pos, size, errp) >= 0; ++ ret = pci_pm_init(pdev, pos, errp) >= 0; ++ /* ++ * PCI-core config space emulation needs write access to the power ++ * state enabled for tracking BAR mapping relative to PM state. ++ */ ++ pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK); + break; + case PCI_CAP_ID_AF: + vfio_check_af_flr(vdev, pos); +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 524b63e5c7..4b2aeaad8d 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -2195,8 +2195,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) + pos = pcie_endpoint_cap_init(pci_dev, 0); + assert(pos > 0); + +- pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0, +- PCI_PM_SIZEOF, errp); ++ pos = pci_pm_init(pci_dev, 0, errp); + if (pos < 0) { + return; + } +-- +2.48.1 + diff --git a/kvm-pcie-virtio-Remove-redundant-pm_cap.patch b/kvm-pcie-virtio-Remove-redundant-pm_cap.patch new file mode 100644 index 0000000..15d82a2 --- /dev/null +++ b/kvm-pcie-virtio-Remove-redundant-pm_cap.patch @@ -0,0 +1,99 @@ +From 274e81bcf091c981d1e27e49fbe98e63d5308472 Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Tue, 25 Feb 2025 14:52:28 -0700 +Subject: [PATCH 5/7] pcie, virtio: Remove redundant pm_cap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing +RH-Jira: RHEL-7301 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Alex Williamson +RH-Acked-by: Jon Maloy +RH-Commit: [5/6] 81c6e3c9c52a0b3f0b9269b4ac7f56e8e4b5d68b (eauger1/centos-qemu-kvm) + +The pm_cap on the PCIExpressDevice object can be distilled down +to the new instance on the PCIDevice object. + +Cc: Michael S. Tsirkin +Cc: Marcel Apfelbaum +Reviewed-by: Michael S. Tsirkin +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Signed-off-by: Alex Williamson +Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-5-alex.williamson@redhat.com +Signed-off-by: Cédric Le Goater +(cherry picked from commit 8b8d08cf293b930d0f55b2d5385d8dd27e0c6b41) +Signed-off-by: Eric Auger +--- + hw/pci-bridge/pcie_pci_bridge.c | 1 - + hw/virtio/virtio-pci.c | 8 +++----- + include/hw/pci/pcie.h | 2 -- + 3 files changed, 3 insertions(+), 8 deletions(-) + +diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c +index 2f098e3a13..c0ba6d7928 100644 +--- a/hw/pci-bridge/pcie_pci_bridge.c ++++ b/hw/pci-bridge/pcie_pci_bridge.c +@@ -56,7 +56,6 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp) + if (pos < 0) { + goto pm_error; + } +- d->exp.pm_cap = pos; + pci_set_word(d->config + pos + PCI_PM_PMC, 0x3); + + pcie_cap_arifwd_init(d); +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 4b2aeaad8d..a85787b837 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -2200,8 +2200,6 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp) + return; + } + +- pci_dev->exp.pm_cap = pos; +- + /* + * Indicates that this function complies with revision 1.2 of the + * PCI Power Management Interface Specification. +@@ -2295,11 +2293,11 @@ static bool virtio_pci_no_soft_reset(PCIDevice *dev) + { + uint16_t pmcsr; + +- if (!pci_is_express(dev) || !dev->exp.pm_cap) { ++ if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) { + return false; + } + +- pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL); ++ pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL); + + /* + * When No_Soft_Reset bit is set and the device +@@ -2328,7 +2326,7 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type) + + if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) { + pci_word_test_and_clear_mask( +- dev->config + dev->exp.pm_cap + PCI_PM_CTRL, ++ dev->config + dev->pm_cap + PCI_PM_CTRL, + PCI_PM_CTRL_STATE_MASK); + } + } +diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h +index 5eddb90976..8a30d07fd0 100644 +--- a/include/hw/pci/pcie.h ++++ b/include/hw/pci/pcie.h +@@ -58,8 +58,6 @@ typedef enum { + struct PCIExpressDevice { + /* Offset of express capability in config space */ + uint8_t exp_cap; +- /* Offset of Power Management capability in config space */ +- uint8_t pm_cap; + + /* SLOT */ + bool hpev_notified; /* Logical AND of conditions for hot plug event. +-- +2.48.1 + diff --git a/kvm-vfio-pci-Delete-local-pm_cap.patch b/kvm-vfio-pci-Delete-local-pm_cap.patch new file mode 100644 index 0000000..0d18886 --- /dev/null +++ b/kvm-vfio-pci-Delete-local-pm_cap.patch @@ -0,0 +1,81 @@ +From 80be4b7d44d4721bacaa6205a47f2d898a090c6b Mon Sep 17 00:00:00 2001 +From: Alex Williamson +Date: Tue, 25 Feb 2025 14:52:27 -0700 +Subject: [PATCH 4/7] vfio/pci: Delete local pm_cap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eric Auger +RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing +RH-Jira: RHEL-7301 +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Alex Williamson +RH-Acked-by: Jon Maloy +RH-Commit: [4/6] 85bd6b15af7c483e36e265c12b7b1689a4872f4c (eauger1/centos-qemu-kvm) + +This is now redundant to PCIDevice.pm_cap. + +Cc: Cédric Le Goater +Reviewed-by: Zhenzhong Duan +Reviewed-by: Eric Auger +Signed-off-by: Alex Williamson +Reviewed-by: Michael S. Tsirkin +Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-4-alex.williamson@redhat.com +Signed-off-by: Cédric Le Goater +(cherry picked from commit 05c6a8eff6298675080aa2692ee05a310b3483b4) +Signed-off-by: Eric Auger +--- + hw/vfio/pci.c | 9 ++++----- + hw/vfio/pci.h | 1 - + 2 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c +index e18b57d864..595b5c9b25 100644 +--- a/hw/vfio/pci.c ++++ b/hw/vfio/pci.c +@@ -2219,7 +2219,6 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp) + break; + case PCI_CAP_ID_PM: + vfio_check_pm_reset(vdev, pos); +- vdev->pm_cap = pos; + ret = pci_pm_init(pdev, pos, errp) >= 0; + /* + * PCI-core config space emulation needs write access to the power +@@ -2416,17 +2415,17 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev) + vfio_disable_interrupts(vdev); + + /* Make sure the device is in D0 */ +- if (vdev->pm_cap) { ++ if (pdev->pm_cap) { + uint16_t pmcsr; + uint8_t state; + +- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2); ++ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); + state = pmcsr & PCI_PM_CTRL_STATE_MASK; + if (state) { + pmcsr &= ~PCI_PM_CTRL_STATE_MASK; +- vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); ++ vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2); + /* vfio handles the necessary delay here */ +- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2); ++ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2); + state = pmcsr & PCI_PM_CTRL_STATE_MASK; + if (state) { + error_report("vfio: Unable to power on device, stuck in D%d", +diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h +index 0d3c93fb2e..ca8d55f8b2 100644 +--- a/hw/vfio/pci.h ++++ b/hw/vfio/pci.h +@@ -161,7 +161,6 @@ struct VFIOPCIDevice { + int32_t bootindex; + uint32_t igd_gms; + OffAutoPCIBAR msix_relo; +- uint8_t pm_cap; + uint8_t nv_gpudirect_clique; + bool pci_aer; + bool req_enabled; +-- +2.48.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 6247558..1eb6843 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 9.1.0 -Release: 16%{?rcrel}%{?dist}%{?cc_suffix} +Release: 17%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -481,6 +481,18 @@ Patch154: kvm-qga-implement-a-guest-get-load-command.patch Patch155: kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch # For RHEL-47340 - [Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully Patch156: kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch +# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown +Patch157: kvm-hw-pci-Rename-has_power-to-enabled.patch +# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown +Patch158: kvm-hw-pci-Basic-support-for-PCI-power-management.patch +# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown +Patch159: kvm-pci-Use-PCI-PM-capability-initializer.patch +# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown +Patch160: kvm-vfio-pci-Delete-local-pm_cap.patch +# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown +Patch161: kvm-pcie-virtio-Remove-redundant-pm_cap.patch +# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown +Patch162: kvm-hw-vfio-pci-Re-order-pre-reset.patch %if %{have_clang} BuildRequires: clang @@ -638,6 +650,7 @@ This package provides documentation and auxiliary programs used with %{name}. %package tools Summary: %{name} support tools Recommends: systemtap-client +Recommends: systemtap-devel %description tools %{name}-tools provides various tools related to %{name} usage. @@ -1548,6 +1561,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Mar 31 2025 Jon Maloy - 9.1.0-17 +- kvm-hw-pci-Rename-has_power-to-enabled.patch [RHEL-7301] +- kvm-hw-pci-Basic-support-for-PCI-power-management.patch [RHEL-7301] +- kvm-pci-Use-PCI-PM-capability-initializer.patch [RHEL-7301] +- kvm-vfio-pci-Delete-local-pm_cap.patch [RHEL-7301] +- kvm-pcie-virtio-Remove-redundant-pm_cap.patch [RHEL-7301] +- kvm-hw-vfio-pci-Re-order-pre-reset.patch [RHEL-7301] +- kvm-Also-recommend-systemtap-devel-from-qemu-tools.patch [RHEL-47340] +- Resolves: RHEL-7301 + ([intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown) +- Resolves: RHEL-47340 + ([Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully) + * Thu Mar 20 2025 Jon Maloy - 9.1.0-16 - kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188] - kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188]