* Mon Mar 31 2025 Jon Maloy <jmaloy@redhat.com> - 9.1.0-17

- kvm-hw-pci-Rename-has_power-to-enabled.patch [RHEL-7301]
- kvm-hw-pci-Basic-support-for-PCI-power-management.patch [RHEL-7301]
- kvm-pci-Use-PCI-PM-capability-initializer.patch [RHEL-7301]
- kvm-vfio-pci-Delete-local-pm_cap.patch [RHEL-7301]
- kvm-pcie-virtio-Remove-redundant-pm_cap.patch [RHEL-7301]
- kvm-hw-vfio-pci-Re-order-pre-reset.patch [RHEL-7301]
- kvm-Also-recommend-systemtap-devel-from-qemu-tools.patch [RHEL-47340]
- Resolves: RHEL-7301
  ([intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown)
- Resolves: RHEL-47340
  ([Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully)
This commit is contained in:
Jon Maloy 2025-03-31 17:55:14 -04:00
parent 48e88e8e19
commit 5b6159d787
7 changed files with 806 additions and 1 deletions

View File

@ -0,0 +1,242 @@
From 98b0cd83c09d35a3da0ae142c09038174355e87e Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 25 Feb 2025 14:52:25 -0700
Subject: [PATCH 2/7] hw/pci: Basic support for PCI power management
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing
RH-Jira: RHEL-7301
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Alex Williamson <None>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [2/6] 5faff6382c124711887704fff4f857e8f85e7be5 (eauger1/centos-qemu-kvm)
Conflicts: contextual conflict in include/hw/pci/pci.h
we don't have 449dca6ac93a ("pcie: enable Extended tag field support")
downstream so we don't have x-pcie-ext-tag definition.
The memory and IO BARs for devices are only accessible in the D0 power
state. In other power states the PCI spec defines that the device
responds to TLPs and messages with an Unsupported Request response.
To approximate this behavior, consider the BARs as unmapped when the
device is not in the D0 power state. This makes the BARs inaccessible
and has the additional bonus for vfio-pci that we don't attempt to DMA
map BARs for devices in a non-D0 power state.
To support this, an interface is added for devices to register the PM
capability, which allows central tracking to enforce valid transitions
and unmap BARs in non-D0 states.
NB. We currently have device models (eepro100 and pcie_pci_bridge)
that register a PM capability but do not set wmask to enable writes to
the power state field. In order to maintain migration compatibility,
this new helper does not manage the wmask to enable guest writes to
initiate a power state change. The contents and write access of the
PM capability are still managed by the caller.
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-2-alex.williamson@redhat.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 9461afd2008b0820fc45a6a7bc675df1b6791e4f)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/pci/pci.c | 93 ++++++++++++++++++++++++++++++++++++-
hw/pci/trace-events | 2 +
include/hw/pci/pci.h | 3 ++
include/hw/pci/pci_device.h | 3 ++
4 files changed, 99 insertions(+), 2 deletions(-)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 83c9d5b9ea..d774ae47d2 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -365,6 +365,84 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
attrs, NULL);
}
+/*
+ * Register and track a PM capability. If wmask is also enabled for the power
+ * state field of the pmcsr register, guest writes may change the device PM
+ * state. BAR access is only enabled while the device is in the D0 state.
+ * Return the capability offset or negative error code.
+ */
+int pci_pm_init(PCIDevice *d, uint8_t offset, Error **errp)
+{
+ int cap = pci_add_capability(d, PCI_CAP_ID_PM, offset, PCI_PM_SIZEOF, errp);
+
+ if (cap < 0) {
+ return cap;
+ }
+
+ d->pm_cap = cap;
+ d->cap_present |= QEMU_PCI_CAP_PM;
+
+ return cap;
+}
+
+static uint8_t pci_pm_state(PCIDevice *d)
+{
+ uint16_t pmcsr;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM)) {
+ return 0;
+ }
+
+ pmcsr = pci_get_word(d->config + d->pm_cap + PCI_PM_CTRL);
+
+ return pmcsr & PCI_PM_CTRL_STATE_MASK;
+}
+
+/*
+ * Update the PM capability state based on the new value stored in config
+ * space respective to the old, pre-write state provided. If the new value
+ * is rejected (unsupported or invalid transition) restore the old value.
+ * Return the resulting PM state.
+ */
+static uint8_t pci_pm_update(PCIDevice *d, uint32_t addr, int l, uint8_t old)
+{
+ uint16_t pmc;
+ uint8_t new;
+
+ if (!(d->cap_present & QEMU_PCI_CAP_PM) ||
+ !range_covers_byte(addr, l, d->pm_cap + PCI_PM_CTRL)) {
+ return old;
+ }
+
+ new = pci_pm_state(d);
+ if (new == old) {
+ return old;
+ }
+
+ pmc = pci_get_word(d->config + d->pm_cap + PCI_PM_PMC);
+
+ /*
+ * Transitions to D1 & D2 are only allowed if supported. Devices may
+ * only transition to higher D-states or to D0.
+ */
+ if ((!(pmc & PCI_PM_CAP_D1) && new == 1) ||
+ (!(pmc & PCI_PM_CAP_D2) && new == 2) ||
+ (old && new && new < old)) {
+ pci_word_test_and_clear_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ pci_word_test_and_set_mask(d->config + d->pm_cap + PCI_PM_CTRL,
+ old);
+ trace_pci_pm_bad_transition(d->name, pci_dev_bus_num(d),
+ PCI_SLOT(d->devfn), PCI_FUNC(d->devfn),
+ old, new);
+ return old;
+ }
+
+ trace_pci_pm_transition(d->name, pci_dev_bus_num(d), PCI_SLOT(d->devfn),
+ PCI_FUNC(d->devfn), old, new);
+ return new;
+}
+
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@@ -404,6 +482,11 @@ static void pci_do_device_reset(PCIDevice *dev)
pci_get_word(dev->wmask + PCI_INTERRUPT_LINE) |
pci_get_word(dev->w1cmask + PCI_INTERRUPT_LINE));
dev->config[PCI_CACHE_LINE_SIZE] = 0x0;
+ /* Default PM state is D0 */
+ if (dev->cap_present & QEMU_PCI_CAP_PM) {
+ pci_word_test_and_clear_mask(dev->config + dev->pm_cap + PCI_PM_CTRL,
+ PCI_PM_CTRL_STATE_MASK);
+ }
pci_reset_regions(dev);
pci_update_mappings(dev);
@@ -1525,7 +1608,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->enabled) {
+ if (!d->enabled || pci_pm_state(d)) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1591,6 +1674,7 @@ uint32_t pci_default_read_config(PCIDevice *d,
void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int l)
{
+ uint8_t new_pm_state, old_pm_state = pci_pm_state(d);
int i, was_irq_disabled = pci_irq_disabled(d);
uint32_t val = val_in;
@@ -1603,11 +1687,16 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
d->config[addr + i] = (d->config[addr + i] & ~wmask) | (val & wmask);
d->config[addr + i] &= ~(val & w1cmask); /* W1C: Write 1 to Clear */
}
+
+ new_pm_state = pci_pm_update(d, addr, l, old_pm_state);
+
if (ranges_overlap(addr, l, PCI_BASE_ADDRESS_0, 24) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS, 4) ||
ranges_overlap(addr, l, PCI_ROM_ADDRESS1, 4) ||
- range_covers_byte(addr, l, PCI_COMMAND))
+ range_covers_byte(addr, l, PCI_COMMAND) ||
+ !!new_pm_state != !!old_pm_state) {
pci_update_mappings(d);
+ }
if (ranges_overlap(addr, l, PCI_COMMAND, 2)) {
pci_update_irq_disabled(d, was_irq_disabled);
diff --git a/hw/pci/trace-events b/hw/pci/trace-events
index 19643aa8c6..c82a87ffdd 100644
--- a/hw/pci/trace-events
+++ b/hw/pci/trace-events
@@ -1,6 +1,8 @@
# See docs/devel/tracing.rst for syntax documentation.
# pci.c
+pci_pm_bad_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x REJECTED PM transition D%d->D%d"
+pci_pm_transition(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, uint8_t old, uint8_t new) "%s %02x:%02x.%x PM transition D%d->D%d"
pci_update_mappings_del(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_update_mappings_add(const char *dev, uint32_t bus, uint32_t slot, uint32_t func, int bar, uint64_t addr, uint64_t size) "%s %02x:%02x.%x %d,0x%"PRIx64"+0x%"PRIx64
pci_route_irq(int dev_irq, const char *dev_path, int parent_irq, const char *parent_path) "IRQ %d @%s -> IRQ %d @%s"
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index 45365ae085..afeb5a2263 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -213,6 +213,8 @@ enum {
QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR),
#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12
QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR),
+#define QEMU_PCI_CAP_PM_BITNR 14
+ QEMU_PCI_CAP_PM = (1 << QEMU_PCI_CAP_PM_BITNR),
};
typedef struct PCIINTxRoute {
@@ -680,5 +682,6 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
void pci_set_enabled(PCIDevice *pci_dev, bool state);
void pci_set_power(PCIDevice *pci_dev, bool state);
+int pci_pm_init(PCIDevice *pci_dev, uint8_t offset, Error **errp);
#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index f38fb31119..325d7bcaf7 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -105,6 +105,9 @@ struct PCIDevice {
/* Capability bits */
uint32_t cap_present;
+ /* Offset of PM capability in config space */
+ uint8_t pm_cap;
+
/* Offset of MSI-X capability in config space */
uint8_t msix_cap;
--
2.48.1

View File

@ -0,0 +1,130 @@
From 8711bb1a54d4f5734d44545cd8e7262bc358f51d Mon Sep 17 00:00:00 2001
From: Akihiko Odaki <akihiko.odaki@daynix.com>
Date: Thu, 9 Jan 2025 15:29:46 +0900
Subject: [PATCH 1/7] hw/pci: Rename has_power to enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing
RH-Jira: RHEL-7301
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Alex Williamson <None>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [1/6] ac8a7427a1203e33aa323933818a7114c0eb4520 (eauger1/centos-qemu-kvm)
The renamed state will not only represent powering state of PFs, but
also represent SR-IOV VF enablement in the future.
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Message-ID: <20250109-reuse-v19-1-f541e82ca5f7@daynix.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
(cherry picked from commit c407eef162f765dd83d45e048585731be41a66fc)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/pci/pci.c | 17 +++++++++++------
hw/pci/pci_host.c | 4 ++--
include/hw/pci/pci.h | 1 +
include/hw/pci/pci_device.h | 2 +-
4 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index fab86d0567..83c9d5b9ea 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1525,7 +1525,7 @@ static void pci_update_mappings(PCIDevice *d)
continue;
new_addr = pci_bar_address(d, i, r->type, r->size);
- if (!d->has_power) {
+ if (!d->enabled) {
new_addr = PCI_BAR_UNMAPPED;
}
@@ -1613,7 +1613,7 @@ void pci_default_write_config(PCIDevice *d, uint32_t addr, uint32_t val_in, int
pci_update_irq_disabled(d, was_irq_disabled);
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
+ & PCI_COMMAND_MASTER) && d->enabled);
}
msi_write_config(d, addr, val_in, l);
@@ -2886,16 +2886,21 @@ MSIMessage pci_get_msi_message(PCIDevice *dev, int vector)
void pci_set_power(PCIDevice *d, bool state)
{
- if (d->has_power == state) {
+ pci_set_enabled(d, state);
+}
+
+void pci_set_enabled(PCIDevice *d, bool state)
+{
+ if (d->enabled == state) {
return;
}
- d->has_power = state;
+ d->enabled = state;
pci_update_mappings(d);
memory_region_set_enabled(&d->bus_master_enable_region,
(pci_get_word(d->config + PCI_COMMAND)
- & PCI_COMMAND_MASTER) && d->has_power);
- if (!d->has_power) {
+ & PCI_COMMAND_MASTER) && d->enabled);
+ if (!d->enabled) {
pci_device_reset(d);
}
}
diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c
index dfe6fe6184..0d82727cc9 100644
--- a/hw/pci/pci_host.c
+++ b/hw/pci/pci_host.c
@@ -86,7 +86,7 @@ void pci_host_config_write_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
return;
}
@@ -111,7 +111,7 @@ uint32_t pci_host_config_read_common(PCIDevice *pci_dev, uint32_t addr,
* allowing direct removal of unexposed functions.
*/
if ((pci_dev->qdev.hotplugged && !pci_get_function_0(pci_dev)) ||
- !pci_dev->has_power || is_pci_dev_ejected(pci_dev)) {
+ !pci_dev->enabled || is_pci_dev_ejected(pci_dev)) {
return ~0x0;
}
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index eb26cac810..45365ae085 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -678,6 +678,7 @@ static inline void pci_irq_pulse(PCIDevice *pci_dev)
}
MSIMessage pci_get_msi_message(PCIDevice *dev, int vector);
+void pci_set_enabled(PCIDevice *pci_dev, bool state);
void pci_set_power(PCIDevice *pci_dev, bool state);
#endif
diff --git a/include/hw/pci/pci_device.h b/include/hw/pci/pci_device.h
index 15694f2489..f38fb31119 100644
--- a/include/hw/pci/pci_device.h
+++ b/include/hw/pci/pci_device.h
@@ -57,7 +57,7 @@ typedef struct PCIReqIDCache PCIReqIDCache;
struct PCIDevice {
DeviceState qdev;
bool partially_hotplugged;
- bool has_power;
+ bool enabled;
/* PCI config space */
uint8_t *config;
--
2.48.1

View File

@ -0,0 +1,74 @@
From d6a961077e753b9ad5a670a1529634fe20322ce2 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 25 Feb 2025 14:52:29 -0700
Subject: [PATCH 6/7] hw/vfio/pci: Re-order pre-reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing
RH-Jira: RHEL-7301
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Alex Williamson <None>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [6/6] c6c386ecbabda93f8a79da926ece95c2195fbc36 (eauger1/centos-qemu-kvm)
We want the device in the D0 power state going into reset, but the
config write can enable the BARs in the address space, which are
then removed from the address space once we clear the memory enable
bit in the command register. Re-order to clear the command bit
first, so the power state change doesn't enable the BARs.
Cc: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-6-alex.williamson@redhat.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 518a69a598916749338de3852d41d961d4503115)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/vfio/pci.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 595b5c9b25..ffe72fd1d0 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2414,6 +2414,15 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
vfio_disable_interrupts(vdev);
+ /*
+ * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
+ * Also put INTx Disable in known state.
+ */
+ cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
+ cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
+ PCI_COMMAND_INTX_DISABLE);
+ vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
+
/* Make sure the device is in D0 */
if (pdev->pm_cap) {
uint16_t pmcsr;
@@ -2433,15 +2442,6 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
}
}
}
-
- /*
- * Stop any ongoing DMA by disconnecting I/O, MMIO, and bus master.
- * Also put INTx Disable in known state.
- */
- cmd = vfio_pci_read_config(pdev, PCI_COMMAND, 2);
- cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER |
- PCI_COMMAND_INTX_DISABLE);
- vfio_pci_write_config(pdev, PCI_COMMAND, cmd, 2);
}
void vfio_pci_post_reset(VFIOPCIDevice *vdev)
--
2.48.1

View File

@ -0,0 +1,153 @@
From 978951b390bb7073293c792c4714516ad40cba73 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 25 Feb 2025 14:52:26 -0700
Subject: [PATCH 3/7] pci: Use PCI PM capability initializer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing
RH-Jira: RHEL-7301
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Alex Williamson <None>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [3/6] fd862caa094490a9b8a04b00ad39ba58e0b46a7a (eauger1/centos-qemu-kvm)
Switch callers directly initializing the PCI PM capability with
pci_add_capability() to use pci_pm_init().
Cc: Dmitry Fleytman <dmitry.fleytman@gmail.com>
Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Stefan Weil <sw@weilnetz.de>
Cc: Sriram Yagnaraman <sriram.yagnaraman@ericsson.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Klaus Jensen <its@irrelevant.dk>
Cc: Jesper Devantier <foss@defmacro.it>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Cc: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-3-alex.williamson@redhat.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 0681ec253141d838210b3c5e6bc0d2d71f2e111e)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/net/e1000e.c | 3 +--
hw/net/eepro100.c | 4 +---
hw/net/igb.c | 3 +--
hw/nvme/ctrl.c | 3 +--
hw/pci-bridge/pcie_pci_bridge.c | 2 +-
hw/vfio/pci.c | 7 ++++++-
hw/virtio/virtio-pci.c | 3 +--
7 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
index 843892ce09..9eb93d049d 100644
--- a/hw/net/e1000e.c
+++ b/hw/net/e1000e.c
@@ -372,8 +372,7 @@ static int
e1000e_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
{
Error *local_err = NULL;
- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &local_err);
+ int ret = pci_pm_init(pdev, offset, &local_err);
if (local_err) {
error_report_err(local_err);
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
index d9a70c4544..668a410055 100644
--- a/hw/net/eepro100.c
+++ b/hw/net/eepro100.c
@@ -549,9 +549,7 @@ static void e100_pci_reset(EEPRO100State *s, Error **errp)
if (info->power_management) {
/* Power Management Capabilities */
int cfg_offset = 0xdc;
- int r = pci_add_capability(&s->dev, PCI_CAP_ID_PM,
- cfg_offset, PCI_PM_SIZEOF,
- errp);
+ int r = pci_pm_init(&s->dev, cfg_offset, errp);
if (r < 0) {
return;
}
diff --git a/hw/net/igb.c b/hw/net/igb.c
index b92bba402e..a3c22e2391 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -356,8 +356,7 @@ static int
igb_add_pm_capability(PCIDevice *pdev, uint8_t offset, uint16_t pmc)
{
Error *local_err = NULL;
- int ret = pci_add_capability(pdev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &local_err);
+ int ret = pci_pm_init(pdev, offset, &local_err);
if (local_err) {
error_report_err(local_err);
diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 9f277b81d8..d451ee0d00 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -8293,8 +8293,7 @@ static int nvme_add_pm_capability(PCIDevice *pci_dev, uint8_t offset)
Error *err = NULL;
int ret;
- ret = pci_add_capability(pci_dev, PCI_CAP_ID_PM, offset,
- PCI_PM_SIZEOF, &err);
+ ret = pci_pm_init(pci_dev, offset, &err);
if (err) {
error_report_err(err);
return ret;
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index 7646ac2397..2f098e3a13 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -52,7 +52,7 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp)
goto cap_error;
}
- pos = pci_add_capability(d, PCI_CAP_ID_PM, 0, PCI_PM_SIZEOF, errp);
+ pos = pci_pm_init(d, 0, errp);
if (pos < 0) {
goto pm_error;
}
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 82a47edc89..e18b57d864 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2220,7 +2220,12 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
case PCI_CAP_ID_PM:
vfio_check_pm_reset(vdev, pos);
vdev->pm_cap = pos;
- ret = pci_add_capability(pdev, cap_id, pos, size, errp) >= 0;
+ ret = pci_pm_init(pdev, pos, errp) >= 0;
+ /*
+ * PCI-core config space emulation needs write access to the power
+ * state enabled for tracking BAR mapping relative to PM state.
+ */
+ pci_set_word(pdev->wmask + pos + PCI_PM_CTRL, PCI_PM_CTRL_STATE_MASK);
break;
case PCI_CAP_ID_AF:
vfio_check_af_flr(vdev, pos);
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 524b63e5c7..4b2aeaad8d 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2195,8 +2195,7 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
pos = pcie_endpoint_cap_init(pci_dev, 0);
assert(pos > 0);
- pos = pci_add_capability(pci_dev, PCI_CAP_ID_PM, 0,
- PCI_PM_SIZEOF, errp);
+ pos = pci_pm_init(pci_dev, 0, errp);
if (pos < 0) {
return;
}
--
2.48.1

View File

@ -0,0 +1,99 @@
From 274e81bcf091c981d1e27e49fbe98e63d5308472 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 25 Feb 2025 14:52:28 -0700
Subject: [PATCH 5/7] pcie, virtio: Remove redundant pm_cap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing
RH-Jira: RHEL-7301
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Alex Williamson <None>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [5/6] 81c6e3c9c52a0b3f0b9269b4ac7f56e8e4b5d68b (eauger1/centos-qemu-kvm)
The pm_cap on the PCIExpressDevice object can be distilled down
to the new instance on the PCIDevice object.
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Marcel Apfelbaum <marcel.apfelbaum@gmail.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-5-alex.williamson@redhat.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 8b8d08cf293b930d0f55b2d5385d8dd27e0c6b41)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/pci-bridge/pcie_pci_bridge.c | 1 -
hw/virtio/virtio-pci.c | 8 +++-----
include/hw/pci/pcie.h | 2 --
3 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c
index 2f098e3a13..c0ba6d7928 100644
--- a/hw/pci-bridge/pcie_pci_bridge.c
+++ b/hw/pci-bridge/pcie_pci_bridge.c
@@ -56,7 +56,6 @@ static void pcie_pci_bridge_realize(PCIDevice *d, Error **errp)
if (pos < 0) {
goto pm_error;
}
- d->exp.pm_cap = pos;
pci_set_word(d->config + pos + PCI_PM_PMC, 0x3);
pcie_cap_arifwd_init(d);
diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 4b2aeaad8d..a85787b837 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -2200,8 +2200,6 @@ static void virtio_pci_realize(PCIDevice *pci_dev, Error **errp)
return;
}
- pci_dev->exp.pm_cap = pos;
-
/*
* Indicates that this function complies with revision 1.2 of the
* PCI Power Management Interface Specification.
@@ -2295,11 +2293,11 @@ static bool virtio_pci_no_soft_reset(PCIDevice *dev)
{
uint16_t pmcsr;
- if (!pci_is_express(dev) || !dev->exp.pm_cap) {
+ if (!pci_is_express(dev) || !(dev->cap_present & QEMU_PCI_CAP_PM)) {
return false;
}
- pmcsr = pci_get_word(dev->config + dev->exp.pm_cap + PCI_PM_CTRL);
+ pmcsr = pci_get_word(dev->config + dev->pm_cap + PCI_PM_CTRL);
/*
* When No_Soft_Reset bit is set and the device
@@ -2328,7 +2326,7 @@ static void virtio_pci_bus_reset_hold(Object *obj, ResetType type)
if (proxy->flags & VIRTIO_PCI_FLAG_INIT_PM) {
pci_word_test_and_clear_mask(
- dev->config + dev->exp.pm_cap + PCI_PM_CTRL,
+ dev->config + dev->pm_cap + PCI_PM_CTRL,
PCI_PM_CTRL_STATE_MASK);
}
}
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 5eddb90976..8a30d07fd0 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -58,8 +58,6 @@ typedef enum {
struct PCIExpressDevice {
/* Offset of express capability in config space */
uint8_t exp_cap;
- /* Offset of Power Management capability in config space */
- uint8_t pm_cap;
/* SLOT */
bool hpev_notified; /* Logical AND of conditions for hot plug event.
--
2.48.1

View File

@ -0,0 +1,81 @@
From 80be4b7d44d4721bacaa6205a47f2d898a090c6b Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 25 Feb 2025 14:52:27 -0700
Subject: [PATCH 4/7] vfio/pci: Delete local pm_cap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Eric Auger <eric.auger@redhat.com>
RH-MergeRequest: 348: PCI: Implement basic PCI PM capability backing
RH-Jira: RHEL-7301
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Alex Williamson <None>
RH-Acked-by: Jon Maloy <jmaloy@redhat.com>
RH-Commit: [4/6] 85bd6b15af7c483e36e265c12b7b1689a4872f4c (eauger1/centos-qemu-kvm)
This is now redundant to PCIDevice.pm_cap.
Cc: Cédric Le Goater <clg@redhat.com>
Reviewed-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Link: https://lore.kernel.org/qemu-devel/20250225215237.3314011-4-alex.williamson@redhat.com
Signed-off-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 05c6a8eff6298675080aa2692ee05a310b3483b4)
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
hw/vfio/pci.c | 9 ++++-----
hw/vfio/pci.h | 1 -
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e18b57d864..595b5c9b25 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2219,7 +2219,6 @@ static bool vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos, Error **errp)
break;
case PCI_CAP_ID_PM:
vfio_check_pm_reset(vdev, pos);
- vdev->pm_cap = pos;
ret = pci_pm_init(pdev, pos, errp) >= 0;
/*
* PCI-core config space emulation needs write access to the power
@@ -2416,17 +2415,17 @@ void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
vfio_disable_interrupts(vdev);
/* Make sure the device is in D0 */
- if (vdev->pm_cap) {
+ if (pdev->pm_cap) {
uint16_t pmcsr;
uint8_t state;
- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
+ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
state = pmcsr & PCI_PM_CTRL_STATE_MASK;
if (state) {
pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
- vfio_pci_write_config(pdev, vdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
+ vfio_pci_write_config(pdev, pdev->pm_cap + PCI_PM_CTRL, pmcsr, 2);
/* vfio handles the necessary delay here */
- pmcsr = vfio_pci_read_config(pdev, vdev->pm_cap + PCI_PM_CTRL, 2);
+ pmcsr = vfio_pci_read_config(pdev, pdev->pm_cap + PCI_PM_CTRL, 2);
state = pmcsr & PCI_PM_CTRL_STATE_MASK;
if (state) {
error_report("vfio: Unable to power on device, stuck in D%d",
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 0d3c93fb2e..ca8d55f8b2 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -161,7 +161,6 @@ struct VFIOPCIDevice {
int32_t bootindex;
uint32_t igd_gms;
OffAutoPCIBAR msix_relo;
- uint8_t pm_cap;
uint8_t nv_gpudirect_clique;
bool pci_aer;
bool req_enabled;
--
2.48.1

View File

@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \
Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm
Version: 9.1.0
Release: 16%{?rcrel}%{?dist}%{?cc_suffix}
Release: 17%{?rcrel}%{?dist}%{?cc_suffix}
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
# Epoch 15 used for RHEL 8
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@ -481,6 +481,18 @@ Patch154: kvm-qga-implement-a-guest-get-load-command.patch
Patch155: kvm-migration-Fix-UAF-for-incoming-migration-on-Migratio.patch
# For RHEL-47340 - [Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully
Patch156: kvm-scripts-improve-error-from-qemu-trace-stap-on-missin.patch
# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown
Patch157: kvm-hw-pci-Rename-has_power-to-enabled.patch
# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown
Patch158: kvm-hw-pci-Basic-support-for-PCI-power-management.patch
# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown
Patch159: kvm-pci-Use-PCI-PM-capability-initializer.patch
# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown
Patch160: kvm-vfio-pci-Delete-local-pm_cap.patch
# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown
Patch161: kvm-pcie-virtio-Remove-redundant-pm_cap.patch
# For RHEL-7301 - [intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown
Patch162: kvm-hw-vfio-pci-Re-order-pre-reset.patch
%if %{have_clang}
BuildRequires: clang
@ -638,6 +650,7 @@ This package provides documentation and auxiliary programs used with %{name}.
%package tools
Summary: %{name} support tools
Recommends: systemtap-client
Recommends: systemtap-devel
%description tools
%{name}-tools provides various tools related to %{name} usage.
@ -1548,6 +1561,19 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
%endif
%changelog
* Mon Mar 31 2025 Jon Maloy <jmaloy@redhat.com> - 9.1.0-17
- kvm-hw-pci-Rename-has_power-to-enabled.patch [RHEL-7301]
- kvm-hw-pci-Basic-support-for-PCI-power-management.patch [RHEL-7301]
- kvm-pci-Use-PCI-PM-capability-initializer.patch [RHEL-7301]
- kvm-vfio-pci-Delete-local-pm_cap.patch [RHEL-7301]
- kvm-pcie-virtio-Remove-redundant-pm_cap.patch [RHEL-7301]
- kvm-hw-vfio-pci-Re-order-pre-reset.patch [RHEL-7301]
- kvm-Also-recommend-systemtap-devel-from-qemu-tools.patch [RHEL-47340]
- Resolves: RHEL-7301
([intel iommu] VFIO_MAP_DMA failed: Bad address on system_powerdown)
- Resolves: RHEL-47340
([Qemu RHEL-9] qemu-trace-stap should handle lack of stap more gracefully)
* Thu Mar 20 2025 Jon Maloy <jmaloy@redhat.com> - 9.1.0-16
- kvm-hw-virtio-virtio-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188]
- kvm-hw-i386-intel-iommu-Migrate-to-3-phase-reset.patch [RHEL-7188]