- kvm-s390x-pci-add-support-for-guests-that-request-direct.patch [RHEL-11430] - kvm-s390x-pci-indicate-QEMU-supports-relaxed-translation.patch [RHEL-11430] - kvm-block-Expand-block-status-mode-from-bool-to-flags.patch [RHEL-82906 RHEL-83015] - kvm-file-posix-gluster-Handle-zero-block-status-hint-bet.patch [RHEL-82906 RHEL-83015] - kvm-block-Let-bdrv_co_is_zero_fast-consolidate-adjacent-.patch [RHEL-82906 RHEL-83015] - kvm-block-Add-new-bdrv_co_is_all_zeroes-function.patch [RHEL-82906 RHEL-83015] - kvm-iotests-Improve-iotest-194-to-mirror-data.patch [RHEL-82906 RHEL-83015] - kvm-mirror-Minor-refactoring.patch [RHEL-82906 RHEL-83015] - kvm-mirror-Pass-full-sync-mode-rather-than-bool-to-inter.patch [RHEL-82906 RHEL-83015] - kvm-mirror-Allow-QMP-override-to-declare-target-already-.patch [RHEL-82906 RHEL-83015] - kvm-mirror-Drop-redundant-zero_target-parameter.patch [RHEL-82906 RHEL-83015] - kvm-mirror-Skip-pre-zeroing-destination-if-it-is-already.patch [RHEL-82906 RHEL-83015] - kvm-mirror-Skip-writing-zeroes-when-target-is-already-ze.patch [RHEL-82906 RHEL-83015] - kvm-iotests-common.rc-add-disk_usage-function.patch [RHEL-82906 RHEL-83015] - kvm-tests-Add-iotest-mirror-sparse-for-recent-patches.patch [RHEL-82906 RHEL-83015] - kvm-mirror-Reduce-I-O-when-destination-is-detect-zeroes-.patch [RHEL-82906 RHEL-83015] - Resolves: RHEL-11430 ([IBM 9.7 FEAT] KVM: Performance Enhanced Refresh PCI Translation - qemu part) - Resolves: RHEL-82906 (--migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-9.7]) - Resolves: RHEL-83015 (Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-9.7])
257 lines
9.8 KiB
Diff
257 lines
9.8 KiB
Diff
From c60d0770ff3f9124e6e9d7beb03e1ef8067e8e26 Mon Sep 17 00:00:00 2001
|
|
From: Christoph Schlameuss <cschlame@redhat.com>
|
|
Date: Thu, 12 Jun 2025 13:25:32 +0200
|
|
Subject: [PATCH 01/16] s390x/pci: add support for guests that request direct
|
|
mapping
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
RH-Author: Christoph Schlameuss <None>
|
|
RH-MergeRequest: 376: Draft: KVM: Performance Enhanced Refresh PCI Translation
|
|
RH-Jira: RHEL-11430
|
|
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
|
RH-Commit: [1/2] 11d1dd9a5add55ae43d5d922588a33945ecbfe27 (cschlame/qemu-kvm)
|
|
|
|
JIRA: https://issues.redhat.com/browse/RHEL-11430
|
|
Conflicts: hw/s390x/s390-pci-bus.c old s390_pci_device_properties[] still has DEFINE_PROP_END_OF_LIST()
|
|
hw/s390x/s390-pci-inst.c hw_accel.h is still in sysemu
|
|
hw/s390x/s390-virtio-ccw.c changes from ccw_machine_9_2_class_options() moved to ccw_rhel_machine_9_6_0_class_options()
|
|
|
|
commit dfcee1ea4c52ac60e0a06221eafb7b6253eb10c3
|
|
Author: Matthew Rosato <mjrosato@linux.ibm.com>
|
|
Date: Wed Feb 26 16:00:12 2025 -0500
|
|
|
|
s390x/pci: add support for guests that request direct mapping
|
|
|
|
When receiving a guest mpcifc(4) or mpcifc(6) instruction without the T
|
|
bit set, treat this as a request to perform direct mapping instead of
|
|
address translation. In order to facilitate this, pin the entirety of
|
|
guest memory into the host iommu.
|
|
|
|
Pinning for the direct mapping case is handled via vfio and its memory
|
|
listener. Additionally, ram discard settings are inherited from vfio:
|
|
coordinated discards (e.g. virtio-mem) are allowed while uncoordinated
|
|
discards (e.g. virtio-balloon) are disabled.
|
|
|
|
Subsequent guest DMA operations are all expected to be of the format
|
|
guest_phys+sdma, allowing them to be used as lookup into the host
|
|
iommu table.
|
|
|
|
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
|
|
Reviewed-by: David Hildenbrand <david@redhat.com>
|
|
Message-ID: <20250226210013.238349-2-mjrosato@linux.ibm.com>
|
|
Signed-off-by: Thomas Huth <thuth@redhat.com>
|
|
|
|
Signed-off-by: Christoph Schlameuss <cschlame@redhat.com>
|
|
---
|
|
hw/s390x/s390-pci-bus.c | 39 +++++++++++++++++++++++++++++++--
|
|
hw/s390x/s390-pci-inst.c | 13 +++++++++--
|
|
hw/s390x/s390-pci-vfio.c | 23 +++++++++++++++----
|
|
hw/s390x/s390-virtio-ccw.c | 5 +++++
|
|
include/hw/s390x/s390-pci-bus.h | 3 +++
|
|
5 files changed, 75 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
|
|
index 3e57d5faca..13bc02d837 100644
|
|
--- a/hw/s390x/s390-pci-bus.c
|
|
+++ b/hw/s390x/s390-pci-bus.c
|
|
@@ -18,6 +18,8 @@
|
|
#include "hw/s390x/s390-pci-inst.h"
|
|
#include "hw/s390x/s390-pci-kvm.h"
|
|
#include "hw/s390x/s390-pci-vfio.h"
|
|
+#include "hw/s390x/s390-virtio-ccw.h"
|
|
+#include "hw/boards.h"
|
|
#include "hw/pci/pci_bus.h"
|
|
#include "hw/qdev-properties.h"
|
|
#include "hw/pci/pci_bridge.h"
|
|
@@ -724,12 +726,42 @@ void s390_pci_iommu_enable(S390PCIIOMMU *iommu)
|
|
g_free(name);
|
|
}
|
|
|
|
+void s390_pci_iommu_direct_map_enable(S390PCIIOMMU *iommu)
|
|
+{
|
|
+ MachineState *ms = MACHINE(qdev_get_machine());
|
|
+ S390CcwMachineState *s390ms = S390_CCW_MACHINE(ms);
|
|
+
|
|
+ /*
|
|
+ * For direct-mapping we must map the entire guest address space. Rather
|
|
+ * than using an iommu, create a memory region alias that maps GPA X to
|
|
+ * IOVA X + SDMA. VFIO will handle pinning via its memory listener.
|
|
+ */
|
|
+ g_autofree char *name = g_strdup_printf("iommu-dm-s390-%04x",
|
|
+ iommu->pbdev->uid);
|
|
+
|
|
+ iommu->dm_mr = g_malloc0(sizeof(*iommu->dm_mr));
|
|
+ memory_region_init_alias(iommu->dm_mr, OBJECT(&iommu->mr), name,
|
|
+ get_system_memory(), 0,
|
|
+ s390_get_memory_limit(s390ms));
|
|
+ iommu->enabled = true;
|
|
+ memory_region_add_subregion(&iommu->mr, iommu->pbdev->zpci_fn.sdma,
|
|
+ iommu->dm_mr);
|
|
+}
|
|
+
|
|
void s390_pci_iommu_disable(S390PCIIOMMU *iommu)
|
|
{
|
|
iommu->enabled = false;
|
|
g_hash_table_remove_all(iommu->iotlb);
|
|
- memory_region_del_subregion(&iommu->mr, MEMORY_REGION(&iommu->iommu_mr));
|
|
- object_unparent(OBJECT(&iommu->iommu_mr));
|
|
+ if (iommu->dm_mr) {
|
|
+ memory_region_del_subregion(&iommu->mr, iommu->dm_mr);
|
|
+ object_unparent(OBJECT(iommu->dm_mr));
|
|
+ g_free(iommu->dm_mr);
|
|
+ iommu->dm_mr = NULL;
|
|
+ } else {
|
|
+ memory_region_del_subregion(&iommu->mr,
|
|
+ MEMORY_REGION(&iommu->iommu_mr));
|
|
+ object_unparent(OBJECT(&iommu->iommu_mr));
|
|
+ }
|
|
}
|
|
|
|
static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
|
|
@@ -1130,6 +1162,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev,
|
|
/* Always intercept emulated devices */
|
|
pbdev->interp = false;
|
|
pbdev->forwarding_assist = false;
|
|
+ pbdev->rtr_avail = false;
|
|
}
|
|
|
|
if (s390_pci_msix_init(pbdev) && !pbdev->interp) {
|
|
@@ -1488,6 +1521,8 @@ static Property s390_pci_device_properties[] = {
|
|
DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true),
|
|
DEFINE_PROP_BOOL("forwarding-assist", S390PCIBusDevice, forwarding_assist,
|
|
true),
|
|
+ DEFINE_PROP_BOOL("relaxed-translation", S390PCIBusDevice, rtr_avail,
|
|
+ true),
|
|
DEFINE_PROP_END_OF_LIST(),
|
|
};
|
|
|
|
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
|
|
index 30149546c0..803ebcd9b3 100644
|
|
--- a/hw/s390x/s390-pci-inst.c
|
|
+++ b/hw/s390x/s390-pci-inst.c
|
|
@@ -16,6 +16,7 @@
|
|
#include "exec/memory.h"
|
|
#include "qemu/error-report.h"
|
|
#include "sysemu/hw_accel.h"
|
|
+#include "hw/boards.h"
|
|
#include "hw/pci/pci_device.h"
|
|
#include "hw/s390x/s390-pci-inst.h"
|
|
#include "hw/s390x/s390-pci-bus.h"
|
|
@@ -1008,17 +1009,25 @@ static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib,
|
|
}
|
|
|
|
/* currently we only support designation type 1 with translation */
|
|
- if (!(dt == ZPCI_IOTA_RTTO && t)) {
|
|
+ if (t && dt != ZPCI_IOTA_RTTO) {
|
|
error_report("unsupported ioat dt %d t %d", dt, t);
|
|
s390_program_interrupt(env, PGM_OPERAND, ra);
|
|
return -EINVAL;
|
|
+ } else if (!t && !pbdev->rtr_avail) {
|
|
+ error_report("relaxed translation not allowed");
|
|
+ s390_program_interrupt(env, PGM_OPERAND, ra);
|
|
+ return -EINVAL;
|
|
}
|
|
|
|
iommu->pba = pba;
|
|
iommu->pal = pal;
|
|
iommu->g_iota = g_iota;
|
|
|
|
- s390_pci_iommu_enable(iommu);
|
|
+ if (t) {
|
|
+ s390_pci_iommu_enable(iommu);
|
|
+ } else {
|
|
+ s390_pci_iommu_direct_map_enable(iommu);
|
|
+ }
|
|
|
|
return 0;
|
|
}
|
|
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
|
|
index 7dbbc76823..443e222912 100644
|
|
--- a/hw/s390x/s390-pci-vfio.c
|
|
+++ b/hw/s390x/s390-pci-vfio.c
|
|
@@ -131,13 +131,28 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev,
|
|
/* Store function type separately for type-specific behavior */
|
|
pbdev->pft = cap->pft;
|
|
|
|
+ /*
|
|
+ * If the device is a passthrough ISM device, disallow relaxed
|
|
+ * translation.
|
|
+ */
|
|
+ if (pbdev->pft == ZPCI_PFT_ISM) {
|
|
+ pbdev->rtr_avail = false;
|
|
+ }
|
|
+
|
|
/*
|
|
* If appropriate, reduce the size of the supported DMA aperture reported
|
|
- * to the guest based upon the vfio DMA limit.
|
|
+ * to the guest based upon the vfio DMA limit. This is applicable for
|
|
+ * devices that are guaranteed to not use relaxed translation. If the
|
|
+ * device is capable of relaxed translation then we must advertise the
|
|
+ * full aperture. In this case, if translation is used then we will
|
|
+ * rely on the vfio DMA limit counting and use RPCIT CC1 / status 16
|
|
+ * to request that the guest free DMA mappings as necessary.
|
|
*/
|
|
- vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
|
|
- if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
|
|
- pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
|
|
+ if (!pbdev->rtr_avail) {
|
|
+ vfio_size = pbdev->iommu->max_dma_limit << TARGET_PAGE_BITS;
|
|
+ if (vfio_size > 0 && vfio_size < cap->end_dma - cap->start_dma + 1) {
|
|
+ pbdev->zpci_fn.edma = cap->start_dma + vfio_size - 1;
|
|
+ }
|
|
}
|
|
}
|
|
|
|
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
|
|
index 312e8f18aa..77a1bde71e 100644
|
|
--- a/hw/s390x/s390-virtio-ccw.c
|
|
+++ b/hw/s390x/s390-virtio-ccw.c
|
|
@@ -1348,8 +1348,13 @@ static void ccw_rhel_machine_9_6_0_instance_options(MachineState *machine)
|
|
|
|
static void ccw_rhel_machine_9_6_0_class_options(MachineClass *mc)
|
|
{
|
|
+ static GlobalProperty compat[] = {
|
|
+ { TYPE_S390_PCI_DEVICE, "relaxed-translation", "off", },
|
|
+ };
|
|
+
|
|
/* NB: remember to move this line to the *latest* RHEL 9 machine */
|
|
compat_props_add(mc->compat_props, hw_compat_rhel_9, hw_compat_rhel_9_len);
|
|
+ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
|
|
}
|
|
DEFINE_CCW_MACHINE_AS_LATEST(9, 6, 0);
|
|
|
|
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
|
|
index 2c43ea123f..04944d4fed 100644
|
|
--- a/include/hw/s390x/s390-pci-bus.h
|
|
+++ b/include/hw/s390x/s390-pci-bus.h
|
|
@@ -277,6 +277,7 @@ struct S390PCIIOMMU {
|
|
AddressSpace as;
|
|
MemoryRegion mr;
|
|
IOMMUMemoryRegion iommu_mr;
|
|
+ MemoryRegion *dm_mr;
|
|
bool enabled;
|
|
uint64_t g_iota;
|
|
uint64_t pba;
|
|
@@ -362,6 +363,7 @@ struct S390PCIBusDevice {
|
|
bool interp;
|
|
bool forwarding_assist;
|
|
bool aif;
|
|
+ bool rtr_avail;
|
|
QTAILQ_ENTRY(S390PCIBusDevice) link;
|
|
};
|
|
|
|
@@ -389,6 +391,7 @@ int pci_chsc_sei_nt2_have_event(void);
|
|
void s390_pci_sclp_configure(SCCB *sccb);
|
|
void s390_pci_sclp_deconfigure(SCCB *sccb);
|
|
void s390_pci_iommu_enable(S390PCIIOMMU *iommu);
|
|
+void s390_pci_iommu_direct_map_enable(S390PCIIOMMU *iommu);
|
|
void s390_pci_iommu_disable(S390PCIIOMMU *iommu);
|
|
void s390_pci_generate_error_event(uint16_t pec, uint32_t fh, uint32_t fid,
|
|
uint64_t faddr, uint32_t e);
|
|
--
|
|
2.48.1
|
|
|