205 lines
8.3 KiB
Diff
205 lines
8.3 KiB
Diff
From f37a1e337dd62c873f18aabd31863c8df144c7ea Mon Sep 17 00:00:00 2001
|
|
From: Alex Williamson <alex.williamson@redhat.com>
|
|
Date: Mon, 3 Dec 2018 22:01:54 +0000
|
|
Subject: [PATCH 13/16] vfio/ccw/pci: Allow devices to opt-in for ballooning
|
|
|
|
RH-Author: Alex Williamson <alex.williamson@redhat.com>
|
|
Message-id: <154387451469.27651.8657130146789267501.stgit@gimli.home>
|
|
Patchwork-id: 83236
|
|
O-Subject: [RHEL-8.0 qemu-kvm PATCH 4/7] vfio/ccw/pci: Allow devices to opt-in for ballooning
|
|
Bugzilla: 1650272
|
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
|
RH-Acked-by: Auger Eric <eric.auger@redhat.com>
|
|
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
|
|
RH-Acked-by: David Hildenbrand <david@redhat.com>
|
|
|
|
Bugzilla: 1650272
|
|
|
|
If a vfio assigned device makes use of a physical IOMMU, then memory
|
|
ballooning is necessarily inhibited due to the page pinning, lack of
|
|
page level granularity at the IOMMU, and sufficient notifiers to both
|
|
remove the page on balloon inflation and add it back on deflation.
|
|
However, not all devices are backed by a physical IOMMU. In the case
|
|
of mediated devices, if a vendor driver is well synchronized with the
|
|
guest driver, such that only pages actively used by the guest driver
|
|
are pinned by the host mdev vendor driver, then there should be no
|
|
overlap between pages available for the balloon driver and pages
|
|
actively in use by the device. Under these conditions, ballooning
|
|
should be safe.
|
|
|
|
vfio-ccw devices are always mediated devices and always operate under
|
|
the constraints above. Therefore we can consider all vfio-ccw devices
|
|
as balloon compatible.
|
|
|
|
The situation is far from straightforward with vfio-pci. These
|
|
devices can be physical devices with physical IOMMU backing or
|
|
mediated devices where it is unknown whether a physical IOMMU is in
|
|
use or whether the vendor driver is well synchronized to the working
|
|
set of the guest driver. The safest approach is therefore to assume
|
|
all vfio-pci devices are incompatible with ballooning, but allow user
|
|
opt-in should they have further insight into mediated devices.
|
|
|
|
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
|
(cherry picked from commit 238e91728503d400e1c4e644e3a9b80f9e621682)
|
|
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
---
|
|
hw/vfio/ccw.c | 9 +++++++++
|
|
hw/vfio/common.c | 23 ++++++++++++++++++++++-
|
|
hw/vfio/pci.c | 26 +++++++++++++++++++++++++-
|
|
hw/vfio/trace-events | 1 +
|
|
include/hw/vfio/vfio-common.h | 2 ++
|
|
5 files changed, 59 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
|
|
index fe34b50..0c74dda 100644
|
|
--- a/hw/vfio/ccw.c
|
|
+++ b/hw/vfio/ccw.c
|
|
@@ -362,6 +362,15 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
|
|
}
|
|
}
|
|
|
|
+ /*
|
|
+ * All vfio-ccw devices are believed to operate in a way compatible with
|
|
+ * memory ballooning, ie. pages pinned in the host are in the current
|
|
+ * working set of the guest driver and therefore never overlap with pages
|
|
+ * available to the guest balloon driver. This needs to be set before
|
|
+ * vfio_get_device() for vfio common to handle the balloon inhibitor.
|
|
+ */
|
|
+ vcdev->vdev.balloon_allowed = true;
|
|
+
|
|
if (vfio_get_device(group, cdev->mdevid, &vcdev->vdev, &err)) {
|
|
g_free(vcdev->vdev.name);
|
|
goto out_device_err;
|
|
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
|
|
index 7e8f289..cda2d1f 100644
|
|
--- a/hw/vfio/common.c
|
|
+++ b/hw/vfio/common.c
|
|
@@ -1376,7 +1376,9 @@ void vfio_put_group(VFIOGroup *group)
|
|
return;
|
|
}
|
|
|
|
- qemu_balloon_inhibit(false);
|
|
+ if (!group->balloon_allowed) {
|
|
+ qemu_balloon_inhibit(false);
|
|
+ }
|
|
vfio_kvm_device_del_group(group);
|
|
vfio_disconnect_container(group);
|
|
QLIST_REMOVE(group, next);
|
|
@@ -1412,6 +1414,25 @@ int vfio_get_device(VFIOGroup *group, const char *name,
|
|
return ret;
|
|
}
|
|
|
|
+ /*
|
|
+ * Clear the balloon inhibitor for this group if the driver knows the
|
|
+ * device operates compatibly with ballooning. Setting must be consistent
|
|
+ * per group, but since compatibility is really only possible with mdev
|
|
+ * currently, we expect singleton groups.
|
|
+ */
|
|
+ if (vbasedev->balloon_allowed != group->balloon_allowed) {
|
|
+ if (!QLIST_EMPTY(&group->device_list)) {
|
|
+ error_setg(errp,
|
|
+ "Inconsistent device balloon setting within group");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (!group->balloon_allowed) {
|
|
+ group->balloon_allowed = true;
|
|
+ qemu_balloon_inhibit(false);
|
|
+ }
|
|
+ }
|
|
+
|
|
vbasedev->fd = fd;
|
|
vbasedev->group = group;
|
|
QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
|
|
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
|
|
index 4683eb4..d43727f 100644
|
|
--- a/hw/vfio/pci.c
|
|
+++ b/hw/vfio/pci.c
|
|
@@ -2803,12 +2803,13 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
|
VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
|
|
VFIODevice *vbasedev_iter;
|
|
VFIOGroup *group;
|
|
- char *tmp, group_path[PATH_MAX], *group_name;
|
|
+ char *tmp, *subsys, group_path[PATH_MAX], *group_name;
|
|
Error *err = NULL;
|
|
ssize_t len;
|
|
struct stat st;
|
|
int groupid;
|
|
int ret, i = 0;
|
|
+ bool is_mdev;
|
|
|
|
QLIST_FOREACH(group, &vfio_group_list, next) {
|
|
QLIST_FOREACH(vbasedev_iter, &group->device_list, next) {
|
|
@@ -2880,6 +2881,27 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
|
|
}
|
|
}
|
|
|
|
+ /*
|
|
+ * Mediated devices *might* operate compatibly with memory ballooning, but
|
|
+ * we cannot know for certain, it depends on whether the mdev vendor driver
|
|
+ * stays in sync with the active working set of the guest driver. Prevent
|
|
+ * the x-balloon-allowed option unless this is minimally an mdev device.
|
|
+ */
|
|
+ tmp = g_strdup_printf("%s/subsystem", vdev->vbasedev.sysfsdev);
|
|
+ subsys = realpath(tmp, NULL);
|
|
+ g_free(tmp);
|
|
+ is_mdev = (strcmp(subsys, "/sys/bus/mdev") == 0);
|
|
+ free(subsys);
|
|
+
|
|
+ trace_vfio_mdev(vdev->vbasedev.name, is_mdev);
|
|
+
|
|
+ if (vdev->vbasedev.balloon_allowed && !is_mdev) {
|
|
+ error_setg(errp, "x-balloon-allowed only potentially compatible "
|
|
+ "with mdev devices");
|
|
+ vfio_put_group(group);
|
|
+ goto error;
|
|
+ }
|
|
+
|
|
ret = vfio_get_device(group, vdev->vbasedev.name, &vdev->vbasedev, errp);
|
|
if (ret) {
|
|
vfio_put_group(group);
|
|
@@ -3177,6 +3199,8 @@ static Property vfio_pci_dev_properties[] = {
|
|
DEFINE_PROP_BIT("x-igd-opregion", VFIOPCIDevice, features,
|
|
VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT, false),
|
|
DEFINE_PROP_BOOL("x-no-mmap", VFIOPCIDevice, vbasedev.no_mmap, false),
|
|
+ DEFINE_PROP_BOOL("x-balloon-allowed", VFIOPCIDevice,
|
|
+ vbasedev.balloon_allowed, false),
|
|
DEFINE_PROP_BOOL("x-no-kvm-intx", VFIOPCIDevice, no_kvm_intx, false),
|
|
DEFINE_PROP_BOOL("x-no-kvm-msi", VFIOPCIDevice, no_kvm_msi, false),
|
|
DEFINE_PROP_BOOL("x-no-kvm-msix", VFIOPCIDevice, no_kvm_msix, false),
|
|
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
|
|
index 20109cb..9487887 100644
|
|
--- a/hw/vfio/trace-events
|
|
+++ b/hw/vfio/trace-events
|
|
@@ -39,6 +39,7 @@ vfio_pci_hot_reset_result(const char *name, const char *result) "%s hot reset: %
|
|
vfio_populate_device_config(const char *name, unsigned long size, unsigned long offset, unsigned long flags) "Device %s config:\n size: 0x%lx, offset: 0x%lx, flags: 0x%lx"
|
|
vfio_populate_device_get_irq_info_failure(void) "VFIO_DEVICE_GET_IRQ_INFO failure: %m"
|
|
vfio_realize(const char *name, int group_id) " (%s) group %d"
|
|
+vfio_mdev(const char *name, bool is_mdev) " (%s) is_mdev %d"
|
|
vfio_add_ext_cap_dropped(const char *name, uint16_t cap, uint16_t offset) "%s 0x%x@0x%x"
|
|
vfio_pci_reset(const char *name) " (%s)"
|
|
vfio_pci_reset_flr(const char *name) "%s FLR/VFIO_DEVICE_RESET"
|
|
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
|
|
index f29df6e..36ee657 100644
|
|
--- a/include/hw/vfio/vfio-common.h
|
|
+++ b/include/hw/vfio/vfio-common.h
|
|
@@ -123,6 +123,7 @@ typedef struct VFIODevice {
|
|
bool reset_works;
|
|
bool needs_reset;
|
|
bool no_mmap;
|
|
+ bool balloon_allowed;
|
|
VFIODeviceOps *ops;
|
|
unsigned int num_irqs;
|
|
unsigned int num_regions;
|
|
@@ -142,6 +143,7 @@ typedef struct VFIOGroup {
|
|
QLIST_HEAD(, VFIODevice) device_list;
|
|
QLIST_ENTRY(VFIOGroup) next;
|
|
QLIST_ENTRY(VFIOGroup) container_next;
|
|
+ bool balloon_allowed;
|
|
} VFIOGroup;
|
|
|
|
typedef struct VFIODMABuf {
|
|
--
|
|
1.8.3.1
|
|
|