- qemu: Implement support for associating iommufd to hostdev (RHEL-126346, RHEL-74202) - qemu: Introduce privateData for hostdevs (RHEL-126346, RHEL-74202) - qemu: Support per-process memory accounting for iommufd (RHEL-126346, RHEL-74202) - qemu: open VFIO FDs from libvirt backend (RHEL-126346, RHEL-74202) - qemu: open iommufd FD from libvirt backend (RHEL-126346, RHEL-74202) - qemu: Update Cgroup, namespace, and seclabel for iommufd (RHEL-126346, RHEL-74202) - tests: qemuxmlconfdata: provide iommufd sample XML and CLI args (RHEL-126346, RHEL-74202) Resolves: RHEL-126346, RHEL-74202
248 lines
8.3 KiB
Diff
248 lines
8.3 KiB
Diff
From 68a23646ba165aa45d3811d626885054ae9d9299 Mon Sep 17 00:00:00 2001
|
|
Message-ID: <68a23646ba165aa45d3811d626885054ae9d9299.1770383182.git.jdenemar@redhat.com>
|
|
From: Nathan Chen <nathanc@nvidia.com>
|
|
Date: Fri, 30 Jan 2026 10:59:15 -0800
|
|
Subject: [PATCH] qemu: open VFIO FDs from libvirt backend
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
Open VFIO FDs from libvirt backend without exposing
|
|
these FDs to XML users, i.e. one per iommufd hostdev
|
|
for /dev/vfio/devices/vfioX, and pass the FD to qemu
|
|
command line.
|
|
|
|
Suggested-by: Ján Tomko <jtomko@redhat.com>
|
|
Signed-off-by: Nathan Chen <nathanc@nvidia.com>
|
|
Reviewed-by: Pavel Hrdina <phrdina@redhat.com>
|
|
(cherry picked from commit f6230804727df834da27370e835204672218ab23)
|
|
|
|
Resolves: https://issues.redhat.com/browse/RHEL-74202
|
|
Resolves: https://issues.redhat.com/browse/RHEL-126346
|
|
|
|
Signed-off-by: Pavel Hrdina <phrdina@redhat.com>
|
|
---
|
|
src/libvirt_private.syms | 1 +
|
|
src/qemu/qemu_command.c | 21 +++++++++++
|
|
src/qemu/qemu_process.c | 78 ++++++++++++++++++++++++++++++++++++++++
|
|
src/util/virpci.c | 39 ++++++++++++++++++++
|
|
src/util/virpci.h | 2 ++
|
|
5 files changed, 141 insertions(+)
|
|
|
|
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
|
|
index 66261ed6cf..e2a7a16347 100644
|
|
--- a/src/libvirt_private.syms
|
|
+++ b/src/libvirt_private.syms
|
|
@@ -3162,6 +3162,7 @@ virPCIDeviceGetStubDriverName;
|
|
virPCIDeviceGetStubDriverType;
|
|
virPCIDeviceGetUnbindFromStub;
|
|
virPCIDeviceGetUsedBy;
|
|
+virPCIDeviceGetVfioPath;
|
|
virPCIDeviceGetVPD;
|
|
virPCIDeviceHasPCIExpressLink;
|
|
virPCIDeviceIsAssignable;
|
|
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
|
|
index 1fb31d1721..83935e82c3 100644
|
|
--- a/src/qemu/qemu_command.c
|
|
+++ b/src/qemu/qemu_command.c
|
|
@@ -4803,6 +4803,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
|
|
NULL) < 0)
|
|
return NULL;
|
|
|
|
+ if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
|
|
+ pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) {
|
|
+ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev);
|
|
+
|
|
+ if (hostdevPriv->vfioDeviceFd != -1) {
|
|
+ g_autofree char *fdstr = g_strdup_printf("%d", hostdevPriv->vfioDeviceFd);
|
|
+ if (virJSONValueObjectAdd(&props, "S:fd", fdstr, NULL) < 0)
|
|
+ return NULL;
|
|
+ hostdevPriv->vfioDeviceFd = -1;
|
|
+ }
|
|
+ }
|
|
+
|
|
if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0)
|
|
return NULL;
|
|
|
|
@@ -5247,6 +5259,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
|
|
if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
|
|
return -1;
|
|
|
|
+ if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
|
|
+ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
|
|
+
|
|
+ if (hostdevPriv->vfioDeviceFd != -1) {
|
|
+ virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd,
|
|
+ VIR_COMMAND_PASS_FD_CLOSE_PARENT);
|
|
+ }
|
|
+ }
|
|
+
|
|
if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev)))
|
|
return -1;
|
|
|
|
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
|
|
index 0e50cd1ccc..1ac57a6321 100644
|
|
--- a/src/qemu/qemu_process.c
|
|
+++ b/src/qemu/qemu_process.c
|
|
@@ -103,6 +103,7 @@
|
|
#include "storage_source.h"
|
|
#include "backup_conf.h"
|
|
#include "storage_file_probe.h"
|
|
+#include "virpci.h"
|
|
|
|
#include "logging/log_manager.h"
|
|
#include "logging/log_protocol.h"
|
|
@@ -7671,6 +7672,81 @@ qemuProcessPrepareHostBackendChardevHotplug(virDomainObj *vm,
|
|
return 0;
|
|
}
|
|
|
|
+/**
|
|
+ * qemuProcessOpenVfioDeviceFd:
|
|
+ * @hostdev: host device definition
|
|
+ * @vfioFd: returned file descriptor
|
|
+ *
|
|
+ * Opens the VFIO device file descriptor for a hostdev.
|
|
+ *
|
|
+ * Returns: FD on success, -1 on failure
|
|
+ */
|
|
+static int
|
|
+qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev)
|
|
+{
|
|
+ g_autofree char *vfioPath = NULL;
|
|
+ int fd = -1;
|
|
+
|
|
+ if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
|
|
+ hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
|
|
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
|
|
+ _("VFIO FD only supported for PCI hostdevs"));
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0)
|
|
+ return -1;
|
|
+
|
|
+ VIR_DEBUG("Opening VFIO device %s", vfioPath);
|
|
+
|
|
+ if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) {
|
|
+ if (errno == ENOENT) {
|
|
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
|
|
+ _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"),
|
|
+ vfioPath);
|
|
+ } else {
|
|
+ virReportSystemError(errno,
|
|
+ _("cannot open VFIO device %1$s"), vfioPath);
|
|
+ }
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath);
|
|
+ return fd;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * qemuProcessOpenVfioFds:
|
|
+ * @vm: domain object
|
|
+ *
|
|
+ * Opens all necessary VFIO file descriptors for the domain.
|
|
+ *
|
|
+ * Returns: 0 on success, -1 on failure
|
|
+ */
|
|
+static int
|
|
+qemuProcessOpenVfioFds(virDomainObj *vm)
|
|
+{
|
|
+ size_t i;
|
|
+
|
|
+ /* Check if we have any hostdevs that need VFIO FDs */
|
|
+ for (i = 0; i < vm->def->nhostdevs; i++) {
|
|
+ virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
|
|
+ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
|
|
+
|
|
+ if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
|
|
+ hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
|
|
+ hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
|
|
+ hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
|
|
+ /* Open VFIO device FD */
|
|
+ hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev);
|
|
+ if (hostdevPriv->vfioDeviceFd == -1)
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
/**
|
|
* qemuProcessPrepareHost:
|
|
* @driver: qemu driver
|
|
@@ -7726,6 +7802,8 @@ qemuProcessPrepareHost(virQEMUDriver *driver,
|
|
hostdev_flags |= VIR_HOSTDEV_COLD_BOOT;
|
|
if (qemuHostdevPrepareDomainDevices(driver, vm->def, hostdev_flags) < 0)
|
|
return -1;
|
|
+ if (qemuProcessOpenVfioFds(vm) < 0)
|
|
+ return -1;
|
|
|
|
VIR_DEBUG("Preparing chr device backends");
|
|
if (qemuProcessPrepareHostBackendChardev(vm) < 0)
|
|
diff --git a/src/util/virpci.c b/src/util/virpci.c
|
|
index 90617e69c6..2348a98003 100644
|
|
--- a/src/util/virpci.c
|
|
+++ b/src/util/virpci.c
|
|
@@ -3320,3 +3320,42 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address)
|
|
{
|
|
g_free(address);
|
|
}
|
|
+
|
|
+/**
|
|
+ * virPCIDeviceGetVfioPath:
|
|
+ * @addr: host device PCI address
|
|
+ * @vfioPath: returned VFIO device path
|
|
+ *
|
|
+ * Constructs the VFIO device path for a PCI hostdev.
|
|
+ *
|
|
+ * Returns: 0 on success, -1 on failure
|
|
+ */
|
|
+int
|
|
+virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr,
|
|
+ char **vfioPath)
|
|
+{
|
|
+ g_autofree char *addrStr = NULL;
|
|
+ g_autofree char *sysfsPath = NULL;
|
|
+ g_autoptr(DIR) dir = NULL;
|
|
+ struct dirent *entry = NULL;
|
|
+
|
|
+ *vfioPath = NULL;
|
|
+ addrStr = virPCIDeviceAddressAsString(addr);
|
|
+
|
|
+ /* Look in device's vfio-dev subdirectory */
|
|
+ sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr);
|
|
+
|
|
+ if (virDirOpen(&dir, sysfsPath) == 1) {
|
|
+ while (virDirRead(dir, &entry, sysfsPath) > 0) {
|
|
+ if (STRPREFIX(entry->d_name, "vfio")) {
|
|
+ *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name);
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ virReportError(VIR_ERR_INTERNAL_ERROR,
|
|
+ _("cannot find VFIO device for PCI device %1$s"),
|
|
+ addrStr);
|
|
+ return -1;
|
|
+}
|
|
diff --git a/src/util/virpci.h b/src/util/virpci.h
|
|
index fc538566e1..24ede10755 100644
|
|
--- a/src/util/virpci.h
|
|
+++ b/src/util/virpci.h
|
|
@@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev);
|
|
|
|
void virPCIDeviceAddressFree(virPCIDeviceAddress *address);
|
|
|
|
+int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath);
|
|
+
|
|
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree);
|
|
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree);
|
|
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree);
|
|
--
|
|
2.52.0
|