diff --git a/libvirt-qemu-Implement-support-for-associating-iommufd-to-hostdev.patch b/libvirt-qemu-Implement-support-for-associating-iommufd-to-hostdev.patch new file mode 100644 index 0000000..8378479 --- /dev/null +++ b/libvirt-qemu-Implement-support-for-associating-iommufd-to-hostdev.patch @@ -0,0 +1,191 @@ +From bb8ef43213cb1f8c123cdcc693d99a30b09dfa16 Mon Sep 17 00:00:00 2001 +Message-ID: +From: Nathan Chen +Date: Fri, 30 Jan 2026 10:59:12 -0800 +Subject: [PATCH] qemu: Implement support for associating iommufd to hostdev +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Implement a new iommufd attribute under hostdevs' PCI +subsystem driver that can be used to specify associated +iommufd object when launching a qemu VM. + +Signed-off-by: Ján Tomko +Signed-off-by: Nathan Chen +Reviewed-by: Pavel Hrdina +(cherry picked from commit fd113055bb174c7284081731d16959f73796e3d7) + +Resolves: https://issues.redhat.com/browse/RHEL-74202 +Resolves: https://issues.redhat.com/browse/RHEL-126346 + +Signed-off-by: Pavel Hrdina +--- + docs/formatdomain.rst | 6 +++++ + src/conf/device_conf.c | 11 ++++++++ + src/conf/device_conf.h | 1 + + src/conf/schemas/basictypes.rng | 5 ++++ + src/qemu/qemu_command.c | 46 +++++++++++++++++++++++++++++++++ + 5 files changed, 69 insertions(+) + +diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst +index 1467fc7e10..167912348e 100644 +--- a/docs/formatdomain.rst ++++ b/docs/formatdomain.rst +@@ -4907,6 +4907,12 @@ or: + found is "problematic" in some way, the generic vfio-pci driver + similarly be forced. + ++ :since:`Since 12.1.0 (QEMU and KVM only)`, the ``iommufd`` element ++ can be used to enable IOMMUFD backend for VFIO device. This ++ provides an interface to propagate DMA mappings to kernel for ++ assigned devices. Libvirt will open the /dev/iommu and VFIO device ++ cdev and pass associated file descriptors to QEMU. ++ + (Note: :since:`Since 1.0.5`, the ``name`` attribute has been + described to be used to select the type of PCI device assignment + ("vfio", "kvm", or "xen"), but those values have been mostly +diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c +index c278b81652..d68232a4f4 100644 +--- a/src/conf/device_conf.c ++++ b/src/conf/device_conf.c +@@ -67,6 +67,11 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node, + return -1; + } + ++ if (virXMLPropTristateBool(node, "iommufd", ++ VIR_XML_PROP_NONE, ++ &driver->iommufd) < 0) ++ return -1; ++ + driver->model = virXMLPropString(node, "model"); + return 0; + } +@@ -93,6 +98,12 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf, + + virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model); + ++ if (driver->iommufd == VIR_TRISTATE_BOOL_YES) { ++ virBufferAddLit(&driverAttrBuf, " iommufd='yes'"); ++ } else if (driver->iommufd == VIR_TRISTATE_BOOL_NO) { ++ virBufferAddLit(&driverAttrBuf, " iommufd='no'"); ++ } ++ + virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL); + return 0; + } +diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h +index e570f51824..116b959143 100644 +--- a/src/conf/device_conf.h ++++ b/src/conf/device_conf.h +@@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName); + struct _virDeviceHostdevPCIDriverInfo { + virDeviceHostdevPCIDriverName name; + char *model; ++ virTristateBool iommufd; + }; + + typedef enum { +diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng +index 5689170fad..381e0ac24f 100644 +--- a/src/conf/schemas/basictypes.rng ++++ b/src/conf/schemas/basictypes.rng +@@ -673,6 +673,11 @@ + + + ++ ++ ++ ++ ++ + + + +diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c +index fb89dbec27..1fb31d1721 100644 +--- a/src/qemu/qemu_command.c ++++ b/src/qemu/qemu_command.c +@@ -4754,6 +4754,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, + g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr); + const char *failover_pair_id = NULL; + const char *driver = NULL; ++ const char *iommufdId = NULL; + /* 'ramfb' property must be omitted unless it's to be enabled */ + bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON; + +@@ -4787,6 +4788,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, + teaming->persistent) + failover_pair_id = teaming->persistent; + ++ if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) ++ iommufdId = "iommufd0"; ++ + if (virJSONValueObjectAdd(&props, + "s:driver", driver, + "s:host", host, +@@ -4795,6 +4799,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, + "S:failover_pair_id", failover_pair_id, + "S:display", qemuOnOffAuto(pcisrc->display), + "B:ramfb", ramfb, ++ "S:iommufd", iommufdId, + NULL) < 0) + return NULL; + +@@ -5314,6 +5319,44 @@ qemuBuildHostdevCommandLine(virCommand *cmd, + } + + ++static int ++qemuBuildIOMMUFDCommandLine(virCommand *cmd, ++ const virDomainDef *def) ++{ ++ size_t i; ++ ++ for (i = 0; i < def->nhostdevs; i++) { ++ virDomainHostdevDef *hostdev = def->hostdevs[i]; ++ virDomainHostdevSubsys *subsys = &hostdev->source.subsys; ++ g_autoptr(virJSONValue) props = NULL; ++ ++ if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS) ++ continue; ++ ++ if (subsys->type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) ++ continue; ++ ++ if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED) ++ continue; ++ ++ if (subsys->u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) ++ continue; ++ ++ if (qemuMonitorCreateObjectProps(&props, "iommufd", ++ "iommufd0", ++ NULL) < 0) ++ return -1; ++ ++ if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0) ++ return -1; ++ ++ break; ++ } ++ ++ return 0; ++} ++ ++ + static int + qemuBuildMonitorCommandLine(virCommand *cmd, + qemuDomainObjPrivate *priv) +@@ -10926,6 +10969,9 @@ qemuBuildCommandLine(virDomainObj *vm, + if (qemuBuildRedirdevCommandLine(cmd, def, qemuCaps) < 0) + return NULL; + ++ if (qemuBuildIOMMUFDCommandLine(cmd, def) < 0) ++ return NULL; ++ + if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps) < 0) + return NULL; + +-- +2.52.0 diff --git a/libvirt-qemu-Introduce-privateData-for-hostdevs.patch b/libvirt-qemu-Introduce-privateData-for-hostdevs.patch new file mode 100644 index 0000000..2d97f4c --- /dev/null +++ b/libvirt-qemu-Introduce-privateData-for-hostdevs.patch @@ -0,0 +1,277 @@ +From 94e2bf223d9fb7b9b65deaf8f2fbafb01dff5578 Mon Sep 17 00:00:00 2001 +Message-ID: <94e2bf223d9fb7b9b65deaf8f2fbafb01dff5578.1770383182.git.jdenemar@redhat.com> +From: Nathan Chen +Date: Fri, 30 Jan 2026 10:59:13 -0800 +Subject: [PATCH] qemu: Introduce privateData for hostdevs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Introduce private data for hostdevs and allocate hostdev +private data by default. + +Signed-off-by: Ján Tomko +Signed-off-by: Nathan Chen +Reviewed-by: Pavel Hrdina +(cherry picked from commit 1043e04e17ed4be59b46e925089204333c08f05e) + +Resolves: https://issues.redhat.com/browse/RHEL-74202 +Resolves: https://issues.redhat.com/browse/RHEL-126346 + +Signed-off-by: Pavel Hrdina +--- + src/bhyve/bhyve_parse_command.c | 2 +- + src/conf/domain_conf.c | 13 +++++++++-- + src/conf/domain_conf.h | 5 ++++- + src/libxl/xen_common.c | 2 +- + src/libxl/xen_xl.c | 2 +- + src/lxc/lxc_native.c | 2 +- + src/qemu/qemu_domain.c | 40 +++++++++++++++++++++++++++++++++ + src/qemu/qemu_domain.h | 15 +++++++++++++ + src/vbox/vbox_common.c | 2 +- + tests/virhostdevtest.c | 2 +- + 10 files changed, 76 insertions(+), 9 deletions(-) + +diff --git a/src/bhyve/bhyve_parse_command.c b/src/bhyve/bhyve_parse_command.c +index d62ea64beb..8b405206bd 100644 +--- a/src/bhyve/bhyve_parse_command.c ++++ b/src/bhyve/bhyve_parse_command.c +@@ -687,7 +687,7 @@ bhyveParsePassthru(virDomainDef *def G_GNUC_UNUSED, + return -1; + } + +- hostdev = virDomainHostdevDefNew(); ++ hostdev = virDomainHostdevDefNew(NULL); + hostdev->mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS; + hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; + +diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c +index 541dad5bdc..f950f7c75d 100644 +--- a/src/conf/domain_conf.c ++++ b/src/conf/domain_conf.c +@@ -2733,6 +2733,8 @@ virDomainHostdevDefClear(virDomainHostdevDef *def) + case VIR_DOMAIN_HOSTDEV_MODE_LAST: + break; + } ++ ++ g_clear_pointer(&def->privateData, virObjectUnref); + } + + +@@ -3483,7 +3485,7 @@ void virDomainVideoDefFree(virDomainVideoDef *def) + + + virDomainHostdevDef * +-virDomainHostdevDefNew(void) ++virDomainHostdevDefNew(virDomainXMLOption *xmlopt) + { + virDomainHostdevDef *def; + +@@ -3491,6 +3493,13 @@ virDomainHostdevDefNew(void) + + def->info = g_new0(virDomainDeviceInfo, 1); + ++ if (xmlopt && xmlopt->privateData.hostdevNew && ++ !(def->privateData = xmlopt->privateData.hostdevNew())) { ++ VIR_FREE(def->info); ++ VIR_FREE(def); ++ return NULL; ++ } ++ + return def; + } + +@@ -13678,7 +13687,7 @@ virDomainHostdevDefParseXML(virDomainXMLOption *xmlopt, + + ctxt->node = node; + +- def = virDomainHostdevDefNew(); ++ def = virDomainHostdevDefNew(xmlopt); + + if (virXMLPropEnumDefault(node, "mode", virDomainHostdevModeTypeFromString, + VIR_XML_PROP_NONE, +diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h +index cb35ff06bd..8f53ed96c0 100644 +--- a/src/conf/domain_conf.h ++++ b/src/conf/domain_conf.h +@@ -364,6 +364,8 @@ struct _virDomainHostdevDef { + */ + virDomainNetDef *parentnet; + ++ virObject *privateData; ++ + virDomainHostdevMode mode; + virDomainStartupPolicy startupPolicy; + bool managed; +@@ -3588,6 +3590,7 @@ struct _virDomainXMLPrivateDataCallbacks { + virDomainXMLPrivateDataNewFunc vsockNew; + virDomainXMLPrivateDataNewFunc cryptoNew; + virDomainXMLPrivateDataNewFunc graphicsNew; ++ virDomainXMLPrivateDataNewFunc hostdevNew; + virDomainXMLPrivateDataNewFunc networkNew; + virDomainXMLPrivateDataNetParseFunc networkParse; + virDomainXMLPrivateDataNetFormatFunc networkFormat; +@@ -3797,7 +3800,7 @@ virDomainVideoDef *virDomainVideoDefNew(virDomainXMLOption *xmlopt); + void virDomainVideoDefFree(virDomainVideoDef *def); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virDomainVideoDef, virDomainVideoDefFree); + void virDomainVideoDefClear(virDomainVideoDef *def); +-virDomainHostdevDef *virDomainHostdevDefNew(void); ++virDomainHostdevDef *virDomainHostdevDefNew(virDomainXMLOption *xmlopt); + void virDomainHostdevDefFree(virDomainHostdevDef *def); + void virDomainHubDefFree(virDomainHubDef *def); + void virDomainRedirdevDefFree(virDomainRedirdevDef *def); +diff --git a/src/libxl/xen_common.c b/src/libxl/xen_common.c +index 666c6cae20..f19e4f6abb 100644 +--- a/src/libxl/xen_common.c ++++ b/src/libxl/xen_common.c +@@ -445,7 +445,7 @@ xenParsePCI(char *entry) + } + } + +- hostdev = virDomainHostdevDefNew(); ++ hostdev = virDomainHostdevDefNew(NULL); + hostdev->managed = false; + hostdev->writeFiltering = filtered; + hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI; +diff --git a/src/libxl/xen_xl.c b/src/libxl/xen_xl.c +index b2ff0edcf2..e62302736b 100644 +--- a/src/libxl/xen_xl.c ++++ b/src/libxl/xen_xl.c +@@ -930,7 +930,7 @@ xenParseXLUSB(virConf *conf, virDomainDef *def) + key = nextkey; + } + +- hostdev = virDomainHostdevDefNew(); ++ hostdev = virDomainHostdevDefNew(NULL); + hostdev->managed = false; + hostdev->source.subsys.type = VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB; + hostdev->source.subsys.u.usb.bus = busNum; +diff --git a/src/lxc/lxc_native.c b/src/lxc/lxc_native.c +index 7700804429..a94427b027 100644 +--- a/src/lxc/lxc_native.c ++++ b/src/lxc/lxc_native.c +@@ -376,7 +376,7 @@ lxcCreateNetDef(const char *type, + static virDomainHostdevDef * + lxcCreateHostdevDef(const char *data) + { +- virDomainHostdevDef *hostdev = virDomainHostdevDefNew(); ++ virDomainHostdevDef *hostdev = virDomainHostdevDefNew(NULL); + hostdev->mode = VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES; + hostdev->source.caps.type = VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET; + hostdev->source.caps.u.net.ifname = g_strdup(data); +diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c +index 486a0e7913..3366214677 100644 +--- a/src/qemu/qemu_domain.c ++++ b/src/qemu/qemu_domain.c +@@ -1238,6 +1238,45 @@ qemuDomainNetworkPrivateFormat(const virDomainNetDef *net, + } + + ++static virClass *qemuDomainHostdevPrivateClass; ++ ++static void ++qemuDomainHostdevPrivateDispose(void *obj) ++{ ++ qemuDomainHostdevPrivate *priv = obj; ++ ++ VIR_FORCE_CLOSE(priv->vfioDeviceFd); ++} ++ ++ ++static int ++qemuDomainHostdevPrivateOnceInit(void) ++{ ++ if (!VIR_CLASS_NEW(qemuDomainHostdevPrivate, virClassForObject())) ++ return -1; ++ ++ return 0; ++} ++ ++VIR_ONCE_GLOBAL_INIT(qemuDomainHostdevPrivate); ++ ++virObject * ++qemuDomainHostdevPrivateNew(void) ++{ ++ qemuDomainHostdevPrivate *priv; ++ ++ if (qemuDomainHostdevPrivateInitialize() < 0) ++ return NULL; ++ ++ if (!(priv = virObjectNew(qemuDomainHostdevPrivateClass))) ++ return NULL; ++ ++ priv->vfioDeviceFd = -1; ++ ++ return (virObject *) priv; ++} ++ ++ + /* qemuDomainSecretInfoSetup: + * @priv: pointer to domain private object + * @alias: alias of the secret +@@ -3563,6 +3602,7 @@ virDomainXMLPrivateDataCallbacks virQEMUDriverPrivateDataCallbacks = { + .chrSourceNew = qemuDomainChrSourcePrivateNew, + .vsockNew = qemuDomainVsockPrivateNew, + .graphicsNew = qemuDomainGraphicsPrivateNew, ++ .hostdevNew = qemuDomainHostdevPrivateNew, + .networkNew = qemuDomainNetworkPrivateNew, + .networkParse = qemuDomainNetworkPrivateParse, + .networkFormat = qemuDomainNetworkPrivateFormat, +diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h +index b9bb338682..88c8416aa4 100644 +--- a/src/qemu/qemu_domain.h ++++ b/src/qemu/qemu_domain.h +@@ -461,6 +461,18 @@ struct _qemuDomainTPMPrivate { + }; + + ++#define QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev) \ ++ ((qemuDomainHostdevPrivate *) (hostdev)->privateData) ++ ++typedef struct _qemuDomainHostdevPrivate qemuDomainHostdevPrivate; ++struct _qemuDomainHostdevPrivate { ++ virObject parent; ++ ++ /* VFIO device file descriptor for iommufd passthrough */ ++ int vfioDeviceFd; ++}; ++ ++ + void + qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv); + +@@ -1175,3 +1187,6 @@ qemuDomainCheckCPU(virArch arch, + bool + qemuDomainMachineSupportsFloppy(const char *machine, + virQEMUCaps *qemuCaps); ++ ++virObject * ++qemuDomainHostdevPrivateNew(void); +diff --git a/src/vbox/vbox_common.c b/src/vbox/vbox_common.c +index 26c5fdfef6..d2a8cf8da4 100644 +--- a/src/vbox/vbox_common.c ++++ b/src/vbox/vbox_common.c +@@ -3090,7 +3090,7 @@ vboxHostDeviceGetXMLDesc(struct _vboxDriver *data, virDomainDef *def, IMachine * + def->hostdevs = g_new0(virDomainHostdevDef *, def->nhostdevs); + + for (i = 0; i < def->nhostdevs; i++) +- def->hostdevs[i] = virDomainHostdevDefNew(); ++ def->hostdevs[i] = virDomainHostdevDefNew(NULL); + + for (i = 0; i < deviceFilters.count; i++) { + PRBool active = PR_FALSE; +diff --git a/tests/virhostdevtest.c b/tests/virhostdevtest.c +index aec474a148..a35c1d9402 100644 +--- a/tests/virhostdevtest.c ++++ b/tests/virhostdevtest.c +@@ -124,7 +124,7 @@ myInit(void) + + for (i = 0; i < nhostdevs; i++) { + virDomainHostdevSubsys *subsys; +- hostdevs[i] = virDomainHostdevDefNew(); ++ hostdevs[i] = virDomainHostdevDefNew(NULL); + if (!hostdevs[i]) + goto cleanup; + hostdevs[i]->mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS; +-- +2.52.0 diff --git a/libvirt-qemu-Support-per-process-memory-accounting-for-iommufd.patch b/libvirt-qemu-Support-per-process-memory-accounting-for-iommufd.patch new file mode 100644 index 0000000..33d73ef --- /dev/null +++ b/libvirt-qemu-Support-per-process-memory-accounting-for-iommufd.patch @@ -0,0 +1,212 @@ +From 9a525305075612f540a1d3b2727ddf8b5320ff01 Mon Sep 17 00:00:00 2001 +Message-ID: <9a525305075612f540a1d3b2727ddf8b5320ff01.1770383182.git.jdenemar@redhat.com> +From: Nathan Chen +Date: Fri, 30 Jan 2026 10:59:14 -0800 +Subject: [PATCH] qemu: Support per-process memory accounting for iommufd + +Implement the IOMMU_OPTION_RLIMIT_MODE +ioctl to set per-process memory accounting for +iommufd. This prevents ENOMEM errors from the +default per-user memory accounting when multiple +VMs under the libvirt-qemu user have their pinned +memory summed and checked against a per-process +RLIMIT_MEMLOCK limit. + +Signed-off-by: Nathan Chen +Reviewed-by: Pavel Hrdina +(cherry picked from commit f91a07d0c8dd583928974e80bb13b54feb5aa908) + +Resolves: https://issues.redhat.com/browse/RHEL-74202 +Resolves: https://issues.redhat.com/browse/RHEL-126346 + +Signed-off-by: Pavel Hrdina +--- + meson.build | 1 + + po/POTFILES | 1 + + src/libvirt_private.syms | 3 ++ + src/util/meson.build | 1 + + src/util/viriommufd.c | 90 ++++++++++++++++++++++++++++++++++++++++ + src/util/viriommufd.h | 25 +++++++++++ + 6 files changed, 121 insertions(+) + create mode 100644 src/util/viriommufd.c + create mode 100644 src/util/viriommufd.h + +diff --git a/meson.build b/meson.build +index 6ac9d01952..28745e4e32 100644 +--- a/meson.build ++++ b/meson.build +@@ -673,6 +673,7 @@ headers = [ + 'ifaddrs.h', + 'libtasn1.h', + 'linux/kvm.h', ++ 'linux/iommufd.h', + 'mntent.h', + 'net/ethernet.h', + 'net/if.h', +diff --git a/po/POTFILES b/po/POTFILES +index f0aad35c8c..c78d2b8000 100644 +--- a/po/POTFILES ++++ b/po/POTFILES +@@ -303,6 +303,7 @@ src/util/virhostuptime.c + src/util/viridentity.c + src/util/virinhibitor.c + src/util/virinitctl.c ++src/util/viriommufd.c + src/util/viriscsi.c + src/util/virjson.c + src/util/virlease.c +diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms +index 4e57e4a8f6..66261ed6cf 100644 +--- a/src/libvirt_private.syms ++++ b/src/libvirt_private.syms +@@ -2652,6 +2652,9 @@ virInhibitorRelease; + virInitctlFifos; + virInitctlSetRunLevel; + ++# util/viriommufd.h ++virIOMMUFDSetRLimitMode; ++ + # util/viriscsi.h + virISCSIConnectionLogin; + virISCSIConnectionLogout; +diff --git a/src/util/meson.build b/src/util/meson.build +index 4950a795cc..9fb0aa0fe7 100644 +--- a/src/util/meson.build ++++ b/src/util/meson.build +@@ -46,6 +46,7 @@ util_sources = [ + 'viridentity.c', + 'virinhibitor.c', + 'virinitctl.c', ++ 'viriommufd.c', + 'viriscsi.c', + 'virjson.c', + 'virkeycode.c', +diff --git a/src/util/viriommufd.c b/src/util/viriommufd.c +new file mode 100644 +index 0000000000..5af097683d +--- /dev/null ++++ b/src/util/viriommufd.c +@@ -0,0 +1,90 @@ ++#include ++ ++#include "viriommufd.h" ++#include "virlog.h" ++#include "virerror.h" ++#include "virfile.h" ++ ++#define VIR_FROM_THIS VIR_FROM_NONE ++ ++VIR_LOG_INIT("util.iommufd"); ++ ++#ifdef __linux__ ++ ++# include ++# include ++ ++# ifdef HAVE_LINUX_IOMMUFD_H ++# include ++# endif ++ ++# ifndef IOMMU_OPTION ++ ++enum iommufd_option { ++ IOMMU_OPTION_RLIMIT_MODE = 0, ++ IOMMU_OPTION_HUGE_PAGES = 1, ++}; ++ ++enum iommufd_option_ops { ++ IOMMU_OPTION_OP_SET = 0, ++ IOMMU_OPTION_OP_GET = 1, ++}; ++ ++struct iommu_option { ++ __u32 size; ++ __u32 option_id; ++ __u16 op; ++ __u16 __reserved; ++ __u32 object_id; ++ __aligned_u64 val64; ++}; ++ ++# define IOMMUFD_TYPE (';') ++# define IOMMUFD_CMD_OPTION 0x87 ++# define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) ++ ++# endif ++ ++/** ++ * virIOMMUFDSetRLimitMode: ++ * @fd: iommufd file descriptor ++ * @processAccounting: true for per-process, false for per-user ++ * ++ * Set RLIMIT_MEMLOCK accounting mode for the iommufd. ++ * ++ * Returns: 0 on success, -1 on error ++ */ ++int ++virIOMMUFDSetRLimitMode(int fd, bool processAccounting) ++{ ++ struct iommu_option option = { ++ .size = sizeof(struct iommu_option), ++ .option_id = IOMMU_OPTION_RLIMIT_MODE, ++ .op = IOMMU_OPTION_OP_SET, ++ .__reserved = 0, ++ .object_id = 0, ++ .val64 = processAccounting ? 1 : 0, ++ }; ++ ++ if (ioctl(fd, IOMMU_OPTION, &option) < 0) { ++ virReportSystemError(errno, "%s", ++ _("failed to set memory accounting for iommufd")); ++ return -1; ++ } ++ ++ VIR_DEBUG("Set iommufd rlimit mode to %s-based accounting", ++ processAccounting ? "process" : "user"); ++ return 0; ++} ++ ++#else ++ ++int virIOMMUFDSetRLimitMode(int fd G_GNUC_UNUSED, ++ bool processAccounting G_GNUC_UNUSED) ++{ ++ virReportError(VIR_ERR_NO_SUPPORT, "%s", ++ _("IOMMUFD is not supported on this platform")); ++ return -1; ++} ++ ++#endif +diff --git a/src/util/viriommufd.h b/src/util/viriommufd.h +new file mode 100644 +index 0000000000..ebecfe3633 +--- /dev/null ++++ b/src/util/viriommufd.h +@@ -0,0 +1,25 @@ ++/* ++ * viriommufd.h: iommufd helpers ++ * ++ * This library is free software; you can redistribute it and/or ++ * modify it under the terms of the GNU Lesser General Public ++ * License as published by the Free Software Foundation; either ++ * version 2.1 of the License, or (at your option) any later version. ++ * ++ * This library is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ * Lesser General Public License for more details. ++ * ++ * You should have received a copy of the GNU Lesser General Public ++ * License along with this library. If not, see ++ * . ++ */ ++ ++#pragma once ++ ++#include "internal.h" ++ ++#define VIR_IOMMU_DEV_PATH "/dev/iommu" ++ ++int virIOMMUFDSetRLimitMode(int fd, bool processAccounting); +-- +2.52.0 diff --git a/libvirt-qemu-Update-Cgroup-namespace-and-seclabel-for-iommufd.patch b/libvirt-qemu-Update-Cgroup-namespace-and-seclabel-for-iommufd.patch new file mode 100644 index 0000000..32b82fb --- /dev/null +++ b/libvirt-qemu-Update-Cgroup-namespace-and-seclabel-for-iommufd.patch @@ -0,0 +1,336 @@ +From 01ce19df7986fe190ce212fa05f2caed799cc50f Mon Sep 17 00:00:00 2001 +Message-ID: <01ce19df7986fe190ce212fa05f2caed799cc50f.1770383182.git.jdenemar@redhat.com> +From: Nathan Chen +Date: Fri, 30 Jan 2026 10:59:17 -0800 +Subject: [PATCH] qemu: Update Cgroup, namespace, and seclabel for iommufd + +When launching a qemu VM with the iommufd feature enabled for VFIO +hostdevs: +- Do not allow cgroup, namespace, and seclabel access to VFIO +paths (/dev/vfio/vfio and /dev/vfio/) +- Allow access to iommufd paths (/dev/iommu and +/dev/vfio/devices/vfio*) for AppArmor, SELinux, and DAC + +Signed-off-by: Nathan Chen +Reviewed-by: Pavel Hrdina +(cherry picked from commit 7d2f91f9cb572ab95d0916bdd1a46dd198874529) + +Resolves: https://issues.redhat.com/browse/RHEL-74202 +Resolves: https://issues.redhat.com/browse/RHEL-126346 + +Signed-off-by: Pavel Hrdina +--- + src/qemu/qemu_cgroup.c | 3 ++ + src/qemu/qemu_namespace.c | 3 ++ + src/security/security_apparmor.c | 28 ++++++++++++------ + src/security/security_dac.c | 49 +++++++++++++++++++++++++------- + src/security/security_selinux.c | 47 +++++++++++++++++++++++------- + src/security/virt-aa-helper.c | 32 ++++++++++++++++----- + 6 files changed, 127 insertions(+), 35 deletions(-) + +diff --git a/src/qemu/qemu_cgroup.c b/src/qemu/qemu_cgroup.c +index 7dadef0739..6148990f19 100644 +--- a/src/qemu/qemu_cgroup.c ++++ b/src/qemu/qemu_cgroup.c +@@ -479,6 +479,9 @@ qemuSetupHostdevCgroup(virDomainObj *vm, + g_autofree char *path = NULL; + int perms; + ++ if (dev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) ++ return 0; ++ + if (!virCgroupHasController(priv->cgroup, VIR_CGROUP_CONTROLLER_DEVICES)) + return 0; + +diff --git a/src/qemu/qemu_namespace.c b/src/qemu/qemu_namespace.c +index c689cc3e40..fb0734193d 100644 +--- a/src/qemu/qemu_namespace.c ++++ b/src/qemu/qemu_namespace.c +@@ -345,6 +345,9 @@ qemuDomainSetupHostdev(virDomainObj *vm, + { + g_autofree char *path = NULL; + ++ if (hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) ++ return 0; ++ + if (qemuDomainGetHostdevPath(hostdev, &path, NULL) < 0) + return -1; + +diff --git a/src/security/security_apparmor.c b/src/security/security_apparmor.c +index 68ac39611f..934acfb461 100644 +--- a/src/security/security_apparmor.c ++++ b/src/security/security_apparmor.c +@@ -45,6 +45,7 @@ + #include "virstring.h" + #include "virscsi.h" + #include "virmdev.h" ++#include "viriommufd.h" + + #define VIR_FROM_THIS VIR_FROM_SECURITY + +@@ -841,25 +842,36 @@ AppArmorSetSecurityHostdevLabel(virSecurityManager *mgr, + } + + case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI: { +- virPCIDevice *pci = ++ g_autoptr(virPCIDevice) pci = + virPCIDeviceNew(&pcisrc->addr); + + if (!pci) + goto done; + + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { +- char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); ++ if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { ++ char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + +- if (!vfioGroupDev) { +- virPCIDeviceFree(pci); +- goto done; ++ if (!vfioGroupDev) { ++ goto done; ++ } ++ ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); ++ VIR_FREE(vfioGroupDev); ++ } else { ++ g_autofree char *vfiofdDev = NULL; ++ ++ if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) ++ goto done; ++ ++ ret = AppArmorSetSecurityPCILabel(pci, vfiofdDev, ptr); ++ if (ret < 0) ++ goto done; ++ ++ ret = AppArmorSetSecurityPCILabel(pci, VIR_IOMMU_DEV_PATH, ptr); + } +- ret = AppArmorSetSecurityPCILabel(pci, vfioGroupDev, ptr); +- VIR_FREE(vfioGroupDev); + } else { + ret = virPCIDeviceFileIterate(pci, AppArmorSetSecurityPCILabel, ptr); + } +- virPCIDeviceFree(pci); + break; + } + +diff --git a/src/security/security_dac.c b/src/security/security_dac.c +index 2f788b872a..d0ed22db2d 100644 +--- a/src/security/security_dac.c ++++ b/src/security/security_dac.c +@@ -41,6 +41,7 @@ + #include "virscsivhost.h" + #include "virstring.h" + #include "virutil.h" ++#include "viriommufd.h" + + #define VIR_FROM_THIS VIR_FROM_SECURITY + +@@ -1282,14 +1283,27 @@ virSecurityDACSetHostdevLabel(virSecurityManager *mgr, + return -1; + + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { +- g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); ++ if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { ++ g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + +- if (!vfioGroupDev) +- return -1; ++ if (!vfioGroupDev) ++ return -1; + +- ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, +- false, +- &cbdata); ++ ret = virSecurityDACSetHostdevLabelHelper(vfioGroupDev, ++ false, ++ &cbdata); ++ } else { ++ g_autofree char *vfiofdDev = NULL; ++ ++ if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) ++ return -1; ++ ++ ret = virSecurityDACSetHostdevLabelHelper(vfiofdDev, false, &cbdata); ++ if (ret < 0) ++ break; ++ ++ ret = virSecurityDACSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &cbdata); ++ } + } else { + ret = virPCIDeviceFileIterate(pci, + virSecurityDACSetPCILabel, +@@ -1443,13 +1457,28 @@ virSecurityDACRestoreHostdevLabel(virSecurityManager *mgr, + return -1; + + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { +- g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); ++ if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { ++ g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + +- if (!vfioGroupDev) +- return -1; ++ if (!vfioGroupDev) ++ return -1; + +- ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, ++ ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, + vfioGroupDev, false); ++ } else { ++ g_autofree char *vfiofdDev = NULL; ++ ++ if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) ++ return -1; ++ ++ ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, ++ vfiofdDev, false); ++ if (ret < 0) ++ break; ++ ++ ret = virSecurityDACRestoreFileLabelInternal(mgr, NULL, ++ VIR_IOMMU_DEV_PATH, false); ++ } + } else { + ret = virPCIDeviceFileIterate(pci, virSecurityDACRestorePCILabel, mgr); + } +diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c +index 2f3cc274a5..834383a7de 100644 +--- a/src/security/security_selinux.c ++++ b/src/security/security_selinux.c +@@ -41,6 +41,7 @@ + #include "virconf.h" + #include "virtpm.h" + #include "virstring.h" ++#include "viriommufd.h" + + #define VIR_FROM_THIS VIR_FROM_SECURITY + +@@ -2256,14 +2257,27 @@ virSecuritySELinuxSetHostdevSubsysLabel(virSecurityManager *mgr, + return -1; + + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { +- g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); ++ if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { ++ g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + +- if (!vfioGroupDev) +- return -1; ++ if (!vfioGroupDev) ++ return -1; + +- ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, +- false, +- &data); ++ ret = virSecuritySELinuxSetHostdevLabelHelper(vfioGroupDev, ++ false, ++ &data); ++ } else { ++ g_autofree char *vfiofdDev = NULL; ++ ++ if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) ++ return -1; ++ ++ ret = virSecuritySELinuxSetHostdevLabelHelper(vfiofdDev, false, &data); ++ if (ret) ++ break; ++ ++ ret = virSecuritySELinuxSetHostdevLabelHelper(VIR_IOMMU_DEV_PATH, false, &data); ++ } + } else { + ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxSetPCILabel, &data); + } +@@ -2491,12 +2505,25 @@ virSecuritySELinuxRestoreHostdevSubsysLabel(virSecurityManager *mgr, + return -1; + + if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO) { +- g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); ++ if (dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { ++ g_autofree char *vfioGroupDev = virPCIDeviceGetIOMMUGroupDev(pci); + +- if (!vfioGroupDev) +- return -1; ++ if (!vfioGroupDev) ++ return -1; + +- ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); ++ ret = virSecuritySELinuxRestoreFileLabel(mgr, vfioGroupDev, false, false); ++ } else { ++ g_autofree char *vfiofdDev = NULL; ++ ++ if (virPCIDeviceGetVfioPath(&dev->source.subsys.u.pci.addr, &vfiofdDev) < 0) ++ return -1; ++ ++ ret = virSecuritySELinuxRestoreFileLabel(mgr, vfiofdDev, false, false); ++ if (ret < 0) ++ break; ++ ++ ret = virSecuritySELinuxRestoreFileLabel(mgr, VIR_IOMMU_DEV_PATH, false, false); ++ } + } else { + ret = virPCIDeviceFileIterate(pci, virSecuritySELinuxRestorePCILabel, mgr); + } +diff --git a/src/security/virt-aa-helper.c b/src/security/virt-aa-helper.c +index de0a826063..29e844c7ff 100644 +--- a/src/security/virt-aa-helper.c ++++ b/src/security/virt-aa-helper.c +@@ -50,6 +50,7 @@ + #include "virstring.h" + #include "virgettext.h" + #include "virhostdev.h" ++#include "viriommufd.h" + + #define VIR_FROM_THIS VIR_FROM_SECURITY + +@@ -1114,8 +1115,9 @@ get_files(vahControl * ctl) + + virDeviceHostdevPCIDriverName driverName = dev->source.subsys.u.pci.driver.name; + +- if (driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO || +- driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_DEFAULT) { ++ if ((driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO || ++ driverName == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_DEFAULT) && ++ dev->source.subsys.u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) { + needsVfio = true; + } + +@@ -1385,9 +1387,18 @@ get_files(vahControl * ctl) + } + } + +- if (ctl->newfile && +- vah_add_file(&buf, ctl->newfile, "rwk") != 0) { +- return -1; ++ if (ctl->newfile) { ++ const char *perms = "rwk"; ++ ++ /* VFIO and iommufd devices need mmap permission */ ++ if (STRPREFIX(ctl->newfile, "/dev/vfio/devices/vfio") || ++ STREQ(ctl->newfile, VIR_IOMMU_DEV_PATH)) { ++ perms = "rwm"; ++ } ++ ++ if (vah_add_file(&buf, ctl->newfile, perms) != 0) { ++ return -1; ++ } + } + + ctl->files = virBufferContentAndReset(&buf); +@@ -1561,8 +1572,15 @@ main(int argc, char **argv) + } + } + if (ctl->append && ctl->newfile) { +- if (vah_add_file(&buf, ctl->newfile, "rwk") != 0) +- goto cleanup; ++ const char *perms = "rwk"; ++ ++ if (STRPREFIX(ctl->newfile, "/dev/vfio/devices/vfio") || ++ STREQ(ctl->newfile, VIR_IOMMU_DEV_PATH)) { ++ perms = "rwm"; ++ } ++ ++ if (vah_add_file(&buf, ctl->newfile, perms) != 0) ++ return -1; + } else { + if (ctl->def->virtType == VIR_DOMAIN_VIRT_QEMU || + ctl->def->virtType == VIR_DOMAIN_VIRT_KQEMU || +-- +2.52.0 diff --git a/libvirt-qemu-open-VFIO-FDs-from-libvirt-backend.patch b/libvirt-qemu-open-VFIO-FDs-from-libvirt-backend.patch new file mode 100644 index 0000000..8e5c90a --- /dev/null +++ b/libvirt-qemu-open-VFIO-FDs-from-libvirt-backend.patch @@ -0,0 +1,247 @@ +From 68a23646ba165aa45d3811d626885054ae9d9299 Mon Sep 17 00:00:00 2001 +Message-ID: <68a23646ba165aa45d3811d626885054ae9d9299.1770383182.git.jdenemar@redhat.com> +From: Nathan Chen +Date: Fri, 30 Jan 2026 10:59:15 -0800 +Subject: [PATCH] qemu: open VFIO FDs from libvirt backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Open VFIO FDs from libvirt backend without exposing +these FDs to XML users, i.e. one per iommufd hostdev +for /dev/vfio/devices/vfioX, and pass the FD to qemu +command line. + +Suggested-by: Ján Tomko +Signed-off-by: Nathan Chen +Reviewed-by: Pavel Hrdina +(cherry picked from commit f6230804727df834da27370e835204672218ab23) + +Resolves: https://issues.redhat.com/browse/RHEL-74202 +Resolves: https://issues.redhat.com/browse/RHEL-126346 + +Signed-off-by: Pavel Hrdina +--- + src/libvirt_private.syms | 1 + + src/qemu/qemu_command.c | 21 +++++++++++ + src/qemu/qemu_process.c | 78 ++++++++++++++++++++++++++++++++++++++++ + src/util/virpci.c | 39 ++++++++++++++++++++ + src/util/virpci.h | 2 ++ + 5 files changed, 141 insertions(+) + +diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms +index 66261ed6cf..e2a7a16347 100644 +--- a/src/libvirt_private.syms ++++ b/src/libvirt_private.syms +@@ -3162,6 +3162,7 @@ virPCIDeviceGetStubDriverName; + virPCIDeviceGetStubDriverType; + virPCIDeviceGetUnbindFromStub; + virPCIDeviceGetUsedBy; ++virPCIDeviceGetVfioPath; + virPCIDeviceGetVPD; + virPCIDeviceHasPCIExpressLink; + virPCIDeviceIsAssignable; +diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c +index 1fb31d1721..83935e82c3 100644 +--- a/src/qemu/qemu_command.c ++++ b/src/qemu/qemu_command.c +@@ -4803,6 +4803,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def, + NULL) < 0) + return NULL; + ++ if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && ++ pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) { ++ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev); ++ ++ if (hostdevPriv->vfioDeviceFd != -1) { ++ g_autofree char *fdstr = g_strdup_printf("%d", hostdevPriv->vfioDeviceFd); ++ if (virJSONValueObjectAdd(&props, "S:fd", fdstr, NULL) < 0) ++ return NULL; ++ hostdevPriv->vfioDeviceFd = -1; ++ } ++ } ++ + if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0) + return NULL; + +@@ -5247,6 +5259,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd, + if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0) + return -1; + ++ if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { ++ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); ++ ++ if (hostdevPriv->vfioDeviceFd != -1) { ++ virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd, ++ VIR_COMMAND_PASS_FD_CLOSE_PARENT); ++ } ++ } ++ + if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev))) + return -1; + +diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c +index 0e50cd1ccc..1ac57a6321 100644 +--- a/src/qemu/qemu_process.c ++++ b/src/qemu/qemu_process.c +@@ -103,6 +103,7 @@ + #include "storage_source.h" + #include "backup_conf.h" + #include "storage_file_probe.h" ++#include "virpci.h" + + #include "logging/log_manager.h" + #include "logging/log_protocol.h" +@@ -7671,6 +7672,81 @@ qemuProcessPrepareHostBackendChardevHotplug(virDomainObj *vm, + return 0; + } + ++/** ++ * qemuProcessOpenVfioDeviceFd: ++ * @hostdev: host device definition ++ * @vfioFd: returned file descriptor ++ * ++ * Opens the VFIO device file descriptor for a hostdev. ++ * ++ * Returns: FD on success, -1 on failure ++ */ ++static int ++qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) ++{ ++ g_autofree char *vfioPath = NULL; ++ int fd = -1; ++ ++ if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS || ++ hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { ++ virReportError(VIR_ERR_INTERNAL_ERROR, "%s", ++ _("VFIO FD only supported for PCI hostdevs")); ++ return -1; ++ } ++ ++ if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0) ++ return -1; ++ ++ VIR_DEBUG("Opening VFIO device %s", vfioPath); ++ ++ if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) { ++ if (errno == ENOENT) { ++ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, ++ _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"), ++ vfioPath); ++ } else { ++ virReportSystemError(errno, ++ _("cannot open VFIO device %1$s"), vfioPath); ++ } ++ return -1; ++ } ++ ++ VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath); ++ return fd; ++} ++ ++/** ++ * qemuProcessOpenVfioFds: ++ * @vm: domain object ++ * ++ * Opens all necessary VFIO file descriptors for the domain. ++ * ++ * Returns: 0 on success, -1 on failure ++ */ ++static int ++qemuProcessOpenVfioFds(virDomainObj *vm) ++{ ++ size_t i; ++ ++ /* Check if we have any hostdevs that need VFIO FDs */ ++ for (i = 0; i < vm->def->nhostdevs; i++) { ++ virDomainHostdevDef *hostdev = vm->def->hostdevs[i]; ++ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev); ++ ++ if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS && ++ hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI && ++ hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO && ++ hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) { ++ /* Open VFIO device FD */ ++ hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); ++ if (hostdevPriv->vfioDeviceFd == -1) ++ return -1; ++ } ++ } ++ ++ return 0; ++} ++ + /** + * qemuProcessPrepareHost: + * @driver: qemu driver +@@ -7726,6 +7802,8 @@ qemuProcessPrepareHost(virQEMUDriver *driver, + hostdev_flags |= VIR_HOSTDEV_COLD_BOOT; + if (qemuHostdevPrepareDomainDevices(driver, vm->def, hostdev_flags) < 0) + return -1; ++ if (qemuProcessOpenVfioFds(vm) < 0) ++ return -1; + + VIR_DEBUG("Preparing chr device backends"); + if (qemuProcessPrepareHostBackendChardev(vm) < 0) +diff --git a/src/util/virpci.c b/src/util/virpci.c +index 90617e69c6..2348a98003 100644 +--- a/src/util/virpci.c ++++ b/src/util/virpci.c +@@ -3320,3 +3320,42 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address) + { + g_free(address); + } ++ ++/** ++ * virPCIDeviceGetVfioPath: ++ * @addr: host device PCI address ++ * @vfioPath: returned VFIO device path ++ * ++ * Constructs the VFIO device path for a PCI hostdev. ++ * ++ * Returns: 0 on success, -1 on failure ++ */ ++int ++virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, ++ char **vfioPath) ++{ ++ g_autofree char *addrStr = NULL; ++ g_autofree char *sysfsPath = NULL; ++ g_autoptr(DIR) dir = NULL; ++ struct dirent *entry = NULL; ++ ++ *vfioPath = NULL; ++ addrStr = virPCIDeviceAddressAsString(addr); ++ ++ /* Look in device's vfio-dev subdirectory */ ++ sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr); ++ ++ if (virDirOpen(&dir, sysfsPath) == 1) { ++ while (virDirRead(dir, &entry, sysfsPath) > 0) { ++ if (STRPREFIX(entry->d_name, "vfio")) { ++ *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name); ++ return 0; ++ } ++ } ++ } ++ ++ virReportError(VIR_ERR_INTERNAL_ERROR, ++ _("cannot find VFIO device for PCI device %1$s"), ++ addrStr); ++ return -1; ++} +diff --git a/src/util/virpci.h b/src/util/virpci.h +index fc538566e1..24ede10755 100644 +--- a/src/util/virpci.h ++++ b/src/util/virpci.h +@@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev); + + void virPCIDeviceAddressFree(virPCIDeviceAddress *address); + ++int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath); ++ + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree); + G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree); +-- +2.52.0 diff --git a/libvirt-qemu-open-iommufd-FD-from-libvirt-backend.patch b/libvirt-qemu-open-iommufd-FD-from-libvirt-backend.patch new file mode 100644 index 0000000..d7275f0 --- /dev/null +++ b/libvirt-qemu-open-iommufd-FD-from-libvirt-backend.patch @@ -0,0 +1,175 @@ +From a444918da5bd01fc11793c82ad33308892777c3a Mon Sep 17 00:00:00 2001 +Message-ID: +From: Nathan Chen +Date: Fri, 30 Jan 2026 10:59:16 -0800 +Subject: [PATCH] qemu: open iommufd FD from libvirt backend +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Open iommufd FD from libvirt backend without exposing +these FDs to XML users, i.e. one per domain for +/dev/iommu, and pass the FD to qemu command line. Set +per-process memory accounting for iommufd instead of +the default per-user memory accounting. + +Suggested-by: Ján Tomko +Signed-off-by: Nathan Chen +Reviewed-by: Pavel Hrdina +(cherry picked from commit 2f0999a161910e3992458902ce90d37f8b8f2642) + +Resolves: https://issues.redhat.com/browse/RHEL-74202 +Resolves: https://issues.redhat.com/browse/RHEL-126346 + +Signed-off-by: Pavel Hrdina +--- + src/qemu/qemu_command.c | 13 +++++++++++-- + src/qemu/qemu_domain.c | 1 + + src/qemu/qemu_domain.h | 2 ++ + src/qemu/qemu_process.c | 43 +++++++++++++++++++++++++++++++++++++++++ + 4 files changed, 57 insertions(+), 2 deletions(-) + +diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c +index 83935e82c3..f355352018 100644 +--- a/src/qemu/qemu_command.c ++++ b/src/qemu/qemu_command.c +@@ -5342,9 +5342,13 @@ qemuBuildHostdevCommandLine(virCommand *cmd, + + static int + qemuBuildIOMMUFDCommandLine(virCommand *cmd, +- const virDomainDef *def) ++ const virDomainDef *def, ++ virDomainObj *vm) + { + size_t i; ++ qemuDomainObjPrivate *priv = vm->privateData; ++ g_autofree char *fdstr = g_strdup_printf("%d", priv->iommufd); ++ + + for (i = 0; i < def->nhostdevs; i++) { + virDomainHostdevDef *hostdev = def->hostdevs[i]; +@@ -5363,8 +5367,13 @@ qemuBuildIOMMUFDCommandLine(virCommand *cmd, + if (subsys->u.pci.driver.iommufd != VIR_TRISTATE_BOOL_YES) + continue; + ++ virCommandPassFD(cmd, priv->iommufd, VIR_COMMAND_PASS_FD_CLOSE_PARENT); ++ ++ priv->iommufd = -1; ++ + if (qemuMonitorCreateObjectProps(&props, "iommufd", + "iommufd0", ++ "S:fd", fdstr, + NULL) < 0) + return -1; + +@@ -10990,7 +10999,7 @@ qemuBuildCommandLine(virDomainObj *vm, + if (qemuBuildRedirdevCommandLine(cmd, def, qemuCaps) < 0) + return NULL; + +- if (qemuBuildIOMMUFDCommandLine(cmd, def) < 0) ++ if (qemuBuildIOMMUFDCommandLine(cmd, def, vm) < 0) + return NULL; + + if (qemuBuildHostdevCommandLine(cmd, def, qemuCaps) < 0) +diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c +index 3366214677..8e1ebe7799 100644 +--- a/src/qemu/qemu_domain.c ++++ b/src/qemu/qemu_domain.c +@@ -2042,6 +2042,7 @@ qemuDomainObjPrivateAlloc(void *opaque) + priv->blockjobs = virHashNew(virObjectUnref); + priv->fds = virHashNew(g_object_unref); + ++ priv->iommufd = -1; + priv->pidMonitored = -1; + + /* agent commands block by default, user can choose different behavior */ +diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h +index 88c8416aa4..3361e97315 100644 +--- a/src/qemu/qemu_domain.h ++++ b/src/qemu/qemu_domain.h +@@ -264,6 +264,8 @@ struct _qemuDomainObjPrivate { + /* named file descriptor groups associated with the VM */ + GHashTable *fds; + ++ int iommufd; ++ + char *memoryBackingDir; + }; + +diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c +index 1ac57a6321..d8f0c78fd1 100644 +--- a/src/qemu/qemu_process.c ++++ b/src/qemu/qemu_process.c +@@ -104,6 +104,7 @@ + #include "backup_conf.h" + #include "storage_file_probe.h" + #include "virpci.h" ++#include "viriommufd.h" + + #include "logging/log_manager.h" + #include "logging/log_protocol.h" +@@ -7672,6 +7673,42 @@ qemuProcessPrepareHostBackendChardevHotplug(virDomainObj *vm, + return 0; + } + ++/** ++ * qemuProcessOpenIommuFd: ++ * @vm: domain object ++ * @iommuFd: returned file descriptor ++ * ++ * Opens /dev/iommu file descriptor for the VM. ++ * ++ * Returns: FD on success, -1 on failure ++ */ ++static int ++qemuProcessOpenIommuFd(virDomainObj *vm) ++{ ++ int fd = -1; ++ ++ VIR_DEBUG("Opening IOMMU FD for domain %s", vm->def->name); ++ ++ if ((fd = open(VIR_IOMMU_DEV_PATH, O_RDWR | O_CLOEXEC)) < 0) { ++ if (errno == ENOENT) { ++ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", ++ _("IOMMU FD support requires /dev/iommu device")); ++ } else { ++ virReportSystemError(errno, "%s", ++ _("cannot open /dev/iommu")); ++ } ++ return -1; ++ } ++ ++ if (virIOMMUFDSetRLimitMode(fd, true) < 0) { ++ VIR_FORCE_CLOSE(fd); ++ return -1; ++ } ++ ++ VIR_DEBUG("Opened IOMMU FD %d for domain %s", fd, vm->def->name); ++ return fd; ++} ++ + /** + * qemuProcessOpenVfioDeviceFd: + * @hostdev: host device definition +@@ -7726,6 +7763,7 @@ qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev) + static int + qemuProcessOpenVfioFds(virDomainObj *vm) + { ++ qemuDomainObjPrivate *priv = vm->privateData; + size_t i; + + /* Check if we have any hostdevs that need VFIO FDs */ +@@ -7741,6 +7779,11 @@ qemuProcessOpenVfioFds(virDomainObj *vm) + hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev); + if (hostdevPriv->vfioDeviceFd == -1) + return -1; ++ ++ /* Open IOMMU FD */ ++ priv->iommufd = qemuProcessOpenIommuFd(vm); ++ if (priv->iommufd == -1) ++ return -1; + } + } + +-- +2.52.0 diff --git a/libvirt-tests-qemuxmlconfdata-provide-iommufd-sample-XML-and-CLI-args.patch b/libvirt-tests-qemuxmlconfdata-provide-iommufd-sample-XML-and-CLI-args.patch new file mode 100644 index 0000000..ed0ef53 --- /dev/null +++ b/libvirt-tests-qemuxmlconfdata-provide-iommufd-sample-XML-and-CLI-args.patch @@ -0,0 +1,635 @@ +From 4f2048ad6600d3357d83f4868a29b08f9a181104 Mon Sep 17 00:00:00 2001 +Message-ID: <4f2048ad6600d3357d83f4868a29b08f9a181104.1770383182.git.jdenemar@redhat.com> +From: Nathan Chen +Date: Fri, 30 Jan 2026 10:59:18 -0800 +Subject: [PATCH] tests: qemuxmlconfdata: provide iommufd sample XML and CLI + args + +Provide sample XML and CLI args for the iommufd XML schema +for pc, q35, and virt machine types. + +Signed-off-by: Nathan Chen +Reviewed-by: Pavel Hrdina +(cherry picked from commit 74fc02d792f7ee55d2e0a7b9ad4e6d751c36ceb8) + +Resolves: https://issues.redhat.com/browse/RHEL-74202 +Resolves: https://issues.redhat.com/browse/RHEL-126346 + +Conflicts: + - missing upstream commit 5b080bc5801ea2b15296d3f89be75a0882b317c2 + +Signed-off-by: Pavel Hrdina +--- + .../iommufd-q35.x86_64-latest.args | 41 +++++++++++++ + .../iommufd-q35.x86_64-latest.xml | 60 +++++++++++++++++++ + tests/qemuxmlconfdata/iommufd-q35.xml | 38 ++++++++++++ + ...fd-virt-pci-bus-single.aarch64-latest.args | 33 ++++++++++ + ...ufd-virt-pci-bus-single.aarch64-latest.xml | 34 +++++++++++ + .../iommufd-virt-pci-bus-single.xml | 22 +++++++ + .../iommufd-virt.aarch64-latest.args | 37 ++++++++++++ + .../iommufd-virt.aarch64-latest.xml | 56 +++++++++++++++++ + tests/qemuxmlconfdata/iommufd-virt.xml | 29 +++++++++ + .../iommufd.x86_64-latest.args | 35 +++++++++++ + .../qemuxmlconfdata/iommufd.x86_64-latest.xml | 38 ++++++++++++ + tests/qemuxmlconfdata/iommufd.xml | 30 ++++++++++ + tests/qemuxmlconftest.c | 34 +++++++++++ + 13 files changed, 487 insertions(+) + create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args + create mode 100644 tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml + create mode 100644 tests/qemuxmlconfdata/iommufd-q35.xml + create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.args + create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.aarch64-latest.xml + create mode 100644 tests/qemuxmlconfdata/iommufd-virt-pci-bus-single.xml + create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.args + create mode 100644 tests/qemuxmlconfdata/iommufd-virt.aarch64-latest.xml + create mode 100644 tests/qemuxmlconfdata/iommufd-virt.xml + create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.args + create mode 100644 tests/qemuxmlconfdata/iommufd.x86_64-latest.xml + create mode 100644 tests/qemuxmlconfdata/iommufd.xml + +diff --git a/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args +new file mode 100644 +index 0000000000..7d819e141b +--- /dev/null ++++ b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.args +@@ -0,0 +1,41 @@ ++LC_ALL=C \ ++PATH=/bin \ ++HOME=/var/lib/libvirt/qemu/domain--1-q35-test \ ++USER=test \ ++LOGNAME=test \ ++XDG_DATA_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.local/share \ ++XDG_CACHE_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.cache \ ++XDG_CONFIG_HOME=/var/lib/libvirt/qemu/domain--1-q35-test/.config \ ++/usr/bin/qemu-system-x86_64 \ ++-name guest=q35-test,debug-threads=on \ ++-S \ ++-object '{"qom-type":"secret","id":"masterKey0","format":"raw","file":"/var/lib/libvirt/qemu/domain--1-q35-test/master-key.aes"}' \ ++-machine q35,usb=off,dump-guest-core=off,memory-backend=pc.ram,acpi=off \ ++-accel tcg \ ++-cpu qemu64 \ ++-m size=2097152k \ ++-object '{"qom-type":"memory-backend-ram","id":"pc.ram","size":2147483648}' \ ++-overcommit mem-lock=off \ ++-smp 2,sockets=2,cores=1,threads=1 \ ++-uuid 11dbdcdd-4c3b-482b-8903-9bdb8c0a2774 \ ++-display none \ ++-no-user-config \ ++-nodefaults \ ++-chardev socket,id=charmonitor,fd=1729,server=on,wait=off \ ++-mon chardev=charmonitor,id=monitor,mode=control \ ++-rtc base=utc \ ++-no-shutdown \ ++-boot strict=on \ ++-device '{"driver":"pcie-root-port","port":16,"chassis":1,"id":"pci.1","bus":"pcie.0","multifunction":true,"addr":"0x2"}' \ ++-device '{"driver":"pcie-root-port","port":17,"chassis":2,"id":"pci.2","bus":"pcie.0","addr":"0x2.0x1"}' \ ++-device '{"driver":"qemu-xhci","id":"usb","bus":"pci.1","addr":"0x0"}' \ ++-blockdev '{"driver":"host_device","filename":"/dev/HostVG/QEMUGuest1","node-name":"libvirt-1-storage","read-only":false}' \ ++-device '{"driver":"ide-hd","bus":"ide.0","drive":"libvirt-1-storage","id":"sata0-0-0","bootindex":1}' \ ++-audiodev '{"id":"audio1","driver":"none"}' \ ++-device '{"driver":"qxl-vga","id":"video0","max_outputs":1,"ram_size":67108864,"vram_size":33554432,"vram64_size_mb":0,"vgamem_mb":8,"bus":"pcie.0","addr":"0x1"}' \ ++-global ICH9-LPC.noreboot=off \ ++-watchdog-action reset \ ++-object '{"qom-type":"iommufd","id":"iommufd0","fd":"-1"}' \ ++-device '{"driver":"vfio-pci","host":"0000:06:12.5","id":"hostdev0","iommufd":"iommufd0","fd":"0","bus":"pcie.0","addr":"0x3"}' \ ++-sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny \ ++-msg timestamp=on +diff --git a/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml +new file mode 100644 +index 0000000000..bb76252b61 +--- /dev/null ++++ b/tests/qemuxmlconfdata/iommufd-q35.x86_64-latest.xml +@@ -0,0 +1,60 @@ ++ ++ q35-test ++ 11dbdcdd-4c3b-482b-8903-9bdb8c0a2774 ++ 2097152 ++ 2097152 ++ 2 ++ ++ hvm ++ ++ ++ ++ qemu64 ++ ++ ++ destroy ++ restart ++ destroy ++ ++ /usr/bin/qemu-system-x86_64 ++ ++ ++ ++ ++
++ ++ ++ ++ ++ ++
++ ++ ++ ++ ++
++ ++ ++
++ ++ ++
++ ++ ++ ++