libvirt/SOURCES/libvirt-qemu_domain-add-a-P...

220 lines
8.9 KiB
Diff

From 051451a1b9cefa42ecfd6d27dcb6a12ef49de072 Mon Sep 17 00:00:00 2001
Message-Id: <051451a1b9cefa42ecfd6d27dcb6a12ef49de072@dist-git>
From: Daniel Henrique Barboza <danielhb413@gmail.com>
Date: Fri, 3 May 2019 13:54:51 +0200
Subject: [PATCH] qemu_domain: add a PPC64 memLockLimit helper
There is a lot of documentation in the comments about how PPC64 handles
passthrough VFIO devices to calculate the @memLockLimit. And more will
be added with the PPC64 NVLink2 support code.
Let's remove the PPC64 code from qemuDomainGetMemLockLimitBytes()
body and put it into a helper function. This will simplify the
flow of qemuDomainGetMemLockLimitBytes() that handles all the other
platforms and improves readability of the PPC64 specifics.
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
Reviewed-by: Erik Skultety <eskultet@redhat.com>
(cherry picked from commit 7a686fd2eae8d5674bb1213d8517dc5814fa6bf3)
https: //bugzilla.redhat.com/show_bug.cgi?id=1505998
Signed-off-by: Erik Skultety <eskultet@redhat.com>
Message-Id: <ccdf9fefeb624585559606d00b6ac19b574733b3.1556884443.git.eskultet@redhat.com>
Reviewed-by: Andrea Bolognani <abologna@redhat.com>
---
src/qemu/qemu_domain.c | 171 ++++++++++++++++++++++-------------------
1 file changed, 93 insertions(+), 78 deletions(-)
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index d936090d87..f91de0b743 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -9805,6 +9805,97 @@ qemuDomainUpdateCurrentMemorySize(virQEMUDriverPtr driver,
}
+/**
+ * getPPC64MemLockLimitBytes:
+ * @def: domain definition
+ *
+ * A PPC64 helper that calculates the memory locking limit in order for
+ * the guest to operate properly.
+ */
+static unsigned long long
+getPPC64MemLockLimitBytes(virDomainDefPtr def)
+{
+ unsigned long long memKB = 0;
+ unsigned long long baseLimit = 0;
+ unsigned long long memory = 0;
+ unsigned long long maxMemory = 0;
+ unsigned long long passthroughLimit = 0;
+ size_t i, nPCIHostBridges = 0;
+ bool usesVFIO = false;
+
+ for (i = 0; i < def->ncontrollers; i++) {
+ virDomainControllerDefPtr cont = def->controllers[i];
+
+ if (!virDomainControllerIsPSeriesPHB(cont))
+ continue;
+
+ nPCIHostBridges++;
+ }
+
+ for (i = 0; i < def->nhostdevs; i++) {
+ virDomainHostdevDefPtr dev = def->hostdevs[i];
+
+ if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
+ dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
+ dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
+ usesVFIO = true;
+ break;
+ }
+ }
+
+ memory = virDomainDefGetMemoryTotal(def);
+
+ if (def->mem.max_memory)
+ maxMemory = def->mem.max_memory;
+ else
+ maxMemory = memory;
+
+ /* baseLimit := maxMemory / 128 (a)
+ * + 4 MiB * #PHBs + 8 MiB (b)
+ *
+ * (a) is the hash table
+ *
+ * (b) is accounting for the 32-bit DMA window - it could be either the
+ * KVM accelerated TCE tables for emulated devices, or the VFIO
+ * userspace view. The 4 MiB per-PHB (including the default one) covers
+ * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
+ * increased to help performance. The 8 MiB extra should be plenty for
+ * the TCE table index for any reasonable number of PHBs and several
+ * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
+ baseLimit = maxMemory / 128 +
+ 4096 * nPCIHostBridges +
+ 8192;
+
+ /* passthroughLimit := max( 2 GiB * #PHBs, (c)
+ * memory (d)
+ * + memory * 1/512 * #PHBs + 8 MiB ) (e)
+ *
+ * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
+ * rather than 1 GiB
+ *
+ * (d) is the with-DDW (and memory pre-registration and related
+ * features) DMA window accounting - assuming that we only account RAM
+ * once, even if mapped to multiple PHBs
+ *
+ * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
+ * window. This is based a bit on expected guest behaviour, but there
+ * really isn't a way to completely avoid that. We assume the guest
+ * requests a 64-bit DMA window (per PHB) just big enough to map all
+ * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
+ * kiB pages, less still if the guest is mapped with hugepages (unlike
+ * the default 32-bit DMA window, DDW windows can use large IOMMU
+ * pages). 8 MiB is for second and further level overheads, like (b) */
+ if (usesVFIO)
+ passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
+ memory +
+ memory / 512 * nPCIHostBridges + 8192);
+
+ memKB = baseLimit + passthroughLimit;
+
+ return memKB << 10;
+}
+
+
/**
* qemuDomainGetMemLockLimitBytes:
* @def: domain definition
@@ -9836,84 +9927,8 @@ qemuDomainGetMemLockLimitBytes(virDomainDefPtr def)
if (def->mem.locked)
return VIR_DOMAIN_MEMORY_PARAM_UNLIMITED;
- if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM) {
- unsigned long long maxMemory;
- unsigned long long memory;
- unsigned long long baseLimit;
- unsigned long long passthroughLimit = 0;
- size_t nPCIHostBridges = 0;
- bool usesVFIO = false;
-
- for (i = 0; i < def->ncontrollers; i++) {
- virDomainControllerDefPtr cont = def->controllers[i];
-
- if (!virDomainControllerIsPSeriesPHB(cont))
- continue;
-
- nPCIHostBridges++;
- }
-
- for (i = 0; i < def->nhostdevs; i++) {
- virDomainHostdevDefPtr dev = def->hostdevs[i];
-
- if (dev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
- dev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
- dev->source.subsys.u.pci.backend == VIR_DOMAIN_HOSTDEV_PCI_BACKEND_VFIO) {
- usesVFIO = true;
- break;
- }
- }
-
- memory = virDomainDefGetMemoryTotal(def);
-
- if (def->mem.max_memory)
- maxMemory = def->mem.max_memory;
- else
- maxMemory = memory;
-
- /* baseLimit := maxMemory / 128 (a)
- * + 4 MiB * #PHBs + 8 MiB (b)
- *
- * (a) is the hash table
- *
- * (b) is accounting for the 32-bit DMA window - it could be either the
- * KVM accelerated TCE tables for emulated devices, or the VFIO
- * userspace view. The 4 MiB per-PHB (including the default one) covers
- * a 2GiB DMA window: default is 1GiB, but it's possible it'll be
- * increased to help performance. The 8 MiB extra should be plenty for
- * the TCE table index for any reasonable number of PHBs and several
- * spapr-vlan or spapr-vscsi devices (512kB + a tiny bit each) */
- baseLimit = maxMemory / 128 +
- 4096 * nPCIHostBridges +
- 8192;
-
- /* passthroughLimit := max( 2 GiB * #PHBs, (c)
- * memory (d)
- * + memory * 1/512 * #PHBs + 8 MiB ) (e)
- *
- * (c) is the pre-DDW VFIO DMA window accounting. We're allowing 2 GiB
- * rather than 1 GiB
- *
- * (d) is the with-DDW (and memory pre-registration and related
- * features) DMA window accounting - assuming that we only account RAM
- * once, even if mapped to multiple PHBs
- *
- * (e) is the with-DDW userspace view and overhead for the 64-bit DMA
- * window. This is based a bit on expected guest behaviour, but there
- * really isn't a way to completely avoid that. We assume the guest
- * requests a 64-bit DMA window (per PHB) just big enough to map all
- * its RAM. 4 kiB page size gives the 1/512; it will be less with 64
- * kiB pages, less still if the guest is mapped with hugepages (unlike
- * the default 32-bit DMA window, DDW windows can use large IOMMU
- * pages). 8 MiB is for second and further level overheads, like (b) */
- if (usesVFIO)
- passthroughLimit = MAX(2 * 1024 * 1024 * nPCIHostBridges,
- memory +
- memory / 512 * nPCIHostBridges + 8192);
-
- memKB = baseLimit + passthroughLimit;
- goto done;
- }
+ if (ARCH_IS_PPC64(def->os.arch) && def->virtType == VIR_DOMAIN_VIRT_KVM)
+ return getPPC64MemLockLimitBytes(def);
/* For device passthrough using VFIO the guest memory and MMIO memory
* regions need to be locked persistent in order to allow DMA.
--
2.21.0