98 lines
4.2 KiB
Diff
98 lines
4.2 KiB
Diff
From da69744f6441f4a63215841ae9add2c1ef631047 Mon Sep 17 00:00:00 2001
|
|
From: Alex Williamson <alex.williamson@redhat.com>
|
|
Date: Tue, 22 Oct 2024 14:08:29 -0600
|
|
Subject: [PATCH 2/2] vfio/helpers: Align mmaps
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
RH-Author: Alex Williamson <None>
|
|
RH-MergeRequest: 468: vfio/helpers: Align mmaps [9.6.z]
|
|
RH-Jira: RHEL-107314
|
|
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
|
|
RH-Acked-by: Peter Xu <peterx@redhat.com>
|
|
RH-Commit: [2/2] 4941ab64a5c0508eefa6bc5f923d6fcc38fdda4b
|
|
|
|
Thanks to work by Peter Xu, support is introduced in Linux v6.12 to
|
|
allow pfnmap insertions at PMD and PUD levels of the page table. This
|
|
means that provided a properly aligned mmap, the vfio driver is able
|
|
to map MMIO at significantly larger intervals than PAGE_SIZE. For
|
|
example on x86_64 (the only architecture currently supporting huge
|
|
pfnmaps for PUD), rather than 4KiB mappings, we can map device MMIO
|
|
using 2MiB and even 1GiB page table entries.
|
|
|
|
Typically mmap will already provide PMD aligned mappings, so devices
|
|
with moderately sized MMIO ranges, even GPUs with standard 256MiB BARs,
|
|
will already take advantage of this support. However in order to better
|
|
support devices exposing multi-GiB MMIO, such as 3D accelerators or GPUs
|
|
with resizable BARs enabled, we need to manually align the mmap.
|
|
|
|
There doesn't seem to be a way for userspace to easily learn about PMD
|
|
and PUD mapping level sizes, therefore this takes the simple approach
|
|
to align the mapping to the power-of-two size of the region, up to 1GiB,
|
|
which is currently the maximum alignment we care about.
|
|
|
|
Cc: Peter Xu <peterx@redhat.com>
|
|
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
|
Reviewed-by: Peter Xu <peterx@redhat.com>
|
|
Reviewed-by: Cédric Le Goater <clg@redhat.com>
|
|
(cherry picked from commit 00b519c0bca0e933ed22e2e6f8bca6b23f41f950)
|
|
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
|
|
---
|
|
hw/vfio/helpers.c | 32 ++++++++++++++++++++++++++++++--
|
|
1 file changed, 30 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
|
|
index b9e606e364..913796f437 100644
|
|
--- a/hw/vfio/helpers.c
|
|
+++ b/hw/vfio/helpers.c
|
|
@@ -27,6 +27,7 @@
|
|
#include "trace.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu/error-report.h"
|
|
+#include "qemu/units.h"
|
|
#include "monitor/monitor.h"
|
|
|
|
/*
|
|
@@ -406,8 +407,35 @@ int vfio_region_mmap(VFIORegion *region)
|
|
prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
|
|
|
|
for (i = 0; i < region->nr_mmaps; i++) {
|
|
- region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
|
|
- MAP_SHARED, region->vbasedev->fd,
|
|
+ size_t align = MIN(1ULL << ctz64(region->mmaps[i].size), 1 * GiB);
|
|
+ void *map_base, *map_align;
|
|
+
|
|
+ /*
|
|
+ * Align the mmap for more efficient mapping in the kernel. Ideally
|
|
+ * we'd know the PMD and PUD mapping sizes to use as discrete alignment
|
|
+ * intervals, but we don't. As of Linux v6.12, the largest PUD size
|
|
+ * supporting huge pfnmap is 1GiB (ARCH_SUPPORTS_PUD_PFNMAP is only set
|
|
+ * on x86_64). Align by power-of-two size, capped at 1GiB.
|
|
+ *
|
|
+ * NB. qemu_memalign() and friends actually allocate memory, whereas
|
|
+ * the region size here can exceed host memory, therefore we manually
|
|
+ * create an oversized anonymous mapping and clean it up for alignment.
|
|
+ */
|
|
+ map_base = mmap(0, region->mmaps[i].size + align, PROT_NONE,
|
|
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
|
+ if (map_base == MAP_FAILED) {
|
|
+ ret = -errno;
|
|
+ goto no_mmap;
|
|
+ }
|
|
+
|
|
+ map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
|
|
+ munmap(map_base, map_align - map_base);
|
|
+ munmap(map_align + region->mmaps[i].size,
|
|
+ align - (map_align - map_base));
|
|
+
|
|
+ region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
|
|
+ MAP_SHARED | MAP_FIXED,
|
|
+ region->vbasedev->fd,
|
|
region->fd_offset +
|
|
region->mmaps[i].offset);
|
|
if (region->mmaps[i].mmap == MAP_FAILED) {
|
|
--
|
|
2.48.1
|
|
|