175 lines
5.7 KiB
Diff
175 lines
5.7 KiB
Diff
|
From e69f257e657473ba59f48692d387e292a24892bb Mon Sep 17 00:00:00 2001
|
||
|
From: "plai@redhat.com" <plai@redhat.com>
|
||
|
Date: Tue, 20 Aug 2019 16:12:50 +0100
|
||
|
Subject: [PATCH 03/11] mmap-alloc: fix hugetlbfs misaligned length in ppc64
|
||
|
|
||
|
RH-Author: plai@redhat.com
|
||
|
Message-id: <1566317571-5697-4-git-send-email-plai@redhat.com>
|
||
|
Patchwork-id: 90082
|
||
|
O-Subject: [RHEL8.2 qemu-kvm PATCH 3/4] mmap-alloc: fix hugetlbfs misaligned length in ppc64
|
||
|
Bugzilla: 1539282
|
||
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
||
|
RH-Acked-by: Pankaj Gupta <pagupta@redhat.com>
|
||
|
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
|
||
|
|
||
|
From: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
|
||
|
|
||
|
The commit 7197fb4058bcb68986bae2bb2c04d6370f3e7218 ("util/mmap-alloc:
|
||
|
fix hugetlb support on ppc64") fixed Huge TLB mappings on ppc64.
|
||
|
|
||
|
However, we still need to consider the underlying huge page size
|
||
|
during munmap() because it requires that both address and length be a
|
||
|
multiple of the underlying huge page size for Huge TLB mappings.
|
||
|
Quote from "Huge page (Huge TLB) mappings" paragraph under NOTES
|
||
|
section of the munmap(2) manual:
|
||
|
|
||
|
"For munmap(), addr and length must both be a multiple of the
|
||
|
underlying huge page size."
|
||
|
|
||
|
On ppc64, the munmap() in qemu_ram_munmap() does not work for Huge TLB
|
||
|
mappings because the mapped segment can be aligned with the underlying
|
||
|
huge page size, not aligned with the native system page size, as
|
||
|
returned by getpagesize().
|
||
|
|
||
|
This has the side effect of not releasing huge pages back to the pool
|
||
|
after a hugetlbfs file-backed memory device is hot-unplugged.
|
||
|
|
||
|
This patch fixes the situation in qemu_ram_mmap() and
|
||
|
qemu_ram_munmap() by considering the underlying page size on ppc64.
|
||
|
|
||
|
After this patch, memory hot-unplug releases huge pages back to the
|
||
|
pool.
|
||
|
|
||
|
Fixes: 7197fb4058bcb68986bae2bb2c04d6370f3e7218
|
||
|
Signed-off-by: Murilo Opsfelder Araujo <muriloo@linux.ibm.com>
|
||
|
Reviewed-by: Greg Kurz <groug@kaod.org>
|
||
|
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||
|
(cherry picked from commit 53adb9d43e1abba187387a51f238e878e934c647)
|
||
|
Signed-off-by: Paul Lai <plai@redhat.com>
|
||
|
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
||
|
---
|
||
|
exec.c | 4 ++--
|
||
|
include/qemu/mmap-alloc.h | 2 +-
|
||
|
util/mmap-alloc.c | 22 ++++++++++++++++------
|
||
|
util/oslib-posix.c | 2 +-
|
||
|
4 files changed, 20 insertions(+), 10 deletions(-)
|
||
|
|
||
|
diff --git a/exec.c b/exec.c
|
||
|
index a79eaa3..9112d8b 100644
|
||
|
--- a/exec.c
|
||
|
+++ b/exec.c
|
||
|
@@ -1679,7 +1679,7 @@ static void *file_ram_alloc(RAMBlock *block,
|
||
|
if (mem_prealloc) {
|
||
|
os_mem_prealloc(fd, area, memory, smp_cpus, errp);
|
||
|
if (errp && *errp) {
|
||
|
- qemu_ram_munmap(area, memory);
|
||
|
+ qemu_ram_munmap(fd, area, memory);
|
||
|
return NULL;
|
||
|
}
|
||
|
}
|
||
|
@@ -2200,7 +2200,7 @@ static void reclaim_ramblock(RAMBlock *block)
|
||
|
xen_invalidate_map_cache_entry(block->host);
|
||
|
#ifndef _WIN32
|
||
|
} else if (block->fd >= 0) {
|
||
|
- qemu_ram_munmap(block->host, block->max_length);
|
||
|
+ qemu_ram_munmap(block->fd, block->host, block->max_length);
|
||
|
close(block->fd);
|
||
|
#endif
|
||
|
} else {
|
||
|
diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h
|
||
|
index 190688a..eec98d8 100644
|
||
|
--- a/include/qemu/mmap-alloc.h
|
||
|
+++ b/include/qemu/mmap-alloc.h
|
||
|
@@ -28,6 +28,6 @@ void *qemu_ram_mmap(int fd,
|
||
|
bool shared,
|
||
|
bool is_pmem);
|
||
|
|
||
|
-void qemu_ram_munmap(void *ptr, size_t size);
|
||
|
+void qemu_ram_munmap(int fd, void *ptr, size_t size);
|
||
|
|
||
|
#endif
|
||
|
diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c
|
||
|
index b29fcee..bbd9077 100644
|
||
|
--- a/util/mmap-alloc.c
|
||
|
+++ b/util/mmap-alloc.c
|
||
|
@@ -82,6 +82,7 @@ void *qemu_ram_mmap(int fd,
|
||
|
int flags;
|
||
|
int guardfd;
|
||
|
size_t offset;
|
||
|
+ size_t pagesize;
|
||
|
size_t total;
|
||
|
void *guardptr;
|
||
|
void *ptr;
|
||
|
@@ -102,7 +103,8 @@ void *qemu_ram_mmap(int fd,
|
||
|
* anonymous memory is OK.
|
||
|
*/
|
||
|
flags = MAP_PRIVATE;
|
||
|
- if (fd == -1 || qemu_fd_getpagesize(fd) == getpagesize()) {
|
||
|
+ pagesize = qemu_fd_getpagesize(fd);
|
||
|
+ if (fd == -1 || pagesize == getpagesize()) {
|
||
|
guardfd = -1;
|
||
|
flags |= MAP_ANONYMOUS;
|
||
|
} else {
|
||
|
@@ -111,6 +113,7 @@ void *qemu_ram_mmap(int fd,
|
||
|
}
|
||
|
#else
|
||
|
guardfd = -1;
|
||
|
+ pagesize = getpagesize();
|
||
|
flags = MAP_PRIVATE | MAP_ANONYMOUS;
|
||
|
#endif
|
||
|
|
||
|
@@ -122,7 +125,7 @@ void *qemu_ram_mmap(int fd,
|
||
|
|
||
|
assert(is_power_of_2(align));
|
||
|
/* Always align to host page size */
|
||
|
- assert(align >= getpagesize());
|
||
|
+ assert(align >= pagesize);
|
||
|
|
||
|
flags = MAP_FIXED;
|
||
|
flags |= fd == -1 ? MAP_ANONYMOUS : 0;
|
||
|
@@ -145,17 +148,24 @@ void *qemu_ram_mmap(int fd,
|
||
|
* a guard page guarding against potential buffer overflows.
|
||
|
*/
|
||
|
total -= offset;
|
||
|
- if (total > size + getpagesize()) {
|
||
|
- munmap(ptr + size + getpagesize(), total - size - getpagesize());
|
||
|
+ if (total > size + pagesize) {
|
||
|
+ munmap(ptr + size + pagesize, total - size - pagesize);
|
||
|
}
|
||
|
|
||
|
return ptr;
|
||
|
}
|
||
|
|
||
|
-void qemu_ram_munmap(void *ptr, size_t size)
|
||
|
+void qemu_ram_munmap(int fd, void *ptr, size_t size)
|
||
|
{
|
||
|
+ size_t pagesize;
|
||
|
+
|
||
|
if (ptr) {
|
||
|
/* Unmap both the RAM block and the guard page */
|
||
|
- munmap(ptr, size + getpagesize());
|
||
|
+#if defined(__powerpc64__) && defined(__linux__)
|
||
|
+ pagesize = qemu_fd_getpagesize(fd);
|
||
|
+#else
|
||
|
+ pagesize = getpagesize();
|
||
|
+#endif
|
||
|
+ munmap(ptr, size + pagesize);
|
||
|
}
|
||
|
}
|
||
|
diff --git a/util/oslib-posix.c b/util/oslib-posix.c
|
||
|
index c36b2bb..7b6db04 100644
|
||
|
--- a/util/oslib-posix.c
|
||
|
+++ b/util/oslib-posix.c
|
||
|
@@ -153,7 +153,7 @@ void qemu_vfree(void *ptr)
|
||
|
void qemu_anon_ram_free(void *ptr, size_t size)
|
||
|
{
|
||
|
trace_qemu_anon_ram_free(ptr, size);
|
||
|
- qemu_ram_munmap(ptr, size);
|
||
|
+ qemu_ram_munmap(-1, ptr, size);
|
||
|
}
|
||
|
|
||
|
void qemu_set_block(int fd)
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|