330 lines
12 KiB
Diff
330 lines
12 KiB
Diff
From f4b01d645926faab2cab86fadb7398c26d6b8285 Mon Sep 17 00:00:00 2001
|
|
From: Xiaoyao Li <xiaoyao.li@intel.com>
|
|
Date: Wed, 20 Mar 2024 03:39:02 -0500
|
|
Subject: [PATCH 028/100] RAMBlock: Add support of KVM private guest memfd
|
|
|
|
RH-Author: Paolo Bonzini <pbonzini@redhat.com>
|
|
RH-MergeRequest: 245: SEV-SNP support
|
|
RH-Jira: RHEL-39544
|
|
RH-Acked-by: Thomas Huth <thuth@redhat.com>
|
|
RH-Acked-by: Bandan Das <bdas@redhat.com>
|
|
RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
|
|
RH-Commit: [28/91] 95fdf196afcb67113834c20fa354ee1397411bfd (bonzini/rhel-qemu-kvm)
|
|
|
|
Add KVM guest_memfd support to RAMBlock so both normal hva based memory
|
|
and kvm guest memfd based private memory can be associated in one RAMBlock.
|
|
|
|
Introduce new flag RAM_GUEST_MEMFD. When it's set, it calls KVM ioctl to
|
|
create private guest_memfd during RAMBlock setup.
|
|
|
|
Allocating a new RAM_GUEST_MEMFD flag to instruct the setup of guest memfd
|
|
is more flexible and extensible than simply relying on the VM type because
|
|
in the future we may have the case that not all the memory of a VM need
|
|
guest memfd. As a benefit, it also avoid getting MachineState in memory
|
|
subsystem.
|
|
|
|
Note, RAM_GUEST_MEMFD is supposed to be set for memory backends of
|
|
confidential guests, such as TDX VM. How and when to set it for memory
|
|
backends will be implemented in the following patches.
|
|
|
|
Introduce memory_region_has_guest_memfd() to query if the MemoryRegion has
|
|
KVM guest_memfd allocated.
|
|
|
|
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
|
|
Reviewed-by: David Hildenbrand <david@redhat.com>
|
|
Message-ID: <20240320083945.991426-7-michael.roth@amd.com>
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
(cherry picked from commit 15f7a80c49cb3637f62fa37fa4a17da913bd91ff)
|
|
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
|
|
---
|
|
accel/kvm/kvm-all.c | 28 ++++++++++++++++++++++++++++
|
|
accel/stubs/kvm-stub.c | 5 +++++
|
|
include/exec/memory.h | 20 +++++++++++++++++---
|
|
include/exec/ram_addr.h | 2 +-
|
|
include/exec/ramblock.h | 1 +
|
|
include/sysemu/kvm.h | 2 ++
|
|
system/memory.c | 5 +++++
|
|
system/physmem.c | 34 +++++++++++++++++++++++++++++++---
|
|
8 files changed, 90 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
|
|
index 272e945f52..a7b9a127dd 100644
|
|
--- a/accel/kvm/kvm-all.c
|
|
+++ b/accel/kvm/kvm-all.c
|
|
@@ -92,6 +92,7 @@ static bool kvm_has_guest_debug;
|
|
static int kvm_sstep_flags;
|
|
static bool kvm_immediate_exit;
|
|
static uint64_t kvm_supported_memory_attributes;
|
|
+static bool kvm_guest_memfd_supported;
|
|
static hwaddr kvm_max_slot_size = ~0;
|
|
|
|
static const KVMCapabilityInfo kvm_required_capabilites[] = {
|
|
@@ -2419,6 +2420,11 @@ static int kvm_init(MachineState *ms)
|
|
}
|
|
|
|
kvm_supported_memory_attributes = kvm_check_extension(s, KVM_CAP_MEMORY_ATTRIBUTES);
|
|
+ kvm_guest_memfd_supported =
|
|
+ kvm_check_extension(s, KVM_CAP_GUEST_MEMFD) &&
|
|
+ kvm_check_extension(s, KVM_CAP_USER_MEMORY2) &&
|
|
+ (kvm_supported_memory_attributes & KVM_MEMORY_ATTRIBUTE_PRIVATE);
|
|
+
|
|
kvm_immediate_exit = kvm_check_extension(s, KVM_CAP_IMMEDIATE_EXIT);
|
|
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
|
|
|
|
@@ -4138,3 +4144,25 @@ void kvm_mark_guest_state_protected(void)
|
|
{
|
|
kvm_state->guest_state_protected = true;
|
|
}
|
|
+
|
|
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
|
+{
|
|
+ int fd;
|
|
+ struct kvm_create_guest_memfd guest_memfd = {
|
|
+ .size = size,
|
|
+ .flags = flags,
|
|
+ };
|
|
+
|
|
+ if (!kvm_guest_memfd_supported) {
|
|
+ error_setg(errp, "KVM does not support guest_memfd");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
|
|
+ if (fd < 0) {
|
|
+ error_setg_errno(errp, errno, "Error creating KVM guest_memfd");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return fd;
|
|
+}
|
|
diff --git a/accel/stubs/kvm-stub.c b/accel/stubs/kvm-stub.c
|
|
index ca38172884..8e0eb22e61 100644
|
|
--- a/accel/stubs/kvm-stub.c
|
|
+++ b/accel/stubs/kvm-stub.c
|
|
@@ -129,3 +129,8 @@ bool kvm_hwpoisoned_mem(void)
|
|
{
|
|
return false;
|
|
}
|
|
+
|
|
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp)
|
|
+{
|
|
+ return -ENOSYS;
|
|
+}
|
|
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
|
index 8626a355b3..679a847685 100644
|
|
--- a/include/exec/memory.h
|
|
+++ b/include/exec/memory.h
|
|
@@ -243,6 +243,9 @@ typedef struct IOMMUTLBEvent {
|
|
/* RAM FD is opened read-only */
|
|
#define RAM_READONLY_FD (1 << 11)
|
|
|
|
+/* RAM can be private that has kvm guest memfd backend */
|
|
+#define RAM_GUEST_MEMFD (1 << 12)
|
|
+
|
|
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
|
IOMMUNotifierFlag flags,
|
|
hwaddr start, hwaddr end,
|
|
@@ -1307,7 +1310,8 @@ bool memory_region_init_ram_nomigrate(MemoryRegion *mr,
|
|
* @name: Region name, becomes part of RAMBlock name used in migration stream
|
|
* must be unique within any device
|
|
* @size: size of the region.
|
|
- * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE.
|
|
+ * @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_NORESERVE,
|
|
+ * RAM_GUEST_MEMFD.
|
|
* @errp: pointer to Error*, to store an error if it happens.
|
|
*
|
|
* Note that this function does not do anything to cause the data in the
|
|
@@ -1369,7 +1373,7 @@ bool memory_region_init_resizeable_ram(MemoryRegion *mr,
|
|
* (getpagesize()) will be used.
|
|
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
|
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
|
- * RAM_READONLY_FD
|
|
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
|
* @path: the path in which to allocate the RAM.
|
|
* @offset: offset within the file referenced by path
|
|
* @errp: pointer to Error*, to store an error if it happens.
|
|
@@ -1399,7 +1403,7 @@ bool memory_region_init_ram_from_file(MemoryRegion *mr,
|
|
* @size: size of the region.
|
|
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
|
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
|
- * RAM_READONLY_FD
|
|
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
|
* @fd: the fd to mmap.
|
|
* @offset: offset within the file referenced by fd
|
|
* @errp: pointer to Error*, to store an error if it happens.
|
|
@@ -1722,6 +1726,16 @@ static inline bool memory_region_is_romd(MemoryRegion *mr)
|
|
*/
|
|
bool memory_region_is_protected(MemoryRegion *mr);
|
|
|
|
+/**
|
|
+ * memory_region_has_guest_memfd: check whether a memory region has guest_memfd
|
|
+ * associated
|
|
+ *
|
|
+ * Returns %true if a memory region's ram_block has valid guest_memfd assigned.
|
|
+ *
|
|
+ * @mr: the memory region being queried
|
|
+ */
|
|
+bool memory_region_has_guest_memfd(MemoryRegion *mr);
|
|
+
|
|
/**
|
|
* memory_region_get_iommu: check whether a memory region is an iommu
|
|
*
|
|
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
|
|
index de45ba7bc9..07c8f86375 100644
|
|
--- a/include/exec/ram_addr.h
|
|
+++ b/include/exec/ram_addr.h
|
|
@@ -110,7 +110,7 @@ long qemu_maxrampagesize(void);
|
|
* @mr: the memory region where the ram block is
|
|
* @ram_flags: RamBlock flags. Supported flags: RAM_SHARED, RAM_PMEM,
|
|
* RAM_NORESERVE, RAM_PROTECTED, RAM_NAMED_FILE, RAM_READONLY,
|
|
- * RAM_READONLY_FD
|
|
+ * RAM_READONLY_FD, RAM_GUEST_MEMFD
|
|
* @mem_path or @fd: specify the backing file or device
|
|
* @offset: Offset into target file
|
|
* @errp: pointer to Error*, to store an error if it happens
|
|
diff --git a/include/exec/ramblock.h b/include/exec/ramblock.h
|
|
index 848915ea5b..459c8917de 100644
|
|
--- a/include/exec/ramblock.h
|
|
+++ b/include/exec/ramblock.h
|
|
@@ -41,6 +41,7 @@ struct RAMBlock {
|
|
QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
|
|
int fd;
|
|
uint64_t fd_offset;
|
|
+ int guest_memfd;
|
|
size_t page_size;
|
|
/* dirty bitmap used during migration */
|
|
unsigned long *bmap;
|
|
diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
|
|
index f114ff6986..9e4ab7ae89 100644
|
|
--- a/include/sysemu/kvm.h
|
|
+++ b/include/sysemu/kvm.h
|
|
@@ -537,6 +537,8 @@ void kvm_mark_guest_state_protected(void);
|
|
*/
|
|
bool kvm_hwpoisoned_mem(void);
|
|
|
|
+int kvm_create_guest_memfd(uint64_t size, uint64_t flags, Error **errp);
|
|
+
|
|
int kvm_set_memory_attributes_private(hwaddr start, uint64_t size);
|
|
int kvm_set_memory_attributes_shared(hwaddr start, uint64_t size);
|
|
|
|
diff --git a/system/memory.c b/system/memory.c
|
|
index a229a79988..c756950c0c 100644
|
|
--- a/system/memory.c
|
|
+++ b/system/memory.c
|
|
@@ -1850,6 +1850,11 @@ bool memory_region_is_protected(MemoryRegion *mr)
|
|
return mr->ram && (mr->ram_block->flags & RAM_PROTECTED);
|
|
}
|
|
|
|
+bool memory_region_has_guest_memfd(MemoryRegion *mr)
|
|
+{
|
|
+ return mr->ram_block && mr->ram_block->guest_memfd >= 0;
|
|
+}
|
|
+
|
|
uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr)
|
|
{
|
|
uint8_t mask = mr->dirty_log_mask;
|
|
diff --git a/system/physmem.c b/system/physmem.c
|
|
index a4fe3d2bf8..f5dfa20e57 100644
|
|
--- a/system/physmem.c
|
|
+++ b/system/physmem.c
|
|
@@ -1808,6 +1808,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
|
const bool shared = qemu_ram_is_shared(new_block);
|
|
RAMBlock *block;
|
|
RAMBlock *last_block = NULL;
|
|
+ bool free_on_error = false;
|
|
ram_addr_t old_ram_size, new_ram_size;
|
|
Error *err = NULL;
|
|
|
|
@@ -1837,6 +1838,19 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
|
return;
|
|
}
|
|
memory_try_enable_merging(new_block->host, new_block->max_length);
|
|
+ free_on_error = true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (new_block->flags & RAM_GUEST_MEMFD) {
|
|
+ assert(kvm_enabled());
|
|
+ assert(new_block->guest_memfd < 0);
|
|
+
|
|
+ new_block->guest_memfd = kvm_create_guest_memfd(new_block->max_length,
|
|
+ 0, errp);
|
|
+ if (new_block->guest_memfd < 0) {
|
|
+ qemu_mutex_unlock_ramlist();
|
|
+ goto out_free;
|
|
}
|
|
}
|
|
|
|
@@ -1888,6 +1902,13 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
|
|
ram_block_notify_add(new_block->host, new_block->used_length,
|
|
new_block->max_length);
|
|
}
|
|
+ return;
|
|
+
|
|
+out_free:
|
|
+ if (free_on_error) {
|
|
+ qemu_anon_ram_free(new_block->host, new_block->max_length);
|
|
+ new_block->host = NULL;
|
|
+ }
|
|
}
|
|
|
|
#ifdef CONFIG_POSIX
|
|
@@ -1902,7 +1923,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
|
/* Just support these ram flags by now. */
|
|
assert((ram_flags & ~(RAM_SHARED | RAM_PMEM | RAM_NORESERVE |
|
|
RAM_PROTECTED | RAM_NAMED_FILE | RAM_READONLY |
|
|
- RAM_READONLY_FD)) == 0);
|
|
+ RAM_READONLY_FD | RAM_GUEST_MEMFD)) == 0);
|
|
|
|
if (xen_enabled()) {
|
|
error_setg(errp, "-mem-path not supported with Xen");
|
|
@@ -1939,6 +1960,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
|
new_block->used_length = size;
|
|
new_block->max_length = size;
|
|
new_block->flags = ram_flags;
|
|
+ new_block->guest_memfd = -1;
|
|
new_block->host = file_ram_alloc(new_block, size, fd, !file_size, offset,
|
|
errp);
|
|
if (!new_block->host) {
|
|
@@ -2018,7 +2040,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
|
int align;
|
|
|
|
assert((ram_flags & ~(RAM_SHARED | RAM_RESIZEABLE | RAM_PREALLOC |
|
|
- RAM_NORESERVE)) == 0);
|
|
+ RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
|
assert(!host ^ (ram_flags & RAM_PREALLOC));
|
|
|
|
align = qemu_real_host_page_size();
|
|
@@ -2033,6 +2055,7 @@ RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
|
|
new_block->max_length = max_size;
|
|
assert(max_size >= size);
|
|
new_block->fd = -1;
|
|
+ new_block->guest_memfd = -1;
|
|
new_block->page_size = qemu_real_host_page_size();
|
|
new_block->host = host;
|
|
new_block->flags = ram_flags;
|
|
@@ -2055,7 +2078,7 @@ RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
|
|
RAMBlock *qemu_ram_alloc(ram_addr_t size, uint32_t ram_flags,
|
|
MemoryRegion *mr, Error **errp)
|
|
{
|
|
- assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE)) == 0);
|
|
+ assert((ram_flags & ~(RAM_SHARED | RAM_NORESERVE | RAM_GUEST_MEMFD)) == 0);
|
|
return qemu_ram_alloc_internal(size, size, NULL, NULL, ram_flags, mr, errp);
|
|
}
|
|
|
|
@@ -2083,6 +2106,11 @@ static void reclaim_ramblock(RAMBlock *block)
|
|
} else {
|
|
qemu_anon_ram_free(block->host, block->max_length);
|
|
}
|
|
+
|
|
+ if (block->guest_memfd >= 0) {
|
|
+ close(block->guest_memfd);
|
|
+ }
|
|
+
|
|
g_free(block);
|
|
}
|
|
|
|
--
|
|
2.39.3
|
|
|