275 lines
11 KiB
Diff
275 lines
11 KiB
Diff
From b872c9566d3f18b4d6580a7b271bbbc21c026a36 Mon Sep 17 00:00:00 2001
|
|
From: "plai@redhat.com" <plai@redhat.com>
|
|
Date: Mon, 7 Jan 2019 17:02:19 +0000
|
|
Subject: [PATCH 18/22] hostmem-file: add the 'pmem' option
|
|
|
|
RH-Author: plai@redhat.com
|
|
Message-id: <1546880543-24860-7-git-send-email-plai@redhat.com>
|
|
Patchwork-id: 83892
|
|
O-Subject: [RHEL8.0 qemu-kvm PATCH v7 06/10] hostmem-file: add the 'pmem' option
|
|
Bugzilla: 1539285
|
|
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
|
|
RH-Acked-by: Eduardo Habkost <ehabkost@redhat.com>
|
|
|
|
From: Junyan He <junyan.he@intel.com>
|
|
|
|
When QEMU emulates vNVDIMM labels and migrates vNVDIMM devices, it
|
|
needs to know whether the backend storage is a real persistent memory,
|
|
in order to decide whether special operations should be performed to
|
|
ensure the data persistence.
|
|
|
|
This boolean option 'pmem' allows users to specify whether the backend
|
|
storage of memory-backend-file is a real persistent memory. If
|
|
'pmem=on', QEMU will set the flag RAM_PMEM in the RAM block of the
|
|
corresponding memory region. If 'pmem' is set while lack of libpmem
|
|
support, a error is generated.
|
|
|
|
Signed-off-by: Junyan He <junyan.he@intel.com>
|
|
Signed-off-by: Haozhong Zhang <haozhong.zhang@intel.com>
|
|
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
|
|
Reviewed-by: Igor Mammedov <imammedo@redhat.com>
|
|
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
|
|
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
|
|
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
|
|
(cherry picked from commit a4de8552b2580adf6fa4874439217b65d3bdd88b)
|
|
Signed-off-by: Paul Lai <plai@redhat.com>
|
|
|
|
Resolved Conflicts:
|
|
docs/nvdimm.txt
|
|
|
|
Signed-off-by: Danilo C. L. de Paula <ddepaula@redhat.com>
|
|
---
|
|
backends/hostmem-file.c | 43 +++++++++++++++++++++++++++++++++++++++++--
|
|
docs/nvdimm.txt | 42 ++++++++++++++++++++++++++++++++++++++++++
|
|
exec.c | 8 ++++++++
|
|
include/exec/memory.h | 4 ++++
|
|
include/exec/ram_addr.h | 3 +++
|
|
qemu-options.hx | 7 +++++++
|
|
6 files changed, 105 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
|
|
index 34c68bb..2476dcb 100644
|
|
--- a/backends/hostmem-file.c
|
|
+++ b/backends/hostmem-file.c
|
|
@@ -12,6 +12,7 @@
|
|
#include "qemu/osdep.h"
|
|
#include "qapi/error.h"
|
|
#include "qemu-common.h"
|
|
+#include "qemu/error-report.h"
|
|
#include "sysemu/hostmem.h"
|
|
#include "sysemu/sysemu.h"
|
|
#include "qom/object_interfaces.h"
|
|
@@ -31,9 +32,10 @@ typedef struct HostMemoryBackendFile HostMemoryBackendFile;
|
|
struct HostMemoryBackendFile {
|
|
HostMemoryBackend parent_obj;
|
|
|
|
- bool discard_data;
|
|
char *mem_path;
|
|
uint64_t align;
|
|
+ bool discard_data;
|
|
+ bool is_pmem;
|
|
};
|
|
|
|
static void
|
|
@@ -59,7 +61,8 @@ file_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
|
|
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
|
|
path,
|
|
backend->size, fb->align,
|
|
- backend->share ? RAM_SHARED : 0,
|
|
+ (backend->share ? RAM_SHARED : 0) |
|
|
+ (fb->is_pmem ? RAM_PMEM : 0),
|
|
fb->mem_path, errp);
|
|
g_free(path);
|
|
}
|
|
@@ -131,6 +134,39 @@ static void file_memory_backend_set_align(Object *o, Visitor *v,
|
|
error_propagate(errp, local_err);
|
|
}
|
|
|
|
+static bool file_memory_backend_get_pmem(Object *o, Error **errp)
|
|
+{
|
|
+ return MEMORY_BACKEND_FILE(o)->is_pmem;
|
|
+}
|
|
+
|
|
+static void file_memory_backend_set_pmem(Object *o, bool value, Error **errp)
|
|
+{
|
|
+ HostMemoryBackend *backend = MEMORY_BACKEND(o);
|
|
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
|
|
+
|
|
+ if (host_memory_backend_mr_inited(backend)) {
|
|
+ error_setg(errp, "cannot change property 'pmem' of %s '%s'",
|
|
+ object_get_typename(o),
|
|
+ object_get_canonical_path_component(o));
|
|
+ return;
|
|
+ }
|
|
+
|
|
+#ifndef CONFIG_LIBPMEM
|
|
+ if (value) {
|
|
+ Error *local_err = NULL;
|
|
+ error_setg(&local_err,
|
|
+ "Lack of libpmem support while setting the 'pmem=on'"
|
|
+ " of %s '%s'. We can't ensure data persistence.",
|
|
+ object_get_typename(o),
|
|
+ object_get_canonical_path_component(o));
|
|
+ error_propagate(errp, local_err);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ fb->is_pmem = value;
|
|
+}
|
|
+
|
|
static void file_backend_unparent(Object *obj)
|
|
{
|
|
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
|
|
@@ -162,6 +198,9 @@ file_backend_class_init(ObjectClass *oc, void *data)
|
|
file_memory_backend_get_align,
|
|
file_memory_backend_set_align,
|
|
NULL, NULL, &error_abort);
|
|
+ object_class_property_add_bool(oc, "pmem",
|
|
+ file_memory_backend_get_pmem, file_memory_backend_set_pmem,
|
|
+ &error_abort);
|
|
}
|
|
|
|
static void file_backend_instance_finalize(Object *o)
|
|
diff --git a/docs/nvdimm.txt b/docs/nvdimm.txt
|
|
index e903d8b..5f158a6 100644
|
|
--- a/docs/nvdimm.txt
|
|
+++ b/docs/nvdimm.txt
|
|
@@ -153,3 +153,45 @@ guest NVDIMM region mapping structure. This unarmed flag indicates
|
|
guest software that this vNVDIMM device contains a region that cannot
|
|
accept persistent writes. In result, for example, the guest Linux
|
|
NVDIMM driver, marks such vNVDIMM device as read-only.
|
|
+
|
|
+NVDIMM Persistence
|
|
+------------------
|
|
+
|
|
+ACPI 6.2 Errata A added support for a new Platform Capabilities Structure
|
|
+which allows the platform to communicate what features it supports related to
|
|
+NVDIMM data persistence. Users can provide a persistence value to a guest via
|
|
+the optional "nvdimm-persistence" machine command line option:
|
|
+
|
|
+ -machine pc,accel=kvm,nvdimm,nvdimm-persistence=cpu
|
|
+
|
|
+There are currently two valid values for this option:
|
|
+
|
|
+"mem-ctrl" - The platform supports flushing dirty data from the memory
|
|
+ controller to the NVDIMMs in the event of power loss.
|
|
+
|
|
+"cpu" - The platform supports flushing dirty data from the CPU cache to
|
|
+ the NVDIMMs in the event of power loss. This implies that the
|
|
+ platform also supports flushing dirty data through the memory
|
|
+ controller on power loss.
|
|
+
|
|
+If the vNVDIMM backend is in host persistent memory that can be accessed in
|
|
+SNIA NVM Programming Model [1] (e.g., Intel NVDIMM), it's suggested to set
|
|
+the 'pmem' option of memory-backend-file to 'on'. When 'pmem' is 'on' and QEMU
|
|
+is built with libpmem [2] support (configured with --enable-libpmem), QEMU
|
|
+will take necessary operations to guarantee the persistence of its own writes
|
|
+to the vNVDIMM backend(e.g., in vNVDIMM label emulation and live migration).
|
|
+If 'pmem' is 'on' while there is no libpmem support, qemu will exit and report
|
|
+a "lack of libpmem support" message to ensure the persistence is available.
|
|
+For example, if we want to ensure the persistence for some backend file,
|
|
+use the QEMU command line:
|
|
+
|
|
+ -object memory-backend-file,id=nv_mem,mem-path=/XXX/yyy,size=4G,pmem=on
|
|
+
|
|
+References
|
|
+----------
|
|
+
|
|
+[1] NVM Programming Model (NPM)
|
|
+ Version 1.2
|
|
+ https://www.snia.org/sites/default/files/technical_work/final/NVMProgrammingModel_v1.2.pdf
|
|
+[2] Persistent Memory Development Kit (PMDK), formerly known as NVML project, home page:
|
|
+ http://pmem.io/pmdk/
|
|
diff --git a/exec.c b/exec.c
|
|
index 8d58e8f..9028700 100644
|
|
--- a/exec.c
|
|
+++ b/exec.c
|
|
@@ -2049,6 +2049,9 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
|
|
Error *local_err = NULL;
|
|
int64_t file_size;
|
|
|
|
+ /* Just support these ram flags by now. */
|
|
+ assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0);
|
|
+
|
|
if (xen_enabled()) {
|
|
error_setg(errp, "-mem-path not supported with Xen");
|
|
return NULL;
|
|
@@ -3867,6 +3870,11 @@ err:
|
|
return ret;
|
|
}
|
|
|
|
+bool ramblock_is_pmem(RAMBlock *rb)
|
|
+{
|
|
+ return rb->flags & RAM_PMEM;
|
|
+}
|
|
+
|
|
#endif
|
|
|
|
void page_size_init(void)
|
|
diff --git a/include/exec/memory.h b/include/exec/memory.h
|
|
index b3abe61..fd2c574 100644
|
|
--- a/include/exec/memory.h
|
|
+++ b/include/exec/memory.h
|
|
@@ -122,6 +122,9 @@ typedef struct IOMMUNotifier IOMMUNotifier;
|
|
/* RAM can be migrated */
|
|
#define RAM_MIGRATABLE (1 << 4)
|
|
|
|
+/* RAM is a persistent kind memory */
|
|
+#define RAM_PMEM (1 << 5)
|
|
+
|
|
static inline void iommu_notifier_init(IOMMUNotifier *n, IOMMUNotify fn,
|
|
IOMMUNotifierFlag flags,
|
|
hwaddr start, hwaddr end)
|
|
@@ -541,6 +544,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr,
|
|
* (getpagesize()) will be used.
|
|
* @ram_flags: Memory region features:
|
|
* - RAM_SHARED: memory must be mmaped with the MAP_SHARED flag
|
|
+ * - RAM_PMEM: the memory is persistent memory
|
|
* Other bits are ignored now.
|
|
* @path: the path in which to allocate the RAM.
|
|
* @errp: pointer to Error*, to store an error if it happens.
|
|
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
|
|
index 67e163e..922305d 100644
|
|
--- a/include/exec/ram_addr.h
|
|
+++ b/include/exec/ram_addr.h
|
|
@@ -70,6 +70,8 @@ static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
|
|
return host_addr_offset >> TARGET_PAGE_BITS;
|
|
}
|
|
|
|
+bool ramblock_is_pmem(RAMBlock *rb);
|
|
+
|
|
long qemu_getrampagesize(void);
|
|
unsigned long last_ram_page(void);
|
|
|
|
@@ -84,6 +86,7 @@ unsigned long last_ram_page(void);
|
|
* @ram_flags: specify the properties of the ram block, which can be one
|
|
* or bit-or of following values
|
|
* - RAM_SHARED: mmap the backing file or device with MAP_SHARED
|
|
+ * - RAM_PMEM: the backend @mem_path or @fd is persistent memory
|
|
* Other bits are ignored.
|
|
* @mem_path or @fd: specify the backing file or device
|
|
* @errp: pointer to Error*, to store an error if it happens
|
|
diff --git a/qemu-options.hx b/qemu-options.hx
|
|
index 683ab0d..1b6786b 100644
|
|
--- a/qemu-options.hx
|
|
+++ b/qemu-options.hx
|
|
@@ -4051,6 +4051,13 @@ requires an alignment different than the default one used by QEMU, eg
|
|
the device DAX /dev/dax0.0 requires 2M alignment rather than 4K. In
|
|
such cases, users can specify the required alignment via this option.
|
|
|
|
+The @option{pmem} option specifies whether the backing file specified
|
|
+by @option{mem-path} is in host persistent memory that can be accessed
|
|
+using the SNIA NVM programming model (e.g. Intel NVDIMM).
|
|
+If @option{pmem} is set to 'on', QEMU will take necessary operations to
|
|
+guarantee the persistence of its own writes to @option{mem-path}
|
|
+(e.g. in vNVDIMM label emulation and live migration).
|
|
+
|
|
@item -object memory-backend-ram,id=@var{id},merge=@var{on|off},dump=@var{on|off},share=@var{on|off},prealloc=@var{on|off},size=@var{size},host-nodes=@var{host-nodes},policy=@var{default|preferred|bind|interleave}
|
|
|
|
Creates a memory backend object, which can be used to back the guest RAM.
|
|
--
|
|
1.8.3.1
|
|
|