- kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch [RHEL-85635] - kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch [RHEL-88457] - kvm-distro-add-an-explicit-valgrind-devel-build-dep.patch [RHEL-88457] - kvm-Allow-guest-get-load-QGA-command.patch [RHEL-91219] - Resolves: RHEL-85635 (Video stuck about 1 min after switchover phase when play one video during postcopy-preempt migration) - Resolves: RHEL-88457 (qemu inadvertantly built with valgrind coroutine stack debugging on x86_64) - Resolves: RHEL-91219 ([qemu-guest-agent] Enable 'guest-get-load' by default [RHEL-10])
235 lines
7.9 KiB
Diff
235 lines
7.9 KiB
Diff
From 2e49bdf20390600286cd2596859feb400a932c44 Mon Sep 17 00:00:00 2001
|
|
From: Peter Xu <peterx@redhat.com>
|
|
Date: Thu, 24 Apr 2025 18:07:05 -0400
|
|
Subject: [PATCH 1/4] migration/postcopy: Spatial locality page hint for
|
|
preempt mode
|
|
|
|
RH-Author: Peter Xu <peterx@redhat.com>
|
|
RH-MergeRequest: 357: migration/postcopy: Spatial locality page hint for preempt mode
|
|
RH-Jira: RHEL-85635
|
|
RH-Acked-by: Juraj Marcin <None>
|
|
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
|
|
RH-Commit: [1/1] 05f3befe60e1d3f2f8beaaf0e0e577e49a126dfe (peterx/qemu-kvm)
|
|
|
|
The preempt mode postcopy has been introduced for a while. From latency
|
|
POV, it should always win the vanilla postcopy.
|
|
|
|
However there's one thing missing when preempt mode is enabled right now,
|
|
which is the spatial locality hint when there're page requests from the
|
|
destination side.
|
|
|
|
In vanilla postcopy, as long as a page request was unqueued, it will update
|
|
the PSS of the precopy background stream, so that after a page request the
|
|
background thread will move the pages after whatever was requested. It's
|
|
pretty much a natural behavior when there's only one channel anyway, and
|
|
one scanner to send the pages.
|
|
|
|
Preempt mode didn't follow that, because preempt mode has its own channel
|
|
and its own PSS (which doesn't linearly scan the guest memory, but
|
|
dedicated to resolve page requested from destination). So the page request
|
|
process and the background migration process are completely separate.
|
|
|
|
This patch adds the hint explicitly for preempt mode. With that, whenever
|
|
the preempt mode receives a page request on the source, it will service the
|
|
remote page fault in the return path, then it'll provide a hint to the
|
|
background thread so that we'll start sending the pages right after the
|
|
requested ones in the background, assuming the follow up pages have a
|
|
higher chance to be accessed later.
|
|
|
|
NOTE: since the background migration thread and return path thread run
|
|
completely concurrently, it doesn't always mean the hint will be applied
|
|
every single time. For example, it's possible that the return path thread
|
|
receives multiple page requests in a row without the background thread
|
|
getting the chance to consume one. In such case, the preempt thread only
|
|
provide the hint if the previous hint has been consumed. After all,
|
|
there's no point queuing hints when we only have one linear scanner.
|
|
|
|
This could measureably improve the simple sequential memory access pattern
|
|
during postcopy (when preempt is on). For random accesses, I can measure a
|
|
slight increase of remote page fault latency from ~500us -> ~600us, that
|
|
could be a trade-off to have such hint mechanism, and after all that's
|
|
still greatly improved comparing to vanilla postcopy on random (~10ms).
|
|
|
|
The patch is verified by our QE team in a video streaming test case, to
|
|
reduce the pause of the video from ~1min to a few seconds when switching
|
|
over to postcopy with preempt mode.
|
|
|
|
Reported-by: Xiaohui Li <xiaohli@redhat.com>
|
|
Tested-by: Xiaohui Li <xiaohli@redhat.com>
|
|
Reviewed-by: Juraj Marcin <jmarcin@redhat.com>
|
|
Link: https://lore.kernel.org/r/20250424220705.195544-1-peterx@redhat.com
|
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
(cherry picked from commit 20d82622812d888478d04a2d0d8575d70eb5d749)
|
|
Signed-off-by: Peter Xu <peterx@redhat.com>
|
|
---
|
|
migration/ram.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++-
|
|
1 file changed, 96 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/migration/ram.c b/migration/ram.c
|
|
index 424df6d9f1..21d2f87ff1 100644
|
|
--- a/migration/ram.c
|
|
+++ b/migration/ram.c
|
|
@@ -91,6 +91,36 @@
|
|
|
|
XBZRLECacheStats xbzrle_counters;
|
|
|
|
+/*
|
|
+ * This structure locates a specific location of a guest page. In QEMU,
|
|
+ * it's described in a tuple of (ramblock, offset).
|
|
+ */
|
|
+struct PageLocation {
|
|
+ RAMBlock *block;
|
|
+ unsigned long offset;
|
|
+};
|
|
+typedef struct PageLocation PageLocation;
|
|
+
|
|
+/**
|
|
+ * PageLocationHint: describes a hint to a page location
|
|
+ *
|
|
+ * @valid set if the hint is vaild and to be consumed
|
|
+ * @location: the hint content
|
|
+ *
|
|
+ * In postcopy preempt mode, the urgent channel may provide hints to the
|
|
+ * background channel, so that QEMU source can try to migrate whatever is
|
|
+ * right after the requested urgent pages.
|
|
+ *
|
|
+ * This is based on the assumption that the VM (already running on the
|
|
+ * destination side) tends to access the memory with spatial locality.
|
|
+ * This is also the default behavior of vanilla postcopy (preempt off).
|
|
+ */
|
|
+struct PageLocationHint {
|
|
+ bool valid;
|
|
+ PageLocation location;
|
|
+};
|
|
+typedef struct PageLocationHint PageLocationHint;
|
|
+
|
|
/* used by the search for pages to send */
|
|
struct PageSearchStatus {
|
|
/* The migration channel used for a specific host page */
|
|
@@ -395,6 +425,13 @@ struct RAMState {
|
|
* RAM migration.
|
|
*/
|
|
unsigned int postcopy_bmap_sync_requested;
|
|
+ /*
|
|
+ * Page hint during postcopy when preempt mode is on. Return path
|
|
+ * thread sets it, while background migration thread consumes it.
|
|
+ *
|
|
+ * Protected by @bitmap_mutex.
|
|
+ */
|
|
+ PageLocationHint page_hint;
|
|
};
|
|
typedef struct RAMState RAMState;
|
|
|
|
@@ -2039,6 +2076,21 @@ static void pss_host_page_finish(PageSearchStatus *pss)
|
|
pss->host_page_start = pss->host_page_end = 0;
|
|
}
|
|
|
|
+static void ram_page_hint_update(RAMState *rs, PageSearchStatus *pss)
|
|
+{
|
|
+ PageLocationHint *hint = &rs->page_hint;
|
|
+
|
|
+ /* If there's a pending hint not consumed, don't bother */
|
|
+ if (hint->valid) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Provide a hint to the background stream otherwise */
|
|
+ hint->location.block = pss->block;
|
|
+ hint->location.offset = pss->page;
|
|
+ hint->valid = true;
|
|
+}
|
|
+
|
|
/*
|
|
* Send an urgent host page specified by `pss'. Need to be called with
|
|
* bitmap_mutex held.
|
|
@@ -2084,6 +2136,7 @@ out:
|
|
/* For urgent requests, flush immediately if sent */
|
|
if (sent) {
|
|
qemu_fflush(pss->pss_channel);
|
|
+ ram_page_hint_update(rs, pss);
|
|
}
|
|
return ret;
|
|
}
|
|
@@ -2171,6 +2224,30 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
|
|
return (res < 0 ? res : pages);
|
|
}
|
|
|
|
+static bool ram_page_hint_valid(RAMState *rs)
|
|
+{
|
|
+ /* There's only page hint during postcopy preempt mode */
|
|
+ if (!postcopy_preempt_active()) {
|
|
+ return false;
|
|
+ }
|
|
+
|
|
+ return rs->page_hint.valid;
|
|
+}
|
|
+
|
|
+static void ram_page_hint_collect(RAMState *rs, RAMBlock **block,
|
|
+ unsigned long *page)
|
|
+{
|
|
+ PageLocationHint *hint = &rs->page_hint;
|
|
+
|
|
+ assert(hint->valid);
|
|
+
|
|
+ *block = hint->location.block;
|
|
+ *page = hint->location.offset;
|
|
+
|
|
+ /* Mark the hint consumed */
|
|
+ hint->valid = false;
|
|
+}
|
|
+
|
|
/**
|
|
* ram_find_and_save_block: finds a dirty page and sends it to f
|
|
*
|
|
@@ -2187,6 +2264,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss)
|
|
static int ram_find_and_save_block(RAMState *rs)
|
|
{
|
|
PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY];
|
|
+ unsigned long next_page;
|
|
+ RAMBlock *next_block;
|
|
int pages = 0;
|
|
|
|
/* No dirty page as there is zero RAM */
|
|
@@ -2206,7 +2285,14 @@ static int ram_find_and_save_block(RAMState *rs)
|
|
rs->last_page = 0;
|
|
}
|
|
|
|
- pss_init(pss, rs->last_seen_block, rs->last_page);
|
|
+ if (ram_page_hint_valid(rs)) {
|
|
+ ram_page_hint_collect(rs, &next_block, &next_page);
|
|
+ } else {
|
|
+ next_block = rs->last_seen_block;
|
|
+ next_page = rs->last_page;
|
|
+ }
|
|
+
|
|
+ pss_init(pss, next_block, next_page);
|
|
|
|
while (true){
|
|
if (!get_queued_page(rs, pss)) {
|
|
@@ -2339,6 +2425,13 @@ static void ram_save_cleanup(void *opaque)
|
|
ram_state_cleanup(rsp);
|
|
}
|
|
|
|
+static void ram_page_hint_reset(PageLocationHint *hint)
|
|
+{
|
|
+ hint->location.block = NULL;
|
|
+ hint->location.offset = 0;
|
|
+ hint->valid = false;
|
|
+}
|
|
+
|
|
static void ram_state_reset(RAMState *rs)
|
|
{
|
|
int i;
|
|
@@ -2351,6 +2444,8 @@ static void ram_state_reset(RAMState *rs)
|
|
rs->last_page = 0;
|
|
rs->last_version = ram_list.version;
|
|
rs->xbzrle_started = false;
|
|
+
|
|
+ ram_page_hint_reset(&rs->page_hint);
|
|
}
|
|
|
|
#define MAX_WAIT 50 /* ms, half buffered_file limit */
|
|
--
|
|
2.39.3
|
|
|