From 9f0b20db5e14de43fd393e280e96e50c1616413f Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 12 May 2025 02:34:48 -0400 Subject: [PATCH 1/2] * Mon May 12 2025 Miroslav Rezanina - 10.0.0-2 - kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch [RHEL-87642] - kvm-block-io-skip-head-tail-requests-on-EINVAL.patch [RHEL-87642] - kvm-file-posix-Fix-crash-on-discard_granularity-0.patch [RHEL-87642] - kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch [RHEL-86056] - Resolves: RHEL-87642 (QEMU sends unaligned discards on 4K devices[RHEL-10]) - Resolves: RHEL-86056 (Enable 'vhost-user-gpu-pci' in qemu-kvm for RHIVOS) --- ...Enable-vhost-user-gpu-pci-for-RHIVOS.patch | 39 ++++++ ...io-skip-head-tail-requests-on-EINVAL.patch | 74 ++++++++++ ...x-Fix-crash-on-discard_granularity-0.patch | 46 ++++++ ...-discard-alignment-on-Linux-block-de.patch | 131 ++++++++++++++++++ qemu-kvm.spec | 20 ++- 5 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch create mode 100644 kvm-block-io-skip-head-tail-requests-on-EINVAL.patch create mode 100644 kvm-file-posix-Fix-crash-on-discard_granularity-0.patch create mode 100644 kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch diff --git a/kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch b/kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch new file mode 100644 index 0000000..659e692 --- /dev/null +++ b/kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch @@ -0,0 +1,39 @@ +From 86f52a84c158f7b31455596c9700124977696314 Mon Sep 17 00:00:00 2001 +From: Miroslav Rezanina +Date: Tue, 29 Apr 2025 09:09:51 -0400 +Subject: [PATCH 4/4] Enable vhost-user-gpu-pci for RHIVOS + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 356: Enable vhost-user-gpu-pci for RHIVOS +RH-Jira: RHEL-86056 +RH-Commit: [1/1] d323301596f82a3b0a98b0ac99f839d39199ab32 (mrezanin/centos-src-qemu-kvm) + +RHIVOS needs vhost-user-gpu-pci device to be available. + +Signed-off-by: Miroslav Rezanina +--- + configs/devices/aarch64-softmmu/aarch64-rh-devices.mak | 1 + + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 + + 2 files changed, 2 insertions(+) + +diff --git a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +index dce5fca821..197fabeb00 100644 +--- a/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak ++++ b/configs/devices/aarch64-softmmu/aarch64-rh-devices.mak +@@ -45,3 +45,4 @@ CONFIG_VHOST_USER_FS=y + CONFIG_IOMMUFD=y + CONFIG_VHOST_USER_SND=y + CONFIG_VHOST_USER_SCMI=y ++CONFIG_VHOST_USER_GPU=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index 8da1a8f82f..097dad9003 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -113,3 +113,4 @@ CONFIG_VHOST_USER_VSOCK=y + CONFIG_VHOST_USER_FS=y + CONFIG_IOMMUFD=y + CONFIG_VHOST_USER_SND=y ++CONFIG_VHOST_USER_GPU=y +-- +2.39.3 + diff --git a/kvm-block-io-skip-head-tail-requests-on-EINVAL.patch b/kvm-block-io-skip-head-tail-requests-on-EINVAL.patch new file mode 100644 index 0000000..9650c04 --- /dev/null +++ b/kvm-block-io-skip-head-tail-requests-on-EINVAL.patch @@ -0,0 +1,74 @@ +From 8d520ef6e8959a017535ecfc556b067e4b118cb7 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 17 Apr 2025 11:05:28 -0400 +Subject: [PATCH 2/4] block/io: skip head/tail requests on EINVAL + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 354: file-posix: probe discard alignment on Linux block devices +RH-Jira: RHEL-87642 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Commit: [2/3] 5fddeb3ba2df7c61cdb8dd709e56914f3b5c0972 (stefanha/centos-stream-qemu-kvm) + +When guests send misaligned discard requests, the block layer breaks +them up into a misaligned head, an aligned main body, and a misaligned +tail. + +The file-posix block driver on Linux returns -EINVAL on misaligned +discard requests. This causes bdrv_co_pdiscard() to fail and guests +configured with werror=stop will pause. + +Add a special case for misaligned head/tail requests. Simply continue +when EINVAL is encountered so that the aligned main body of the request +can be completed and the guest is not paused. This is the best we can do +when guest discard limits do not match the host discard limits. + +Fixes: https://issues.redhat.com/browse/RHEL-86032 +Signed-off-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Message-ID: <20250417150528.76470-3-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 4733cb0833c4b223f92ec0136980eeb5239ecb87) +Signed-off-by: Stefan Hajnoczi +--- + block/io.c | 15 ++++++++++----- + 1 file changed, 10 insertions(+), 5 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 1ba8d1aeea..ccec11386b 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -3109,11 +3109,12 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, + /* Invalidate the cached block-status data range if this discard overlaps */ + bdrv_bsc_invalidate_range(bs, offset, bytes); + +- /* Discard is advisory, but some devices track and coalesce ++ /* ++ * Discard is advisory, but some devices track and coalesce + * unaligned requests, so we must pass everything down rather than +- * round here. Still, most devices will just silently ignore +- * unaligned requests (by returning -ENOTSUP), so we must fragment +- * the request accordingly. */ ++ * round here. Still, most devices reject unaligned requests with ++ * -EINVAL or -ENOTSUP, so we must fragment the request accordingly. ++ */ + align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment); + assert(align % bs->bl.request_alignment == 0); + head = offset % align; +@@ -3180,7 +3181,11 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset, + } + } + if (ret && ret != -ENOTSUP) { +- goto out; ++ if (ret == -EINVAL && (offset % align != 0 || num % align != 0)) { ++ /* Silently skip rejected unaligned head/tail requests */ ++ } else { ++ goto out; /* bail out */ ++ } + } + + offset += num; +-- +2.39.3 + diff --git a/kvm-file-posix-Fix-crash-on-discard_granularity-0.patch b/kvm-file-posix-Fix-crash-on-discard_granularity-0.patch new file mode 100644 index 0000000..d7145e3 --- /dev/null +++ b/kvm-file-posix-Fix-crash-on-discard_granularity-0.patch @@ -0,0 +1,46 @@ +From 4d575970f12462a054a207b593438aff0d40881a Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 29 Apr 2025 17:56:54 +0200 +Subject: [PATCH 3/4] file-posix: Fix crash on discard_granularity == 0 + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 354: file-posix: probe discard alignment on Linux block devices +RH-Jira: RHEL-87642 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Commit: [3/3] dbe73aef453e77263d30ebebc690ab21145f6bab (stefanha/centos-stream-qemu-kvm) + +Block devices that don't support discard have a discard_granularity of +0. Currently, this results in a division by zero when we try to make +sure that it's a multiple of request_alignment. Only try to update +bs->bl.pdiscard_alignment when we got a non-zero discard_granularity +from sysfs. + +Fixes: f605796aae4 ('file-posix: probe discard alignment on Linux block devices') +Signed-off-by: Kevin Wolf +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Eric Blake +Message-ID: <20250429155654.102735-1-kwolf@redhat.com> +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit 71a30d54e6ab1d5c102a8bee2c263414697402ea) +Signed-off-by: Stefan Hajnoczi +--- + block/file-posix.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 0d6e12f880..0d85123d0f 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1573,7 +1573,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) + int ret; + + ret = hdev_get_pdiscard_alignment(&st, &dalign); +- if (ret == 0) { ++ if (ret == 0 && dalign != 0) { + uint32_t ralign = bs->bl.request_alignment; + + /* Probably never happens, but handle it just in case */ +-- +2.39.3 + diff --git a/kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch b/kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch new file mode 100644 index 0000000..cd24d41 --- /dev/null +++ b/kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch @@ -0,0 +1,131 @@ +From 2baedec75a8a0daf9e93228795d1e6f2974f4825 Mon Sep 17 00:00:00 2001 +From: Stefan Hajnoczi +Date: Thu, 17 Apr 2025 11:05:27 -0400 +Subject: [PATCH 1/4] file-posix: probe discard alignment on Linux block + devices + +RH-Author: Stefan Hajnoczi +RH-MergeRequest: 354: file-posix: probe discard alignment on Linux block devices +RH-Jira: RHEL-87642 +RH-Acked-by: Kevin Wolf +RH-Acked-by: Eric Blake +RH-Commit: [1/3] 84de24191bfa47e94cd475e78dcafd38a50a5888 (stefanha/centos-stream-qemu-kvm) + +Populate the pdiscard_alignment block limit so the block layer is able +align discard requests correctly. + +Signed-off-by: Stefan Hajnoczi +Message-ID: <20250417150528.76470-2-stefanha@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit f605796aae42885034400c83ed6a9b07cd6d6481) +Signed-off-by: Stefan Hajnoczi +--- + block/file-posix.c | 67 +++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 66 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 56d1972d15..0d6e12f880 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -1276,10 +1276,10 @@ static int get_sysfs_zoned_model(struct stat *st, BlockZoneModel *zoned) + } + #endif /* defined(CONFIG_BLKZONED) */ + ++#ifdef CONFIG_LINUX + /* + * Get a sysfs attribute value as a long integer. + */ +-#ifdef CONFIG_LINUX + static long get_sysfs_long_val(struct stat *st, const char *attribute) + { + g_autofree char *str = NULL; +@@ -1299,6 +1299,30 @@ static long get_sysfs_long_val(struct stat *st, const char *attribute) + } + return ret; + } ++ ++/* ++ * Get a sysfs attribute value as a uint32_t. ++ */ ++static int get_sysfs_u32_val(struct stat *st, const char *attribute, ++ uint32_t *u32) ++{ ++ g_autofree char *str = NULL; ++ const char *end; ++ unsigned int val; ++ int ret; ++ ++ ret = get_sysfs_str_val(st, attribute, &str); ++ if (ret < 0) { ++ return ret; ++ } ++ ++ /* The file is ended with '\n', pass 'end' to accept that. */ ++ ret = qemu_strtoui(str, &end, 10, &val); ++ if (ret == 0 && end && *end == '\0') { ++ *u32 = val; ++ } ++ return ret; ++} + #endif + + static int hdev_get_max_segments(int fd, struct stat *st) +@@ -1318,6 +1342,23 @@ static int hdev_get_max_segments(int fd, struct stat *st) + #endif + } + ++/* ++ * Fills in *dalign with the discard alignment and returns 0 on success, ++ * -errno otherwise. ++ */ ++static int hdev_get_pdiscard_alignment(struct stat *st, uint32_t *dalign) ++{ ++#ifdef CONFIG_LINUX ++ /* ++ * Note that Linux "discard_granularity" is QEMU "discard_alignment". Linux ++ * "discard_alignment" is something else. ++ */ ++ return get_sysfs_u32_val(st, "discard_granularity", dalign); ++#else ++ return -ENOTSUP; ++#endif ++} ++ + #if defined(CONFIG_BLKZONED) + /* + * If the reset_all flag is true, then the wps of zone whose state is +@@ -1527,6 +1568,30 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) + } + } + ++ if (S_ISBLK(st.st_mode)) { ++ uint32_t dalign = 0; ++ int ret; ++ ++ ret = hdev_get_pdiscard_alignment(&st, &dalign); ++ if (ret == 0) { ++ uint32_t ralign = bs->bl.request_alignment; ++ ++ /* Probably never happens, but handle it just in case */ ++ if (dalign < ralign && (ralign % dalign == 0)) { ++ dalign = ralign; ++ } ++ ++ /* The block layer requires a multiple of request_alignment */ ++ if (dalign % ralign != 0) { ++ error_setg(errp, "Invalid pdiscard_alignment limit %u is not a " ++ "multiple of request_alignment %u", dalign, ralign); ++ return; ++ } ++ ++ bs->bl.pdiscard_alignment = dalign; ++ } ++ } ++ + raw_refresh_zoned_limits(bs, &st, errp); + } + +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index b6729ed..35bb1a4 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -147,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 10.0.0 -Release: 1%{?rcrel}%{?dist}%{?cc_suffix} +Release: 2%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -188,6 +188,14 @@ Patch0016: 0016-vfio-cap-number-of-devices-that-can-be-assigned.patch Patch0017: 0017-Add-support-statement-to-help-output.patch Patch0018: 0018-Use-qemu-kvm-in-documentation-instead-of-qemu-system.patch Patch0019: 0019-qcow2-Deprecation-warning-when-opening-v2-images-rw.patch +# For RHEL-87642 - QEMU sends unaligned discards on 4K devices[RHEL-10] +Patch20: kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch +# For RHEL-87642 - QEMU sends unaligned discards on 4K devices[RHEL-10] +Patch21: kvm-block-io-skip-head-tail-requests-on-EINVAL.patch +# For RHEL-87642 - QEMU sends unaligned discards on 4K devices[RHEL-10] +Patch22: kvm-file-posix-Fix-crash-on-discard_granularity-0.patch +# For RHEL-86056 - Enable 'vhost-user-gpu-pci' in qemu-kvm for RHIVOS +Patch23: kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch %if %{have_clang} BuildRequires: clang @@ -1263,6 +1271,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 12 2025 Miroslav Rezanina - 10.0.0-2 +- kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch [RHEL-87642] +- kvm-block-io-skip-head-tail-requests-on-EINVAL.patch [RHEL-87642] +- kvm-file-posix-Fix-crash-on-discard_granularity-0.patch [RHEL-87642] +- kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch [RHEL-86056] +- Resolves: RHEL-87642 + (QEMU sends unaligned discards on 4K devices[RHEL-10]) +- Resolves: RHEL-86056 + (Enable 'vhost-user-gpu-pci' in qemu-kvm for RHIVOS) + * Wed Apr 23 2025 Miroslav Rezanina - 10.0.0-1 - Rebase to QEMU 10.0.0 [RHEL-74473] - Resolves: RHEL-74473 From 9c8774ec774990491c1cb870c902d19c887f75e4 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 19 May 2025 02:23:51 -0400 Subject: [PATCH 2/2] * Mon May 19 2025 Miroslav Rezanina - 10.0.0-3 - kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch [RHEL-85635] - kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch [RHEL-88457] - kvm-distro-add-an-explicit-valgrind-devel-build-dep.patch [RHEL-88457] - kvm-Allow-guest-get-load-QGA-command.patch [RHEL-91219] - Resolves: RHEL-85635 (Video stuck about 1 min after switchover phase when play one video during postcopy-preempt migration) - Resolves: RHEL-88457 (qemu inadvertantly built with valgrind coroutine stack debugging on x86_64) - Resolves: RHEL-91219 ([qemu-guest-agent] Enable 'guest-get-load' by default [RHEL-10]) --- ...e-duplicate-label-in-qemu-block-driv.patch | 71 ++++++ ...add-valgrind-option-en-dis-able-valg.patch | 107 ++++++++ ...py-Spatial-locality-page-hint-for-pr.patch | 234 ++++++++++++++++++ qemu-ga.sysconfig | 2 +- qemu-kvm.spec | 27 +- 5 files changed, 439 insertions(+), 2 deletions(-) create mode 100644 kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch create mode 100644 kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch create mode 100644 kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch diff --git a/kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch b/kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch new file mode 100644 index 0000000..b6628e6 --- /dev/null +++ b/kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch @@ -0,0 +1,71 @@ +From c73cd0b322ad22a4ba47035ba87a0e83ff851b6a Mon Sep 17 00:00:00 2001 +From: Peter Maydell +Date: Thu, 1 May 2025 10:31:26 +0100 +Subject: [PATCH] docs: Don't define duplicate label in + qemu-block-drivers.rst.inc + +Sphinx requires that labels within documents are unique across the +whole manual. This is because the "create a hyperlink" directive +specifies only the name of the label, not a filename+label. Some +Sphinx versions will warn about duplicate labels, but even if there +is no warning there is still an ambiguity and no guarantee that the +hyperlink will be created to the right target. + +For QEMU this is awkward, because we have various .rst.inc fragments +which we include into multiple .rst files. If you define a label in +the .rst.inc file then it will be a duplicate label. We have mostly +worked around this by not putting labels into those .rst.inc files, +or by adding "insert a label" functionality into the hxtool extension +(see commit 1eeb432a953b0 "doc/sphinx/hxtool.py: add optional label +argument to SRST directive"). + +Unfortunately in commit 7f6314427e78 ("docs/devel: add a codebase +section") we accidentally added a duplicate label, because not all +Sphinx versions warn about the mistake. + +In this case the link was only from the developer docs codebase +summary, so as the simplest fix for the stable branch, we drop +the link entirely. + +Cc: qemu-stable@nongnu.org +Fixes: 1eeb432a953b0 "doc/sphinx/hxtool.py: add optional label argument to SRST directive" +Reported-by: Dario Faggioli +Signed-off-by: Peter Maydell +Acked-by: Eric Blake +Reviewed-by: Pierrick Bouvier +Message-id: 20250501093126.716667-1-peter.maydell@linaro.org +Signed-off-by: Miroslav Rezanina +--- + docs/devel/codebase.rst | 2 +- + docs/system/qemu-block-drivers.rst.inc | 2 -- + 2 files changed, 1 insertion(+), 3 deletions(-) + +diff --git a/docs/devel/codebase.rst b/docs/devel/codebase.rst +index ef98578296..085da10a22 100644 +--- a/docs/devel/codebase.rst ++++ b/docs/devel/codebase.rst +@@ -116,7 +116,7 @@ yet, so sometimes the source code is all you have. + * `monitor `_: + `Monitor ` implementation (HMP & QMP). + * `nbd `_: +- QEMU `NBD (Network Block Device) ` server. ++ QEMU NBD (Network Block Device) server. + * `net `_: + Network (host) support. + * `pc-bios `_: +diff --git a/docs/system/qemu-block-drivers.rst.inc b/docs/system/qemu-block-drivers.rst.inc +index cfe1acb78a..384e95ba76 100644 +--- a/docs/system/qemu-block-drivers.rst.inc ++++ b/docs/system/qemu-block-drivers.rst.inc +@@ -500,8 +500,6 @@ What you should *never* do: + - expect it to work when loadvm'ing + - write to the FAT directory on the host system while accessing it with the guest system + +-.. _nbd: +- + NBD access + ~~~~~~~~~~ + +-- +2.39.3 + diff --git a/kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch b/kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch new file mode 100644 index 0000000..c2fc552 --- /dev/null +++ b/kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch @@ -0,0 +1,107 @@ +From 8227a9534bb09d202441b3e554da53815be66a28 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Daniel=20P=2E=20Berrang=C3=A9?= +Date: Fri, 25 Apr 2025 13:17:12 +0100 +Subject: [PATCH 2/4] meson/configure: add 'valgrind' option & --{en, + dis}able-valgrind flag +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Daniel P. Berrangé +RH-MergeRequest: 360: distro: add an explicit valgrind-devel build dep +RH-Jira: RHEL-88457 +RH-Acked-by: Thomas Huth +RH-Acked-by: Eric Blake +RH-Commit: [1/2] 55ab738650e95ff0e951897001d9246a725ee699 (berrange/centos-src-qemu) + +Currently valgrind debugging support for coroutine stacks is enabled +unconditionally when valgrind/valgrind.h is found. There is no way +to disable valgrind support if valgrind.h is present in the build env. + +This is bad for distros, as an dependency far down the chain may cause +valgrind.h to become installed, inadvertently enabling QEMU's valgrind +debugging support. It also means if a distro wants valgrind support +there is no way to mandate this. + +The solution is to add a 'valgrind' build feature to meson and thus +configure script. + +Signed-off-by: Daniel P. Berrangé +Reviewed-by: Thomas Huth +Message-ID: <20250425121713.1913424-1-berrange@redhat.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 6b1c744ec0d66d6d568f9a156282153fc11a21cf) +--- + meson.build | 13 ++++++++++++- + meson_options.txt | 2 ++ + scripts/meson-buildoptions.sh | 3 +++ + 3 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/meson.build b/meson.build +index 0607c1313b..dadd47d362 100644 +--- a/meson.build ++++ b/meson.build +@@ -2617,7 +2617,17 @@ config_host_data.set('CONFIG_FSTRIM', qga_fstrim) + # has_header + config_host_data.set('CONFIG_EPOLL', cc.has_header('sys/epoll.h')) + config_host_data.set('CONFIG_LINUX_MAGIC_H', cc.has_header('linux/magic.h')) +-config_host_data.set('CONFIG_VALGRIND_H', cc.has_header('valgrind/valgrind.h')) ++valgrind = false ++if get_option('valgrind').allowed() ++ if cc.has_header('valgrind/valgrind.h') ++ valgrind = true ++ else ++ if get_option('valgrind').enabled() ++ error('valgrind requested but valgrind.h not found') ++ endif ++ endif ++endif ++config_host_data.set('CONFIG_VALGRIND_H', valgrind) + config_host_data.set('HAVE_BTRFS_H', cc.has_header('linux/btrfs.h')) + config_host_data.set('HAVE_DRM_H', cc.has_header('libdrm/drm.h')) + config_host_data.set('HAVE_OPENAT2_H', cc.has_header('linux/openat2.h')) +@@ -4856,6 +4866,7 @@ endif + if host_os == 'darwin' + summary_info += {'ParavirtualizedGraphics support': pvg} + endif ++summary_info += {'valgrind': valgrind} + summary(summary_info, bool_yn: true, section: 'Dependencies') + + if host_arch == 'unknown' +diff --git a/meson_options.txt b/meson_options.txt +index ad6996178c..6592d9fb07 100644 +--- a/meson_options.txt ++++ b/meson_options.txt +@@ -117,6 +117,8 @@ option('dbus_display', type: 'feature', value: 'auto', + description: '-display dbus support') + option('tpm', type : 'feature', value : 'auto', + description: 'TPM support') ++option('valgrind', type : 'feature', value: 'auto', ++ description: 'valgrind debug support for coroutine stacks') + + # Do not enable it by default even for Mingw32, because it doesn't + # work on Wine. +diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh +index e9edc8a919..541e1e7a2f 100644 +--- a/scripts/meson-buildoptions.sh ++++ b/scripts/meson-buildoptions.sh +@@ -199,6 +199,7 @@ meson_options_help() { + printf "%s\n" ' u2f U2F emulation support' + printf "%s\n" ' uadk UADK Library support' + printf "%s\n" ' usb-redir libusbredir support' ++ printf "%s\n" ' valgrind valgrind debug support for coroutine stacks' + printf "%s\n" ' vde vde network backend support' + printf "%s\n" ' vdi vdi image format support' + printf "%s\n" ' vduse-blk-export' +@@ -528,6 +529,8 @@ _meson_option_parse() { + --disable-ubsan) printf "%s" -Dubsan=false ;; + --enable-usb-redir) printf "%s" -Dusb_redir=enabled ;; + --disable-usb-redir) printf "%s" -Dusb_redir=disabled ;; ++ --enable-valgrind) printf "%s" -Dvalgrind=enabled ;; ++ --disable-valgrind) printf "%s" -Dvalgrind=disabled ;; + --enable-vde) printf "%s" -Dvde=enabled ;; + --disable-vde) printf "%s" -Dvde=disabled ;; + --enable-vdi) printf "%s" -Dvdi=enabled ;; +-- +2.39.3 + diff --git a/kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch b/kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch new file mode 100644 index 0000000..c6d9ec0 --- /dev/null +++ b/kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch @@ -0,0 +1,234 @@ +From 2e49bdf20390600286cd2596859feb400a932c44 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Thu, 24 Apr 2025 18:07:05 -0400 +Subject: [PATCH 1/4] migration/postcopy: Spatial locality page hint for + preempt mode + +RH-Author: Peter Xu +RH-MergeRequest: 357: migration/postcopy: Spatial locality page hint for preempt mode +RH-Jira: RHEL-85635 +RH-Acked-by: Juraj Marcin +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 05f3befe60e1d3f2f8beaaf0e0e577e49a126dfe (peterx/qemu-kvm) + +The preempt mode postcopy has been introduced for a while. From latency +POV, it should always win the vanilla postcopy. + +However there's one thing missing when preempt mode is enabled right now, +which is the spatial locality hint when there're page requests from the +destination side. + +In vanilla postcopy, as long as a page request was unqueued, it will update +the PSS of the precopy background stream, so that after a page request the +background thread will move the pages after whatever was requested. It's +pretty much a natural behavior when there's only one channel anyway, and +one scanner to send the pages. + +Preempt mode didn't follow that, because preempt mode has its own channel +and its own PSS (which doesn't linearly scan the guest memory, but +dedicated to resolve page requested from destination). So the page request +process and the background migration process are completely separate. + +This patch adds the hint explicitly for preempt mode. With that, whenever +the preempt mode receives a page request on the source, it will service the +remote page fault in the return path, then it'll provide a hint to the +background thread so that we'll start sending the pages right after the +requested ones in the background, assuming the follow up pages have a +higher chance to be accessed later. + +NOTE: since the background migration thread and return path thread run +completely concurrently, it doesn't always mean the hint will be applied +every single time. For example, it's possible that the return path thread +receives multiple page requests in a row without the background thread +getting the chance to consume one. In such case, the preempt thread only +provide the hint if the previous hint has been consumed. After all, +there's no point queuing hints when we only have one linear scanner. + +This could measureably improve the simple sequential memory access pattern +during postcopy (when preempt is on). For random accesses, I can measure a +slight increase of remote page fault latency from ~500us -> ~600us, that +could be a trade-off to have such hint mechanism, and after all that's +still greatly improved comparing to vanilla postcopy on random (~10ms). + +The patch is verified by our QE team in a video streaming test case, to +reduce the pause of the video from ~1min to a few seconds when switching +over to postcopy with preempt mode. + +Reported-by: Xiaohui Li +Tested-by: Xiaohui Li +Reviewed-by: Juraj Marcin +Link: https://lore.kernel.org/r/20250424220705.195544-1-peterx@redhat.com +Signed-off-by: Peter Xu +(cherry picked from commit 20d82622812d888478d04a2d0d8575d70eb5d749) +Signed-off-by: Peter Xu +--- + migration/ram.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 96 insertions(+), 1 deletion(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 424df6d9f1..21d2f87ff1 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -91,6 +91,36 @@ + + XBZRLECacheStats xbzrle_counters; + ++/* ++ * This structure locates a specific location of a guest page. In QEMU, ++ * it's described in a tuple of (ramblock, offset). ++ */ ++struct PageLocation { ++ RAMBlock *block; ++ unsigned long offset; ++}; ++typedef struct PageLocation PageLocation; ++ ++/** ++ * PageLocationHint: describes a hint to a page location ++ * ++ * @valid set if the hint is vaild and to be consumed ++ * @location: the hint content ++ * ++ * In postcopy preempt mode, the urgent channel may provide hints to the ++ * background channel, so that QEMU source can try to migrate whatever is ++ * right after the requested urgent pages. ++ * ++ * This is based on the assumption that the VM (already running on the ++ * destination side) tends to access the memory with spatial locality. ++ * This is also the default behavior of vanilla postcopy (preempt off). ++ */ ++struct PageLocationHint { ++ bool valid; ++ PageLocation location; ++}; ++typedef struct PageLocationHint PageLocationHint; ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* The migration channel used for a specific host page */ +@@ -395,6 +425,13 @@ struct RAMState { + * RAM migration. + */ + unsigned int postcopy_bmap_sync_requested; ++ /* ++ * Page hint during postcopy when preempt mode is on. Return path ++ * thread sets it, while background migration thread consumes it. ++ * ++ * Protected by @bitmap_mutex. ++ */ ++ PageLocationHint page_hint; + }; + typedef struct RAMState RAMState; + +@@ -2039,6 +2076,21 @@ static void pss_host_page_finish(PageSearchStatus *pss) + pss->host_page_start = pss->host_page_end = 0; + } + ++static void ram_page_hint_update(RAMState *rs, PageSearchStatus *pss) ++{ ++ PageLocationHint *hint = &rs->page_hint; ++ ++ /* If there's a pending hint not consumed, don't bother */ ++ if (hint->valid) { ++ return; ++ } ++ ++ /* Provide a hint to the background stream otherwise */ ++ hint->location.block = pss->block; ++ hint->location.offset = pss->page; ++ hint->valid = true; ++} ++ + /* + * Send an urgent host page specified by `pss'. Need to be called with + * bitmap_mutex held. +@@ -2084,6 +2136,7 @@ out: + /* For urgent requests, flush immediately if sent */ + if (sent) { + qemu_fflush(pss->pss_channel); ++ ram_page_hint_update(rs, pss); + } + return ret; + } +@@ -2171,6 +2224,30 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) + return (res < 0 ? res : pages); + } + ++static bool ram_page_hint_valid(RAMState *rs) ++{ ++ /* There's only page hint during postcopy preempt mode */ ++ if (!postcopy_preempt_active()) { ++ return false; ++ } ++ ++ return rs->page_hint.valid; ++} ++ ++static void ram_page_hint_collect(RAMState *rs, RAMBlock **block, ++ unsigned long *page) ++{ ++ PageLocationHint *hint = &rs->page_hint; ++ ++ assert(hint->valid); ++ ++ *block = hint->location.block; ++ *page = hint->location.offset; ++ ++ /* Mark the hint consumed */ ++ hint->valid = false; ++} ++ + /** + * ram_find_and_save_block: finds a dirty page and sends it to f + * +@@ -2187,6 +2264,8 @@ static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss) + static int ram_find_and_save_block(RAMState *rs) + { + PageSearchStatus *pss = &rs->pss[RAM_CHANNEL_PRECOPY]; ++ unsigned long next_page; ++ RAMBlock *next_block; + int pages = 0; + + /* No dirty page as there is zero RAM */ +@@ -2206,7 +2285,14 @@ static int ram_find_and_save_block(RAMState *rs) + rs->last_page = 0; + } + +- pss_init(pss, rs->last_seen_block, rs->last_page); ++ if (ram_page_hint_valid(rs)) { ++ ram_page_hint_collect(rs, &next_block, &next_page); ++ } else { ++ next_block = rs->last_seen_block; ++ next_page = rs->last_page; ++ } ++ ++ pss_init(pss, next_block, next_page); + + while (true){ + if (!get_queued_page(rs, pss)) { +@@ -2339,6 +2425,13 @@ static void ram_save_cleanup(void *opaque) + ram_state_cleanup(rsp); + } + ++static void ram_page_hint_reset(PageLocationHint *hint) ++{ ++ hint->location.block = NULL; ++ hint->location.offset = 0; ++ hint->valid = false; ++} ++ + static void ram_state_reset(RAMState *rs) + { + int i; +@@ -2351,6 +2444,8 @@ static void ram_state_reset(RAMState *rs) + rs->last_page = 0; + rs->last_version = ram_list.version; + rs->xbzrle_started = false; ++ ++ ram_page_hint_reset(&rs->page_hint); + } + + #define MAX_WAIT 50 /* ms, half buffered_file limit */ +-- +2.39.3 + diff --git a/qemu-ga.sysconfig b/qemu-ga.sysconfig index 6f6e98b..b574514 100644 --- a/qemu-ga.sysconfig +++ b/qemu-ga.sysconfig @@ -13,7 +13,7 @@ # # You can get the list of RPC commands using "qemu-ga --allow-rpcs='?'". # There should be no spaces between commas and commands in the allow list. -FILTER_RPC_ARGS="--allow-rpcs=guest-sync-delimited,guest-sync,guest-ping,guest-get-time,guest-set-time,guest-info,guest-shutdown,guest-fsfreeze-status,guest-fsfreeze-freeze,guest-fsfreeze-freeze-list,guest-fsfreeze-thaw,guest-fstrim,guest-suspend-disk,guest-suspend-ram,guest-suspend-hybrid,guest-network-get-interfaces,guest-get-vcpus,guest-set-vcpus,guest-get-disks,guest-get-fsinfo,guest-set-user-password,guest-get-memory-blocks,guest-set-memory-blocks,guest-get-memory-block-info,guest-get-host-name,guest-get-users,guest-get-timezone,guest-get-osinfo,guest-get-devices,guest-ssh-get-authorized-keys,guest-ssh-add-authorized-keys,guest-ssh-remove-authorized-keys,guest-get-diskstats,guest-get-cpustats,guest-network-get-route" +FILTER_RPC_ARGS="--allow-rpcs=guest-sync-delimited,guest-sync,guest-ping,guest-get-time,guest-set-time,guest-info,guest-shutdown,guest-fsfreeze-status,guest-fsfreeze-freeze,guest-fsfreeze-freeze-list,guest-fsfreeze-thaw,guest-fstrim,guest-suspend-disk,guest-suspend-ram,guest-suspend-hybrid,guest-network-get-interfaces,guest-get-vcpus,guest-set-vcpus,guest-get-disks,guest-get-fsinfo,guest-set-user-password,guest-get-memory-blocks,guest-set-memory-blocks,guest-get-memory-block-info,guest-get-host-name,guest-get-users,guest-get-timezone,guest-get-osinfo,guest-get-devices,guest-ssh-get-authorized-keys,guest-ssh-add-authorized-keys,guest-ssh-remove-authorized-keys,guest-get-diskstats,guest-get-cpustats,guest-network-get-route,guest-get-load" # Fsfreeze hook script specification. # diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 35bb1a4..f5e5b4b 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -147,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 10.0.0 -Release: 2%{?rcrel}%{?dist}%{?cc_suffix} +Release: 3%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -196,6 +196,12 @@ Patch21: kvm-block-io-skip-head-tail-requests-on-EINVAL.patch Patch22: kvm-file-posix-Fix-crash-on-discard_granularity-0.patch # For RHEL-86056 - Enable 'vhost-user-gpu-pci' in qemu-kvm for RHIVOS Patch23: kvm-Enable-vhost-user-gpu-pci-for-RHIVOS.patch +# For RHEL-85635 - Video stuck about 1 min after switchover phase when play one video during postcopy-preempt migration +Patch24: kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch +# For RHEL-88457 - qemu inadvertantly built with valgrind coroutine stack debugging on x86_64 +Patch25: kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch +# Fixing s390x build issues +Patch26: kvm-docs-Don-t-define-duplicate-label-in-qemu-block-driv.patch %if %{have_clang} BuildRequires: clang @@ -272,6 +278,9 @@ BuildRequires: libslirp-devel BuildRequires: pulseaudio-libs-devel BuildRequires: spice-protocol BuildRequires: capstone-devel +%ifarch %{valgrind_arches} +BuildRequires: valgrind-devel +%endif # Requires for qemu-kvm package Requires: %{name}-core = %{epoch}:%{version}-%{release} @@ -653,6 +662,7 @@ ulimit -n 10240 --disable-ubsan \\\ --disable-usb-redir \\\ --disable-user \\\ + --disable-valgrind \\\ --disable-vde \\\ --disable-vdi \\\ --disable-vduse-blk-export \\\ @@ -777,6 +787,9 @@ run_configure \ --enable-tpm \ %if %{have_usbredir} --enable-usb-redir \ +%endif +%ifarch %{valgrind_arches} + --enable-valgrind \ %endif --enable-vdi \ --enable-vhost-kernel \ @@ -1271,6 +1284,18 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon May 19 2025 Miroslav Rezanina - 10.0.0-3 +- kvm-migration-postcopy-Spatial-locality-page-hint-for-pr.patch [RHEL-85635] +- kvm-meson-configure-add-valgrind-option-en-dis-able-valg.patch [RHEL-88457] +- kvm-distro-add-an-explicit-valgrind-devel-build-dep.patch [RHEL-88457] +- kvm-Allow-guest-get-load-QGA-command.patch [RHEL-91219] +- Resolves: RHEL-85635 + (Video stuck about 1 min after switchover phase when play one video during postcopy-preempt migration) +- Resolves: RHEL-88457 + (qemu inadvertantly built with valgrind coroutine stack debugging on x86_64) +- Resolves: RHEL-91219 + ([qemu-guest-agent] Enable 'guest-get-load' by default [RHEL-10]) + * Mon May 12 2025 Miroslav Rezanina - 10.0.0-2 - kvm-file-posix-probe-discard-alignment-on-Linux-block-de.patch [RHEL-87642] - kvm-block-io-skip-head-tail-requests-on-EINVAL.patch [RHEL-87642]