import OL qemu-kvm-9.1.0-15.el9_6.9

This commit is contained in:
eabdullin 2025-09-17 06:55:38 +00:00
parent 14e6eca34d
commit 91e6abe5b7
4 changed files with 478 additions and 1 deletions

View File

@ -0,0 +1,273 @@
From 04a3beab85453e901e76c64b8a7164bfb9fbbc4d Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 11 Aug 2025 15:40:10 +0200
Subject: [PATCH] rbd: Fix .bdrv_get_specific_info implementation
RH-Author: Kevin Wolf <kwolf@redhat.com>
RH-MergeRequest: 473: rbd: Fix .bdrv_get_specific_info implementation
RH-Jira: RHEL-108725
RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
RH-Commit: [1/1] 183cf2d34cd1aa32e5f234139c00f6cf925edd56 (kmwolf/rhel-qemu-kvm)
qemu_rbd_get_specific_info() has at least two problems:
The first is that it issues a blocking rbd_read() call in order to probe
the encryption format for the image while querying the node. This means
that if the connection to the server goes down, not only I/O is stuck
(which is unavoidable), but query-names-block-nodes will actually make
the whole QEMU instance unresponsive. .bdrv_get_specific_info
implementations shouldn't perform blocking operations, but only return
what is already known.
The second is that the information returned isn't even correct. If the
image is already opened with encryption enabled at the RBD level, we'll
probe for "double encryption", i.e. if the encrypted data contains
another encryption header. If it doesn't (which is the normal case), we
won't return the encryption format. If it does, we return misleading
information because it looks like we're talking about the outer level
(the encryption format of the image itself) while the information is
about an encryption header in the guest data.
Fix this by storing the encryption format in BDRVRBDState when the image
is opened (and we do blocking operations anyway) and returning only the
stored information in qemu_rbd_get_specific_info().
The information we'll store is either the actual encryption format that
we enabled on the RBD level, or if the image is unencrypted, the result
of the same probing as we previously did when querying the node. Probing
image formats based on content that can be modified by the guest has
long been known as problematic, but as long as we only output it to the
user instead of making decisions based on it, it should be okay. It is
undoubtedly useful in the context of 'qemu-img info' when you're trying
to figure out which encryption options you have to use to open the
image successfully.
Fixes: 42e4ac9ef5a6 ("block/rbd: Add support for rbd image encryption")
Buglink: https://issues.redhat.com/browse/RHEL-105440
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Message-ID: <20250811134010.81787-1-kwolf@redhat.com>
Reviewed-by: Hanna Czenczek <hreitz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
(cherry picked from commit 4af976ef398e4e823addc00bf1c58787ba4952fe)
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
block/rbd.c | 104 ++++++++++++++++++++++++++++---------------
qapi/block-core.json | 9 +++-
2 files changed, 76 insertions(+), 37 deletions(-)
diff --git a/block/rbd.c b/block/rbd.c
index 9c0fd0cb3f..fa9aab12ab 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -99,6 +99,14 @@ typedef struct BDRVRBDState {
char *namespace;
uint64_t image_size;
uint64_t object_size;
+
+ /*
+ * If @bs->encrypted is true, this is the encryption format actually loaded
+ * at the librbd level. If it is false, it is the result of probing.
+ * RBD_IMAGE_ENCRYPTION_FORMAT__MAX means that encryption is not enabled and
+ * probing didn't find any known encryption header either.
+ */
+ RbdImageEncryptionFormat encryption_format;
} BDRVRBDState;
typedef struct RBDTask {
@@ -471,10 +479,12 @@ static int qemu_rbd_encryption_format(rbd_image_t image,
return 0;
}
-static int qemu_rbd_encryption_load(rbd_image_t image,
+static int qemu_rbd_encryption_load(BlockDriverState *bs,
+ rbd_image_t image,
RbdEncryptionOptions *encrypt,
Error **errp)
{
+ BDRVRBDState *s = bs->opaque;
int r = 0;
g_autofree char *passphrase = NULL;
rbd_encryption_luks1_format_options_t luks_opts;
@@ -545,15 +555,19 @@ static int qemu_rbd_encryption_load(rbd_image_t image,
error_setg_errno(errp, -r, "encryption load fail");
return r;
}
+ bs->encrypted = true;
+ s->encryption_format = encrypt->format;
return 0;
}
#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
-static int qemu_rbd_encryption_load2(rbd_image_t image,
+static int qemu_rbd_encryption_load2(BlockDriverState *bs,
+ rbd_image_t image,
RbdEncryptionOptions *encrypt,
Error **errp)
{
+ BDRVRBDState *s = bs->opaque;
int r = 0;
int encrypt_count = 1;
int i;
@@ -639,6 +653,8 @@ static int qemu_rbd_encryption_load2(rbd_image_t image,
error_setg_errno(errp, -r, "layered encryption load fail");
goto exit;
}
+ bs->encrypted = true;
+ s->encryption_format = encrypt->format;
exit:
for (i = 0; i < encrypt_count; ++i) {
@@ -672,6 +688,45 @@ exit:
#endif
#endif
+/*
+ * For an image without encryption enabled on the rbd layer, probe the start of
+ * the image if it could be opened as an encrypted image so that we can display
+ * it when the user queries the node (most importantly in qemu-img).
+ *
+ * If the guest writes an encryption header to its disk after this probing, this
+ * won't be reflected when queried, but that's okay. There is no reason why the
+ * user should want to apply encryption at the rbd level while the image is
+ * still in use. This is just guest data.
+ */
+static void qemu_rbd_encryption_probe(BlockDriverState *bs)
+{
+ BDRVRBDState *s = bs->opaque;
+ char buf[RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {0};
+ int r;
+
+ assert(s->encryption_format == RBD_IMAGE_ENCRYPTION_FORMAT__MAX);
+
+ r = rbd_read(s->image, 0,
+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN, buf);
+ if (r < RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) {
+ return;
+ }
+
+ if (memcmp(buf, rbd_luks_header_verification,
+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
+ } else if (memcmp(buf, rbd_luks2_header_verification,
+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
+ } else if (memcmp(buf, rbd_layered_luks_header_verification,
+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
+ } else if (memcmp(buf, rbd_layered_luks2_header_verification,
+ RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
+ }
+}
+
/* FIXME Deprecate and remove keypairs or make it available in QMP. */
static int qemu_rbd_do_create(BlockdevCreateOptions *options,
const char *keypairs, const char *password_secret,
@@ -1134,17 +1189,18 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
goto failed_open;
}
+ s->encryption_format = RBD_IMAGE_ENCRYPTION_FORMAT__MAX;
if (opts->encrypt) {
#ifdef LIBRBD_SUPPORTS_ENCRYPTION
if (opts->encrypt->parent) {
#ifdef LIBRBD_SUPPORTS_ENCRYPTION_LOAD2
- r = qemu_rbd_encryption_load2(s->image, opts->encrypt, errp);
+ r = qemu_rbd_encryption_load2(bs, s->image, opts->encrypt, errp);
#else
r = -ENOTSUP;
error_setg(errp, "RBD library does not support layered encryption");
#endif
} else {
- r = qemu_rbd_encryption_load(s->image, opts->encrypt, errp);
+ r = qemu_rbd_encryption_load(bs, s->image, opts->encrypt, errp);
}
if (r < 0) {
goto failed_post_open;
@@ -1154,6 +1210,8 @@ static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
error_setg(errp, "RBD library does not support image encryption");
goto failed_post_open;
#endif
+ } else {
+ qemu_rbd_encryption_probe(bs);
}
r = rbd_stat(s->image, &info, sizeof(info));
@@ -1413,17 +1471,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
{
BDRVRBDState *s = bs->opaque;
ImageInfoSpecific *spec_info;
- char buf[RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN] = {0};
- int r;
-
- if (s->image_size >= RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) {
- r = rbd_read(s->image, 0,
- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN, buf);
- if (r < 0) {
- error_setg_errno(errp, -r, "cannot read image start for probe");
- return NULL;
- }
- }
spec_info = g_new(ImageInfoSpecific, 1);
*spec_info = (ImageInfoSpecific){
@@ -1431,28 +1478,13 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
.u.rbd.data = g_new0(ImageInfoSpecificRbd, 1),
};
- if (memcmp(buf, rbd_luks_header_verification,
- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
- spec_info->u.rbd.data->encryption_format =
- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
- spec_info->u.rbd.data->has_encryption_format = true;
- } else if (memcmp(buf, rbd_luks2_header_verification,
- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
- spec_info->u.rbd.data->encryption_format =
- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
- spec_info->u.rbd.data->has_encryption_format = true;
- } else if (memcmp(buf, rbd_layered_luks_header_verification,
- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
- spec_info->u.rbd.data->encryption_format =
- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS;
- spec_info->u.rbd.data->has_encryption_format = true;
- } else if (memcmp(buf, rbd_layered_luks2_header_verification,
- RBD_ENCRYPTION_LUKS_HEADER_VERIFICATION_LEN) == 0) {
- spec_info->u.rbd.data->encryption_format =
- RBD_IMAGE_ENCRYPTION_FORMAT_LUKS2;
- spec_info->u.rbd.data->has_encryption_format = true;
+ if (s->encryption_format == RBD_IMAGE_ENCRYPTION_FORMAT__MAX) {
+ assert(!bs->encrypted);
} else {
- spec_info->u.rbd.data->has_encryption_format = false;
+ ImageInfoSpecificRbd *rbd_info = spec_info->u.rbd.data;
+
+ rbd_info->has_encryption_format = true;
+ rbd_info->encryption_format = s->encryption_format;
}
return spec_info;
diff --git a/qapi/block-core.json b/qapi/block-core.json
index c1af3d1f7d..a2fa277245 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -158,7 +158,14 @@
##
# @ImageInfoSpecificRbd:
#
-# @encryption-format: Image encryption format
+# @encryption-format: Image encryption format. If encryption is enabled for the
+# image (see encrypted in BlockNodeInfo), this is the actual format in which the
+# image is accessed. If encryption is not enabled, this is the result of
+# probing when the image was opened, to give a suggestion which encryption
+# format could be enabled. Note that probing results can be changed by the
+# guest by writing a (possibly partial) encryption format header to the
+# image, so don't treat this information as trusted if the guest is not
+# trusted.
#
# Since: 6.1
##
--
2.50.1

View File

@ -0,0 +1,97 @@
From da69744f6441f4a63215841ae9add2c1ef631047 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 22 Oct 2024 14:08:29 -0600
Subject: [PATCH 2/2] vfio/helpers: Align mmaps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Alex Williamson <None>
RH-MergeRequest: 468: vfio/helpers: Align mmaps [9.6.z]
RH-Jira: RHEL-107314
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [2/2] 4941ab64a5c0508eefa6bc5f923d6fcc38fdda4b
Thanks to work by Peter Xu, support is introduced in Linux v6.12 to
allow pfnmap insertions at PMD and PUD levels of the page table. This
means that provided a properly aligned mmap, the vfio driver is able
to map MMIO at significantly larger intervals than PAGE_SIZE. For
example on x86_64 (the only architecture currently supporting huge
pfnmaps for PUD), rather than 4KiB mappings, we can map device MMIO
using 2MiB and even 1GiB page table entries.
Typically mmap will already provide PMD aligned mappings, so devices
with moderately sized MMIO ranges, even GPUs with standard 256MiB BARs,
will already take advantage of this support. However in order to better
support devices exposing multi-GiB MMIO, such as 3D accelerators or GPUs
with resizable BARs enabled, we need to manually align the mmap.
There doesn't seem to be a way for userspace to easily learn about PMD
and PUD mapping level sizes, therefore this takes the simple approach
to align the mapping to the power-of-two size of the region, up to 1GiB,
which is currently the maximum alignment we care about.
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 00b519c0bca0e933ed22e2e6f8bca6b23f41f950)
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/vfio/helpers.c | 32 ++++++++++++++++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index b9e606e364..913796f437 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -27,6 +27,7 @@
#include "trace.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
+#include "qemu/units.h"
#include "monitor/monitor.h"
/*
@@ -406,8 +407,35 @@ int vfio_region_mmap(VFIORegion *region)
prot |= region->flags & VFIO_REGION_INFO_FLAG_WRITE ? PROT_WRITE : 0;
for (i = 0; i < region->nr_mmaps; i++) {
- region->mmaps[i].mmap = mmap(NULL, region->mmaps[i].size, prot,
- MAP_SHARED, region->vbasedev->fd,
+ size_t align = MIN(1ULL << ctz64(region->mmaps[i].size), 1 * GiB);
+ void *map_base, *map_align;
+
+ /*
+ * Align the mmap for more efficient mapping in the kernel. Ideally
+ * we'd know the PMD and PUD mapping sizes to use as discrete alignment
+ * intervals, but we don't. As of Linux v6.12, the largest PUD size
+ * supporting huge pfnmap is 1GiB (ARCH_SUPPORTS_PUD_PFNMAP is only set
+ * on x86_64). Align by power-of-two size, capped at 1GiB.
+ *
+ * NB. qemu_memalign() and friends actually allocate memory, whereas
+ * the region size here can exceed host memory, therefore we manually
+ * create an oversized anonymous mapping and clean it up for alignment.
+ */
+ map_base = mmap(0, region->mmaps[i].size + align, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (map_base == MAP_FAILED) {
+ ret = -errno;
+ goto no_mmap;
+ }
+
+ map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
+ munmap(map_base, map_align - map_base);
+ munmap(map_align + region->mmaps[i].size,
+ align - (map_align - map_base));
+
+ region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
+ MAP_SHARED | MAP_FIXED,
+ region->vbasedev->fd,
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {
--
2.48.1

View File

@ -0,0 +1,90 @@
From 41ea67ec82122d12ed26a98fe32d29e90f8fd282 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Tue, 22 Oct 2024 14:08:28 -0600
Subject: [PATCH 1/2] vfio/helpers: Refactor vfio_region_mmap() error handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Alex Williamson <None>
RH-MergeRequest: 468: vfio/helpers: Align mmaps [9.6.z]
RH-Jira: RHEL-107314
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Acked-by: Peter Xu <peterx@redhat.com>
RH-Commit: [1/2] b91e8d009b8a6ff91bf273211272b101bc1c1146
Move error handling code to the end of the function so that it can more
easily be shared by new mmap failure conditions. No functional change
intended.
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
(cherry picked from commit 49915c0d2c9868e6f25e52e4d839943611b69e98)
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
hw/vfio/helpers.c | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index ea15c79db0..b9e606e364 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -395,7 +395,7 @@ static void vfio_subregion_unmap(VFIORegion *region, int index)
int vfio_region_mmap(VFIORegion *region)
{
- int i, prot = 0;
+ int i, ret, prot = 0;
char *name;
if (!region->mem) {
@@ -411,22 +411,8 @@ int vfio_region_mmap(VFIORegion *region)
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {
- int ret = -errno;
-
- trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
- region->fd_offset +
- region->mmaps[i].offset,
- region->fd_offset +
- region->mmaps[i].offset +
- region->mmaps[i].size - 1, ret);
-
- region->mmaps[i].mmap = NULL;
-
- for (i--; i >= 0; i--) {
- vfio_subregion_unmap(region, i);
- }
-
- return ret;
+ ret = -errno;
+ goto no_mmap;
}
name = g_strdup_printf("%s mmaps[%d]",
@@ -446,6 +432,20 @@ int vfio_region_mmap(VFIORegion *region)
}
return 0;
+
+no_mmap:
+ trace_vfio_region_mmap_fault(memory_region_name(region->mem), i,
+ region->fd_offset + region->mmaps[i].offset,
+ region->fd_offset + region->mmaps[i].offset +
+ region->mmaps[i].size - 1, ret);
+
+ region->mmaps[i].mmap = NULL;
+
+ for (i--; i >= 0; i--) {
+ vfio_subregion_unmap(region, i);
+ }
+
+ return ret;
}
void vfio_region_unmap(VFIORegion *region)
--
2.48.1

View File

@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \
Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm
Version: 9.1.0
Release: 15%{?rcrel}%{?dist}%{?cc_suffix}.7
Release: 15%{?rcrel}%{?dist}%{?cc_suffix}.9
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
# Epoch 15 used for RHEL 8
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@ -543,6 +543,12 @@ Patch185: kvm-hw-ufs-lu-skip-automatic-zero-init-of-large-array.patch
Patch186: kvm-net-socket-skip-automatic-zero-init-of-large-array.patch
# For RHEL-99887 - -ftrivial-auto-var-init=zero reduced performance [rhel-9.6.z]
Patch187: kvm-net-stream-skip-automatic-zero-init-of-large-array.patch
# For RHEL-107314 - Improve VFIO mmapping performance with huge pfnmaps [rhel-9.6.z]
Patch188: kvm-vfio-helpers-Refactor-vfio_region_mmap-error-handlin.patch
# For RHEL-107314 - Improve VFIO mmapping performance with huge pfnmaps [rhel-9.6.z]
Patch189: kvm-vfio-helpers-Align-mmaps.patch
# For RHEL-108725 - Openstack guest becomes inaccessible via network when storage network on the hypervisor is disabled/lost [rhel-9.6.z]
Patch190: kvm-rbd-Fix-.bdrv_get_specific_info-implementation.patch
%if %{have_clang}
BuildRequires: clang
@ -1609,6 +1615,17 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
%endif
%changelog
* Mon Aug 18 2025 Jon Maloy <jmaloy@redhat.com> - 9.1.0-15.el9_6.9
- kvm-rbd-Fix-.bdrv_get_specific_info-implementation.patch [RHEL-108725]
- Resolves: RHEL-108725
(Openstack guest becomes inaccessible via network when storage network on the hypervisor is disabled/lost [rhel-9.6.z])
* Tue Aug 05 2025 Jon Maloy <jmaloy@redhat.com> - 9.1.0-15.el9_6.8
- kvm-vfio-helpers-Refactor-vfio_region_mmap-error-handlin.patch [RHEL-107314]
- kvm-vfio-helpers-Align-mmaps.patch [RHEL-107314]
- Resolves: RHEL-107314
(Improve VFIO mmapping performance with huge pfnmaps [rhel-9.6.z])
* Fri Jul 04 2025 Miroslav Rezanina <mrezanin@redhat.com> - 9.1.0-15.el9_6.7
- kvm-ui-vnc-Update-display-update-interval-when-VM-state-.patch [RHEL-100767]
- kvm-include-qemu-compiler-add-QEMU_UNINITIALIZED-attribu.patch [RHEL-99887]