From 793edd1f1b1c9ff4a4f5029eed85cbd5c1d3083a Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 20 Mar 2024 01:52:48 -0400 Subject: [PATCH] * Wed Mar 20 2024 Miroslav Rezanina - 8.2.0-9 - kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch [RHEL-28125] - kvm-nbd-server-Fix-race-in-draining-the-export.patch [RHEL-28125] - kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch [RHEL-28125] - kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch [RHEL-21705] - Resolves: RHEL-28125 (RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete.) - Resolves: RHEL-21705 (pc-q35-rhel9.4.0 does not provide proper computer information) --- ...-for-reset-AioContext-switches-with-.patch | 133 ++++++++++++++++++ ...all-job_pause_point-under-graph-lock.patch | 90 ++++++++++++ ...rver-Fix-race-in-draining-the-export.patch | 95 +++++++++++++ ...manufacturer-product-version-to-matc.patch | 44 ++++++ qemu-kvm.spec | 20 ++- 5 files changed, 381 insertions(+), 1 deletion(-) create mode 100644 kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch create mode 100644 kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch create mode 100644 kvm-nbd-server-Fix-race-in-draining-the-export.patch create mode 100644 kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch diff --git a/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch b/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch new file mode 100644 index 0000000..3b2841f --- /dev/null +++ b/kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch @@ -0,0 +1,133 @@ +From 65d58819ff7b012e43b5f1da1356b559d3f5a962 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 14 Mar 2024 17:58:25 +0100 +Subject: [PATCH 3/4] iotests: Add test for reset/AioContext switches with NBD + exports + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [3/3] 7266acdf85271d157497bfe452aa6eeb58fbe7c6 (kmwolf/centos-qemu-kvm) + +This replicates the scenario in which the bug was reported. +Unfortunately this relies on actually executing a guest (so that the +firmware initialises the virtio-blk device and moves it to its +configured iothread), so this can't make use of the qtest accelerator +like most other test cases. I tried to find a different easy way to +trigger the bug, but couldn't find one. + +Signed-off-by: Kevin Wolf +Message-ID: <20240314165825.40261-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit e8fce34eccf68a32f4ecf2c6f121ff2ac383d6bf) +Signed-off-by: Kevin Wolf +--- + tests/qemu-iotests/tests/iothreads-nbd-export | 66 +++++++++++++++++++ + .../tests/iothreads-nbd-export.out | 19 ++++++ + 2 files changed, 85 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iothreads-nbd-export + create mode 100644 tests/qemu-iotests/tests/iothreads-nbd-export.out + +diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export b/tests/qemu-iotests/tests/iothreads-nbd-export +new file mode 100755 +index 0000000000..037260729c +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-nbd-export +@@ -0,0 +1,66 @@ ++#!/usr/bin/env python3 ++# group: rw quick ++# ++# Copyright (C) 2024 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++# Creator/Owner: Kevin Wolf ++ ++import time ++import qemu ++import iotests ++ ++iotests.script_initialize(supported_fmts=['qcow2'], ++ supported_platforms=['linux']) ++ ++with iotests.FilePath('disk1.img') as path, \ ++ iotests.FilePath('nbd.sock', base_dir=iotests.sock_dir) as nbd_sock, \ ++ qemu.machine.QEMUMachine(iotests.qemu_prog) as vm: ++ ++ img_size = '10M' ++ ++ iotests.log('Preparing disk...') ++ iotests.qemu_img_create('-f', iotests.imgfmt, path, img_size) ++ vm.add_args('-blockdev', f'file,node-name=disk-file,filename={path}') ++ vm.add_args('-blockdev', 'qcow2,node-name=disk,file=disk-file') ++ vm.add_args('-object', 'iothread,id=iothread0') ++ vm.add_args('-device', ++ 'virtio-blk,drive=disk,iothread=iothread0,share-rw=on') ++ ++ iotests.log('Launching VM...') ++ vm.add_args('-accel', 'kvm', '-accel', 'tcg') ++ #vm.add_args('-accel', 'qtest') ++ vm.launch() ++ ++ iotests.log('Exporting to NBD...') ++ iotests.log(vm.qmp('nbd-server-start', ++ addr={'type': 'unix', 'data': {'path': nbd_sock}})) ++ iotests.log(vm.qmp('block-export-add', type='nbd', id='exp0', ++ node_name='disk', writable=True)) ++ ++ iotests.log('Connecting qemu-img...') ++ qemu_io = iotests.QemuIoInteractive('-f', 'raw', ++ f'nbd+unix:///disk?socket={nbd_sock}') ++ ++ iotests.log('Moving the NBD export to a different iothread...') ++ for i in range(0, 10): ++ iotests.log(vm.qmp('system_reset')) ++ time.sleep(0.1) ++ ++ iotests.log('Checking that it is still alive...') ++ iotests.log(vm.qmp('query-status')) ++ ++ qemu_io.close() ++ vm.shutdown() +diff --git a/tests/qemu-iotests/tests/iothreads-nbd-export.out b/tests/qemu-iotests/tests/iothreads-nbd-export.out +new file mode 100644 +index 0000000000..bc514e35e5 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iothreads-nbd-export.out +@@ -0,0 +1,19 @@ ++Preparing disk... ++Launching VM... ++Exporting to NBD... ++{"return": {}} ++{"return": {}} ++Connecting qemu-img... ++Moving the NBD export to a different iothread... ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++{"return": {}} ++Checking that it is still alive... ++{"return": {"running": true, "status": "running"}} +-- +2.39.3 + diff --git a/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch b/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch new file mode 100644 index 0000000..871a80e --- /dev/null +++ b/kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch @@ -0,0 +1,90 @@ +From 21cadc8ed200ad9af13b217b441b9f12cd2163ee Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Wed, 13 Mar 2024 16:30:00 +0100 +Subject: [PATCH 1/4] mirror: Don't call job_pause_point() under graph lock + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/3] 093910fcd5ea1516d0ed48a9cd73dad6170590f3 (kmwolf/centos-qemu-kvm) + +Calling job_pause_point() while holding the graph reader lock +potentially results in a deadlock: bdrv_graph_wrlock() first drains +everything, including the mirror job, which pauses it. The job is only +unpaused at the end of the drain section, which is when the graph writer +lock has been successfully taken. However, if the job happens to be +paused at a pause point where it still holds the reader lock, the writer +lock can't be taken as long as the job is still paused. + +Mark job_pause_point() as GRAPH_UNLOCKED and fix mirror accordingly. + +Cc: qemu-stable@nongnu.org +Buglink: https://issues.redhat.com/browse/RHEL-28125 +Fixes: 004915a96a7a ("block: Protect bs->backing with graph_lock") +Signed-off-by: Kevin Wolf +Message-ID: <20240313153000.33121-1-kwolf@redhat.com> +Reviewed-by: Eric Blake +Signed-off-by: Kevin Wolf +(cherry picked from commit ae5a40e8581185654a667fbbf7e4adbc2a2a3e45) +Signed-off-by: Kevin Wolf +--- + block/mirror.c | 10 ++++++---- + include/qemu/job.h | 2 +- + 2 files changed, 7 insertions(+), 5 deletions(-) + +diff --git a/block/mirror.c b/block/mirror.c +index 5145eb53e1..1bdce3b657 100644 +--- a/block/mirror.c ++++ b/block/mirror.c +@@ -479,9 +479,9 @@ static unsigned mirror_perform(MirrorBlockJob *s, int64_t offset, + return bytes_handled; + } + +-static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) ++static void coroutine_fn GRAPH_UNLOCKED mirror_iteration(MirrorBlockJob *s) + { +- BlockDriverState *source = s->mirror_top_bs->backing->bs; ++ BlockDriverState *source; + MirrorOp *pseudo_op; + int64_t offset; + /* At least the first dirty chunk is mirrored in one iteration. */ +@@ -489,6 +489,10 @@ static void coroutine_fn GRAPH_RDLOCK mirror_iteration(MirrorBlockJob *s) + bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)); + int max_io_bytes = MAX(s->buf_size / MAX_IN_FLIGHT, MAX_IO_BYTES); + ++ bdrv_graph_co_rdlock(); ++ source = s->mirror_top_bs->backing->bs; ++ bdrv_graph_co_rdunlock(); ++ + bdrv_dirty_bitmap_lock(s->dirty_bitmap); + offset = bdrv_dirty_iter_next(s->dbi); + if (offset < 0) { +@@ -1066,9 +1070,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp) + mirror_wait_for_free_in_flight_slot(s); + continue; + } else if (cnt != 0) { +- bdrv_graph_co_rdlock(); + mirror_iteration(s); +- bdrv_graph_co_rdunlock(); + } + } + +diff --git a/include/qemu/job.h b/include/qemu/job.h +index 9ea98b5927..2b873f2576 100644 +--- a/include/qemu/job.h ++++ b/include/qemu/job.h +@@ -483,7 +483,7 @@ void job_enter(Job *job); + * + * Called with job_mutex *not* held. + */ +-void coroutine_fn job_pause_point(Job *job); ++void coroutine_fn GRAPH_UNLOCKED job_pause_point(Job *job); + + /** + * @job: The job that calls the function. +-- +2.39.3 + diff --git a/kvm-nbd-server-Fix-race-in-draining-the-export.patch b/kvm-nbd-server-Fix-race-in-draining-the-export.patch new file mode 100644 index 0000000..7298992 --- /dev/null +++ b/kvm-nbd-server-Fix-race-in-draining-the-export.patch @@ -0,0 +1,95 @@ +From dc4dd11233522d8195782a2196aaae44bd924575 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 14 Mar 2024 17:58:24 +0100 +Subject: [PATCH 2/4] nbd/server: Fix race in draining the export + +RH-Author: Kevin Wolf +RH-MergeRequest: 231: Fix deadlock and crash during storage migration +RH-Jira: RHEL-28125 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/3] 036e1b171986099904dee468c59d77437e3d3d61 (kmwolf/centos-qemu-kvm) + +When draining an NBD export, nbd_drained_begin() first sets +client->quiescing so that nbd_client_receive_next_request() won't start +any new request coroutines. Then nbd_drained_poll() tries to makes sure +that we wait for any existing request coroutines by checking that +client->nb_requests has become 0. + +However, there is a small window between creating a new request +coroutine and increasing client->nb_requests. If a coroutine is in this +state, it won't be waited for and drain returns too early. + +In the context of switching to a different AioContext, this means that +blk_aio_attached() will see client->recv_coroutine != NULL and fail its +assertion. + +Fix this by increasing client->nb_requests immediately when starting the +coroutine. Doing this after the checks if we should create a new +coroutine is okay because client->lock is held. + +Cc: qemu-stable@nongnu.org +Fixes: fd6afc501a01 ("nbd/server: Use drained block ops to quiesce the server") +Signed-off-by: Kevin Wolf +Message-ID: <20240314165825.40261-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 9c707525cbb1dd1e56876e45c70c0c08f2876d41) +Signed-off-by: Kevin Wolf +--- + nbd/server.c | 15 +++++++-------- + 1 file changed, 7 insertions(+), 8 deletions(-) + +diff --git a/nbd/server.c b/nbd/server.c +index 941832f178..c3484cc1eb 100644 +--- a/nbd/server.c ++++ b/nbd/server.c +@@ -3007,8 +3007,8 @@ static coroutine_fn int nbd_handle_request(NBDClient *client, + /* Owns a reference to the NBDClient passed as opaque. */ + static coroutine_fn void nbd_trip(void *opaque) + { +- NBDClient *client = opaque; +- NBDRequestData *req = NULL; ++ NBDRequestData *req = opaque; ++ NBDClient *client = req->client; + NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */ + int ret; + Error *local_err = NULL; +@@ -3037,8 +3037,6 @@ static coroutine_fn void nbd_trip(void *opaque) + goto done; + } + +- req = nbd_request_get(client); +- + /* + * nbd_co_receive_request() returns -EAGAIN when nbd_drained_begin() has + * set client->quiescing but by the time we get back nbd_drained_end() may +@@ -3112,9 +3110,7 @@ static coroutine_fn void nbd_trip(void *opaque) + } + + done: +- if (req) { +- nbd_request_put(req); +- } ++ nbd_request_put(req); + + qemu_mutex_unlock(&client->lock); + +@@ -3143,10 +3139,13 @@ disconnect: + */ + static void nbd_client_receive_next_request(NBDClient *client) + { ++ NBDRequestData *req; ++ + if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS && + !client->quiescing) { + nbd_client_get(client); +- client->recv_coroutine = qemu_coroutine_create(nbd_trip, client); ++ req = nbd_request_get(client); ++ client->recv_coroutine = qemu_coroutine_create(nbd_trip, req); + aio_co_schedule(client->exp->common.ctx, client->recv_coroutine); + } + } +-- +2.39.3 + diff --git a/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch b/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch new file mode 100644 index 0000000..9d3fd29 --- /dev/null +++ b/kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch @@ -0,0 +1,44 @@ +From 03cad16743d9a4d377af66611d030eca9eda326d Mon Sep 17 00:00:00 2001 +From: Igor Mammedov +Date: Tue, 19 Mar 2024 11:42:04 +0100 +Subject: [PATCH 4/4] pc: smbios: fixup manufacturer/product/version to match + downstream + +RH-Author: Igor Mammedov +RH-MergeRequest: 232: pc: smbios: fixup manufacturer/product/version to match downstream +RH-Jira: RHEL-21705 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Ani Sinha +RH-Commit: [1/1] 9235f64acb5ff46360282e889d0aedcb13374ac1 + +JIRA: https://issues.redhat.com/browse/RHEL-21705 + +commit [1] discarded RHEL only change that customizes +SMBIOS values for manufacturer/product/version for pc/q35 +machine types. +Fix it up by reverting back to ("Red Hat", "KVM", mc->desc) +tuple. + +1) +Fixes: 208239eb2 (hw/i386/pc: Defer smbios_set_defaults() to machine_done) +Signed-off-by: Igor Mammedov +--- + hw/i386/fw_cfg.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/hw/i386/fw_cfg.c b/hw/i386/fw_cfg.c +index 58429bb78d..d6a24177e2 100644 +--- a/hw/i386/fw_cfg.c ++++ b/hw/i386/fw_cfg.c +@@ -63,7 +63,7 @@ void fw_cfg_build_smbios(PCMachineState *pcms, FWCfgState *fw_cfg, + + if (pcmc->smbios_defaults) { + /* These values are guest ABI, do not change */ +- smbios_set_defaults("QEMU", mc->desc, mc->name, ++ smbios_set_defaults("Red Hat", "KVM", mc->desc, + pcmc->smbios_uuid_encoded, + pcmc->smbios_stream_product, + pcmc->smbios_stream_version); +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 1e8ed3f..e614d12 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.2.0 -Release: 8%{?rcrel}%{?dist}%{?cc_suffix} +Release: 9%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -592,6 +592,14 @@ Patch166: kvm-smbios-in-case-of-entry-point-is-auto-try-to-build-v.patch Patch167: kvm-smbios-error-out-when-building-type-4-table-is-not-p.patch # For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information Patch168: kvm-pc-q35-set-SMBIOS-entry-point-type-to-auto-by-defaul.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch169: kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch170: kvm-nbd-server-Fix-race-in-draining-the-export.patch +# For RHEL-28125 - RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete. +Patch171: kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch +# For RHEL-21705 - pc-q35-rhel9.4.0 does not provide proper computer information +Patch172: kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch %if %{have_clang} BuildRequires: clang @@ -1653,6 +1661,16 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Wed Mar 20 2024 Miroslav Rezanina - 8.2.0-9 +- kvm-mirror-Don-t-call-job_pause_point-under-graph-lock.patch [RHEL-28125] +- kvm-nbd-server-Fix-race-in-draining-the-export.patch [RHEL-28125] +- kvm-iotests-Add-test-for-reset-AioContext-switches-with-.patch [RHEL-28125] +- kvm-pc-smbios-fixup-manufacturer-product-version-to-matc.patch [RHEL-21705] +- Resolves: RHEL-28125 + (RHEL9.4 - KVM : Live migration of guest with multiple qcow devices remains incomplete.) +- Resolves: RHEL-21705 + (pc-q35-rhel9.4.0 does not provide proper computer information) + * Mon Mar 18 2024 Miroslav Rezanina - 8.2.0-8 - kvm-ui-clipboard-mark-type-as-not-available-when-there-i.patch [RHEL-19629] - kvm-ui-clipboard-add-asserts-for-update-and-request.patch [RHEL-19629]