diff --git a/kvm-KVM-keep-track-of-running-ioctls.patch b/kvm-KVM-keep-track-of-running-ioctls.patch new file mode 100644 index 0000000..b7aba7e --- /dev/null +++ b/kvm-KVM-keep-track-of-running-ioctls.patch @@ -0,0 +1,82 @@ +From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:17:23 -0500 +Subject: [PATCH 30/31] KVM: keep track of running ioctls + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit a27dd2de68f37ba96fe164a42121daa5f0750afc +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:57 2022 -0500 + + KVM: keep track of running ioctls + + Using the new accel-blocker API, mark where ioctls are being called + in KVM. Next, we will implement the critical section that will take + care of performing memslots modifications atomically, therefore + preventing any new ioctl from running and allowing the running ones + to finish. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-3-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index f99b0becd8..ff660fd469 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms) + assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size()); + + s->sigmask_len = 8; ++ accel_blocker_init(); + + #ifdef KVM_CAP_SET_GUEST_DEBUG + QTAILQ_INIT(&s->kvm_sw_breakpoints); +@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...) + va_end(ap); + + trace_kvm_vm_ioctl(type, arg); ++ accel_ioctl_begin(); + ret = ioctl(s->vmfd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...) + va_end(ap); + + trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg); ++ accel_cpu_ioctl_begin(cpu); + ret = ioctl(cpu->kvm_fd, type, arg); ++ accel_cpu_ioctl_end(cpu); + if (ret == -1) { + ret = -errno; + } +@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...) + va_end(ap); + + trace_kvm_device_ioctl(fd, type, arg); ++ accel_ioctl_begin(); + ret = ioctl(fd, type, arg); ++ accel_ioctl_end(); + if (ret == -1) { + ret = -errno; + } +-- +2.31.1 + diff --git a/kvm-accel-introduce-accelerator-blocker-API.patch b/kvm-accel-introduce-accelerator-blocker-API.patch new file mode 100644 index 0000000..29a8ac5 --- /dev/null +++ b/kvm-accel-introduce-accelerator-blocker-API.patch @@ -0,0 +1,348 @@ +From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:16:41 -0500 +Subject: [PATCH 29/31] accel: introduce accelerator blocker API +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1 +Author: Emanuele Giuseppe Esposito +Date: Fri Nov 11 10:47:56 2022 -0500 + + accel: introduce accelerator blocker API + + This API allows the accelerators to prevent vcpus from issuing + new ioctls while execting a critical section marked with the + accel_ioctl_inhibit_begin/end functions. + + Note that all functions submitting ioctls must mark where the + ioctl is being called with accel_{cpu_}ioctl_begin/end(). + + This API requires the caller to always hold the BQL. + API documentation is in sysemu/accel-blocker.h + + Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt + (to minimize cache line bouncing) to keep avoid that new ioctls + run when the critical section starts, and a QemuEvent to wait + that all running ioctls finish. + + Signed-off-by: Emanuele Giuseppe Esposito + Reviewed-by: Philippe Mathieu-Daudé + Message-Id: <20221111154758.1372674-2-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Conflicts: + util/meson.build: "interval-tree.c" does not exist + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/accel-blocker.c | 154 +++++++++++++++++++++++++++++++++ + accel/meson.build | 2 +- + hw/core/cpu-common.c | 2 + + include/hw/core/cpu.h | 3 + + include/sysemu/accel-blocker.h | 56 ++++++++++++ + util/meson.build | 2 +- + 6 files changed, 217 insertions(+), 2 deletions(-) + create mode 100644 accel/accel-blocker.c + create mode 100644 include/sysemu/accel-blocker.h + +diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c +new file mode 100644 +index 0000000000..1e7f423462 +--- /dev/null ++++ b/accel/accel-blocker.c +@@ -0,0 +1,154 @@ ++/* ++ * Lock to inhibit accelerator ioctls ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * Permission is hereby granted, free of charge, to any person obtaining a copy ++ * of this software and associated documentation files (the "Software"), to deal ++ * in the Software without restriction, including without limitation the rights ++ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++ * copies of the Software, and to permit persons to whom the Software is ++ * furnished to do so, subject to the following conditions: ++ * ++ * The above copyright notice and this permission notice shall be included in ++ * all copies or substantial portions of the Software. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL ++ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN ++ * THE SOFTWARE. ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/thread.h" ++#include "qemu/main-loop.h" ++#include "hw/core/cpu.h" ++#include "sysemu/accel-blocker.h" ++ ++static QemuLockCnt accel_in_ioctl_lock; ++static QemuEvent accel_in_ioctl_event; ++ ++void accel_blocker_init(void) ++{ ++ qemu_lockcnt_init(&accel_in_ioctl_lock); ++ qemu_event_init(&accel_in_ioctl_event, false); ++} ++ ++void accel_ioctl_begin(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_end(void) ++{ ++ if (likely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&accel_in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++void accel_cpu_ioctl_begin(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ /* block if lock is taken in kvm_ioctl_inhibit_begin() */ ++ qemu_lockcnt_inc(&cpu->in_ioctl_lock); ++} ++ ++void accel_cpu_ioctl_end(CPUState *cpu) ++{ ++ if (unlikely(qemu_mutex_iothread_locked())) { ++ return; ++ } ++ ++ qemu_lockcnt_dec(&cpu->in_ioctl_lock); ++ /* change event to SET. If event was BUSY, wake up all waiters */ ++ qemu_event_set(&accel_in_ioctl_event); ++} ++ ++static bool accel_has_to_wait(void) ++{ ++ CPUState *cpu; ++ bool needs_to_wait = false; ++ ++ CPU_FOREACH(cpu) { ++ if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) { ++ /* exit the ioctl, if vcpu is running it */ ++ qemu_cpu_kick(cpu); ++ needs_to_wait = true; ++ } ++ } ++ ++ return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock); ++} ++ ++void accel_ioctl_inhibit_begin(void) ++{ ++ CPUState *cpu; ++ ++ /* ++ * We allow to inhibit only when holding the BQL, so we can identify ++ * when an inhibitor wants to issue an ioctl easily. ++ */ ++ g_assert(qemu_mutex_iothread_locked()); ++ ++ /* Block further invocations of the ioctls outside the BQL. */ ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_lock(&cpu->in_ioctl_lock); ++ } ++ qemu_lockcnt_lock(&accel_in_ioctl_lock); ++ ++ /* Keep waiting until there are running ioctls */ ++ while (true) { ++ ++ /* Reset event to FREE. */ ++ qemu_event_reset(&accel_in_ioctl_event); ++ ++ if (accel_has_to_wait()) { ++ /* ++ * If event is still FREE, and there are ioctls still in progress, ++ * wait. ++ * ++ * If an ioctl finishes before qemu_event_wait(), it will change ++ * the event state to SET. This will prevent qemu_event_wait() from ++ * blocking, but it's not a problem because if other ioctls are ++ * still running the loop will iterate once more and reset the event ++ * status to FREE so that it can wait properly. ++ * ++ * If an ioctls finishes while qemu_event_wait() is blocking, then ++ * it will be waken up, but also here the while loop makes sure ++ * to re-enter the wait if there are other running ioctls. ++ */ ++ qemu_event_wait(&accel_in_ioctl_event); ++ } else { ++ /* No ioctl is running */ ++ return; ++ } ++ } ++} ++ ++void accel_ioctl_inhibit_end(void) ++{ ++ CPUState *cpu; ++ ++ qemu_lockcnt_unlock(&accel_in_ioctl_lock); ++ CPU_FOREACH(cpu) { ++ qemu_lockcnt_unlock(&cpu->in_ioctl_lock); ++ } ++} ++ +diff --git a/accel/meson.build b/accel/meson.build +index 259c35c4c8..061332610f 100644 +--- a/accel/meson.build ++++ b/accel/meson.build +@@ -1,4 +1,4 @@ +-specific_ss.add(files('accel-common.c')) ++specific_ss.add(files('accel-common.c', 'accel-blocker.c')) + softmmu_ss.add(files('accel-softmmu.c')) + user_ss.add(files('accel-user.c')) + +diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c +index f9fdd46b9d..8d6a4b1b65 100644 +--- a/hw/core/cpu-common.c ++++ b/hw/core/cpu-common.c +@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj) + cpu->nr_threads = 1; + + qemu_mutex_init(&cpu->work_mutex); ++ qemu_lockcnt_init(&cpu->in_ioctl_lock); + QSIMPLEQ_INIT(&cpu->work_list); + QTAILQ_INIT(&cpu->breakpoints); + QTAILQ_INIT(&cpu->watchpoints); +@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj) + { + CPUState *cpu = CPU(obj); + ++ qemu_lockcnt_destroy(&cpu->in_ioctl_lock); + qemu_mutex_destroy(&cpu->work_mutex); + } + +diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h +index 8830546121..2417597236 100644 +--- a/include/hw/core/cpu.h ++++ b/include/hw/core/cpu.h +@@ -398,6 +398,9 @@ struct CPUState { + uint32_t kvm_fetch_index; + uint64_t dirty_pages; + ++ /* Use by accel-block: CPU is executing an ioctl() */ ++ QemuLockCnt in_ioctl_lock; ++ + /* Used for events with 'vcpu' and *without* the 'disabled' properties */ + DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS); + DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS); +diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h +new file mode 100644 +index 0000000000..72020529ef +--- /dev/null ++++ b/include/sysemu/accel-blocker.h +@@ -0,0 +1,56 @@ ++/* ++ * Accelerator blocking API, to prevent new ioctls from starting and wait the ++ * running ones finish. ++ * This mechanism differs from pause/resume_all_vcpus() in that it does not ++ * release the BQL. ++ * ++ * Copyright (c) 2022 Red Hat Inc. ++ * ++ * Author: Emanuele Giuseppe Esposito ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or later. ++ * See the COPYING file in the top-level directory. ++ */ ++#ifndef ACCEL_BLOCKER_H ++#define ACCEL_BLOCKER_H ++ ++#include "qemu/osdep.h" ++#include "sysemu/cpus.h" ++ ++extern void accel_blocker_init(void); ++ ++/* ++ * accel_{cpu_}ioctl_begin/end: ++ * Mark when ioctl is about to run or just finished. ++ * ++ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is ++ * called, preventing new ioctls to run. They will continue only after ++ * accel_ioctl_inibith_end(). ++ */ ++extern void accel_ioctl_begin(void); ++extern void accel_ioctl_end(void); ++extern void accel_cpu_ioctl_begin(CPUState *cpu); ++extern void accel_cpu_ioctl_end(CPUState *cpu); ++ ++/* ++ * accel_ioctl_inhibit_begin: start critical section ++ * ++ * This function makes sure that: ++ * 1) incoming accel_{cpu_}ioctl_begin() calls block ++ * 2) wait that all ioctls that were already running reach ++ * accel_{cpu_}ioctl_end(), kicking vcpus if necessary. ++ * ++ * This allows the caller to access shared data or perform operations without ++ * worrying of concurrent vcpus accesses. ++ */ ++extern void accel_ioctl_inhibit_begin(void); ++ ++/* ++ * accel_ioctl_inhibit_end: end critical section started by ++ * accel_ioctl_inhibit_begin() ++ * ++ * This function allows blocked accel_{cpu_}ioctl_begin() to continue. ++ */ ++extern void accel_ioctl_inhibit_end(void); ++ ++#endif /* ACCEL_BLOCKER_H */ +diff --git a/util/meson.build b/util/meson.build +index 25b9b61f98..85a5504c4d 100644 +--- a/util/meson.build ++++ b/util/meson.build +@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c')) + util_ss.add(files('yank.c')) + util_ss.add(files('int128.c')) + util_ss.add(files('memalign.c')) ++util_ss.add(files('lockcnt.c')) + + if have_user + util_ss.add(files('selfmap.c')) +@@ -71,7 +72,6 @@ endif + if have_block or have_ga + util_ss.add(files('aiocb.c', 'async.c')) + util_ss.add(files('base64.c')) +- util_ss.add(files('lockcnt.c')) + util_ss.add(files('main-loop.c')) + util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c')) + util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND']))) +-- +2.31.1 + diff --git a/kvm-block-Call-drain-callbacks-only-once.patch b/kvm-block-Call-drain-callbacks-only-once.patch new file mode 100644 index 0000000..04f1dda --- /dev/null +++ b/kvm-block-Call-drain-callbacks-only-once.patch @@ -0,0 +1,250 @@ +From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:06 +0100 +Subject: [PATCH 24/31] block: Call drain callbacks only once + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s) + +We only need to call both the BlockDriver's callback and the parent +callbacks when going from undrained to drained or vice versa. A second +drain section doesn't make a difference for the driver or the parent, +they weren't supposed to send new requests before and after the second +drain. + +One thing that gets in the way is the 'ignore_bds_parents' parameter in +bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that +bdrv_drain_all_begin() increases bs->quiesce_counter, but does not +quiesce the parent through BdrvChildClass callbacks. If an additional +drain section is started now, bs->quiesce_counter will be non-zero, but +we would still need to quiesce the parent through BdrvChildClass in +order to keep things consistent (and unquiesce it on the matching +bdrv_drained_end(), even though the counter would not reach 0 yet as +long as the bdrv_drain_all() section is still active). + +Instead of keeping track of this, let's just get rid of the parameter. +It was introduced in commit 6cd5c9d7b2d as an optimisation so that +during bdrv_drain_all(), we wouldn't recursively drain all parents up to +the root for each node, resulting in quadratic complexity. As it happens, +calling the callbacks only once solves the same problem, so as of this +patch, we'll still have O(n) complexity and ignore_bds_parents is not +needed any more. + +This patch only ignores the 'ignore_bds_parents' parameter. It will be +removed in a separate patch. + +Signed-off-by: Kevin Wolf +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-12-kwolf@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1) +Signed-off-by: Stefano Garzarella +--- + block.c | 25 +++++++------------------ + block/io.c | 30 ++++++++++++++++++------------ + include/block/block_int-common.h | 8 ++++---- + tests/unit/test-bdrv-drain.c | 16 ++++++++++------ + 4 files changed, 39 insertions(+), 40 deletions(-) + +diff --git a/block.c b/block.c +index e0e3b21790..5a583e260d 100644 +--- a/block.c ++++ b/block.c +@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + { + BlockDriverState *old_bs = child->bs; + int new_bs_quiesce_counter; +- int drain_saldo; + + assert(!child->frozen); + assert(old_bs != new_bs); +@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } + +- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); +- drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter; +- + /* + * If the new child node is drained but the old one was not, flush + * all outstanding requests to the old child node. + */ +- while (drain_saldo > 0 && child->klass->drained_begin) { ++ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); ++ if (new_bs_quiesce_counter && !child->quiesced_parent) { + bdrv_parent_drained_begin_single(child, true); +- drain_saldo--; + } + + if (old_bs) { +@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + if (new_bs) { + assert_bdrv_graph_writable(new_bs); + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); +- +- /* +- * Polling in bdrv_parent_drained_begin_single() may have led to the new +- * node's quiesce_counter having been decreased. Not a problem, we just +- * need to recognize this here and then invoke drained_end appropriately +- * more often. +- */ +- assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); +- drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; +- + if (child->klass->attach) { + child->klass->attach(child); + } +@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + /* + * If the old child node was drained but the new one is not, allow + * requests to come in only after the new node has been attached. ++ * ++ * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() ++ * polls, which could have changed the value. + */ +- while (drain_saldo < 0 && child->klass->drained_end) { ++ new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); ++ if (!new_bs_quiesce_counter && child->quiesced_parent) { + bdrv_parent_drained_end_single(child); +- drain_saldo++; + } + } + +diff --git a/block/io.c b/block/io.c +index 75224480d0..87d6f22ec4 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c) + { + IO_OR_GS_CODE(); + +- assert(c->parent_quiesce_counter > 0); +- c->parent_quiesce_counter--; ++ assert(c->quiesced_parent); ++ c->quiesced_parent = false; ++ + if (c->klass->drained_end) { + c->klass->drained_end(c); + } +@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) + { + AioContext *ctx = bdrv_child_get_parent_aio_context(c); + IO_OR_GS_CODE(); +- c->parent_quiesce_counter++; ++ ++ assert(!c->quiesced_parent); ++ c->quiesced_parent = true; ++ + if (c->klass->drained_begin) { + c->klass->drained_begin(c); + } +@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + aio_disable_external(bdrv_get_aio_context(bs)); +- } + +- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- if (bs->drv && bs->drv->bdrv_drain_begin) { +- bs->drv->bdrv_drain_begin(bs); ++ /* TODO Remove ignore_bds_parents, we don't consider it any more */ ++ bdrv_parent_drained_begin(bs, parent, false); ++ if (bs->drv && bs->drv->bdrv_drain_begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } + } + } + +@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- if (bs->drv && bs->drv->bdrv_drain_end) { +- bs->drv->bdrv_drain_end(bs); +- } +- bdrv_parent_drained_end(bs, parent, ignore_bds_parents); +- + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); + if (old_quiesce_counter == 1) { ++ if (bs->drv && bs->drv->bdrv_drain_end) { ++ bs->drv->bdrv_drain_end(bs); ++ } ++ /* TODO Remove ignore_bds_parents, we don't consider it any more */ ++ bdrv_parent_drained_end(bs, parent, false); ++ + aio_enable_external(bdrv_get_aio_context(bs)); + } + } +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 791dddfd7d..a6bc6b7fe9 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -980,13 +980,13 @@ struct BdrvChild { + bool frozen; + + /* +- * How many times the parent of this child has been drained ++ * True if the parent of this child has been drained by this BdrvChild + * (through klass->drained_*). +- * Usually, this is equal to bs->quiesce_counter (potentially +- * reduced by bdrv_drain_all_count). It may differ while the ++ * ++ * It is generally true if bs->quiesce_counter > 0. It may differ while the + * child is entering or leaving a drained section. + */ +- int parent_quiesce_counter; ++ bool quiesced_parent; + + QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index dda08de8db..172bc6debc 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive) + + do_drain_begin(drain_type, bs); + +- g_assert_cmpint(bs->quiesce_counter, ==, 1); ++ if (drain_type == BDRV_DRAIN_ALL) { ++ g_assert_cmpint(bs->quiesce_counter, ==, 2); ++ } else { ++ g_assert_cmpint(bs->quiesce_counter, ==, 1); ++ } + g_assert_cmpint(backing->quiesce_counter, ==, !!recursive); + + do_drain_end(drain_type, bs); +@@ -348,8 +352,8 @@ static void test_nested(void) + + for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) { + for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) { +- int backing_quiesce = (outer != BDRV_DRAIN) + +- (inner != BDRV_DRAIN); ++ int backing_quiesce = (outer == BDRV_DRAIN_ALL) + ++ (inner == BDRV_DRAIN_ALL); + + g_assert_cmpint(bs->quiesce_counter, ==, 0); + g_assert_cmpint(backing->quiesce_counter, ==, 0); +@@ -359,10 +363,10 @@ static void test_nested(void) + do_drain_begin(outer, bs); + do_drain_begin(inner, bs); + +- g_assert_cmpint(bs->quiesce_counter, ==, 2); ++ g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce); + g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce); +- g_assert_cmpint(s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce); ++ g_assert_cmpint(s->drain_count, ==, 1); ++ g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce); + + do_drain_end(inner, bs); + do_drain_end(outer, bs); +-- +2.31.1 + diff --git a/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch b/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch new file mode 100644 index 0000000..80018cc --- /dev/null +++ b/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch @@ -0,0 +1,298 @@ +From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:09 +0100 +Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s) + +In order to make sure that bdrv_replace_child_noperm() doesn't have to +poll any more, get rid of the bdrv_parent_drained_begin_single() call. + +This is possible now because we can require that the parent is already +drained through the child in question when the function is called and we +don't call the parent drain callbacks more than once. + +The additional drain calls needed in callers cause the test case to run +its code in the drain handler too early (bdrv_attach_child() drains +now), so modify it to only enable the code after the test setup has +completed. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-15-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4) +Signed-off-by: Stefano Garzarella +--- + block.c | 103 ++++++++++++++++++++++++++++++----- + block/io.c | 2 +- + include/block/block-io.h | 8 +++ + tests/unit/test-bdrv-drain.c | 10 ++++ + 4 files changed, 108 insertions(+), 15 deletions(-) + +diff --git a/block.c b/block.c +index af31a94863..65588d313a 100644 +--- a/block.c ++++ b/block.c +@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque) + + GLOBAL_STATE_CODE(); + /* old_bs reference is transparently moved from @s to @s->child */ ++ if (!s->child->bs) { ++ /* ++ * The parents were undrained when removing old_bs from the child. New ++ * requests can't have been made, though, because the child was empty. ++ * ++ * TODO Make bdrv_replace_child_noperm() transactionable to avoid ++ * undraining the parent in the first place. Once this is done, having ++ * new_bs drained when calling bdrv_replace_child_tran() is not a ++ * requirement any more. ++ */ ++ bdrv_parent_drained_begin_single(s->child, false); ++ assert(!bdrv_parent_drained_poll_single(s->child)); ++ } ++ assert(s->child->quiesced_parent); + bdrv_replace_child_noperm(s->child, s->old_bs); + bdrv_unref(new_bs); + } +@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = { + * + * Note: real unref of old_bs is done only on commit. + * ++ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be ++ * kept drained until the transaction is completed. ++ * + * The function doesn't update permissions, caller is responsible for this. + */ + static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs, + Transaction *tran) + { + BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1); ++ ++ assert(child->quiesced_parent); ++ assert(!new_bs || new_bs->quiesce_counter); ++ + *s = (BdrvReplaceChildState) { + .child = child, + .old_bs = child->bs, +@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm) + return permissions[qapi_perm]; + } + ++/* ++ * Replaces the node that a BdrvChild points to without updating permissions. ++ * ++ * If @new_bs is non-NULL, the parent of @child must already be drained through ++ * @child. ++ * ++ * This function does not poll. ++ */ + static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs) + { +@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + int new_bs_quiesce_counter; + + assert(!child->frozen); ++ ++ /* ++ * If we want to change the BdrvChild to point to a drained node as its new ++ * child->bs, we need to make sure that its new parent is drained, too. In ++ * other words, either child->quiesce_parent must already be true or we must ++ * be able to set it and keep the parent's quiesce_counter consistent with ++ * that, but without polling or starting new requests (this function ++ * guarantees that it doesn't poll, and starting new requests would be ++ * against the invariants of drain sections). ++ * ++ * To keep things simple, we pick the first option (child->quiesce_parent ++ * must already be true). We also generalise the rule a bit to make it ++ * easier to verify in callers and more likely to be covered in test cases: ++ * The parent must be quiesced through this child even if new_bs isn't ++ * currently drained. ++ * ++ * The only exception is for callers that always pass new_bs == NULL. In ++ * this case, we obviously never need to consider the case of a drained ++ * new_bs, so we can keep the callers simpler by allowing them not to drain ++ * the parent. ++ */ ++ assert(!new_bs || child->quiesced_parent); + assert(old_bs != new_bs); + GLOBAL_STATE_CODE(); + +@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs)); + } + +- /* +- * If the new child node is drained but the old one was not, flush +- * all outstanding requests to the old child node. +- */ +- new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); +- if (new_bs_quiesce_counter && !child->quiesced_parent) { +- bdrv_parent_drained_begin_single(child, true); +- } +- + if (old_bs) { + if (child->klass->detach) { + child->klass->detach(child); +@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + } + + /* +- * If the old child node was drained but the new one is not, allow +- * requests to come in only after the new node has been attached. +- * +- * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single() +- * polls, which could have changed the value. ++ * If the parent was drained through this BdrvChild previously, but new_bs ++ * is not drained, allow requests to come in only after the new node has ++ * been attached. + */ + new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0); + if (!new_bs_quiesce_counter && child->quiesced_parent) { +@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, + } + + bdrv_ref(child_bs); ++ /* ++ * Let every new BdrvChild start with a drained parent. Inserting the child ++ * in the graph with bdrv_replace_child_noperm() will undrain it if ++ * @child_bs is not drained. ++ * ++ * The child was only just created and is not yet visible in global state ++ * until bdrv_replace_child_noperm() inserts it into the graph, so nobody ++ * could have sent requests and polling is not necessary. ++ * ++ * Note that this means that the parent isn't fully drained yet, we only ++ * stop new requests from coming in. This is fine, we don't care about the ++ * old requests here, they are not for this child. If another place enters a ++ * drain section for the same parent, but wants it to be fully quiesced, it ++ * will not run most of the the code in .drained_begin() again (which is not ++ * a problem, we already did this), but it will still poll until the parent ++ * is fully quiesced, so it will not be negatively affected either. ++ */ ++ bdrv_parent_drained_begin_single(new_child, false); + bdrv_replace_child_noperm(new_child, child_bs); + + BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); +@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) + } + + if (child->bs) { ++ BlockDriverState *bs = child->bs; ++ bdrv_drained_begin(bs); + bdrv_replace_child_tran(child, NULL, tran); ++ bdrv_drained_end(bs); + } + + tran_add(tran, &bdrv_remove_child_drv, child); + } + ++static void undrain_on_clean_cb(void *opaque) ++{ ++ bdrv_drained_end(opaque); ++} ++ ++static TransactionActionDrv undrain_on_clean = { ++ .clean = undrain_on_clean_cb, ++}; ++ + static int bdrv_replace_node_noperm(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Transaction *tran, +@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from, + + GLOBAL_STATE_CODE(); + ++ bdrv_drained_begin(from); ++ bdrv_drained_begin(to); ++ tran_add(tran, &undrain_on_clean, from); ++ tran_add(tran, &undrain_on_clean, to); ++ + QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) { + assert(c->bs == from); + if (!should_update_child(c, to)) { +diff --git a/block/io.c b/block/io.c +index 5e9150d92c..ae64830eac 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) + } + } + +-static bool bdrv_parent_drained_poll_single(BdrvChild *c) ++bool bdrv_parent_drained_poll_single(BdrvChild *c) + { + if (c->klass->drained_poll) { + return c->klass->drained_poll(c); +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 8f5e75756a..65e6d2569b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + */ + void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + ++/** ++ * bdrv_parent_drained_poll_single: ++ * ++ * Returns true if there is any pending activity to cease before @c can be ++ * called quiesced, false otherwise. ++ */ ++bool bdrv_parent_drained_poll_single(BdrvChild *c); ++ + /** + * bdrv_parent_drained_end_single: + * +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 172bc6debc..2686a8acee 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void) + + + typedef struct BDRVReplaceTestState { ++ bool setup_completed; + bool was_drained; + bool was_undrained; + bool has_read; +@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + ++ if (!s->setup_completed) { ++ return; ++ } ++ + if (!s->drain_count) { + s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); + bdrv_inc_in_flight(bs); +@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + ++ if (!s->setup_completed) { ++ return; ++ } ++ + g_assert(s->drain_count > 0); + if (!--s->drain_count) { + s->was_undrained = true; +@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count, + bdrv_ref(old_child_bs); + bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds, + BDRV_CHILD_COW, &error_abort); ++ parent_s->setup_completed = true; + + for (i = 0; i < old_drain_count; i++) { + bdrv_drained_begin(old_child_bs); +-- +2.31.1 + diff --git a/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch b/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch new file mode 100644 index 0000000..e3bf1e2 --- /dev/null +++ b/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch @@ -0,0 +1,54 @@ +From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:03 +0100 +Subject: [PATCH 21/31] block: Don't use subtree drains in + bdrv_drop_intermediate() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s) + +Instead of using a subtree drain from the top node (which also drains +child nodes of base that we're not even interested in), use a normal +drain for base, which automatically drains all of the parents, too. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-9-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/block.c b/block.c +index cb5e96b1cf..b3449a312e 100644 +--- a/block.c ++++ b/block.c +@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + GLOBAL_STATE_CODE(); + + bdrv_ref(top); +- bdrv_subtree_drained_begin(top); ++ bdrv_drained_begin(base); + + if (!top->drv || !base->drv) { + goto exit; +@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base, + + ret = 0; + exit: +- bdrv_subtree_drained_end(top); ++ bdrv_drained_end(base); + bdrv_unref(top); + return ret; + } +-- +2.31.1 + diff --git a/kvm-block-Drain-individual-nodes-during-reopen.patch b/kvm-block-Drain-individual-nodes-during-reopen.patch new file mode 100644 index 0000000..24661fb --- /dev/null +++ b/kvm-block-Drain-individual-nodes-during-reopen.patch @@ -0,0 +1,157 @@ +From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:02 +0100 +Subject: [PATCH 20/31] block: Drain individual nodes during reopen + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s) + +bdrv_reopen() and friends use subtree drains as a lazy way of covering +all the nodes they touch. Turns out that this lazy way is a lot more +complicated than just draining the nodes individually, even not +accounting for the additional complexity in the drain mechanism itself. + +Simplify the code by switching to draining the individual nodes that are +already managed in the BlockReopenQueue anyway. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-8-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8) +Signed-off-by: Stefano Garzarella +--- + block.c | 16 +++++++++------- + block/replication.c | 6 ------ + blockdev.c | 13 ------------- + 3 files changed, 9 insertions(+), 26 deletions(-) + +diff --git a/block.c b/block.c +index 46df410b07..cb5e96b1cf 100644 +--- a/block.c ++++ b/block.c +@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + * returns a pointer to bs_queue, which is either the newly allocated + * bs_queue, or the existing bs_queue being used. + * +- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). ++ * bs is drained here and undrained by bdrv_reopen_queue_free(). + * + * To be called with bs->aio_context locked. + */ +@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + int flags; + QemuOpts *opts; + +- /* Make sure that the caller remembered to use a drained section. This is +- * important to avoid graph changes between the recursive queuing here and +- * bdrv_reopen_multiple(). */ +- assert(bs->quiesce_counter > 0); + GLOBAL_STATE_CODE(); + ++ bdrv_drained_begin(bs); ++ + if (bs_queue == NULL) { + bs_queue = g_new0(BlockReopenQueue, 1); + QTAILQ_INIT(bs_queue); +@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue) + if (bs_queue) { + BlockReopenQueueEntry *bs_entry, *next; + QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) { ++ AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs); ++ ++ aio_context_acquire(ctx); ++ bdrv_drained_end(bs_entry->state.bs); ++ aio_context_release(ctx); ++ + qobject_unref(bs_entry->state.explicit_options); + qobject_unref(bs_entry->state.options); + g_free(bs_entry); +@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + + GLOBAL_STATE_CODE(); + +- bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + + if (ctx != qemu_get_aio_context()) { +@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + if (ctx != qemu_get_aio_context()) { + aio_context_acquire(ctx); + } +- bdrv_subtree_drained_end(bs); + + return ret; + } +diff --git a/block/replication.c b/block/replication.c +index f1eed25e43..c62f48a874 100644 +--- a/block/replication.c ++++ b/block/replication.c +@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs); + } + +- bdrv_subtree_drained_begin(hidden_disk->bs); +- bdrv_subtree_drained_begin(secondary_disk->bs); +- + if (s->orig_hidden_read_only) { + QDict *opts = qdict_new(); + qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable); +@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable, + aio_context_acquire(ctx); + } + } +- +- bdrv_subtree_drained_end(hidden_disk->bs); +- bdrv_subtree_drained_end(secondary_disk->bs); + } + + static void backup_job_cleanup(BlockDriverState *bs) +diff --git a/blockdev.c b/blockdev.c +index 3f1dec6242..8ffb3d9537 100644 +--- a/blockdev.c ++++ b/blockdev.c +@@ -3547,8 +3547,6 @@ fail: + void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + { + BlockReopenQueue *queue = NULL; +- GSList *drained = NULL; +- GSList *p; + + /* Add each one of the BDS that we want to reopen to the queue */ + for (; reopen_list != NULL; reopen_list = reopen_list->next) { +@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + ctx = bdrv_get_aio_context(bs); + aio_context_acquire(ctx); + +- bdrv_subtree_drained_begin(bs); + queue = bdrv_reopen_queue(queue, bs, qdict, false); +- drained = g_slist_prepend(drained, bs); + + aio_context_release(ctx); + } +@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp) + + fail: + bdrv_reopen_queue_free(queue); +- for (p = drained; p; p = p->next) { +- BlockDriverState *bs = p->data; +- AioContext *ctx = bdrv_get_aio_context(bs); +- +- aio_context_acquire(ctx); +- bdrv_subtree_drained_end(bs); +- aio_context_release(ctx); +- } +- g_slist_free(drained); + } + + void qmp_blockdev_del(const char *node_name, Error **errp) +-- +2.31.1 + diff --git a/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch b/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch new file mode 100644 index 0000000..1ae73c7 --- /dev/null +++ b/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch @@ -0,0 +1,96 @@ +From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:08 +0100 +Subject: [PATCH 26/31] block: Drop out of coroutine in + bdrv_do_drained_begin_quiesce() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s) + +The next patch adds a parent drain to bdrv_attach_child_common(), which +shouldn't be, but is currently called from coroutines in some cases (e.g. +.bdrv_co_create implementations generally open new nodes). Therefore, +the assertion that we're not in a coroutine doesn't hold true any more. + +We could just remove the assertion because there is nothing in the +function that should be in conflict with running in a coroutine, but +just to be on the safe side, we can reverse the caller relationship +between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so +that the latter also just drops out of coroutine context and we can +still be certain in the future that any drain code doesn't run in +coroutines. + +As a nice side effect, the structure of bdrv_do_drained_begin() is now +symmetrical with bdrv_do_drained_end(). + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-14-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553) +Signed-off-by: Stefano Garzarella +--- + block/io.c | 25 ++++++++++++------------- + 1 file changed, 12 insertions(+), 13 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 2e9503df6a..5e9150d92c 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + } + } + +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool poll) + { + IO_OR_GS_CODE(); +- assert(!qemu_in_coroutine()); ++ ++ if (qemu_in_coroutine()) { ++ bdrv_co_yield_to_drain(bs, true, parent, poll); ++ return; ++ } + + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { +@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) + bs->drv->bdrv_drain_begin(bs); + } + } +-} +- +-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool poll) +-{ +- if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, parent, poll); +- return; +- } +- +- bdrv_do_drained_begin_quiesce(bs, parent); + + /* + * Wait for drained requests to finish. +@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + } + } + ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) ++{ ++ bdrv_do_drained_begin(bs, parent, false); ++} ++ + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +-- +2.31.1 + diff --git a/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch b/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch new file mode 100644 index 0000000..b73b8fe --- /dev/null +++ b/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch @@ -0,0 +1,67 @@ +From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:01 +0100 +Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s) + +Callers don't agree whether bdrv_reopen_queue_child() should be called +with the AioContext lock held or not. Standardise on holding the lock +(as done by QMP blockdev-reopen and the replication block driver) and +fix bdrv_reopen() to do the same. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-7-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814) +Signed-off-by: Stefano Garzarella +--- + block.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/block.c b/block.c +index 7999fd08c5..46df410b07 100644 +--- a/block.c ++++ b/block.c +@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + * bs_queue, or the existing bs_queue being used. + * + * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple(). ++ * ++ * To be called with bs->aio_context locked. + */ + static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + BlockDriverState *bs, +@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue, + return bs_queue; + } + ++/* To be called with bs->aio_context locked */ + BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, + QDict *options, bool keep_old_opts) +@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts, + GLOBAL_STATE_CODE(); + + bdrv_subtree_drained_begin(bs); ++ queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); ++ + if (ctx != qemu_get_aio_context()) { + aio_context_release(ctx); + } +- +- queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts); + ret = bdrv_reopen_multiple(queue, errp); + + if (ctx != qemu_get_aio_context()) { +-- +2.31.1 + diff --git a/kvm-block-Inline-bdrv_drain_invoke.patch b/kvm-block-Inline-bdrv_drain_invoke.patch new file mode 100644 index 0000000..07160dc --- /dev/null +++ b/kvm-block-Inline-bdrv_drain_invoke.patch @@ -0,0 +1,81 @@ +From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:00 +0100 +Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s) + +bdrv_drain_invoke() has now two entirely separate cases that share no +code any more and are selected depending on a bool parameter. Each case +has only one caller. Just inline the function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-6-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121) +Signed-off-by: Stefano Garzarella +--- + block/io.c | 23 ++++++----------------- + 1 file changed, 6 insertions(+), 17 deletions(-) + +diff --git a/block/io.c b/block/io.c +index f4ca62b034..a25103be6f 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -242,21 +242,6 @@ typedef struct { + bool ignore_bds_parents; + } BdrvCoDrainData; + +-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ +-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) +-{ +- if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || +- (!begin && !bs->drv->bdrv_drain_end)) { +- return; +- } +- +- if (begin) { +- bs->drv->bdrv_drain_begin(bs); +- } else { +- bs->drv->bdrv_drain_end(bs); +- } +-} +- + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ + bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + BdrvChild *ignore_parent, bool ignore_bds_parents) +@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- bdrv_drain_invoke(bs, true); ++ if (bs->drv && bs->drv->bdrv_drain_begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- bdrv_drain_invoke(bs, false); ++ if (bs->drv && bs->drv->bdrv_drain_end) { ++ bs->drv->bdrv_drain_end(bs); ++ } + bdrv_parent_drained_end(bs, parent, ignore_bds_parents); + + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); +-- +2.31.1 + diff --git a/kvm-block-Remove-drained_end_counter.patch b/kvm-block-Remove-drained_end_counter.patch new file mode 100644 index 0000000..cfafc33 --- /dev/null +++ b/kvm-block-Remove-drained_end_counter.patch @@ -0,0 +1,433 @@ +From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:59 +0100 +Subject: [PATCH 17/31] block: Remove drained_end_counter + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s) + +drained_end_counter is unused now, nobody changes its value any more. It +can be removed. + +In cases where we had two almost identical functions that only differed +in whether the caller passes drained_end_counter, or whether they would +poll for a local drained_end_counter to reach 0, these become a single +function. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Message-Id: <20221118174110.55183-5-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Signed-off-by: Kevin Wolf +(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f) +Signed-off-by: Stefano Garzarella +--- + block.c | 5 +- + block/block-backend.c | 4 +- + block/io.c | 98 ++++++++------------------------ + blockjob.c | 2 +- + include/block/block-io.h | 24 -------- + include/block/block_int-common.h | 6 +- + 6 files changed, 30 insertions(+), 109 deletions(-) + +diff --git a/block.c b/block.c +index 16a62a329c..7999fd08c5 100644 +--- a/block.c ++++ b/block.c +@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child) + return bdrv_drain_poll(bs, false, NULL, false); + } + +-static void bdrv_child_cb_drained_end(BdrvChild *child, +- int *drained_end_counter) ++static void bdrv_child_cb_drained_end(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- bdrv_drained_end_no_poll(bs, drained_end_counter); ++ bdrv_drained_end(bs); + } + + static int bdrv_child_cb_inactivate(BdrvChild *child) +diff --git a/block/block-backend.c b/block/block-backend.c +index d98a96ff37..feaf2181fa 100644 +--- a/block/block-backend.c ++++ b/block/block-backend.c +@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format, + } + static void blk_root_drained_begin(BdrvChild *child); + static bool blk_root_drained_poll(BdrvChild *child); +-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter); ++static void blk_root_drained_end(BdrvChild *child); + + static void blk_root_change_media(BdrvChild *child, bool load); + static void blk_root_resize(BdrvChild *child); +@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child) + return busy || !!blk->in_flight; + } + +-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter) ++static void blk_root_drained_end(BdrvChild *child) + { + BlockBackend *blk = child->opaque; + assert(blk->quiesce_counter); +diff --git a/block/io.c b/block/io.c +index c2ed4b2af9..f4ca62b034 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, + } + } + +-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c, +- int *drained_end_counter) ++void bdrv_parent_drained_end_single(BdrvChild *c) + { ++ IO_OR_GS_CODE(); ++ + assert(c->parent_quiesce_counter > 0); + c->parent_quiesce_counter--; + if (c->klass->drained_end) { +- c->klass->drained_end(c, drained_end_counter); ++ c->klass->drained_end(c); + } + } + +-void bdrv_parent_drained_end_single(BdrvChild *c) +-{ +- int drained_end_counter = 0; +- AioContext *ctx = bdrv_child_get_parent_aio_context(c); +- IO_OR_GS_CODE(); +- bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter); +- AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0); +-} +- + static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents, +- int *drained_end_counter) ++ bool ignore_bds_parents) + { + BdrvChild *c; + +@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, + if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { + continue; + } +- bdrv_parent_drained_end_single_no_poll(c, drained_end_counter); ++ bdrv_parent_drained_end_single(c); + } + } + +@@ -249,12 +240,10 @@ typedef struct { + bool poll; + BdrvChild *parent; + bool ignore_bds_parents; +- int *drained_end_counter; + } BdrvCoDrainData; + + /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ +-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, +- int *drained_end_counter) ++static void bdrv_drain_invoke(BlockDriverState *bs, bool begin) + { + if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || + (!begin && !bs->drv->bdrv_drain_end)) { +@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + BdrvChild *parent, bool ignore_bds_parents, + bool poll); + static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- int *drained_end_counter); ++ BdrvChild *parent, bool ignore_bds_parents); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- assert(!data->drained_end_counter); + bdrv_do_drained_begin(bs, data->recursive, data->parent, + data->ignore_bds_parents, data->poll); + } else { + assert(!data->poll); + bdrv_do_drained_end(bs, data->recursive, data->parent, +- data->ignore_bds_parents, +- data->drained_end_counter); ++ data->ignore_bds_parents); + } + aio_context_release(ctx); + } else { +@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bool begin, bool recursive, + BdrvChild *parent, + bool ignore_bds_parents, +- bool poll, +- int *drained_end_counter) ++ bool poll) + { + BdrvCoDrainData data; + Coroutine *self = qemu_coroutine_self(); +@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, +- .drained_end_counter = drained_end_counter, + }; + + if (bs) { +@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + bdrv_parent_drained_begin(bs, parent, ignore_bds_parents); +- bdrv_drain_invoke(bs, true, NULL); ++ bdrv_drain_invoke(bs, true); + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, +- poll, NULL); ++ poll); + return; + } + +@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs) + + /** + * This function does not poll, nor must any of its recursively called +- * functions. The *drained_end_counter pointee will be incremented +- * once for every background operation scheduled, and decremented once +- * the operation settles. Therefore, the pointer must remain valid +- * until the pointee reaches 0. That implies that whoever sets up the +- * pointee has to poll until it is 0. +- * +- * We use atomic operations to access *drained_end_counter, because +- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of +- * @bs may contain nodes in different AioContexts, +- * (2) bdrv_drain_all_end() uses the same counter for all nodes, +- * regardless of which AioContext they are in. ++ * functions. + */ + static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- int *drained_end_counter) ++ BdrvChild *parent, bool ignore_bds_parents) + { + BdrvChild *child; + int old_quiesce_counter; + +- assert(drained_end_counter != NULL); +- + if (qemu_in_coroutine()) { + bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, +- false, drained_end_counter); ++ false); + return; + } + assert(bs->quiesce_counter > 0); + + /* Re-enable things in child-to-parent order */ +- bdrv_drain_invoke(bs, false, drained_end_counter); +- bdrv_parent_drained_end(bs, parent, ignore_bds_parents, +- drained_end_counter); ++ bdrv_drain_invoke(bs, false); ++ bdrv_parent_drained_end(bs, parent, ignore_bds_parents); + + old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter); + if (old_quiesce_counter == 1) { +@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + assert(!ignore_bds_parents); + bs->recursive_quiesce_counter--; + QLIST_FOREACH(child, &bs->children, next) { +- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents, +- drained_end_counter); ++ bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); + } + } + } + + void bdrv_drained_end(BlockDriverState *bs) + { +- int drained_end_counter = 0; + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter); +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); +-} +- +-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter) +-{ +- IO_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, false); + } + + void bdrv_subtree_drained_end(BlockDriverState *bs) + { +- int drained_end_counter = 0; + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter); +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); ++ bdrv_do_drained_end(bs, true, NULL, false); + } + + void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) + + void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) + { +- int drained_end_counter = 0; + int i; + IO_OR_GS_CODE(); + + for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_end(child->bs, true, child, false, +- &drained_end_counter); ++ bdrv_do_drained_end(child->bs, true, child, false); + } +- +- BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL); ++ bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); + return; + } + +@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void) + + void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + { +- int drained_end_counter = 0; + GLOBAL_STATE_CODE(); + + g_assert(bs->quiesce_counter > 0); + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, true); + } +- BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0); + } + + void bdrv_drain_all_end(void) + { + BlockDriverState *bs = NULL; +- int drained_end_counter = 0; + GLOBAL_STATE_CODE(); + + /* +@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter); ++ bdrv_do_drained_end(bs, false, NULL, true); + aio_context_release(aio_context); + } + + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +- AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0); +- + assert(bdrv_drain_all_count > 0); + bdrv_drain_all_count--; + } +diff --git a/blockjob.c b/blockjob.c +index f51d4e18f3..0ab721e139 100644 +--- a/blockjob.c ++++ b/blockjob.c +@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c) + } + } + +-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter) ++static void child_job_drained_end(BdrvChild *c) + { + BlockJob *job = c->opaque; + job_resume(&job->job); +diff --git a/include/block/block-io.h b/include/block/block-io.h +index b099d7db45..054e964c9b 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset, + int64_t bytes, BdrvRequestFlags read_flags, + BdrvRequestFlags write_flags); + +-/** +- * bdrv_drained_end_no_poll: +- * +- * Same as bdrv_drained_end(), but do not poll for the subgraph to +- * actually become unquiesced. Therefore, no graph changes will occur +- * with this function. +- * +- * *drained_end_counter is incremented for every background operation +- * that is scheduled, and will be decremented for every operation once +- * it settles. The caller must poll until it reaches 0. The counter +- * should be accessed using atomic operations only. +- */ +-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter); +- +- + /* + * "I/O or GS" API functions. These functions can run without + * the BQL, but only in one specific iothread/main loop. +@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); + * bdrv_parent_drained_end_single: + * + * End a quiesced section for the parent of @c. +- * +- * This polls @bs's AioContext until all scheduled sub-drained_ends +- * have settled, which may result in graph changes. + */ + void bdrv_parent_drained_end_single(BdrvChild *c); + +@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). +- * +- * This polls @bs's AioContext until all scheduled sub-drained_ends +- * have settled. On one hand, that may result in graph changes. On +- * the other, this requires that the caller either runs in the main +- * loop; or that all involved nodes (@bs and all of its parents) are +- * in the caller's AioContext. + */ + void bdrv_drained_end(BlockDriverState *bs); + +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 40d646d1ed..2b97576f6d 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -939,15 +939,11 @@ struct BdrvChildClass { + * These functions must not change the graph (and therefore also must not + * call aio_poll(), which could change the graph indirectly). + * +- * If drained_end() schedules background operations, it must atomically +- * increment *drained_end_counter for each such operation and atomically +- * decrement it once the operation has settled. +- * + * Note that this can be nested. If drained_begin() was called twice, new + * I/O is allowed only after drained_end() was called twice, too. + */ + void (*drained_begin)(BdrvChild *child); +- void (*drained_end)(BdrvChild *child, int *drained_end_counter); ++ void (*drained_end)(BdrvChild *child); + + /* + * Returns whether the parent has pending requests for the child. This +-- +2.31.1 + diff --git a/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch b/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch new file mode 100644 index 0000000..aa64bec --- /dev/null +++ b/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch @@ -0,0 +1,274 @@ +From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:07 +0100 +Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from + drain_begin/end. + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s) + +ignore_bds_parents is now ignored during drain_begin and drain_end, so +we can just remove it there. It is still a valid optimisation for +drain_all in bdrv_drained_poll(), so leave it around there. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-13-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270) +Signed-off-by: Stefano Garzarella +--- + block.c | 2 +- + block/io.c | 58 +++++++++++++++------------------------- + include/block/block-io.h | 3 +-- + 3 files changed, 24 insertions(+), 39 deletions(-) + +diff --git a/block.c b/block.c +index 5a583e260d..af31a94863 100644 +--- a/block.c ++++ b/block.c +@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c) + static void bdrv_child_cb_drained_begin(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- bdrv_do_drained_begin_quiesce(bs, NULL, false); ++ bdrv_do_drained_begin_quiesce(bs, NULL); + } + + static bool bdrv_child_cb_drained_poll(BdrvChild *child) +diff --git a/block/io.c b/block/io.c +index 87d6f22ec4..2e9503df6a 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs); + static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, + int64_t offset, int64_t bytes, BdrvRequestFlags flags); + +-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents) ++static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) + { + BdrvChild *c, *next; + + QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) { +- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { ++ if (c == ignore) { + continue; + } + bdrv_parent_drained_begin_single(c, false); +@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c) + } + } + +-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore, +- bool ignore_bds_parents) ++static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore) + { + BdrvChild *c; + + QLIST_FOREACH(c, &bs->parents, next_parent) { +- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) { ++ if (c == ignore) { + continue; + } + bdrv_parent_drained_end_single(c); +@@ -242,7 +240,6 @@ typedef struct { + bool begin; + bool poll; + BdrvChild *parent; +- bool ignore_bds_parents; + } BdrvCoDrainData; + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs, + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents, bool poll); +-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents); ++ bool poll); ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, +- data->poll); ++ bdrv_do_drained_begin(bs, data->parent, data->poll); + } else { + assert(!data->poll); +- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); ++ bdrv_do_drained_end(bs, data->parent); + } + aio_context_release(ctx); + } else { +@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque) + static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + bool begin, + BdrvChild *parent, +- bool ignore_bds_parents, + bool poll) + { + BdrvCoDrainData data; +@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .done = false, + .begin = begin, + .parent = parent, +- .ignore_bds_parents = ignore_bds_parents, + .poll = poll, + }; + +@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + } + } + +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, +- BdrvChild *parent, bool ignore_bds_parents) ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent) + { + IO_OR_GS_CODE(); + assert(!qemu_in_coroutine()); +@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + /* Stop things in parent-to-child order */ + if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) { + aio_disable_external(bdrv_get_aio_context(bs)); +- +- /* TODO Remove ignore_bds_parents, we don't consider it any more */ +- bdrv_parent_drained_begin(bs, parent, false); ++ bdrv_parent_drained_begin(bs, parent); + if (bs->drv && bs->drv->bdrv_drain_begin) { + bs->drv->bdrv_drain_begin(bs); + } +@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + + static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents, bool poll) ++ bool poll) + { + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); ++ bdrv_co_yield_to_drain(bs, true, parent, poll); + return; + } + +- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); ++ bdrv_do_drained_begin_quiesce(bs, parent); + + /* + * Wait for drained requests to finish. +@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + * nodes. + */ + if (poll) { +- assert(!ignore_bds_parents); + BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); + } + } +@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, NULL, false, true); ++ bdrv_do_drained_begin(bs, NULL, true); + } + + /** + * This function does not poll, nor must any of its recursively called + * functions. + */ +-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, +- bool ignore_bds_parents) ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent) + { + int old_quiesce_counter; + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); ++ bdrv_co_yield_to_drain(bs, false, parent, false); + return; + } + assert(bs->quiesce_counter > 0); +@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + if (bs->drv && bs->drv->bdrv_drain_end) { + bs->drv->bdrv_drain_end(bs); + } +- /* TODO Remove ignore_bds_parents, we don't consider it any more */ +- bdrv_parent_drained_end(bs, parent, false); +- ++ bdrv_parent_drained_end(bs, parent); + aio_enable_external(bdrv_get_aio_context(bs)); + } + } +@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, + void bdrv_drained_end(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, NULL, false); ++ bdrv_do_drained_end(bs, NULL); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, NULL, true, true); ++ bdrv_co_yield_to_drain(NULL, true, NULL, true); + return; + } + +@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_begin(bs, NULL, true, false); ++ bdrv_do_drained_begin(bs, NULL, false); + aio_context_release(aio_context); + } + +@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, NULL, true); ++ bdrv_do_drained_end(bs, NULL); + } + } + +@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, NULL, true); ++ bdrv_do_drained_end(bs, NULL); + aio_context_release(aio_context); + } + +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 9c36a16a1f..8f5e75756a 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs); + * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already + * running requests to complete. + */ +-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, +- BdrvChild *parent, bool ignore_bds_parents); ++void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent); + + /** + * bdrv_drained_end: +-- +2.31.1 + diff --git a/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch b/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch new file mode 100644 index 0000000..94eba86 --- /dev/null +++ b/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch @@ -0,0 +1,106 @@ +From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:10 +0100 +Subject: [PATCH 28/31] block: Remove poll parameter from + bdrv_parent_drained_begin_single() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s) + +All callers of bdrv_parent_drained_begin_single() pass poll=false now, +so we don't need the parameter any more. + +Signed-off-by: Kevin Wolf +Message-Id: <20221118174110.55183-16-kwolf@redhat.com> +Reviewed-by: Hanna Reitz +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Kevin Wolf +(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 ++-- + block/io.c | 8 ++------ + include/block/block-io.h | 5 ++--- + 3 files changed, 6 insertions(+), 11 deletions(-) + +diff --git a/block.c b/block.c +index 65588d313a..0d78711416 100644 +--- a/block.c ++++ b/block.c +@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque) + * new_bs drained when calling bdrv_replace_child_tran() is not a + * requirement any more. + */ +- bdrv_parent_drained_begin_single(s->child, false); ++ bdrv_parent_drained_begin_single(s->child); + assert(!bdrv_parent_drained_poll_single(s->child)); + } + assert(s->child->quiesced_parent); +@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs, + * a problem, we already did this), but it will still poll until the parent + * is fully quiesced, so it will not be negatively affected either. + */ +- bdrv_parent_drained_begin_single(new_child, false); ++ bdrv_parent_drained_begin_single(new_child); + bdrv_replace_child_noperm(new_child, child_bs); + + BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1); +diff --git a/block/io.c b/block/io.c +index ae64830eac..38e57d1f67 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore) + if (c == ignore) { + continue; + } +- bdrv_parent_drained_begin_single(c, false); ++ bdrv_parent_drained_begin_single(c); + } + } + +@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore, + return busy; + } + +-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) ++void bdrv_parent_drained_begin_single(BdrvChild *c) + { +- AioContext *ctx = bdrv_child_get_parent_aio_context(c); + IO_OR_GS_CODE(); + + assert(!c->quiesced_parent); +@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) + if (c->klass->drained_begin) { + c->klass->drained_begin(c); + } +- if (poll) { +- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c)); +- } + } + + static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 65e6d2569b..92aaa7c1e9 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); + /** + * bdrv_parent_drained_begin_single: + * +- * Begin a quiesced section for the parent of @c. If @poll is true, wait for +- * any pending activity to cease. ++ * Begin a quiesced section for the parent of @c. + */ +-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll); ++void bdrv_parent_drained_begin_single(BdrvChild *c); + + /** + * bdrv_parent_drained_poll_single: +-- +2.31.1 + diff --git a/kvm-block-Remove-subtree-drains.patch b/kvm-block-Remove-subtree-drains.patch new file mode 100644 index 0000000..af9c0ff --- /dev/null +++ b/kvm-block-Remove-subtree-drains.patch @@ -0,0 +1,896 @@ +From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:05 +0100 +Subject: [PATCH 23/31] block: Remove subtree drains + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s) + +Subtree drains are not used any more. Remove them. + +After this, BdrvChildClass.attach/detach() don't poll any more. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-11-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066) +Signed-off-by: Stefano Garzarella +--- + block.c | 20 +-- + block/io.c | 121 +++----------- + include/block/block-io.h | 18 +-- + include/block/block_int-common.h | 1 - + include/block/block_int-io.h | 12 -- + tests/unit/test-bdrv-drain.c | 261 ++----------------------------- + 6 files changed, 44 insertions(+), 389 deletions(-) + +diff --git a/block.c b/block.c +index 5330e89903..e0e3b21790 100644 +--- a/block.c ++++ b/block.c +@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child) + static bool bdrv_child_cb_drained_poll(BdrvChild *child) + { + BlockDriverState *bs = child->opaque; +- return bdrv_drain_poll(bs, false, NULL, false); ++ return bdrv_drain_poll(bs, NULL, false); + } + + static void bdrv_child_cb_drained_end(BdrvChild *child) +@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child) + assert(!bs->file); + bs->file = child; + } +- +- bdrv_apply_subtree_drain(child, bs); + } + + static void bdrv_child_cb_detach(BdrvChild *child) +@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child) + bdrv_backing_detach(child); + } + +- bdrv_unapply_subtree_drain(child, bs); +- + assert_bdrv_graph_writable(bs); + QLIST_REMOVE(child, next); + if (child == bs->backing) { +@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + } + + if (old_bs) { +- /* Detach first so that the recursive drain sections coming from @child +- * are already gone and we only end the drain sections that came from +- * elsewhere. */ + if (child->klass->detach) { + child->klass->detach(child); + } +@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child, + QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent); + + /* +- * Detaching the old node may have led to the new node's +- * quiesce_counter having been decreased. Not a problem, we +- * just need to recognize this here and then invoke +- * drained_end appropriately more often. ++ * Polling in bdrv_parent_drained_begin_single() may have led to the new ++ * node's quiesce_counter having been decreased. Not a problem, we just ++ * need to recognize this here and then invoke drained_end appropriately ++ * more often. + */ + assert(new_bs->quiesce_counter <= new_bs_quiesce_counter); + drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter; + +- /* Attach only after starting new drained sections, so that recursive +- * drain sections coming from @child don't get an extra .drained_begin +- * callback. */ + if (child->klass->attach) { + child->klass->attach(child); + } +diff --git a/block/io.c b/block/io.c +index a25103be6f..75224480d0 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -236,17 +236,15 @@ typedef struct { + BlockDriverState *bs; + bool done; + bool begin; +- bool recursive; + bool poll; + BdrvChild *parent; + bool ignore_bds_parents; + } BdrvCoDrainData; + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, +- BdrvChild *ignore_parent, bool ignore_bds_parents) ++bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, ++ bool ignore_bds_parents) + { +- BdrvChild *child, *next; + IO_OR_GS_CODE(); + + if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) { +@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, + return true; + } + +- if (recursive) { +- assert(!ignore_bds_parents); +- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { +- if (bdrv_drain_poll(child->bs, recursive, child, false)) { +- return true; +- } +- } +- } +- + return false; + } + +-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive, ++static bool bdrv_drain_poll_top_level(BlockDriverState *bs, + BdrvChild *ignore_parent) + { +- return bdrv_drain_poll(bs, recursive, ignore_parent, false); ++ return bdrv_drain_poll(bs, ignore_parent, false); + } + +-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- bool poll); +-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents); ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents, bool poll); ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents); + + static void bdrv_co_drain_bh_cb(void *opaque) + { +@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque) + aio_context_acquire(ctx); + bdrv_dec_in_flight(bs); + if (data->begin) { +- bdrv_do_drained_begin(bs, data->recursive, data->parent, +- data->ignore_bds_parents, data->poll); ++ bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents, ++ data->poll); + } else { + assert(!data->poll); +- bdrv_do_drained_end(bs, data->recursive, data->parent, +- data->ignore_bds_parents); ++ bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents); + } + aio_context_release(ctx); + } else { +@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque) + } + + static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, +- bool begin, bool recursive, ++ bool begin, + BdrvChild *parent, + bool ignore_bds_parents, + bool poll) +@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs, + .bs = bs, + .done = false, + .begin = begin, +- .recursive = recursive, + .parent = parent, + .ignore_bds_parents = ignore_bds_parents, + .poll = poll, +@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + } + } + +-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents, +- bool poll) ++static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents, bool poll) + { +- BdrvChild *child, *next; +- + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents, +- poll); ++ bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll); + return; + } + + bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents); + +- if (recursive) { +- assert(!ignore_bds_parents); +- bs->recursive_quiesce_counter++; +- QLIST_FOREACH_SAFE(child, &bs->children, next, next) { +- bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents, +- false); +- } +- } +- + /* + * Wait for drained requests to finish. + * +@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive, + */ + if (poll) { + assert(!ignore_bds_parents); +- BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent)); ++ BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent)); + } + } + + void bdrv_drained_begin(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, false, NULL, false, true); +-} +- +-void bdrv_subtree_drained_begin(BlockDriverState *bs) +-{ +- IO_OR_GS_CODE(); +- bdrv_do_drained_begin(bs, true, NULL, false, true); ++ bdrv_do_drained_begin(bs, NULL, false, true); + } + + /** + * This function does not poll, nor must any of its recursively called + * functions. + */ +-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, +- BdrvChild *parent, bool ignore_bds_parents) ++static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent, ++ bool ignore_bds_parents) + { +- BdrvChild *child; + int old_quiesce_counter; + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents, +- false); ++ bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false); + return; + } + assert(bs->quiesce_counter > 0); +@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive, + if (old_quiesce_counter == 1) { + aio_enable_external(bdrv_get_aio_context(bs)); + } +- +- if (recursive) { +- assert(!ignore_bds_parents); +- bs->recursive_quiesce_counter--; +- QLIST_FOREACH(child, &bs->children, next) { +- bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents); +- } +- } + } + + void bdrv_drained_end(BlockDriverState *bs) + { + IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, false, NULL, false); +-} +- +-void bdrv_subtree_drained_end(BlockDriverState *bs) +-{ +- IO_OR_GS_CODE(); +- bdrv_do_drained_end(bs, true, NULL, false); +-} +- +-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent) +-{ +- int i; +- IO_OR_GS_CODE(); +- +- for (i = 0; i < new_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_begin(child->bs, true, child, false, true); +- } +-} +- +-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent) +-{ +- int i; +- IO_OR_GS_CODE(); +- +- for (i = 0; i < old_parent->recursive_quiesce_counter; i++) { +- bdrv_do_drained_end(child->bs, true, child, false); +- } ++ bdrv_do_drained_end(bs, NULL, false); + } + + void bdrv_drain(BlockDriverState *bs) +@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void) + while ((bs = bdrv_next_all_states(bs))) { + AioContext *aio_context = bdrv_get_aio_context(bs); + aio_context_acquire(aio_context); +- result |= bdrv_drain_poll(bs, false, NULL, true); ++ result |= bdrv_drain_poll(bs, NULL, true); + aio_context_release(aio_context); + } + +@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void) + GLOBAL_STATE_CODE(); + + if (qemu_in_coroutine()) { +- bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true); ++ bdrv_co_yield_to_drain(NULL, true, NULL, true, true); + return; + } + +@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_begin(bs, false, NULL, true, false); ++ bdrv_do_drained_begin(bs, NULL, true, false); + aio_context_release(aio_context); + } + +@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs) + g_assert(!bs->refcnt); + + while (bs->quiesce_counter) { +- bdrv_do_drained_end(bs, false, NULL, true); ++ bdrv_do_drained_end(bs, NULL, true); + } + } + +@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void) + AioContext *aio_context = bdrv_get_aio_context(bs); + + aio_context_acquire(aio_context); +- bdrv_do_drained_end(bs, false, NULL, true); ++ bdrv_do_drained_end(bs, NULL, true); + aio_context_release(aio_context); + } + +diff --git a/include/block/block-io.h b/include/block/block-io.h +index 054e964c9b..9c36a16a1f 100644 +--- a/include/block/block-io.h ++++ b/include/block/block-io.h +@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c); + /** + * bdrv_drain_poll: + * +- * Poll for pending requests in @bs, its parents (except for @ignore_parent), +- * and if @recursive is true its children as well (used for subtree drain). ++ * Poll for pending requests in @bs and its parents (except for @ignore_parent). + * + * If @ignore_bds_parents is true, parents that are BlockDriverStates must + * ignore the drain request because they will be drained separately (used for +@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c); + * + * This is part of bdrv_drained_begin. + */ +-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive, +- BdrvChild *ignore_parent, bool ignore_bds_parents); ++bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent, ++ bool ignore_bds_parents); + + /** + * bdrv_drained_begin: +@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs); + void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, + BdrvChild *parent, bool ignore_bds_parents); + +-/** +- * Like bdrv_drained_begin, but recursively begins a quiesced section for +- * exclusive access to all child nodes as well. +- */ +-void bdrv_subtree_drained_begin(BlockDriverState *bs); +- + /** + * bdrv_drained_end: + * +@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs); + */ + void bdrv_drained_end(BlockDriverState *bs); + +-/** +- * End a quiescent section started by bdrv_subtree_drained_begin(). +- */ +-void bdrv_subtree_drained_end(BlockDriverState *bs); +- + #endif /* BLOCK_IO_H */ +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 2b97576f6d..791dddfd7d 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -1184,7 +1184,6 @@ struct BlockDriverState { + + /* Accessed with atomic ops. */ + int quiesce_counter; +- int recursive_quiesce_counter; + + unsigned int write_gen; /* Current data generation */ + +diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h +index 4b0b3e17ef..8bc061ebb8 100644 +--- a/include/block/block_int-io.h ++++ b/include/block/block_int-io.h +@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs, + */ + void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes); + +- +-/* +- * "I/O or GS" API functions. These functions can run without +- * the BQL, but only in one specific iothread/main loop. +- * +- * See include/block/block-io.h for more information about +- * the "I/O or GS" API. +- */ +- +-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); +-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); +- + #endif /* BLOCK_INT_IO_H */ +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 695519ee02..dda08de8db 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void)) + enum drain_type { + BDRV_DRAIN_ALL, + BDRV_DRAIN, +- BDRV_SUBTREE_DRAIN, + DRAIN_TYPE_MAX, + }; + +@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs) + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_begin(); break; + case BDRV_DRAIN: bdrv_drained_begin(bs); break; +- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_begin(bs); break; + default: g_assert_not_reached(); + } + } +@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs) + switch (drain_type) { + case BDRV_DRAIN_ALL: bdrv_drain_all_end(); break; + case BDRV_DRAIN: bdrv_drained_end(bs); break; +- case BDRV_SUBTREE_DRAIN: bdrv_subtree_drained_end(bs); break; + default: g_assert_not_reached(); + } + } +@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void) + test_drv_cb_common(BDRV_DRAIN, false); + } + +-static void test_drv_cb_drain_subtree(void) +-{ +- test_drv_cb_common(BDRV_SUBTREE_DRAIN, true); +-} +- + static void test_drv_cb_co_drain_all(void) + { + call_in_coroutine(test_drv_cb_drain_all); +@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void) + call_in_coroutine(test_drv_cb_drain); + } + +-static void test_drv_cb_co_drain_subtree(void) +-{ +- call_in_coroutine(test_drv_cb_drain_subtree); +-} +- + static void test_quiesce_common(enum drain_type drain_type, bool recursive) + { + BlockBackend *blk; +@@ -332,11 +319,6 @@ static void test_quiesce_drain(void) + test_quiesce_common(BDRV_DRAIN, false); + } + +-static void test_quiesce_drain_subtree(void) +-{ +- test_quiesce_common(BDRV_SUBTREE_DRAIN, true); +-} +- + static void test_quiesce_co_drain_all(void) + { + call_in_coroutine(test_quiesce_drain_all); +@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void) + call_in_coroutine(test_quiesce_drain); + } + +-static void test_quiesce_co_drain_subtree(void) +-{ +- call_in_coroutine(test_quiesce_drain_subtree); +-} +- + static void test_nested(void) + { + BlockBackend *blk; +@@ -402,158 +379,6 @@ static void test_nested(void) + blk_unref(blk); + } + +-static void test_multiparent(void) +-{ +- BlockBackend *blk_a, *blk_b; +- BlockDriverState *bs_a, *bs_b, *backing; +- BDRVTestState *a_s, *b_s, *backing_s; +- +- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, +- &error_abort); +- a_s = bs_a->opaque; +- blk_insert_bs(blk_a, bs_a, &error_abort); +- +- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, +- &error_abort); +- b_s = bs_b->opaque; +- blk_insert_bs(blk_b, bs_b, &error_abort); +- +- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); +- backing_s = backing->opaque; +- bdrv_set_backing_hd(bs_a, backing, &error_abort); +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); +- g_assert_cmpint(backing->quiesce_counter, ==, 1); +- g_assert_cmpint(a_s->drain_count, ==, 1); +- g_assert_cmpint(b_s->drain_count, ==, 1); +- g_assert_cmpint(backing_s->drain_count, ==, 1); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 2); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); +- g_assert_cmpint(backing->quiesce_counter, ==, 2); +- g_assert_cmpint(a_s->drain_count, ==, 2); +- g_assert_cmpint(b_s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, 2); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 1); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 1); +- g_assert_cmpint(backing->quiesce_counter, ==, 1); +- g_assert_cmpint(a_s->drain_count, ==, 1); +- g_assert_cmpint(b_s->drain_count, ==, 1); +- g_assert_cmpint(backing_s->drain_count, ==, 1); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- bdrv_unref(backing); +- bdrv_unref(bs_a); +- bdrv_unref(bs_b); +- blk_unref(blk_a); +- blk_unref(blk_b); +-} +- +-static void test_graph_change_drain_subtree(void) +-{ +- BlockBackend *blk_a, *blk_b; +- BlockDriverState *bs_a, *bs_b, *backing; +- BDRVTestState *a_s, *b_s, *backing_s; +- +- blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR, +- &error_abort); +- a_s = bs_a->opaque; +- blk_insert_bs(blk_a, bs_a, &error_abort); +- +- blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL); +- bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR, +- &error_abort); +- b_s = bs_b->opaque; +- blk_insert_bs(blk_b, bs_b, &error_abort); +- +- backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort); +- backing_s = backing->opaque; +- bdrv_set_backing_hd(bs_a, backing, &error_abort); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b); +- +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); +- g_assert_cmpint(backing->quiesce_counter, ==, 5); +- g_assert_cmpint(a_s->drain_count, ==, 5); +- g_assert_cmpint(b_s->drain_count, ==, 5); +- g_assert_cmpint(backing_s->drain_count, ==, 5); +- +- bdrv_set_backing_hd(bs_b, NULL, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 3); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 2); +- g_assert_cmpint(backing->quiesce_counter, ==, 3); +- g_assert_cmpint(a_s->drain_count, ==, 3); +- g_assert_cmpint(b_s->drain_count, ==, 2); +- g_assert_cmpint(backing_s->drain_count, ==, 3); +- +- bdrv_set_backing_hd(bs_b, backing, &error_abort); +- g_assert_cmpint(bs_a->quiesce_counter, ==, 5); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 5); +- g_assert_cmpint(backing->quiesce_counter, ==, 5); +- g_assert_cmpint(a_s->drain_count, ==, 5); +- g_assert_cmpint(b_s->drain_count, ==, 5); +- g_assert_cmpint(backing_s->drain_count, ==, 5); +- +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_b); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- do_drain_end(BDRV_SUBTREE_DRAIN, bs_a); +- +- g_assert_cmpint(bs_a->quiesce_counter, ==, 0); +- g_assert_cmpint(bs_b->quiesce_counter, ==, 0); +- g_assert_cmpint(backing->quiesce_counter, ==, 0); +- g_assert_cmpint(a_s->drain_count, ==, 0); +- g_assert_cmpint(b_s->drain_count, ==, 0); +- g_assert_cmpint(backing_s->drain_count, ==, 0); +- +- bdrv_unref(backing); +- bdrv_unref(bs_a); +- bdrv_unref(bs_b); +- blk_unref(blk_a); +- blk_unref(blk_b); +-} +- + static void test_graph_change_drain_all(void) + { + BlockBackend *blk_a, *blk_b; +@@ -773,12 +598,6 @@ static void test_iothread_drain(void) + test_iothread_common(BDRV_DRAIN, 1); + } + +-static void test_iothread_drain_subtree(void) +-{ +- test_iothread_common(BDRV_SUBTREE_DRAIN, 0); +- test_iothread_common(BDRV_SUBTREE_DRAIN, 1); +-} +- + + typedef struct TestBlockJob { + BlockJob common; +@@ -863,7 +682,6 @@ enum test_job_result { + enum test_job_drain_node { + TEST_JOB_DRAIN_SRC, + TEST_JOB_DRAIN_SRC_CHILD, +- TEST_JOB_DRAIN_SRC_PARENT, + }; + + static void test_blockjob_common_drain_node(enum drain_type drain_type, +@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type, + case TEST_JOB_DRAIN_SRC_CHILD: + drain_bs = src_backing; + break; +- case TEST_JOB_DRAIN_SRC_PARENT: +- drain_bs = src_overlay; +- break; + default: + g_assert_not_reached(); + } +@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread, + TEST_JOB_DRAIN_SRC); + test_blockjob_common_drain_node(drain_type, use_iothread, result, + TEST_JOB_DRAIN_SRC_CHILD); +- if (drain_type == BDRV_SUBTREE_DRAIN) { +- test_blockjob_common_drain_node(drain_type, use_iothread, result, +- TEST_JOB_DRAIN_SRC_PARENT); +- } + } + + static void test_blockjob_drain_all(void) +@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void) + test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS); + } + +-static void test_blockjob_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS); +-} +- + static void test_blockjob_error_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN); +@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void) + test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE); + } + +-static void test_blockjob_error_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN); +- test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE); +-} +- + static void test_blockjob_iothread_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS); +@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void) + test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS); + } + +-static void test_blockjob_iothread_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS); +-} +- + static void test_blockjob_iothread_error_drain_all(void) + { + test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN); +@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void) + test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE); + } + +-static void test_blockjob_iothread_error_drain_subtree(void) +-{ +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN); +- test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE); +-} +- + + typedef struct BDRVTestTopState { + BdrvChild *wait_child; +@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete, + bdrv_drain(child_bs); + bdrv_unref(child_bs); + break; +- case BDRV_SUBTREE_DRAIN: +- /* Would have to ref/unref bs here for !detach_instead_of_delete, but +- * then the whole test becomes pointless because the graph changes +- * don't occur during the drain any more. */ +- assert(detach_instead_of_delete); +- bdrv_subtree_drained_begin(bs); +- bdrv_subtree_drained_end(bs); +- break; + case BDRV_DRAIN_ALL: + bdrv_drain_all_begin(); + bdrv_drain_all_end(); +@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void) + do_test_delete_by_drain(true, BDRV_DRAIN); + } + +-static void test_detach_by_drain_subtree(void) +-{ +- do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN); +-} +- + + struct detach_by_parent_data { + BlockDriverState *parent_b; +@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb) + g_assert(acb != NULL); + + /* Drain and check the expected result */ +- bdrv_subtree_drained_begin(parent_b); ++ bdrv_drained_begin(parent_b); ++ bdrv_drained_begin(a); ++ bdrv_drained_begin(b); ++ bdrv_drained_begin(c); + + g_assert(detach_by_parent_data.child_c != NULL); + +@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb) + g_assert(QLIST_NEXT(child_a, next) == NULL); + + g_assert_cmpint(parent_a->quiesce_counter, ==, 1); +- g_assert_cmpint(parent_b->quiesce_counter, ==, 1); ++ g_assert_cmpint(parent_b->quiesce_counter, ==, 3); + g_assert_cmpint(a->quiesce_counter, ==, 1); +- g_assert_cmpint(b->quiesce_counter, ==, 0); ++ g_assert_cmpint(b->quiesce_counter, ==, 1); + g_assert_cmpint(c->quiesce_counter, ==, 1); + +- bdrv_subtree_drained_end(parent_b); ++ bdrv_drained_end(parent_b); ++ bdrv_drained_end(a); ++ bdrv_drained_end(b); ++ bdrv_drained_end(c); + + bdrv_unref(parent_b); + blk_unref(blk); +@@ -2202,70 +1984,47 @@ int main(int argc, char **argv) + + g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all); + g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain); +- g_test_add_func("/bdrv-drain/driver-cb/drain_subtree", +- test_drv_cb_drain_subtree); + + g_test_add_func("/bdrv-drain/driver-cb/co/drain_all", + test_drv_cb_co_drain_all); + g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain); +- g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree", +- test_drv_cb_co_drain_subtree); +- + + g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all); + g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain); +- g_test_add_func("/bdrv-drain/quiesce/drain_subtree", +- test_quiesce_drain_subtree); + + g_test_add_func("/bdrv-drain/quiesce/co/drain_all", + test_quiesce_co_drain_all); + g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain); +- g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree", +- test_quiesce_co_drain_subtree); + + g_test_add_func("/bdrv-drain/nested", test_nested); +- g_test_add_func("/bdrv-drain/multiparent", test_multiparent); + +- g_test_add_func("/bdrv-drain/graph-change/drain_subtree", +- test_graph_change_drain_subtree); + g_test_add_func("/bdrv-drain/graph-change/drain_all", + test_graph_change_drain_all); + + g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all); + g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain); +- g_test_add_func("/bdrv-drain/iothread/drain_subtree", +- test_iothread_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all); + g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain); +- g_test_add_func("/bdrv-drain/blockjob/drain_subtree", +- test_blockjob_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/error/drain_all", + test_blockjob_error_drain_all); + g_test_add_func("/bdrv-drain/blockjob/error/drain", + test_blockjob_error_drain); +- g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree", +- test_blockjob_error_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all", + test_blockjob_iothread_drain_all); + g_test_add_func("/bdrv-drain/blockjob/iothread/drain", + test_blockjob_iothread_drain); +- g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree", +- test_blockjob_iothread_drain_subtree); + + g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all", + test_blockjob_iothread_error_drain_all); + g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain", + test_blockjob_iothread_error_drain); +- g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree", +- test_blockjob_iothread_error_drain_subtree); + + g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain); + g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all); + g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain); +- g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree); + g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb); + g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb); + +-- +2.31.1 + diff --git a/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch b/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch new file mode 100644 index 0000000..1529fdb --- /dev/null +++ b/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch @@ -0,0 +1,302 @@ +From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:58 +0100 +Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to + non-coroutine_fn + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s) + +Polling during bdrv_drained_end() can be problematic (and in the future, +we may get cases for bdrv_drained_begin() where polling is forbidden, +and we don't care about already in-flight requests, but just want to +prevent new requests from arriving). + +The .bdrv_drained_begin/end callbacks running in a coroutine is the only +reason why we have to do this polling, so make them non-coroutine +callbacks again. None of the callers actually yield any more. + +This means that bdrv_drained_end() effectively doesn't poll any more, +even if AIO_WAIT_WHILE() loops are still there (their condition is false +from the beginning). This is generally not a problem, but in +test-bdrv-drain, some additional explicit aio_poll() calls need to be +added because the test case wants to verify the final state after BHs +have executed. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-4-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63) +Signed-off-by: Stefano Garzarella +--- + block.c | 4 +-- + block/io.c | 49 +++++--------------------------- + block/qed.c | 6 ++-- + block/throttle.c | 8 +++--- + include/block/block_int-common.h | 10 ++++--- + tests/unit/test-bdrv-drain.c | 18 ++++++------ + 6 files changed, 32 insertions(+), 63 deletions(-) + +diff --git a/block.c b/block.c +index ec184150a2..16a62a329c 100644 +--- a/block.c ++++ b/block.c +@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv, + assert(is_power_of_2(bs->bl.request_alignment)); + + for (i = 0; i < bs->quiesce_counter; i++) { +- if (drv->bdrv_co_drain_begin) { +- drv->bdrv_co_drain_begin(bs); ++ if (drv->bdrv_drain_begin) { ++ drv->bdrv_drain_begin(bs); + } + } + +diff --git a/block/io.c b/block/io.c +index b9424024f9..c2ed4b2af9 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -252,55 +252,20 @@ typedef struct { + int *drained_end_counter; + } BdrvCoDrainData; + +-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque) +-{ +- BdrvCoDrainData *data = opaque; +- BlockDriverState *bs = data->bs; +- +- if (data->begin) { +- bs->drv->bdrv_co_drain_begin(bs); +- } else { +- bs->drv->bdrv_co_drain_end(bs); +- } +- +- /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */ +- qatomic_mb_set(&data->done, true); +- if (!data->begin) { +- qatomic_dec(data->drained_end_counter); +- } +- bdrv_dec_in_flight(bs); +- +- g_free(data); +-} +- +-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */ ++/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */ + static void bdrv_drain_invoke(BlockDriverState *bs, bool begin, + int *drained_end_counter) + { +- BdrvCoDrainData *data; +- +- if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) || +- (!begin && !bs->drv->bdrv_co_drain_end)) { ++ if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) || ++ (!begin && !bs->drv->bdrv_drain_end)) { + return; + } + +- data = g_new(BdrvCoDrainData, 1); +- *data = (BdrvCoDrainData) { +- .bs = bs, +- .done = false, +- .begin = begin, +- .drained_end_counter = drained_end_counter, +- }; +- +- if (!begin) { +- qatomic_inc(drained_end_counter); ++ if (begin) { ++ bs->drv->bdrv_drain_begin(bs); ++ } else { ++ bs->drv->bdrv_drain_end(bs); + } +- +- /* Make sure the driver callback completes during the polling phase for +- * drain_begin. */ +- bdrv_inc_in_flight(bs); +- data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data); +- aio_co_schedule(bdrv_get_aio_context(bs), data->co); + } + + /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */ +diff --git a/block/qed.c b/block/qed.c +index 013f826c44..c2691a85b1 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s) + assert(!s->allocating_write_reqs_plugged); + if (s->allocating_acb != NULL) { + /* Another allocating write came concurrently. This cannot happen +- * from bdrv_qed_co_drain_begin, but it can happen when the timer runs. ++ * from bdrv_qed_drain_begin, but it can happen when the timer runs. + */ + qemu_co_mutex_unlock(&s->table_lock); + return false; +@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs, + } + } + +-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) ++static void bdrv_qed_drain_begin(BlockDriverState *bs) + { + BDRVQEDState *s = bs->opaque; + +@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = { + .bdrv_co_check = bdrv_qed_co_check, + .bdrv_detach_aio_context = bdrv_qed_detach_aio_context, + .bdrv_attach_aio_context = bdrv_qed_attach_aio_context, +- .bdrv_co_drain_begin = bdrv_qed_co_drain_begin, ++ .bdrv_drain_begin = bdrv_qed_drain_begin, + }; + + static void bdrv_qed_init(void) +diff --git a/block/throttle.c b/block/throttle.c +index 131eba3ab4..88851c84f4 100644 +--- a/block/throttle.c ++++ b/block/throttle.c +@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state) + reopen_state->opaque = NULL; + } + +-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) ++static void throttle_drain_begin(BlockDriverState *bs) + { + ThrottleGroupMember *tgm = bs->opaque; + if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) { +@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs) + } + } + +-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs) ++static void throttle_drain_end(BlockDriverState *bs) + { + ThrottleGroupMember *tgm = bs->opaque; + assert(tgm->io_limits_disabled); +@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = { + .bdrv_reopen_commit = throttle_reopen_commit, + .bdrv_reopen_abort = throttle_reopen_abort, + +- .bdrv_co_drain_begin = throttle_co_drain_begin, +- .bdrv_co_drain_end = throttle_co_drain_end, ++ .bdrv_drain_begin = throttle_drain_begin, ++ .bdrv_drain_end = throttle_drain_end, + + .is_filter = true, + .strong_runtime_opts = throttle_strong_runtime_opts, +diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h +index 31ae91e56e..40d646d1ed 100644 +--- a/include/block/block_int-common.h ++++ b/include/block/block_int-common.h +@@ -735,17 +735,19 @@ struct BlockDriver { + void (*bdrv_io_unplug)(BlockDriverState *bs); + + /** +- * bdrv_co_drain_begin is called if implemented in the beginning of a ++ * bdrv_drain_begin is called if implemented in the beginning of a + * drain operation to drain and stop any internal sources of requests in + * the driver. +- * bdrv_co_drain_end is called if implemented at the end of the drain. ++ * bdrv_drain_end is called if implemented at the end of the drain. + * + * They should be used by the driver to e.g. manage scheduled I/O + * requests, or toggle an internal state. After the end of the drain new + * requests will continue normally. ++ * ++ * Implementations of both functions must not call aio_poll(). + */ +- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs); +- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs); ++ void (*bdrv_drain_begin)(BlockDriverState *bs); ++ void (*bdrv_drain_end)(BlockDriverState *bs); + + bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs); + bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)( +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 24f34e24ad..695519ee02 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque) + bdrv_dec_in_flight(bs); + } + +-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) ++static void bdrv_test_drain_begin(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count++; +@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) + } + } + +-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs) ++static void bdrv_test_drain_end(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count--; +@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = { + .bdrv_close = bdrv_test_close, + .bdrv_co_preadv = bdrv_test_co_preadv, + +- .bdrv_co_drain_begin = bdrv_test_co_drain_begin, +- .bdrv_co_drain_end = bdrv_test_co_drain_end, ++ .bdrv_drain_begin = bdrv_test_drain_begin, ++ .bdrv_drain_end = bdrv_test_drain_end, + + .bdrv_child_perm = bdrv_default_perms, + +@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void) + bdrv_drained_begin(bs_child); + g_assert(!job_has_completed); + bdrv_drained_end(bs_child); ++ aio_poll(qemu_get_aio_context(), false); + g_assert(job_has_completed); + + bdrv_unref(bs_parents[0]); +@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void) + + g_assert(!job_has_completed); + ret = bdrv_drop_intermediate(chain[1], chain[0], NULL); ++ aio_poll(qemu_get_aio_context(), false); + g_assert(ret == 0); + g_assert(job_has_completed); + +@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) + * .was_drained. + * Increment .drain_count. + */ +-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) ++static void bdrv_replace_test_drain_begin(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + +@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) + * If .drain_count reaches 0 and the node has a backing file, issue a + * read request. + */ +-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) ++static void bdrv_replace_test_drain_end(BlockDriverState *bs) + { + BDRVReplaceTestState *s = bs->opaque; + +@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = { + .bdrv_close = bdrv_replace_test_close, + .bdrv_co_preadv = bdrv_replace_test_co_preadv, + +- .bdrv_co_drain_begin = bdrv_replace_test_co_drain_begin, +- .bdrv_co_drain_end = bdrv_replace_test_co_drain_end, ++ .bdrv_drain_begin = bdrv_replace_test_drain_begin, ++ .bdrv_drain_end = bdrv_replace_test_drain_end, + + .bdrv_child_perm = bdrv_default_perms, + }; +-- +2.31.1 + diff --git a/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch b/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch new file mode 100644 index 0000000..19d52b5 --- /dev/null +++ b/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch @@ -0,0 +1,70 @@ +From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001 +From: Vladimir Sementsov-Ogievskiy +Date: Mon, 7 Nov 2022 19:35:56 +0300 +Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s) + +Drop this simple wrapper used only in one place. We have too many graph +modifying functions even without it. + +Signed-off-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264) +Signed-off-by: Stefano Garzarella +--- + block.c | 15 +-------------- + 1 file changed, 1 insertion(+), 14 deletions(-) + +diff --git a/block.c b/block.c +index a18f052374..ec184150a2 100644 +--- a/block.c ++++ b/block.c +@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs, + static void bdrv_replace_child_noperm(BdrvChild *child, + BlockDriverState *new_bs); + static void bdrv_remove_child(BdrvChild *child, Transaction *tran); +-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, +- Transaction *tran); + + static int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, +@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran) + tran_add(tran, &bdrv_remove_child_drv, child); + } + +-/* +- * A function to remove backing-chain child of @bs if exists: cow child for +- * format nodes (always .backing) and filter child for filters (may be .file or +- * .backing) +- */ +-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs, +- Transaction *tran) +-{ +- bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran); +-} +- + static int bdrv_replace_node_noperm(BlockDriverState *from, + BlockDriverState *to, + bool auto_skip, Transaction *tran, +@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from, + } + + if (detach_subchain) { +- bdrv_remove_filter_or_cow_child(to_cow_parent, tran); ++ bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran); + } + + found = g_hash_table_new(NULL, NULL); +-- +2.31.1 + diff --git a/kvm-kvm-Atomic-memslot-updates.patch b/kvm-kvm-Atomic-memslot-updates.patch new file mode 100644 index 0000000..14e9e32 --- /dev/null +++ b/kvm-kvm-Atomic-memslot-updates.patch @@ -0,0 +1,286 @@ +From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001 +From: Emanuele Giuseppe Esposito +Date: Mon, 16 Jan 2023 07:17:31 -0500 +Subject: [PATCH 31/31] kvm: Atomic memslot updates + +RH-Author: Emanuele Giuseppe Esposito +RH-MergeRequest: 138: accel: introduce accelerator blocker API +RH-Bugzilla: 1979276 +RH-Acked-by: Paolo Bonzini +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276 + +commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39 +Author: David Hildenbrand +Date: Fri Nov 11 10:47:58 2022 -0500 + + kvm: Atomic memslot updates + + If we update an existing memslot (e.g., resize, split), we temporarily + remove the memslot to re-add it immediately afterwards. These updates + are not atomic, especially not for KVM VCPU threads, such that we can + get spurious faults. + + Let's inhibit most KVM ioctls while performing relevant updates, such + that we can perform the update just as if it would happen atomically + without additional kernel support. + + We capture the add/del changes and apply them in the notifier commit + stage instead. There, we can check for overlaps and perform the ioctl + inhibiting only if really required (-> overlap). + + To keep things simple we don't perform additional checks that wouldn't + actually result in an overlap -- such as !RAM memory regions in some + cases (see kvm_set_phys_mem()). + + To minimize cache-line bouncing, use a separate indicator + (in_ioctl_lock) per CPU. Also, make sure to hold the kvm_slots_lock + while performing both actions (removing+re-adding). + + We have to wait until all IOCTLs were exited and block new ones from + getting executed. + + This approach cannot result in a deadlock as long as the inhibitor does + not hold any locks that might hinder an IOCTL from getting finished and + exited - something fairly unusual. The inhibitor will always hold the BQL. + + AFAIKs, one possible candidate would be userfaultfd. If a page cannot be + placed (e.g., during postcopy), because we're waiting for a lock, or if the + userfaultfd thread cannot process a fault, because it is waiting for a + lock, there could be a deadlock. However, the BQL is not applicable here, + because any other guest memory access while holding the BQL would already + result in a deadlock. + + Nothing else in the kernel should block forever and wait for userspace + intervention. + + Note: pause_all_vcpus()/resume_all_vcpus() or + start_exclusive()/end_exclusive() cannot be used, as they either drop + the BQL or require to be called without the BQL - something inhibitors + cannot handle. We need a low-level locking mechanism that is + deadlock-free even when not releasing the BQL. + + Signed-off-by: David Hildenbrand + Signed-off-by: Emanuele Giuseppe Esposito + Tested-by: Emanuele Giuseppe Esposito + Message-Id: <20221111154758.1372674-4-eesposit@redhat.com> + Signed-off-by: Paolo Bonzini + +Signed-off-by: Emanuele Giuseppe Esposito +--- + accel/kvm/kvm-all.c | 101 ++++++++++++++++++++++++++++++++++----- + include/sysemu/kvm_int.h | 8 ++++ + 2 files changed, 98 insertions(+), 11 deletions(-) + +diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c +index ff660fd469..39ed30ab59 100644 +--- a/accel/kvm/kvm-all.c ++++ b/accel/kvm/kvm-all.c +@@ -31,6 +31,7 @@ + #include "sysemu/kvm_int.h" + #include "sysemu/runstate.h" + #include "sysemu/cpus.h" ++#include "sysemu/accel-blocker.h" + #include "qemu/bswap.h" + #include "exec/memory.h" + #include "exec/ram_addr.h" +@@ -46,6 +47,7 @@ + #include "sysemu/hw_accel.h" + #include "kvm-cpus.h" + #include "sysemu/dirtylimit.h" ++#include "qemu/range.h" + + #include "hw/boards.h" + #include "monitor/stats.h" +@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size) + kvm_max_slot_size = max_slot_size; + } + ++/* Called with KVMMemoryListener.slots_lock held */ + static void kvm_set_phys_mem(KVMMemoryListener *kml, + MemoryRegionSection *section, bool add) + { +@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram = memory_region_get_ram_ptr(mr) + mr_offset; + ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset; + +- kvm_slots_lock(); +- + if (!add) { + do { + slot_size = MIN(kvm_max_slot_size, size); + mem = kvm_lookup_matching_slot(kml, start_addr, slot_size); + if (!mem) { +- goto out; ++ return; + } + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* +@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + start_addr += slot_size; + size -= slot_size; + } while (size); +- goto out; ++ return; + } + + /* register the new slot */ +@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml, + ram += slot_size; + size -= slot_size; + } while (size); +- +-out: +- kvm_slots_unlock(); + } + + static void *kvm_dirty_ring_reaper_thread(void *data) +@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; + +- memory_region_ref(section->mr); +- kvm_set_phys_mem(kml, section, true); ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next); + } + + static void kvm_region_del(MemoryListener *listener, + MemoryRegionSection *section) + { + KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener); ++ KVMMemoryUpdate *update; ++ ++ update = g_new0(KVMMemoryUpdate, 1); ++ update->section = *section; ++ ++ QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next); ++} ++ ++static void kvm_region_commit(MemoryListener *listener) ++{ ++ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, ++ listener); ++ KVMMemoryUpdate *u1, *u2; ++ bool need_inhibit = false; ++ ++ if (QSIMPLEQ_EMPTY(&kml->transaction_add) && ++ QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ return; ++ } ++ ++ /* ++ * We have to be careful when regions to add overlap with ranges to remove. ++ * We have to simulate atomic KVM memslot updates by making sure no ioctl() ++ * is currently active. ++ * ++ * The lists are order by addresses, so it's easy to find overlaps. ++ */ ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ u2 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ while (u1 && u2) { ++ Range r1, r2; ++ ++ range_init_nofail(&r1, u1->section.offset_within_address_space, ++ int128_get64(u1->section.size)); ++ range_init_nofail(&r2, u2->section.offset_within_address_space, ++ int128_get64(u2->section.size)); ++ ++ if (range_overlaps_range(&r1, &r2)) { ++ need_inhibit = true; ++ break; ++ } ++ if (range_lob(&r1) < range_lob(&r2)) { ++ u1 = QSIMPLEQ_NEXT(u1, next); ++ } else { ++ u2 = QSIMPLEQ_NEXT(u2, next); ++ } ++ } ++ ++ kvm_slots_lock(); ++ if (need_inhibit) { ++ accel_ioctl_inhibit_begin(); ++ } ++ ++ /* Remove all memslots before adding the new ones. */ ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_del); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next); + +- kvm_set_phys_mem(kml, section, false); +- memory_region_unref(section->mr); ++ kvm_set_phys_mem(kml, &u1->section, false); ++ memory_region_unref(u1->section.mr); ++ ++ g_free(u1); ++ } ++ while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) { ++ u1 = QSIMPLEQ_FIRST(&kml->transaction_add); ++ QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next); ++ ++ memory_region_ref(u1->section.mr); ++ kvm_set_phys_mem(kml, &u1->section, true); ++ ++ g_free(u1); ++ } ++ ++ if (need_inhibit) { ++ accel_ioctl_inhibit_end(); ++ } ++ kvm_slots_unlock(); + } + + static void kvm_log_sync(MemoryListener *listener, +@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml, + kml->slots[i].slot = i; + } + ++ QSIMPLEQ_INIT(&kml->transaction_add); ++ QSIMPLEQ_INIT(&kml->transaction_del); ++ + kml->listener.region_add = kvm_region_add; + kml->listener.region_del = kvm_region_del; ++ kml->listener.commit = kvm_region_commit; + kml->listener.log_start = kvm_log_start; + kml->listener.log_stop = kvm_log_stop; + kml->listener.priority = 10; +diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h +index 3b4adcdc10..60b520a13e 100644 +--- a/include/sysemu/kvm_int.h ++++ b/include/sysemu/kvm_int.h +@@ -12,6 +12,7 @@ + #include "exec/memory.h" + #include "qapi/qapi-types-common.h" + #include "qemu/accel.h" ++#include "qemu/queue.h" + #include "sysemu/kvm.h" + + typedef struct KVMSlot +@@ -31,10 +32,17 @@ typedef struct KVMSlot + ram_addr_t ram_start_offset; + } KVMSlot; + ++typedef struct KVMMemoryUpdate { ++ QSIMPLEQ_ENTRY(KVMMemoryUpdate) next; ++ MemoryRegionSection section; ++} KVMMemoryUpdate; ++ + typedef struct KVMMemoryListener { + MemoryListener listener; + KVMSlot *slots; + int as_id; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add; ++ QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del; + } KVMMemoryListener; + + #define KVM_MSI_HASHTAB_SIZE 256 +-- +2.31.1 + diff --git a/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch b/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch new file mode 100644 index 0000000..d2dacbc --- /dev/null +++ b/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch @@ -0,0 +1,84 @@ +From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:56 +0100 +Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s) + +We want to change .bdrv_co_drained_begin() back to be a non-coroutine +callback, so in preparation, avoid yielding in its implementation. + +Because we increase bs->in_flight and bdrv_drained_begin() polls, the +behaviour is unchanged. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-2-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23) +Signed-off-by: Stefano Garzarella +--- + block/qed.c | 20 +++++++++++++++++--- + 1 file changed, 17 insertions(+), 3 deletions(-) + +diff --git a/block/qed.c b/block/qed.c +index 2f36ad342c..013f826c44 100644 +--- a/block/qed.c ++++ b/block/qed.c +@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s) + qemu_co_mutex_unlock(&s->table_lock); + } + +-static void coroutine_fn qed_need_check_timer_entry(void *opaque) ++static void coroutine_fn qed_need_check_timer(BDRVQEDState *s) + { +- BDRVQEDState *s = opaque; + int ret; + + trace_qed_need_check_timer_cb(s); +@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque) + (void) ret; + } + ++static void coroutine_fn qed_need_check_timer_entry(void *opaque) ++{ ++ BDRVQEDState *s = opaque; ++ ++ qed_need_check_timer(opaque); ++ bdrv_dec_in_flight(s->bs); ++} ++ + static void qed_need_check_timer_cb(void *opaque) + { ++ BDRVQEDState *s = opaque; + Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque); ++ ++ bdrv_inc_in_flight(s->bs); + qemu_coroutine_enter(co); + } + +@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs) + * header is flushed. + */ + if (s->need_check_timer && timer_pending(s->need_check_timer)) { ++ Coroutine *co; ++ + qed_cancel_need_check_timer(s); +- qed_need_check_timer_entry(s); ++ co = qemu_coroutine_create(qed_need_check_timer_entry, s); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + +-- +2.31.1 + diff --git a/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch b/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch new file mode 100644 index 0000000..42114a1 --- /dev/null +++ b/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch @@ -0,0 +1,70 @@ +From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Tue, 10 Jan 2023 14:25:34 +0100 +Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on + s390-ccw-virtio-rhel8.6.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0 +RH-Bugzilla: 2159408 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s) + +commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements +on older machines") activated zPCI enhancement features (interpretation +and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine +for RHEL8.8. It didn't seem to be a problem since migration is not +possible but it broke LEAPP upgrade to RHEL9 when the machine is +defined with a passthrough device. Activate the zPCI features also on +RHEL9.2 for the machines to be alike in both latest RHEL distros. + +Upstream Status: RHEL-only +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408 + +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++ + 1 file changed, 12 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index aa142a1a4e..4cdd59c394 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine) + + static void ccw_machine_rhel860_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "on", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", }, ++ }; ++ + ccw_machine_rhel900_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + + /* All RHEL machines for prior major releases are deprecated */ + mc->deprecation_reason = rhel_old_machine_deprecation; +@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); +-- +2.31.1 + diff --git a/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch b/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch new file mode 100644 index 0000000..a8e3957 --- /dev/null +++ b/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch @@ -0,0 +1,159 @@ +From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:41:04 +0100 +Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s) + +The subtree drain was introduced in commit b1e1af394d9 as a way to avoid +graph changes between finding the base node and changing the block graph +as necessary on completion of the image streaming job. + +The block graph could change between these two points because +bdrv_set_backing_hd() first drains the parent node, which involved +polling and can do anything. + +Subtree draining was an imperfect way to make this less likely (because +with it, fewer callbacks are called during this window). Everyone agreed +that it's not really the right solution, and it was only committed as a +stopgap solution. + +This replaces the subtree drain with a solution that simply drains the +parent node before we try to find the base node, and then call a version +of bdrv_set_backing_hd() that doesn't drain, but just asserts that the +parent node is already drained. + +This way, any graph changes caused by draining happen before we start +looking at the graph and things stay consistent between finding the base +node and changing the graph. + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-10-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6) +Signed-off-by: Stefano Garzarella +--- + block.c | 17 ++++++++++++++--- + block/stream.c | 26 ++++++++++++++++---------- + include/block/block-global-state.h | 3 +++ + 3 files changed, 33 insertions(+), 13 deletions(-) + +diff --git a/block.c b/block.c +index b3449a312e..5330e89903 100644 +--- a/block.c ++++ b/block.c +@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs, + return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp); + } + +-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, +- Error **errp) ++int bdrv_set_backing_hd_drained(BlockDriverState *bs, ++ BlockDriverState *backing_hd, ++ Error **errp) + { + int ret; + Transaction *tran = tran_new(); + + GLOBAL_STATE_CODE(); +- bdrv_drained_begin(bs); ++ assert(bs->quiesce_counter > 0); + + ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp); + if (ret < 0) { +@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + ret = bdrv_refresh_perms(bs, errp); + out: + tran_finalize(tran, ret); ++ return ret; ++} + ++int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, ++ Error **errp) ++{ ++ int ret; ++ GLOBAL_STATE_CODE(); ++ ++ bdrv_drained_begin(bs); ++ ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp); + bdrv_drained_end(bs); + + return ret; +diff --git a/block/stream.c b/block/stream.c +index 694709bd25..8744ad103f 100644 +--- a/block/stream.c ++++ b/block/stream.c +@@ -64,13 +64,16 @@ static int stream_prepare(Job *job) + bdrv_cor_filter_drop(s->cor_filter_bs); + s->cor_filter_bs = NULL; + +- bdrv_subtree_drained_begin(s->above_base); ++ /* ++ * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain ++ * already here and use bdrv_set_backing_hd_drained() instead because ++ * the polling during drained_begin() might change the graph, and if we do ++ * this only later, we may end up working with the wrong base node (or it ++ * might even have gone away by the time we want to use it). ++ */ ++ bdrv_drained_begin(unfiltered_bs); + + base = bdrv_filter_or_cow_bs(s->above_base); +- if (base) { +- bdrv_ref(base); +- } +- + unfiltered_base = bdrv_skip_filters(base); + + if (bdrv_cow_child(unfiltered_bs)) { +@@ -82,7 +85,13 @@ static int stream_prepare(Job *job) + } + } + +- bdrv_set_backing_hd(unfiltered_bs, base, &local_err); ++ bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err); ++ ++ /* ++ * This call will do I/O, so the graph can change again from here on. ++ * We have already completed the graph change, so we are not in danger ++ * of operating on the wrong node any more if this happens. ++ */ + ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false); + if (local_err) { + error_report_err(local_err); +@@ -92,10 +101,7 @@ static int stream_prepare(Job *job) + } + + out: +- if (base) { +- bdrv_unref(base); +- } +- bdrv_subtree_drained_end(s->above_base); ++ bdrv_drained_end(unfiltered_bs); + return ret; + } + +diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h +index c7bd4a2088..00e0cf8aea 100644 +--- a/include/block/block-global-state.h ++++ b/include/block/block-global-state.h +@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename, + BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp); + int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, + Error **errp); ++int bdrv_set_backing_hd_drained(BlockDriverState *bs, ++ BlockDriverState *backing_hd, ++ Error **errp); + int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options, + const char *bdref_key, Error **errp); + BlockDriverState *bdrv_open(const char *filename, const char *reference, +-- +2.31.1 + diff --git a/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch b/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch new file mode 100644 index 0000000..268c263 --- /dev/null +++ b/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch @@ -0,0 +1,153 @@ +From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Fri, 18 Nov 2022 18:40:57 +0100 +Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in + .bdrv_co_drained_begin/end() + +RH-Author: Stefano Garzarella +RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot +RH-Bugzilla: 2155112 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Kevin Wolf +RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s) + +We want to change .bdrv_co_drained_begin/end() back to be non-coroutine +callbacks, so in preparation, avoid yielding in their implementation. + +This does almost the same as the existing logic in bdrv_drain_invoke(), +by creating and entering coroutines internally. However, since the test +case is by far the heaviest user of coroutine code in drain callbacks, +it is preferable to have the complexity in the test case rather than the +drain core, which is already complicated enough without this. + +The behaviour for bdrv_drain_begin() is unchanged because we increase +bs->in_flight and this is still polled. However, bdrv_drain_end() +doesn't wait for the spawned coroutine to complete any more. This is +fine, we don't rely on bdrv_drain_end() restarting all operations +immediately before the next aio_poll(). + +Signed-off-by: Kevin Wolf +Reviewed-by: Vladimir Sementsov-Ogievskiy +Reviewed-by: Emanuele Giuseppe Esposito +Reviewed-by: Hanna Reitz +Message-Id: <20221118174110.55183-3-kwolf@redhat.com> +Signed-off-by: Kevin Wolf +(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75) +Signed-off-by: Stefano Garzarella +--- + tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++---------- + 1 file changed, 46 insertions(+), 18 deletions(-) + +diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c +index 09dc4a4891..24f34e24ad 100644 +--- a/tests/unit/test-bdrv-drain.c ++++ b/tests/unit/test-bdrv-drain.c +@@ -38,12 +38,22 @@ typedef struct BDRVTestState { + bool sleep_in_drain_begin; + } BDRVTestState; + ++static void coroutine_fn sleep_in_drain_begin(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); ++ bdrv_dec_in_flight(bs); ++} ++ + static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs) + { + BDRVTestState *s = bs->opaque; + s->drain_count++; + if (s->sleep_in_drain_begin) { +- qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000); ++ Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + +@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs, + return 0; + } + ++static void coroutine_fn bdrv_replace_test_drain_co(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ BDRVReplaceTestState *s = bs->opaque; ++ ++ /* Keep waking io_co up until it is done */ ++ while (s->io_co) { ++ aio_co_wake(s->io_co); ++ s->io_co = NULL; ++ qemu_coroutine_yield(); ++ } ++ s->drain_co = NULL; ++ bdrv_dec_in_flight(bs); ++} ++ + /** + * If .drain_count is 0, wake up .io_co if there is one; and set + * .was_drained. +@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs) + BDRVReplaceTestState *s = bs->opaque; + + if (!s->drain_count) { +- /* Keep waking io_co up until it is done */ +- s->drain_co = qemu_coroutine_self(); +- while (s->io_co) { +- aio_co_wake(s->io_co); +- s->io_co = NULL; +- qemu_coroutine_yield(); +- } +- s->drain_co = NULL; +- ++ s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), s->drain_co); + s->was_drained = true; + } + s->drain_count++; + } + ++static void coroutine_fn bdrv_replace_test_read_entry(void *opaque) ++{ ++ BlockDriverState *bs = opaque; ++ char data; ++ QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); ++ int ret; ++ ++ /* Queue a read request post-drain */ ++ ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); ++ g_assert(ret >= 0); ++ bdrv_dec_in_flight(bs); ++} ++ + /** + * Reduce .drain_count, set .was_undrained once it reaches 0. + * If .drain_count reaches 0 and the node has a backing file, issue a +@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs) + + g_assert(s->drain_count > 0); + if (!--s->drain_count) { +- int ret; +- + s->was_undrained = true; + + if (bs->backing) { +- char data; +- QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1); +- +- /* Queue a read request post-drain */ +- ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0); +- g_assert(ret >= 0); ++ Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry, ++ bs); ++ bdrv_inc_in_flight(bs); ++ aio_co_enter(bdrv_get_aio_context(bs), co); + } + } + } +-- +2.31.1 + diff --git a/kvm-vhost-add-support-for-configure-interrupt.patch b/kvm-vhost-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..a7cfb2f --- /dev/null +++ b/kvm-vhost-add-support-for-configure-interrupt.patch @@ -0,0 +1,185 @@ +From 42818e2bc6fa537fe52f7f0e6b094774a1eb00e1 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:48 +0800 +Subject: [PATCH 07/31] vhost: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/10] d58b439eb093f5dd3b7ca081af0ab75780e42917 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add functions to support configure interrupt. +The configure interrupt process will start in vhost_dev_start +and stop in vhost_dev_stop. + +Also add the functions to support vhost_config_pending and +vhost_config_mask. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-8-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f9a09ca3ea69d108d828b7c82f1bd61b2df6fc96) +Signed-off-by: Cindy Lu +--- + hw/virtio/vhost.c | 78 ++++++++++++++++++++++++++++++++++++++- + include/hw/virtio/vhost.h | 4 ++ + 2 files changed, 81 insertions(+), 1 deletion(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 7fb008bc9e..84dbb39e07 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -1596,7 +1596,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n); + r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file); + if (r < 0) { +- VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed"); ++ error_report("vhost_set_vring_call failed %d", -r); ++ } ++} ++ ++bool vhost_config_pending(struct vhost_dev *hdev) ++{ ++ assert(hdev->vhost_ops); ++ if ((hdev->started == false) || ++ (hdev->vhost_ops->vhost_set_config_call == NULL)) { ++ return false; ++ } ++ ++ EventNotifier *notifier = ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ return event_notifier_test_and_clear(notifier); ++} ++ ++void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask) ++{ ++ int fd; ++ int r; ++ EventNotifier *notifier = ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier; ++ EventNotifier *config_notifier = &vdev->config_notifier; ++ assert(hdev->vhost_ops); ++ ++ if ((hdev->started == false) || ++ (hdev->vhost_ops->vhost_set_config_call == NULL)) { ++ return; ++ } ++ if (mask) { ++ assert(vdev->use_guest_notifier_mask); ++ fd = event_notifier_get_fd(notifier); ++ } else { ++ fd = event_notifier_get_fd(config_notifier); ++ } ++ r = hdev->vhost_ops->vhost_set_config_call(hdev, fd); ++ if (r < 0) { ++ error_report("vhost_set_config_call failed %d", -r); ++ } ++} ++ ++static void vhost_stop_config_intr(struct vhost_dev *dev) ++{ ++ int fd = -1; ++ assert(dev->vhost_ops); ++ if (dev->vhost_ops->vhost_set_config_call) { ++ dev->vhost_ops->vhost_set_config_call(dev, fd); ++ } ++} ++ ++static void vhost_start_config_intr(struct vhost_dev *dev) ++{ ++ int r; ++ ++ assert(dev->vhost_ops); ++ int fd = event_notifier_get_fd(&dev->vdev->config_notifier); ++ if (dev->vhost_ops->vhost_set_config_call) { ++ r = dev->vhost_ops->vhost_set_config_call(dev, fd); ++ if (!r) { ++ event_notifier_set(&dev->vdev->config_notifier); ++ } + } + } + +@@ -1836,6 +1897,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + } + } + ++ r = event_notifier_init( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0); ++ if (r < 0) { ++ return r; ++ } ++ event_notifier_test_and_clear( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); ++ if (!vdev->use_guest_notifier_mask) { ++ vhost_config_mask(hdev, vdev, true); ++ } + if (hdev->log_enabled) { + uint64_t log_base; + +@@ -1874,6 +1945,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + vhost_device_iotlb_miss(hdev, vq->used_phys, true); + } + } ++ vhost_start_config_intr(hdev); + return 0; + fail_start: + if (vrings) { +@@ -1903,6 +1975,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + + /* should only be called after backend is connected */ + assert(hdev->vhost_ops); ++ event_notifier_test_and_clear( ++ &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier); ++ event_notifier_test_and_clear(&vdev->config_notifier); + + trace_vhost_dev_stop(hdev, vdev->name, vrings); + +@@ -1925,6 +2000,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings) + } + memory_listener_unregister(&hdev->iommu_listener); + } ++ vhost_stop_config_intr(hdev); + vhost_log_put(hdev, true); + hdev->started = false; + vdev->vhost_started = false; +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 67a6807fac..05bedb2416 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -33,6 +33,7 @@ struct vhost_virtqueue { + unsigned used_size; + EventNotifier masked_notifier; + EventNotifier error_notifier; ++ EventNotifier masked_config_notifier; + struct vhost_dev *dev; + }; + +@@ -41,6 +42,7 @@ typedef unsigned long vhost_log_chunk_t; + #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t)) + #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS) + #define VHOST_INVALID_FEATURE_BIT (0xff) ++#define VHOST_QUEUE_NUM_CONFIG_INR 0 + + struct vhost_log { + unsigned long long size; +@@ -168,6 +170,8 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); + * Disable direct notifications to vhost device. + */ + void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev); ++bool vhost_config_pending(struct vhost_dev *hdev); ++void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask); + + /** + * vhost_dev_is_started() - report status of vhost device +-- +2.31.1 + diff --git a/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch b/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch new file mode 100644 index 0000000..ca93785 --- /dev/null +++ b/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch @@ -0,0 +1,157 @@ +From 55aad90e347599e88747888ddbefcba33427f386 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Fri, 16 Dec 2022 11:35:52 +0800 +Subject: [PATCH 12/31] vhost: fix vq dirty bitmap syncing when vIOMMU is + enabled + +RH-Author: Eric Auger +RH-MergeRequest: 134: vhost: fix vq dirty bitmap syncing when vIOMMU is enabled +RH-Bugzilla: 2124856 +RH-Acked-by: Peter Xu +RH-Acked-by: Jason Wang +RH-Acked-by: Laurent Vivier +RH-Commit: [1/1] 57ef499b63dc2cca6e64ee84d1dc127635868ca2 (eauger1/centos-qemu-kvm) + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124856 +Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=49989924 +Upstream: yes + +When vIOMMU is enabled, the vq->used_phys is actually the IOVA not +GPA. So we need to translate it to GPA before the syncing otherwise we +may hit the following crash since IOVA could be out of the scope of +the GPA log size. This could be noted when using virtio-IOMMU with +vhost using 1G memory. + +Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support") +Cc: qemu-stable@nongnu.org +Tested-by: Lei Yang +Reported-by: Yalan Zhang +Signed-off-by: Jason Wang +Message-Id: <20221216033552.77087-1-jasowang@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 345cc1cbcbce2bab00abc2b88338d7d89c702d6b) +Signed-off-by: Eric Auger +--- + hw/virtio/vhost.c | 84 ++++++++++++++++++++++++++++++++++++----------- + 1 file changed, 64 insertions(+), 20 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 84dbb39e07..2c566dc539 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -20,6 +20,7 @@ + #include "qemu/range.h" + #include "qemu/error-report.h" + #include "qemu/memfd.h" ++#include "qemu/log.h" + #include "standard-headers/linux/vhost_types.h" + #include "hw/virtio/virtio-bus.h" + #include "hw/virtio/virtio-access.h" +@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, + } + } + ++static bool vhost_dev_has_iommu(struct vhost_dev *dev) ++{ ++ VirtIODevice *vdev = dev->vdev; ++ ++ /* ++ * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support ++ * incremental memory mapping API via IOTLB API. For platform that ++ * does not have IOMMU, there's no need to enable this feature ++ * which may cause unnecessary IOTLB miss/update transactions. ++ */ ++ if (vdev) { ++ return virtio_bus_device_iommu_enabled(vdev) && ++ virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ } else { ++ return false; ++ } ++} ++ + static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + MemoryRegionSection *section, + hwaddr first, +@@ -137,8 +156,51 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev, + continue; + } + +- vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys, +- range_get_last(vq->used_phys, vq->used_size)); ++ if (vhost_dev_has_iommu(dev)) { ++ IOMMUTLBEntry iotlb; ++ hwaddr used_phys = vq->used_phys, used_size = vq->used_size; ++ hwaddr phys, s, offset; ++ ++ while (used_size) { ++ rcu_read_lock(); ++ iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as, ++ used_phys, ++ true, ++ MEMTXATTRS_UNSPECIFIED); ++ rcu_read_unlock(); ++ ++ if (!iotlb.target_as) { ++ qemu_log_mask(LOG_GUEST_ERROR, "translation " ++ "failure for used_iova %"PRIx64"\n", ++ used_phys); ++ return -EINVAL; ++ } ++ ++ offset = used_phys & iotlb.addr_mask; ++ phys = iotlb.translated_addr + offset; ++ ++ /* ++ * Distance from start of used ring until last byte of ++ * IOMMU page. ++ */ ++ s = iotlb.addr_mask - offset; ++ /* ++ * Size of used ring, or of the part of it until end ++ * of IOMMU page. To avoid zero result, do the adding ++ * outside of MIN(). ++ */ ++ s = MIN(s, used_size - 1) + 1; ++ ++ vhost_dev_sync_region(dev, section, start_addr, end_addr, phys, ++ range_get_last(phys, s)); ++ used_size -= s; ++ used_phys += s; ++ } ++ } else { ++ vhost_dev_sync_region(dev, section, start_addr, ++ end_addr, vq->used_phys, ++ range_get_last(vq->used_phys, vq->used_size)); ++ } + } + return 0; + } +@@ -306,24 +368,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size) + dev->log_size = size; + } + +-static bool vhost_dev_has_iommu(struct vhost_dev *dev) +-{ +- VirtIODevice *vdev = dev->vdev; +- +- /* +- * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support +- * incremental memory mapping API via IOTLB API. For platform that +- * does not have IOMMU, there's no need to enable this feature +- * which may cause unnecessary IOTLB miss/update transactions. +- */ +- if (vdev) { +- return virtio_bus_device_iommu_enabled(vdev) && +- virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +- } else { +- return false; +- } +-} +- + static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr, + hwaddr *plen, bool is_write) + { +-- +2.31.1 + diff --git a/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch b/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch new file mode 100644 index 0000000..1b48f5d --- /dev/null +++ b/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch @@ -0,0 +1,56 @@ +From d135303da1187d9f214e520a977fe7c47e5ce1f0 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:45 +0800 +Subject: [PATCH 04/31] vhost: introduce new VhostOps vhost_set_config_call +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/10] c2492838d9c1415e42d2507f2956d640a30325f2 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +This patch introduces new VhostOps vhost_set_config_call. +This function allows the qemu to set the config +event fd to kernel driver. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-5-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9b30cdf9bbf9524a4f4f8a6eb551eb13cbbd3893) +Signed-off-by: Cindy Lu +--- + include/hw/virtio/vhost-backend.h | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index eab46d7f0b..c5ab49051e 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -128,6 +128,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id); + + typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev); + ++typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev, ++ int fd); + typedef struct VhostOps { + VhostBackendType backend_type; + vhost_backend_init vhost_backend_init; +@@ -174,6 +176,7 @@ typedef struct VhostOps { + vhost_vq_get_addr_op vhost_vq_get_addr; + vhost_get_device_id_op vhost_get_device_id; + vhost_force_iommu_op vhost_force_iommu; ++ vhost_set_config_call_op vhost_set_config_call; + } VhostOps; + + int vhost_backend_update_device_iotlb(struct vhost_dev *dev, +-- +2.31.1 + diff --git a/kvm-vhost-vdpa-add-support-for-config-interrupt.patch b/kvm-vhost-vdpa-add-support-for-config-interrupt.patch new file mode 100644 index 0000000..88d4df6 --- /dev/null +++ b/kvm-vhost-vdpa-add-support-for-config-interrupt.patch @@ -0,0 +1,73 @@ +From e01563a8de9a45937ffd8d4c1d74a6890ffb6eb6 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:46 +0800 +Subject: [PATCH 05/31] vhost-vdpa: add support for config interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/10] 49bfd214a503f8e199ff93f4bbfcbd4c4f2405b5 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add new call back function in vhost-vdpa, The function +vhost_set_config_call can set the event fd to kernel. +This function will be called in the vhost_dev_start +and vhost_dev_stop + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-6-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 259f3acc1c675dd77ebbdb28a483f5d0220bdbf6) +Signed-off-by: Cindy Lu +--- + hw/virtio/trace-events | 1 + + hw/virtio/vhost-vdpa.c | 8 ++++++++ + 2 files changed, 9 insertions(+) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 14fc5b9bb2..46f2faf04e 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -62,6 +62,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI + vhost_vdpa_set_owner(void *dev) "dev: %p" + vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64 + vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64 ++vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d" + + # virtio.c + virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index 7468e44b87..c5be2645b0 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -754,6 +754,13 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev) + return 0; + } + ++static int vhost_vdpa_set_config_call(struct vhost_dev *dev, ++ int fd) ++{ ++ trace_vhost_vdpa_set_config_call(dev, fd); ++ return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd); ++} ++ + static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config, + uint32_t config_len) + { +@@ -1310,4 +1317,5 @@ const VhostOps vdpa_ops = { + .vhost_get_device_id = vhost_vdpa_get_device_id, + .vhost_vq_get_addr = vhost_vdpa_vq_get_addr, + .vhost_force_iommu = vhost_vdpa_force_iommu, ++ .vhost_set_config_call = vhost_vdpa_set_config_call, + }; +-- +2.31.1 + diff --git a/kvm-virtio-add-support-for-configure-interrupt.patch b/kvm-virtio-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..02f4666 --- /dev/null +++ b/kvm-virtio-add-support-for-configure-interrupt.patch @@ -0,0 +1,115 @@ +From e04c76339580effae41617b690b58a6605e0f40b Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:47 +0800 +Subject: [PATCH 06/31] virtio: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/10] 7048eb488b732578686d451684babaf17b582b05 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add the functions to support the configure interrupt in virtio +The function virtio_config_guest_notifier_read will notify the +guest if there is an configure interrupt. +The function virtio_config_set_guest_notifier_fd_handler is +to set the fd hander for the notifier + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-7-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 7d847d0c9b93b91160f40d69a65c904d76f1edd8) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio.c | 29 +++++++++++++++++++++++++++++ + include/hw/virtio/virtio.h | 4 ++++ + 2 files changed, 33 insertions(+) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index eb6347ab5d..34e9c5d141 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -4012,7 +4012,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n) + virtio_irq(vq); + } + } ++static void virtio_config_guest_notifier_read(EventNotifier *n) ++{ ++ VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier); + ++ if (event_notifier_test_and_clear(n)) { ++ virtio_notify_config(vdev); ++ } ++} + void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + bool with_irqfd) + { +@@ -4029,6 +4036,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign, + } + } + ++void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, ++ bool assign, bool with_irqfd) ++{ ++ EventNotifier *n; ++ n = &vdev->config_notifier; ++ if (assign && !with_irqfd) { ++ event_notifier_set_handler(n, virtio_config_guest_notifier_read); ++ } else { ++ event_notifier_set_handler(n, NULL); ++ } ++ if (!assign) { ++ /* Test and clear notifier before closing it,*/ ++ /* in case poll callback didn't have time to run. */ ++ virtio_config_guest_notifier_read(n); ++ } ++} ++ + EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq) + { + return &vq->guest_notifier; +@@ -4109,6 +4133,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq) + return &vq->host_notifier; + } + ++EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev) ++{ ++ return &vdev->config_notifier; ++} ++ + void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled) + { + vq->host_notifier_enabled = enabled; +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 1f4a41b958..9c3a4642f2 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -138,6 +138,7 @@ struct VirtIODevice + AddressSpace *dma_as; + QLIST_HEAD(, VirtQueue) *vector_queues; + QTAILQ_ENTRY(VirtIODevice) next; ++ EventNotifier config_notifier; + }; + + struct VirtioDeviceClass { +@@ -360,6 +361,9 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct + void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx); + VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector); + VirtQueue *virtio_vector_next_queue(VirtQueue *vq); ++EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev); ++void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev, ++ bool assign, bool with_irqfd); + + static inline void virtio_add_feature(uint64_t *features, unsigned int fbit) + { +-- +2.31.1 + diff --git a/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch b/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch new file mode 100644 index 0000000..ea2589a --- /dev/null +++ b/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch @@ -0,0 +1,262 @@ +From 34a267758cf016f34b327318500efdbf0f606033 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:42 +0800 +Subject: [PATCH 01/31] virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/10] f374aaae221bc5a4c2521a267d21350b812e11ba (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +To support configure interrupt for vhost-vdpa +Introduce VIRTIO_CONFIG_IRQ_IDX -1 as configure interrupt's queue index, +Then we can reuse the functions guest_notifier_mask and guest_notifier_pending. +Add the check of queue index in these drivers, if the driver does not support +configure interrupt, the function will just return + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-2-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 544f0278afcab2bebab61b14e4c2c58e65911f5b) +Signed-off-by: Cindy Lu +--- + hw/display/vhost-user-gpu.c | 18 ++++++++++++++++++ + hw/net/virtio-net.c | 22 ++++++++++++++++++++-- + hw/virtio/vhost-user-fs.c | 18 ++++++++++++++++++ + hw/virtio/vhost-user-gpio.c | 10 ++++++++++ + hw/virtio/vhost-vsock-common.c | 18 ++++++++++++++++++ + hw/virtio/virtio-crypto.c | 18 ++++++++++++++++++ + include/hw/virtio/virtio.h | 3 +++ + 7 files changed, 105 insertions(+), 2 deletions(-) + +diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c +index 19c0e20103..4380a5e672 100644 +--- a/hw/display/vhost-user-gpu.c ++++ b/hw/display/vhost-user-gpu.c +@@ -486,6 +486,15 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VhostUserGPU *g = VHOST_USER_GPU(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&g->vhost->dev, idx); + } + +@@ -494,6 +503,15 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) + { + VhostUserGPU *g = VHOST_USER_GPU(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask); + } + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index aba12759d5..bee35d6f9f 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3316,6 +3316,15 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return false ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } + +@@ -3339,8 +3348,17 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + } else { + nc = qemu_get_subqueue(n->nic, vq2q(idx)); + } +- vhost_net_virtqueue_mask(get_vhost_net(nc->peer), +- vdev, idx, mask); ++ /* ++ *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } ++ ++ vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); + } + + static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features) +diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c +index d97b179e6f..f5049735ac 100644 +--- a/hw/virtio/vhost-user-fs.c ++++ b/hw/virtio/vhost-user-fs.c +@@ -159,6 +159,15 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, + { + VHostUserFS *fs = VHOST_USER_FS(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask); + } + +@@ -166,6 +175,15 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx) + { + VHostUserFS *fs = VHOST_USER_FS(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&fs->vhost_dev, idx); + } + +diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c +index b7b82a1099..fe3da32c74 100644 +--- a/hw/virtio/vhost-user-gpio.c ++++ b/hw/virtio/vhost-user-gpio.c +@@ -191,6 +191,16 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) + { + VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } ++ + vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask); + } + +diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c +index d21c72b401..d2b5519d5a 100644 +--- a/hw/virtio/vhost-vsock-common.c ++++ b/hw/virtio/vhost-vsock-common.c +@@ -127,6 +127,15 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, + { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask); + } + +@@ -135,6 +144,15 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, + { + VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return vhost_virtqueue_pending(&vvc->vhost_dev, idx); + } + +diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c +index 97da74e719..516425e26a 100644 +--- a/hw/virtio/virtio-crypto.c ++++ b/hw/virtio/virtio-crypto.c +@@ -1182,6 +1182,15 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, + + assert(vcrypto->vhost_started); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return; ++ } + cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask); + } + +@@ -1192,6 +1201,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) + + assert(vcrypto->vhost_started); + ++ /* ++ * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 ++ * as the Marco of configure interrupt's IDX, If this driver does not ++ * support, the function will return ++ */ ++ ++ if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ return false; ++ } + return cryptodev_vhost_virtqueue_pending(vdev, queue, idx); + } + +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index acfd4df125..1f4a41b958 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -79,6 +79,9 @@ typedef struct VirtQueueElement + + #define VIRTIO_NO_VECTOR 0xffff + ++/* special index value used internally for config irqs */ ++#define VIRTIO_CONFIG_IRQ_IDX -1 ++ + #define TYPE_VIRTIO_DEVICE "virtio-device" + OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE) + +-- +2.31.1 + diff --git a/kvm-virtio-mmio-add-support-for-configure-interrupt.patch b/kvm-virtio-mmio-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..275b197 --- /dev/null +++ b/kvm-virtio-mmio-add-support-for-configure-interrupt.patch @@ -0,0 +1,80 @@ +From 181705090c9963c2da97811838ace5bb058737c6 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:50 +0800 +Subject: [PATCH 09/31] virtio-mmio: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [9/10] 742cc2b425ffd7bbd393772526e7481446ee131c (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add configure interrupt support in virtio-mmio bus. +add function to set configure guest notifier. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-10-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit cd336e834620ea78edef049c3567f312974e475b) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-mmio.c | 27 +++++++++++++++++++++++++++ + 1 file changed, 27 insertions(+) + +diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c +index d240efef97..103260ec15 100644 +--- a/hw/virtio/virtio-mmio.c ++++ b/hw/virtio/virtio-mmio.c +@@ -670,7 +670,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign, + + return 0; + } ++static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign, ++ bool with_irqfd) ++{ ++ VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d); ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); ++ EventNotifier *notifier = virtio_config_get_guest_notifier(vdev); ++ int r = 0; + ++ if (assign) { ++ r = event_notifier_init(notifier, 0); ++ if (r < 0) { ++ return r; ++ } ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ } else { ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ event_notifier_cleanup(notifier); ++ } ++ if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) { ++ vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign); ++ } ++ return r; ++} + static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, + bool assign) + { +@@ -692,6 +715,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs, + goto assign_error; + } + } ++ r = virtio_mmio_set_config_guest_notifier(d, assign, with_irqfd); ++ if (r < 0) { ++ goto assign_error; ++ } + + return 0; + +-- +2.31.1 + diff --git a/kvm-virtio-net-add-support-for-configure-interrupt.patch b/kvm-virtio-net-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..74b956a --- /dev/null +++ b/kvm-virtio-net-add-support-for-configure-interrupt.patch @@ -0,0 +1,115 @@ +From 2b8e3409edb8a17d89c3829cfa3d92bdfdd43c53 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:49 +0800 +Subject: [PATCH 08/31] virtio-net: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [8/10] 1b125169bea6c81c508b154fa1bae68af153b312 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add functions to support configure interrupt in virtio_net +Add the functions to support vhost_net_config_pending +and vhost_net_config_mask. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-9-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8aab0d1dbe90c7b5ac6672a1a09b0578178f5f4c) +Signed-off-by: Cindy Lu +--- + hw/net/vhost_net-stub.c | 9 +++++++++ + hw/net/vhost_net.c | 9 +++++++++ + hw/net/virtio-net.c | 4 ++-- + include/net/vhost_net.h | 2 ++ + 4 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c +index 9f7daae99c..c36f258201 100644 +--- a/hw/net/vhost_net-stub.c ++++ b/hw/net/vhost_net-stub.c +@@ -82,6 +82,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + { + } + ++bool vhost_net_config_pending(VHostNetState *net) ++{ ++ return false; ++} ++ ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) ++{ ++} ++ + int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr) + { + return -1; +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 043058ff43..6a55f5a473 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -478,6 +478,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + vhost_virtqueue_mask(&net->dev, dev, idx, mask); + } + ++bool vhost_net_config_pending(VHostNetState *net) ++{ ++ return vhost_config_pending(&net->dev); ++} ++ ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask) ++{ ++ vhost_config_mask(&net->dev, dev, mask); ++} + VHostNetState *get_vhost_net(NetClientState *nc) + { + VHostNetState *vhost_net = 0; +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index bee35d6f9f..ec974f7a76 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3323,7 +3323,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) + */ + + if (idx == VIRTIO_CONFIG_IRQ_IDX) { +- return false; ++ return vhost_net_config_pending(get_vhost_net(nc->peer)); + } + return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); + } +@@ -3355,9 +3355,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, + */ + + if (idx == VIRTIO_CONFIG_IRQ_IDX) { ++ vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask); + return; + } +- + vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask); + } + +diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h +index 40b9a40074..dbbd0dc04e 100644 +--- a/include/net/vhost_net.h ++++ b/include/net/vhost_net.h +@@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, + bool vhost_net_virtqueue_pending(VHostNetState *net, int n); + void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev, + int idx, bool mask); ++bool vhost_net_config_pending(VHostNetState *net); ++void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask); + int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr); + VHostNetState *get_vhost_net(NetClientState *nc); + +-- +2.31.1 + diff --git a/kvm-virtio-pci-add-support-for-configure-interrupt.patch b/kvm-virtio-pci-add-support-for-configure-interrupt.patch new file mode 100644 index 0000000..14070a4 --- /dev/null +++ b/kvm-virtio-pci-add-support-for-configure-interrupt.patch @@ -0,0 +1,274 @@ +From 61ac1476d3820c97e1cc103af422b17bc94c6ca5 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:51 +0800 +Subject: [PATCH 10/31] virtio-pci: add support for configure interrupt +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [10/10] ebd6a11d7699660d8ac5a4e44a790f823daea57c (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +Add process to handle the configure interrupt, The function's +logic is the same with vq interrupt.Add extra process to check +the configure interrupt + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-11-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 1680542862edd963e6380dd4121a5e85df55581f) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 118 +++++++++++++++++++++++++++------ + include/hw/virtio/virtio-pci.h | 4 +- + 2 files changed, 102 insertions(+), 20 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index ec816ea367..3f00e91718 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -751,7 +751,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, + VirtQueue *vq; + + if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { +- return -1; ++ *n = virtio_config_get_guest_notifier(vdev); ++ *vector = vdev->config_vector; + } else { + if (!virtio_queue_get_num(vdev, queue_no)) { + return -1; +@@ -811,7 +812,7 @@ undo: + } + return ret; + } +-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + int ret = 0; +@@ -826,6 +827,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + return ret; + } + ++static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy) ++{ ++ return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX); ++} + + static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, + int queue_no) +@@ -850,7 +855,7 @@ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, + kvm_virtio_pci_vq_vector_release(proxy, vector); + } + +-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs) + { + int queue_no; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +@@ -863,6 +868,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + } + } + ++static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy) ++{ ++ kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX); ++} ++ + static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, +@@ -944,9 +954,19 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + } + vq = virtio_vector_next_queue(vq); + } +- ++ /* unmask config intr */ ++ if (vector == vdev->config_vector) { ++ n = virtio_config_get_guest_notifier(vdev); ++ ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, ++ msg, n); ++ if (ret < 0) { ++ goto undo_config; ++ } ++ } + return 0; +- ++undo_config: ++ n = virtio_config_get_guest_notifier(vdev); ++ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); + undo: + vq = virtio_vector_first_queue(vdev, vector); + while (vq && unmasked >= 0) { +@@ -980,6 +1000,11 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) + } + vq = virtio_vector_next_queue(vq); + } ++ ++ if (vector == vdev->config_vector) { ++ n = virtio_config_get_guest_notifier(vdev); ++ virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n); ++ } + } + + static void virtio_pci_vector_poll(PCIDevice *dev, +@@ -1011,6 +1036,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev, + msix_set_pending(dev, vector); + } + } ++ /* poll the config intr */ ++ ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, ¬ifier, ++ &vector); ++ if (ret < 0) { ++ return; ++ } ++ if (vector < vector_start || vector >= vector_end || ++ !msix_is_masked(dev, vector)) { ++ return; ++ } ++ if (k->guest_notifier_pending) { ++ if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) { ++ msix_set_pending(dev, vector); ++ } ++ } else if (event_notifier_test_and_clear(notifier)) { ++ msix_set_pending(dev, vector); ++ } ++} ++ ++void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, ++ int n, bool assign, ++ bool with_irqfd) ++{ ++ if (n == VIRTIO_CONFIG_IRQ_IDX) { ++ virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd); ++ } else { ++ virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd); ++ } + } + + static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, +@@ -1019,17 +1072,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign, + VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); +- VirtQueue *vq = virtio_get_queue(vdev, n); +- EventNotifier *notifier = virtio_queue_get_guest_notifier(vq); ++ VirtQueue *vq = NULL; ++ EventNotifier *notifier = NULL; ++ ++ if (n == VIRTIO_CONFIG_IRQ_IDX) { ++ notifier = virtio_config_get_guest_notifier(vdev); ++ } else { ++ vq = virtio_get_queue(vdev, n); ++ notifier = virtio_queue_get_guest_notifier(vq); ++ } + + if (assign) { + int r = event_notifier_init(notifier, 0); + if (r < 0) { + return r; + } +- virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd); ++ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd); + } else { +- virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd); ++ virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false, ++ with_irqfd); + event_notifier_cleanup(notifier); + } + +@@ -1072,10 +1133,13 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + proxy->nvqs_with_notifiers = nvqs; + + /* Must unset vector notifier while guest notifier is still assigned */ +- if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) { ++ if ((proxy->vector_irqfd || ++ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && ++ !assign) { + msix_unset_vector_notifiers(&proxy->pci_dev); + if (proxy->vector_irqfd) { +- kvm_virtio_pci_vector_release(proxy, nvqs); ++ kvm_virtio_pci_vector_vq_release(proxy, nvqs); ++ kvm_virtio_pci_vector_config_release(proxy); + g_free(proxy->vector_irqfd); + proxy->vector_irqfd = NULL; + } +@@ -1091,20 +1155,30 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + goto assign_error; + } + } +- ++ r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign, ++ with_irqfd); ++ if (r < 0) { ++ goto config_assign_error; ++ } + /* Must set vector notifier after guest notifier has been assigned */ +- if ((with_irqfd || k->guest_notifier_mask) && assign) { ++ if ((with_irqfd || ++ (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) && ++ assign) { + if (with_irqfd) { + proxy->vector_irqfd = + g_malloc0(sizeof(*proxy->vector_irqfd) * + msix_nr_vectors_allocated(&proxy->pci_dev)); +- r = kvm_virtio_pci_vector_use(proxy, nvqs); ++ r = kvm_virtio_pci_vector_vq_use(proxy, nvqs); ++ if (r < 0) { ++ goto config_assign_error; ++ } ++ r = kvm_virtio_pci_vector_config_use(proxy); + if (r < 0) { +- goto assign_error; ++ goto config_error; + } + } +- r = msix_set_vector_notifiers(&proxy->pci_dev, +- virtio_pci_vector_unmask, ++ ++ r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask, + virtio_pci_vector_mask, + virtio_pci_vector_poll); + if (r < 0) { +@@ -1117,9 +1191,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign) + notifiers_error: + if (with_irqfd) { + assert(assign); +- kvm_virtio_pci_vector_release(proxy, nvqs); ++ kvm_virtio_pci_vector_vq_release(proxy, nvqs); + } +- ++config_error: ++ if (with_irqfd) { ++ kvm_virtio_pci_vector_config_release(proxy); ++ } ++config_assign_error: ++ virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign, ++ with_irqfd); + assign_error: + /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */ + assert(assign); +diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h +index 938799e8f6..c02e278f46 100644 +--- a/include/hw/virtio/virtio-pci.h ++++ b/include/hw/virtio/virtio-pci.h +@@ -256,5 +256,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t); + * @fixed_queues. + */ + unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues); +- ++void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq, ++ int n, bool assign, ++ bool with_irqfd); + #endif +-- +2.31.1 + diff --git a/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch b/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch new file mode 100644 index 0000000..a8c32a2 --- /dev/null +++ b/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch @@ -0,0 +1,272 @@ +From 9a234f849273d3480e4a88042cb1ea06a37a626b Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:43 +0800 +Subject: [PATCH 02/31] virtio-pci: decouple notifier from interrupt process +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/10] a20f4c9ff38b239531d12cbcc7deaa649c86abc3 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 +To reuse the notifier process. We add the virtio_pci_get_notifier +to get the notifier and vector. The INPUT for this function is IDX, +The OUTPUT is the notifier and the vector + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-3-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 2e07f69d0c828e21515b63dc22884d548540b382) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 88 +++++++++++++++++++++++++++--------------- + 1 file changed, 57 insertions(+), 31 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index a1c9dfa7bb..52c7692fff 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -728,29 +728,41 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy, + } + + static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy, +- unsigned int queue_no, ++ EventNotifier *n, + unsigned int vector) + { + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; +- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq); + } + + static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy, +- unsigned int queue_no, ++ EventNotifier *n , + unsigned int vector) + { +- VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; + int ret; + + ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq); + assert(ret == 0); + } ++static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, ++ EventNotifier **n, unsigned int *vector) ++{ ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ VirtQueue *vq; ++ ++ if (queue_no == VIRTIO_CONFIG_IRQ_IDX) { ++ return -1; ++ } else { ++ if (!virtio_queue_get_num(vdev, queue_no)) { ++ return -1; ++ } ++ *vector = virtio_queue_vector(vdev, queue_no); ++ vq = virtio_get_queue(vdev, queue_no); ++ *n = virtio_queue_get_guest_notifier(vq); ++ } ++ return 0; ++} + + static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + { +@@ -759,12 +771,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + unsigned int vector; + int ret, queue_no; +- ++ EventNotifier *n; + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } +@@ -776,7 +791,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) + * Otherwise, delay until unmasked in the frontend. + */ + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret < 0) { + kvm_virtio_pci_vq_vector_release(proxy, vector); + goto undo; +@@ -792,7 +807,11 @@ undo: + continue; + } + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } +@@ -806,12 +825,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + unsigned int vector; + int queue_no; + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- ++ EventNotifier *n; ++ int ret ; + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ break; ++ } + if (vector >= msix_nr_vectors_allocated(dev)) { + continue; + } +@@ -819,21 +842,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) + * Otherwise, it was cleaned when masked in the frontend. + */ + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + kvm_virtio_pci_vq_vector_release(proxy, vector); + } + } + +-static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, ++static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy, + unsigned int queue_no, + unsigned int vector, +- MSIMessage msg) ++ MSIMessage msg, ++ EventNotifier *n) + { + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- VirtQueue *vq = virtio_get_queue(vdev, queue_no); +- EventNotifier *n = virtio_queue_get_guest_notifier(vq); + VirtIOIRQFD *irqfd; + int ret = 0; + +@@ -860,14 +882,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy, + event_notifier_set(n); + } + } else { +- ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector); ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + } + return ret; + } + +-static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, ++static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy, + unsigned int queue_no, +- unsigned int vector) ++ unsigned int vector, ++ EventNotifier *n) + { + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +@@ -878,7 +901,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy, + if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { + k->guest_notifier_mask(vdev, queue_no, true); + } else { +- kvm_virtio_pci_irqfd_release(proxy, queue_no, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); + } + } + +@@ -888,6 +911,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtQueue *vq = virtio_vector_first_queue(vdev, vector); ++ EventNotifier *n; + int ret, index, unmasked = 0; + + while (vq) { +@@ -896,7 +920,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector, + break; + } + if (index < proxy->nvqs_with_notifiers) { +- ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg); ++ n = virtio_queue_get_guest_notifier(vq); ++ ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n); + if (ret < 0) { + goto undo; + } +@@ -912,7 +937,8 @@ undo: + while (vq && unmasked >= 0) { + index = virtio_get_queue_index(vq); + if (index < proxy->nvqs_with_notifiers) { +- virtio_pci_vq_vector_mask(proxy, index, vector); ++ n = virtio_queue_get_guest_notifier(vq); ++ virtio_pci_one_vector_mask(proxy, index, vector, n); + --unmasked; + } + vq = virtio_vector_next_queue(vq); +@@ -925,15 +951,17 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector) + VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtQueue *vq = virtio_vector_first_queue(vdev, vector); ++ EventNotifier *n; + int index; + + while (vq) { + index = virtio_get_queue_index(vq); ++ n = virtio_queue_get_guest_notifier(vq); + if (!virtio_queue_get_num(vdev, index)) { + break; + } + if (index < proxy->nvqs_with_notifiers) { +- virtio_pci_vq_vector_mask(proxy, index, vector); ++ virtio_pci_one_vector_mask(proxy, index, vector, n); + } + vq = virtio_vector_next_queue(vq); + } +@@ -949,19 +977,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev, + int queue_no; + unsigned int vector; + EventNotifier *notifier; +- VirtQueue *vq; ++ int ret; + + for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) { +- if (!virtio_queue_get_num(vdev, queue_no)) { ++ ret = virtio_pci_get_notifier(proxy, queue_no, ¬ifier, &vector); ++ if (ret < 0) { + break; + } +- vector = virtio_queue_vector(vdev, queue_no); + if (vector < vector_start || vector >= vector_end || + !msix_is_masked(dev, vector)) { + continue; + } +- vq = virtio_get_queue(vdev, queue_no); +- notifier = virtio_queue_get_guest_notifier(vq); + if (k->guest_notifier_pending) { + if (k->guest_notifier_pending(vdev, queue_no)) { + msix_set_pending(dev, vector); +-- +2.31.1 + diff --git a/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch b/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch new file mode 100644 index 0000000..be9b3c7 --- /dev/null +++ b/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch @@ -0,0 +1,212 @@ +From 58cd577ff157cfaf7506bba135db58e75c330ff0 Mon Sep 17 00:00:00 2001 +From: Cindy Lu +Date: Thu, 22 Dec 2022 15:04:44 +0800 +Subject: [PATCH 03/31] virtio-pci: decouple the single vector from the + interrupt process +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cindy Lu +RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa +RH-Bugzilla: 1905805 +RH-Acked-by: Laurent Vivier +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/10] 2c79cb678f005fb2f53b2db0f237347634ab3422 (lulu6/qemu-kvm3) + +https://bugzilla.redhat.com/show_bug.cgi?id=1905805 + +To reuse the interrupt process in configure interrupt +Need to decouple the single vector from the interrupt process. +We add new function kvm_virtio_pci_vector_use_one and _release_one. +These functions are used for the single vector, the whole process will +finish in the loop with vq number. + +Signed-off-by: Cindy Lu +Message-Id: <20221222070451.936503-4-lulu@redhat.com> +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit ee3b8dc6cc496ba7f4e27aed4493275c706a7942) +Signed-off-by: Cindy Lu +--- + hw/virtio/virtio-pci.c | 131 +++++++++++++++++++++++------------------ + 1 file changed, 73 insertions(+), 58 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 52c7692fff..ec816ea367 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -699,7 +699,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev, + } + + static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy, +- unsigned int queue_no, + unsigned int vector) + { + VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector]; +@@ -764,87 +763,103 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no, + return 0; + } + +-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no) + { ++ unsigned int vector; ++ int ret; ++ EventNotifier *n; + PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- unsigned int vector; +- int ret, queue_no; +- EventNotifier *n; +- for (queue_no = 0; queue_no < nvqs; queue_no++) { +- if (!virtio_queue_get_num(vdev, queue_no)) { +- break; +- } +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; +- } +- ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector); ++ ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return ret; ++ } ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return 0; ++ } ++ ret = kvm_virtio_pci_vq_vector_use(proxy, vector); ++ if (ret < 0) { ++ goto undo; ++ } ++ /* ++ * If guest supports masking, set up irqfd now. ++ * Otherwise, delay until unmasked in the frontend. ++ */ ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); + if (ret < 0) { ++ kvm_virtio_pci_vq_vector_release(proxy, vector); + goto undo; + } +- /* If guest supports masking, set up irqfd now. +- * Otherwise, delay until unmasked in the frontend. +- */ +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = kvm_virtio_pci_irqfd_use(proxy, n, vector); +- if (ret < 0) { +- kvm_virtio_pci_vq_vector_release(proxy, vector); +- goto undo; +- } +- } + } +- return 0; + ++ return 0; + undo: +- while (--queue_no >= 0) { +- vector = virtio_queue_vector(vdev, queue_no); +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; ++ ++ vector = virtio_queue_vector(vdev, queue_no); ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return ret; ++ } ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return ret; + } +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ } ++ return ret; ++} ++static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs) ++{ ++ int queue_no; ++ int ret = 0; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ ++ for (queue_no = 0; queue_no < nvqs; queue_no++) { ++ if (!virtio_queue_get_num(vdev, queue_no)) { ++ return -1; + } +- kvm_virtio_pci_vq_vector_release(proxy, vector); ++ ret = kvm_virtio_pci_vector_use_one(proxy, queue_no); + } + return ret; + } + +-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++ ++static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy, ++ int queue_no) + { +- PCIDevice *dev = &proxy->pci_dev; + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + unsigned int vector; +- int queue_no; +- VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + EventNotifier *n; +- int ret ; ++ int ret; ++ VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); ++ PCIDevice *dev = &proxy->pci_dev; ++ ++ ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); ++ if (ret < 0) { ++ return; ++ } ++ if (vector >= msix_nr_vectors_allocated(dev)) { ++ return; ++ } ++ if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { ++ kvm_virtio_pci_irqfd_release(proxy, n, vector); ++ } ++ kvm_virtio_pci_vq_vector_release(proxy, vector); ++} ++ ++static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs) ++{ ++ int queue_no; ++ VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); ++ + for (queue_no = 0; queue_no < nvqs; queue_no++) { + if (!virtio_queue_get_num(vdev, queue_no)) { + break; + } +- ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector); +- if (ret < 0) { +- break; +- } +- if (vector >= msix_nr_vectors_allocated(dev)) { +- continue; +- } +- /* If guest supports masking, clean up irqfd now. +- * Otherwise, it was cleaned when masked in the frontend. +- */ +- if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) { +- kvm_virtio_pci_irqfd_release(proxy, n, vector); +- } +- kvm_virtio_pci_vq_vector_release(proxy, vector); ++ kvm_virtio_pci_vector_release_one(proxy, queue_no); + } + } + +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index e143966..b01376f 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -210,6 +210,68 @@ Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch # For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch33: kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch34: kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch35: kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch36: kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch37: kvm-vhost-vdpa-add-support-for-config-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch38: kvm-virtio-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch39: kvm-vhost-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch40: kvm-virtio-net-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch41: kvm-virtio-mmio-add-support-for-configure-interrupt.patch +# For bz#1905805 - support config interrupt in vhost-vdpa qemu +Patch42: kvm-virtio-pci-add-support-for-configure-interrupt.patch +# For bz#2159408 - [s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8 +Patch43: kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch +# For bz#2124856 - VM with virtio interface and iommu=on will crash when try to migrate +Patch44: kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch45: kvm-block-drop-bdrv_remove_filter_or_cow_child.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch46: kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch47: kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch48: kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch49: kvm-block-Remove-drained_end_counter.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch50: kvm-block-Inline-bdrv_drain_invoke.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch51: kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch52: kvm-block-Drain-individual-nodes-during-reopen.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch53: kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch54: kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch55: kvm-block-Remove-subtree-drains.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch56: kvm-block-Call-drain-callbacks-only-once.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch57: kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch58: kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch59: kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch +# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled) +Patch60: kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch61: kvm-accel-introduce-accelerator-blocker-API.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch62: kvm-KVM-keep-track-of-running-ioctls.patch +# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on +Patch63: kvm-kvm-Atomic-memslot-updates.patch %if %{have_clang} BuildRequires: clang @@ -1238,6 +1300,49 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Tue Jan 17 2023 Miroslav Rezanina - 7.2.0-5 +- kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805] +- kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805] +- kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch [bz#1905805] +- kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch [bz#1905805] +- kvm-vhost-vdpa-add-support-for-config-interrupt.patch [bz#1905805] +- kvm-virtio-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-vhost-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-net-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-mmio-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-virtio-pci-add-support-for-configure-interrupt.patch [bz#1905805] +- kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch [bz#2159408] +- kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch [bz#2124856] +- kvm-block-drop-bdrv_remove_filter_or_cow_child.patch [bz#2155112] +- kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch [bz#2155112] +- kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch [bz#2155112] +- kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch [bz#2155112] +- kvm-block-Remove-drained_end_counter.patch [bz#2155112] +- kvm-block-Inline-bdrv_drain_invoke.patch [bz#2155112] +- kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch [bz#2155112] +- kvm-block-Drain-individual-nodes-during-reopen.patch [bz#2155112] +- kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch [bz#2155112] +- kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch [bz#2155112] +- kvm-block-Remove-subtree-drains.patch [bz#2155112] +- kvm-block-Call-drain-callbacks-only-once.patch [bz#2155112] +- kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch [bz#2155112] +- kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch [bz#2155112] +- kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch [bz#2155112] +- kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch [bz#2155112] +- kvm-accel-introduce-accelerator-blocker-API.patch [bz#1979276] +- kvm-KVM-keep-track-of-running-ioctls.patch [bz#1979276] +- kvm-kvm-Atomic-memslot-updates.patch [bz#1979276] +- Resolves: bz#1905805 + (support config interrupt in vhost-vdpa qemu) +- Resolves: bz#2159408 + ([s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8) +- Resolves: bz#2124856 + (VM with virtio interface and iommu=on will crash when try to migrate) +- Resolves: bz#2155112 + (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) +- Resolves: bz#1979276 + (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on) + * Thu Jan 12 2023 Miroslav Rezanina - 7.2.0-4 - kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch [bz#2155749] - kvm-Update-QGA-service-for-new-command-line.patch [bz#2156515]