* Tue Jan 17 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-5

- kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805] - kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805] - kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch [bz#1905805] - kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch [bz#1905805] - kvm-vhost-vdpa-add-support-for-config-interrupt.patch [bz#1905805] - kvm-virtio-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-vhost-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-net-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-mmio-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-virtio-pci-add-support-for-configure-interrupt.patch [bz#1905805] - kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch [bz#2159408] - kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch [bz#2124856] - kvm-block-drop-bdrv_remove_filter_or_cow_child.patch [bz#2155112] - kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch [bz#2155112] - kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch [bz#2155112] - kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch [bz#2155112] - kvm-block-Remove-drained_end_counter.patch [bz#2155112] - kvm-block-Inline-bdrv_drain_invoke.patch [bz#2155112] - kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch [bz#2155112] - kvm-block-Drain-individual-nodes-during-reopen.patch [bz#2155112] - kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch [bz#2155112] - kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch [bz#2155112] - kvm-block-Remove-subtree-drains.patch [bz#2155112] - kvm-block-Call-drain-callbacks-only-once.patch [bz#2155112] - kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch [bz#2155112] - kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch [bz#2155112] - kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch [bz#2155112] - kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch [bz#2155112] - kvm-accel-introduce-accelerator-blocker-API.patch [bz#1979276] - kvm-KVM-keep-track-of-running-ioctls.patch [bz#1979276] - kvm-kvm-Atomic-memslot-updates.patch [bz#1979276] - Resolves: bz#1905805 (support config interrupt in vhost-vdpa qemu) - Resolves: bz#2159408 ([s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8) - Resolves: bz#2124856 (VM with virtio interface and iommu=on will crash when try to migrate) - Resolves: bz#2155112 (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)) - Resolves: bz#1979276 (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on)
2023-01-17 07:06:28 -05:00 · 2023-01-17 07:06:28 -05:00 · 2fe1fc7b2d
commit 2fe1fc7b2d
parent 408bed44fe
32 changed files with 6173 additions and 1 deletions
--- a/kvm-KVM-keep-track-of-running-ioctls.patch
+++ b/kvm-KVM-keep-track-of-running-ioctls.patch
@ -0,0 +1,82 @@
+From 6aebc271d95f9c59cb63c923b6ce35f16fce10e4 Mon Sep 17 00:00:00 2001
+From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Date: Mon, 16 Jan 2023 07:17:23 -0500
+Subject: [PATCH 30/31] KVM: keep track of running ioctls
+
+RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-MergeRequest: 138: accel: introduce accelerator blocker API
+RH-Bugzilla: 1979276
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [2/3] f566f81dda26ae733008f32261ecd1253ec1796d (eesposit/qemu-kvm)
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
+
+commit a27dd2de68f37ba96fe164a42121daa5f0750afc
+Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Date:   Fri Nov 11 10:47:57 2022 -0500
+
+    KVM: keep track of running ioctls
+
+    Using the new accel-blocker API, mark where ioctls are being called
+    in KVM. Next, we will implement the critical section that will take
+    care of performing memslots modifications atomically, therefore
+    preventing any new ioctl from running and allowing the running ones
+    to finish.
+
+    Signed-off-by: David Hildenbrand <david@redhat.com>
+    Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+    Message-Id: <20221111154758.1372674-3-eesposit@redhat.com>
+    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+---
+ accel/kvm/kvm-all.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
+index f99b0becd8..ff660fd469 100644
+--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
+@@ -2310,6 +2310,7 @@ static int kvm_init(MachineState *ms)
+     assert(TARGET_PAGE_SIZE <= qemu_real_host_page_size());
+ 
+     s->sigmask_len = 8;
+    accel_blocker_init();
+ 
+ #ifdef KVM_CAP_SET_GUEST_DEBUG
+     QTAILQ_INIT(&s->kvm_sw_breakpoints);
+@@ -3014,7 +3015,9 @@ int kvm_vm_ioctl(KVMState *s, int type, ...)
+     va_end(ap);
+ 
+     trace_kvm_vm_ioctl(type, arg);
+    accel_ioctl_begin();
+     ret = ioctl(s->vmfd, type, arg);
+    accel_ioctl_end();
+     if (ret == -1) {
+         ret = -errno;
+     }
+@@ -3032,7 +3035,9 @@ int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
+     va_end(ap);
+ 
+     trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
+    accel_cpu_ioctl_begin(cpu);
+     ret = ioctl(cpu->kvm_fd, type, arg);
+    accel_cpu_ioctl_end(cpu);
+     if (ret == -1) {
+         ret = -errno;
+     }
+@@ -3050,7 +3055,9 @@ int kvm_device_ioctl(int fd, int type, ...)
+     va_end(ap);
+ 
+     trace_kvm_device_ioctl(fd, type, arg);
+    accel_ioctl_begin();
+     ret = ioctl(fd, type, arg);
+    accel_ioctl_end();
+     if (ret == -1) {
+         ret = -errno;
+     }
+-- 
+2.31.1
+
--- a/kvm-accel-introduce-accelerator-blocker-API.patch
+++ b/kvm-accel-introduce-accelerator-blocker-API.patch
@ -0,0 +1,348 @@
+From ae2077fd5d351a68c313c64f07fb225dff694a8f Mon Sep 17 00:00:00 2001
+From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Date: Mon, 16 Jan 2023 07:16:41 -0500
+Subject: [PATCH 29/31] accel: introduce accelerator blocker API
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-MergeRequest: 138: accel: introduce accelerator blocker API
+RH-Bugzilla: 1979276
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [1/3] 56b07cd7db516c5066e6d66b4695064fdf73abbf (eesposit/qemu-kvm)
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
+
+commit bd688fc93120fb3e28aa70e3dfdf567ccc1e0bc1
+Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Date:   Fri Nov 11 10:47:56 2022 -0500
+
+    accel: introduce accelerator blocker API
+
+    This API allows the accelerators to prevent vcpus from issuing
+    new ioctls while execting a critical section marked with the
+    accel_ioctl_inhibit_begin/end functions.
+
+    Note that all functions submitting ioctls must mark where the
+    ioctl is being called with accel_{cpu_}ioctl_begin/end().
+
+    This API requires the caller to always hold the BQL.
+    API documentation is in sysemu/accel-blocker.h
+
+    Internally, it uses a QemuLockCnt together with a per-CPU QemuLockCnt
+    (to minimize cache line bouncing) to keep avoid that new ioctls
+    run when the critical section starts, and a QemuEvent to wait
+    that all running ioctls finish.
+
+    Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+    Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+    Message-Id: <20221111154758.1372674-2-eesposit@redhat.com>
+    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Conflicts:
+	util/meson.build: "interval-tree.c" does not exist
+
+Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+---
+ accel/accel-blocker.c          | 154 +++++++++++++++++++++++++++++++++
+ accel/meson.build              |   2 +-
+ hw/core/cpu-common.c           |   2 +
+ include/hw/core/cpu.h          |   3 +
+ include/sysemu/accel-blocker.h |  56 ++++++++++++
+ util/meson.build               |   2 +-
+ 6 files changed, 217 insertions(+), 2 deletions(-)
+ create mode 100644 accel/accel-blocker.c
+ create mode 100644 include/sysemu/accel-blocker.h
+
+diff --git a/accel/accel-blocker.c b/accel/accel-blocker.c
+new file mode 100644
+index 0000000000..1e7f423462
+--- /dev/null
+++ b/accel/accel-blocker.c
+@@ -0,0 +1,154 @@
+/*
+ * Lock to inhibit accelerator ioctls
+ *
+ * Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito       <eesposit@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+#include "hw/core/cpu.h"
+#include "sysemu/accel-blocker.h"
+
+static QemuLockCnt accel_in_ioctl_lock;
+static QemuEvent accel_in_ioctl_event;
+
+void accel_blocker_init(void)
+{
+    qemu_lockcnt_init(&accel_in_ioctl_lock);
+    qemu_event_init(&accel_in_ioctl_event, false);
+}
+
+void accel_ioctl_begin(void)
+{
+    if (likely(qemu_mutex_iothread_locked())) {
+        return;
+    }
+
+    /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+    qemu_lockcnt_inc(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_end(void)
+{
+    if (likely(qemu_mutex_iothread_locked())) {
+        return;
+    }
+
+    qemu_lockcnt_dec(&accel_in_ioctl_lock);
+    /* change event to SET. If event was BUSY, wake up all waiters */
+    qemu_event_set(&accel_in_ioctl_event);
+}
+
+void accel_cpu_ioctl_begin(CPUState *cpu)
+{
+    if (unlikely(qemu_mutex_iothread_locked())) {
+        return;
+    }
+
+    /* block if lock is taken in kvm_ioctl_inhibit_begin() */
+    qemu_lockcnt_inc(&cpu->in_ioctl_lock);
+}
+
+void accel_cpu_ioctl_end(CPUState *cpu)
+{
+    if (unlikely(qemu_mutex_iothread_locked())) {
+        return;
+    }
+
+    qemu_lockcnt_dec(&cpu->in_ioctl_lock);
+    /* change event to SET. If event was BUSY, wake up all waiters */
+    qemu_event_set(&accel_in_ioctl_event);
+}
+
+static bool accel_has_to_wait(void)
+{
+    CPUState *cpu;
+    bool needs_to_wait = false;
+
+    CPU_FOREACH(cpu) {
+        if (qemu_lockcnt_count(&cpu->in_ioctl_lock)) {
+            /* exit the ioctl, if vcpu is running it */
+            qemu_cpu_kick(cpu);
+            needs_to_wait = true;
+        }
+    }
+
+    return needs_to_wait || qemu_lockcnt_count(&accel_in_ioctl_lock);
+}
+
+void accel_ioctl_inhibit_begin(void)
+{
+    CPUState *cpu;
+
+    /*
+     * We allow to inhibit only when holding the BQL, so we can identify
+     * when an inhibitor wants to issue an ioctl easily.
+     */
+    g_assert(qemu_mutex_iothread_locked());
+
+    /* Block further invocations of the ioctls outside the BQL.  */
+    CPU_FOREACH(cpu) {
+        qemu_lockcnt_lock(&cpu->in_ioctl_lock);
+    }
+    qemu_lockcnt_lock(&accel_in_ioctl_lock);
+
+    /* Keep waiting until there are running ioctls */
+    while (true) {
+
+        /* Reset event to FREE. */
+        qemu_event_reset(&accel_in_ioctl_event);
+
+        if (accel_has_to_wait()) {
+            /*
+             * If event is still FREE, and there are ioctls still in progress,
+             * wait.
+             *
+             *  If an ioctl finishes before qemu_event_wait(), it will change
+             * the event state to SET. This will prevent qemu_event_wait() from
+             * blocking, but it's not a problem because if other ioctls are
+             * still running the loop will iterate once more and reset the event
+             * status to FREE so that it can wait properly.
+             *
+             * If an ioctls finishes while qemu_event_wait() is blocking, then
+             * it will be waken up, but also here the while loop makes sure
+             * to re-enter the wait if there are other running ioctls.
+             */
+            qemu_event_wait(&accel_in_ioctl_event);
+        } else {
+            /* No ioctl is running */
+            return;
+        }
+    }
+}
+
+void accel_ioctl_inhibit_end(void)
+{
+    CPUState *cpu;
+
+    qemu_lockcnt_unlock(&accel_in_ioctl_lock);
+    CPU_FOREACH(cpu) {
+        qemu_lockcnt_unlock(&cpu->in_ioctl_lock);
+    }
+}
+
+diff --git a/accel/meson.build b/accel/meson.build
+index 259c35c4c8..061332610f 100644
+--- a/accel/meson.build
+++ b/accel/meson.build
+@@ -1,4 +1,4 @@
+-specific_ss.add(files('accel-common.c'))
+specific_ss.add(files('accel-common.c', 'accel-blocker.c'))
+ softmmu_ss.add(files('accel-softmmu.c'))
+ user_ss.add(files('accel-user.c'))
+ 
+diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
+index f9fdd46b9d..8d6a4b1b65 100644
+--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
+@@ -237,6 +237,7 @@ static void cpu_common_initfn(Object *obj)
+     cpu->nr_threads = 1;
+ 
+     qemu_mutex_init(&cpu->work_mutex);
+    qemu_lockcnt_init(&cpu->in_ioctl_lock);
+     QSIMPLEQ_INIT(&cpu->work_list);
+     QTAILQ_INIT(&cpu->breakpoints);
+     QTAILQ_INIT(&cpu->watchpoints);
+@@ -248,6 +249,7 @@ static void cpu_common_finalize(Object *obj)
+ {
+     CPUState *cpu = CPU(obj);
+ 
+    qemu_lockcnt_destroy(&cpu->in_ioctl_lock);
+     qemu_mutex_destroy(&cpu->work_mutex);
+ }
+ 
+diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
+index 8830546121..2417597236 100644
+--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
+@@ -398,6 +398,9 @@ struct CPUState {
+     uint32_t kvm_fetch_index;
+     uint64_t dirty_pages;
+ 
+    /* Use by accel-block: CPU is executing an ioctl() */
+    QemuLockCnt in_ioctl_lock;
+
+     /* Used for events with 'vcpu' and *without* the 'disabled' properties */
+     DECLARE_BITMAP(trace_dstate_delayed, CPU_TRACE_DSTATE_MAX_EVENTS);
+     DECLARE_BITMAP(trace_dstate, CPU_TRACE_DSTATE_MAX_EVENTS);
+diff --git a/include/sysemu/accel-blocker.h b/include/sysemu/accel-blocker.h
+new file mode 100644
+index 0000000000..72020529ef
+--- /dev/null
+++ b/include/sysemu/accel-blocker.h
+@@ -0,0 +1,56 @@
+/*
+ * Accelerator blocking API, to prevent new ioctls from starting and wait the
+ * running ones finish.
+ * This mechanism differs from pause/resume_all_vcpus() in that it does not
+ * release the BQL.
+ *
+ *  Copyright (c) 2022 Red Hat Inc.
+ *
+ * Author: Emanuele Giuseppe Esposito       <eesposit@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+#ifndef ACCEL_BLOCKER_H
+#define ACCEL_BLOCKER_H
+
+#include "qemu/osdep.h"
+#include "sysemu/cpus.h"
+
+extern void accel_blocker_init(void);
+
+/*
+ * accel_{cpu_}ioctl_begin/end:
+ * Mark when ioctl is about to run or just finished.
+ *
+ * accel_{cpu_}ioctl_begin will block after accel_ioctl_inhibit_begin() is
+ * called, preventing new ioctls to run. They will continue only after
+ * accel_ioctl_inibith_end().
+ */
+extern void accel_ioctl_begin(void);
+extern void accel_ioctl_end(void);
+extern void accel_cpu_ioctl_begin(CPUState *cpu);
+extern void accel_cpu_ioctl_end(CPUState *cpu);
+
+/*
+ * accel_ioctl_inhibit_begin: start critical section
+ *
+ * This function makes sure that:
+ * 1) incoming accel_{cpu_}ioctl_begin() calls block
+ * 2) wait that all ioctls that were already running reach
+ *    accel_{cpu_}ioctl_end(), kicking vcpus if necessary.
+ *
+ * This allows the caller to access shared data or perform operations without
+ * worrying of concurrent vcpus accesses.
+ */
+extern void accel_ioctl_inhibit_begin(void);
+
+/*
+ * accel_ioctl_inhibit_end: end critical section started by
+ * accel_ioctl_inhibit_begin()
+ *
+ * This function allows blocked accel_{cpu_}ioctl_begin() to continue.
+ */
+extern void accel_ioctl_inhibit_end(void);
+
+#endif /* ACCEL_BLOCKER_H */
+diff --git a/util/meson.build b/util/meson.build
+index 25b9b61f98..85a5504c4d 100644
+--- a/util/meson.build
+++ b/util/meson.build
+@@ -57,6 +57,7 @@ util_ss.add(files('guest-random.c'))
+ util_ss.add(files('yank.c'))
+ util_ss.add(files('int128.c'))
+ util_ss.add(files('memalign.c'))
+util_ss.add(files('lockcnt.c'))
+ 
+ if have_user
+   util_ss.add(files('selfmap.c'))
+@@ -71,7 +72,6 @@ endif
+ if have_block or have_ga
+   util_ss.add(files('aiocb.c', 'async.c'))
+   util_ss.add(files('base64.c'))
+-  util_ss.add(files('lockcnt.c'))
+   util_ss.add(files('main-loop.c'))
+   util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
+   util_ss.add(files('coroutine-@0@.c'.format(config_host['CONFIG_COROUTINE_BACKEND'])))
+-- 
+2.31.1
+
--- a/kvm-block-Call-drain-callbacks-only-once.patch
+++ b/kvm-block-Call-drain-callbacks-only-once.patch
@ -0,0 +1,250 @@
+From 9bb9cafd736057fd2a8ebfa6f5769668f125fbe6 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:06 +0100
+Subject: [PATCH 24/31] block: Call drain callbacks only once
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [12/16] ea9a433dc01d1b8539a2d4ea12887f2a3ce830ea (sgarzarella/qemu-kvm-c-9-s)
+
+We only need to call both the BlockDriver's callback and the parent
+callbacks when going from undrained to drained or vice versa. A second
+drain section doesn't make a difference for the driver or the parent,
+they weren't supposed to send new requests before and after the second
+drain.
+
+One thing that gets in the way is the 'ignore_bds_parents' parameter in
+bdrv_do_drained_begin_quiesce() and bdrv_do_drained_end(): It means that
+bdrv_drain_all_begin() increases bs->quiesce_counter, but does not
+quiesce the parent through BdrvChildClass callbacks. If an additional
+drain section is started now, bs->quiesce_counter will be non-zero, but
+we would still need to quiesce the parent through BdrvChildClass in
+order to keep things consistent (and unquiesce it on the matching
+bdrv_drained_end(), even though the counter would not reach 0 yet as
+long as the bdrv_drain_all() section is still active).
+
+Instead of keeping track of this, let's just get rid of the parameter.
+It was introduced in commit 6cd5c9d7b2d as an optimisation so that
+during bdrv_drain_all(), we wouldn't recursively drain all parents up to
+the root for each node, resulting in quadratic complexity. As it happens,
+calling the callbacks only once solves the same problem, so as of this
+patch, we'll still have O(n) complexity and ignore_bds_parents is not
+needed any more.
+
+This patch only ignores the 'ignore_bds_parents' parameter. It will be
+removed in a separate patch.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-12-kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 57e05be343f33f4e5899a8d8946a8596d68424a1)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                          | 25 +++++++------------------
+ block/io.c                       | 30 ++++++++++++++++++------------
+ include/block/block_int-common.h |  8 ++++----
+ tests/unit/test-bdrv-drain.c     | 16 ++++++++++------
+ 4 files changed, 39 insertions(+), 40 deletions(-)
+
+diff --git a/block.c b/block.c
+index e0e3b21790..5a583e260d 100644
+--- a/block.c
+++ b/block.c
+@@ -2824,7 +2824,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+ {
+     BlockDriverState *old_bs = child->bs;
+     int new_bs_quiesce_counter;
+-    int drain_saldo;
+ 
+     assert(!child->frozen);
+     assert(old_bs != new_bs);
+@@ -2834,16 +2833,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
+     }
+ 
+-    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+-    drain_saldo = new_bs_quiesce_counter - child->parent_quiesce_counter;
+-
+     /*
+      * If the new child node is drained but the old one was not, flush
+      * all outstanding requests to the old child node.
+      */
+-    while (drain_saldo > 0 && child->klass->drained_begin) {
+    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+    if (new_bs_quiesce_counter && !child->quiesced_parent) {
+         bdrv_parent_drained_begin_single(child, true);
+-        drain_saldo--;
+     }
+ 
+     if (old_bs) {
+@@ -2859,16 +2855,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+     if (new_bs) {
+         assert_bdrv_graph_writable(new_bs);
+         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
+-
+-        /*
+-         * Polling in bdrv_parent_drained_begin_single() may have led to the new
+-         * node's quiesce_counter having been decreased.  Not a problem, we just
+-         * need to recognize this here and then invoke drained_end appropriately
+-         * more often.
+-         */
+-        assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
+-        drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
+-
+         if (child->klass->attach) {
+             child->klass->attach(child);
+         }
+@@ -2877,10 +2863,13 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+     /*
+      * If the old child node was drained but the new one is not, allow
+      * requests to come in only after the new node has been attached.
+     *
+     * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
+     * polls, which could have changed the value.
+      */
+-    while (drain_saldo < 0 && child->klass->drained_end) {
+    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+    if (!new_bs_quiesce_counter && child->quiesced_parent) {
+         bdrv_parent_drained_end_single(child);
+-        drain_saldo++;
+     }
+ }
+ 
+diff --git a/block/io.c b/block/io.c
+index 75224480d0..87d6f22ec4 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -62,8 +62,9 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
+ {
+     IO_OR_GS_CODE();
+ 
+-    assert(c->parent_quiesce_counter > 0);
+-    c->parent_quiesce_counter--;
+    assert(c->quiesced_parent);
+    c->quiesced_parent = false;
+
+     if (c->klass->drained_end) {
+         c->klass->drained_end(c);
+     }
+@@ -110,7 +111,10 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+ {
+     AioContext *ctx = bdrv_child_get_parent_aio_context(c);
+     IO_OR_GS_CODE();
+-    c->parent_quiesce_counter++;
+
+    assert(!c->quiesced_parent);
+    c->quiesced_parent = true;
+
+     if (c->klass->drained_begin) {
+         c->klass->drained_begin(c);
+     }
+@@ -358,11 +362,12 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+     /* Stop things in parent-to-child order */
+     if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+         aio_disable_external(bdrv_get_aio_context(bs));
+-    }
+ 
+-    bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
+-    if (bs->drv && bs->drv->bdrv_drain_begin) {
+-        bs->drv->bdrv_drain_begin(bs);
+        /* TODO Remove ignore_bds_parents, we don't consider it any more */
+        bdrv_parent_drained_begin(bs, parent, false);
+        if (bs->drv && bs->drv->bdrv_drain_begin) {
+            bs->drv->bdrv_drain_begin(bs);
+        }
+     }
+ }
+ 
+@@ -413,13 +418,14 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+     assert(bs->quiesce_counter > 0);
+ 
+     /* Re-enable things in child-to-parent order */
+-    if (bs->drv && bs->drv->bdrv_drain_end) {
+-        bs->drv->bdrv_drain_end(bs);
+-    }
+-    bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
+-
+     old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
+     if (old_quiesce_counter == 1) {
+        if (bs->drv && bs->drv->bdrv_drain_end) {
+            bs->drv->bdrv_drain_end(bs);
+        }
+        /* TODO Remove ignore_bds_parents, we don't consider it any more */
+        bdrv_parent_drained_end(bs, parent, false);
+
+         aio_enable_external(bdrv_get_aio_context(bs));
+     }
+ }
+diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
+index 791dddfd7d..a6bc6b7fe9 100644
+--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
+@@ -980,13 +980,13 @@ struct BdrvChild {
+     bool frozen;
+ 
+     /*
+-     * How many times the parent of this child has been drained
+     * True if the parent of this child has been drained by this BdrvChild
+      * (through klass->drained_*).
+-     * Usually, this is equal to bs->quiesce_counter (potentially
+-     * reduced by bdrv_drain_all_count).  It may differ while the
+     *
+     * It is generally true if bs->quiesce_counter > 0. It may differ while the
+      * child is entering or leaving a drained section.
+      */
+-    int parent_quiesce_counter;
+    bool quiesced_parent;
+ 
+     QLIST_ENTRY(BdrvChild) next;
+     QLIST_ENTRY(BdrvChild) next_parent;
+diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
+index dda08de8db..172bc6debc 100644
+--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
+@@ -296,7 +296,11 @@ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
+ 
+     do_drain_begin(drain_type, bs);
+ 
+-    g_assert_cmpint(bs->quiesce_counter, ==, 1);
+    if (drain_type == BDRV_DRAIN_ALL) {
+        g_assert_cmpint(bs->quiesce_counter, ==, 2);
+    } else {
+        g_assert_cmpint(bs->quiesce_counter, ==, 1);
+    }
+     g_assert_cmpint(backing->quiesce_counter, ==, !!recursive);
+ 
+     do_drain_end(drain_type, bs);
+@@ -348,8 +352,8 @@ static void test_nested(void)
+ 
+     for (outer = 0; outer < DRAIN_TYPE_MAX; outer++) {
+         for (inner = 0; inner < DRAIN_TYPE_MAX; inner++) {
+-            int backing_quiesce = (outer != BDRV_DRAIN) +
+-                                  (inner != BDRV_DRAIN);
+            int backing_quiesce = (outer == BDRV_DRAIN_ALL) +
+                                  (inner == BDRV_DRAIN_ALL);
+ 
+             g_assert_cmpint(bs->quiesce_counter, ==, 0);
+             g_assert_cmpint(backing->quiesce_counter, ==, 0);
+@@ -359,10 +363,10 @@ static void test_nested(void)
+             do_drain_begin(outer, bs);
+             do_drain_begin(inner, bs);
+ 
+-            g_assert_cmpint(bs->quiesce_counter, ==, 2);
+            g_assert_cmpint(bs->quiesce_counter, ==, 2 + !!backing_quiesce);
+             g_assert_cmpint(backing->quiesce_counter, ==, backing_quiesce);
+-            g_assert_cmpint(s->drain_count, ==, 2);
+-            g_assert_cmpint(backing_s->drain_count, ==, backing_quiesce);
+            g_assert_cmpint(s->drain_count, ==, 1);
+            g_assert_cmpint(backing_s->drain_count, ==, !!backing_quiesce);
+ 
+             do_drain_end(inner, bs);
+             do_drain_end(outer, bs);
+-- 
+2.31.1
+
--- a/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch
+++ b/kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch
@ -0,0 +1,298 @@
+From 150ef3356cc6732fede7ca059168fc0565ed0b76 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:09 +0100
+Subject: [PATCH 27/31] block: Don't poll in bdrv_replace_child_noperm()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [15/16] 5fc7d6b703a2d6c1118d875056f0afbd6ba5cca9 (sgarzarella/qemu-kvm-c-9-s)
+
+In order to make sure that bdrv_replace_child_noperm() doesn't have to
+poll any more, get rid of the bdrv_parent_drained_begin_single() call.
+
+This is possible now because we can require that the parent is already
+drained through the child in question when the function is called and we
+don't call the parent drain callbacks more than once.
+
+The additional drain calls needed in callers cause the test case to run
+its code in the drain handler too early (bdrv_attach_child() drains
+now), so modify it to only enable the code after the test setup has
+completed.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Message-Id: <20221118174110.55183-15-kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 23987471285a26397e3152a9244b652445fd36c4)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                      | 103 ++++++++++++++++++++++++++++++-----
+ block/io.c                   |   2 +-
+ include/block/block-io.h     |   8 +++
+ tests/unit/test-bdrv-drain.c |  10 ++++
+ 4 files changed, 108 insertions(+), 15 deletions(-)
+
+diff --git a/block.c b/block.c
+index af31a94863..65588d313a 100644
+--- a/block.c
+++ b/block.c
+@@ -2407,6 +2407,20 @@ static void bdrv_replace_child_abort(void *opaque)
+ 
+     GLOBAL_STATE_CODE();
+     /* old_bs reference is transparently moved from @s to @s->child */
+    if (!s->child->bs) {
+        /*
+         * The parents were undrained when removing old_bs from the child. New
+         * requests can't have been made, though, because the child was empty.
+         *
+         * TODO Make bdrv_replace_child_noperm() transactionable to avoid
+         * undraining the parent in the first place. Once this is done, having
+         * new_bs drained when calling bdrv_replace_child_tran() is not a
+         * requirement any more.
+         */
+        bdrv_parent_drained_begin_single(s->child, false);
+        assert(!bdrv_parent_drained_poll_single(s->child));
+    }
+    assert(s->child->quiesced_parent);
+     bdrv_replace_child_noperm(s->child, s->old_bs);
+     bdrv_unref(new_bs);
+ }
+@@ -2422,12 +2436,19 @@ static TransactionActionDrv bdrv_replace_child_drv = {
+  *
+  * Note: real unref of old_bs is done only on commit.
+  *
+ * Both @child->bs and @new_bs (if non-NULL) must be drained. @new_bs must be
+ * kept drained until the transaction is completed.
+ *
+  * The function doesn't update permissions, caller is responsible for this.
+  */
+ static void bdrv_replace_child_tran(BdrvChild *child, BlockDriverState *new_bs,
+                                     Transaction *tran)
+ {
+     BdrvReplaceChildState *s = g_new(BdrvReplaceChildState, 1);
+
+    assert(child->quiesced_parent);
+    assert(!new_bs || new_bs->quiesce_counter);
+
+     *s = (BdrvReplaceChildState) {
+         .child = child,
+         .old_bs = child->bs,
+@@ -2819,6 +2840,14 @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
+     return permissions[qapi_perm];
+ }
+ 
+/*
+ * Replaces the node that a BdrvChild points to without updating permissions.
+ *
+ * If @new_bs is non-NULL, the parent of @child must already be drained through
+ * @child.
+ *
+ * This function does not poll.
+ */
+ static void bdrv_replace_child_noperm(BdrvChild *child,
+                                       BlockDriverState *new_bs)
+ {
+@@ -2826,6 +2855,28 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+     int new_bs_quiesce_counter;
+ 
+     assert(!child->frozen);
+
+    /*
+     * If we want to change the BdrvChild to point to a drained node as its new
+     * child->bs, we need to make sure that its new parent is drained, too. In
+     * other words, either child->quiesce_parent must already be true or we must
+     * be able to set it and keep the parent's quiesce_counter consistent with
+     * that, but without polling or starting new requests (this function
+     * guarantees that it doesn't poll, and starting new requests would be
+     * against the invariants of drain sections).
+     *
+     * To keep things simple, we pick the first option (child->quiesce_parent
+     * must already be true). We also generalise the rule a bit to make it
+     * easier to verify in callers and more likely to be covered in test cases:
+     * The parent must be quiesced through this child even if new_bs isn't
+     * currently drained.
+     *
+     * The only exception is for callers that always pass new_bs == NULL. In
+     * this case, we obviously never need to consider the case of a drained
+     * new_bs, so we can keep the callers simpler by allowing them not to drain
+     * the parent.
+     */
+    assert(!new_bs || child->quiesced_parent);
+     assert(old_bs != new_bs);
+     GLOBAL_STATE_CODE();
+ 
+@@ -2833,15 +2884,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+         assert(bdrv_get_aio_context(old_bs) == bdrv_get_aio_context(new_bs));
+     }
+ 
+-    /*
+-     * If the new child node is drained but the old one was not, flush
+-     * all outstanding requests to the old child node.
+-     */
+-    new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+-    if (new_bs_quiesce_counter && !child->quiesced_parent) {
+-        bdrv_parent_drained_begin_single(child, true);
+-    }
+-
+     if (old_bs) {
+         if (child->klass->detach) {
+             child->klass->detach(child);
+@@ -2861,11 +2903,9 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+     }
+ 
+     /*
+-     * If the old child node was drained but the new one is not, allow
+-     * requests to come in only after the new node has been attached.
+-     *
+-     * Update new_bs_quiesce_counter because bdrv_parent_drained_begin_single()
+-     * polls, which could have changed the value.
+     * If the parent was drained through this BdrvChild previously, but new_bs
+     * is not drained, allow requests to come in only after the new node has
+     * been attached.
+      */
+     new_bs_quiesce_counter = (new_bs ? new_bs->quiesce_counter : 0);
+     if (!new_bs_quiesce_counter && child->quiesced_parent) {
+@@ -3002,6 +3042,24 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
+     }
+ 
+     bdrv_ref(child_bs);
+    /*
+     * Let every new BdrvChild start with a drained parent. Inserting the child
+     * in the graph with bdrv_replace_child_noperm() will undrain it if
+     * @child_bs is not drained.
+     *
+     * The child was only just created and is not yet visible in global state
+     * until bdrv_replace_child_noperm() inserts it into the graph, so nobody
+     * could have sent requests and polling is not necessary.
+     *
+     * Note that this means that the parent isn't fully drained yet, we only
+     * stop new requests from coming in. This is fine, we don't care about the
+     * old requests here, they are not for this child. If another place enters a
+     * drain section for the same parent, but wants it to be fully quiesced, it
+     * will not run most of the the code in .drained_begin() again (which is not
+     * a problem, we already did this), but it will still poll until the parent
+     * is fully quiesced, so it will not be negatively affected either.
+     */
+    bdrv_parent_drained_begin_single(new_child, false);
+     bdrv_replace_child_noperm(new_child, child_bs);
+ 
+     BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
+@@ -5059,12 +5117,24 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
+     }
+ 
+     if (child->bs) {
+        BlockDriverState *bs = child->bs;
+        bdrv_drained_begin(bs);
+         bdrv_replace_child_tran(child, NULL, tran);
+        bdrv_drained_end(bs);
+     }
+ 
+     tran_add(tran, &bdrv_remove_child_drv, child);
+ }
+ 
+static void undrain_on_clean_cb(void *opaque)
+{
+    bdrv_drained_end(opaque);
+}
+
+static TransactionActionDrv undrain_on_clean = {
+    .clean = undrain_on_clean_cb,
+};
+
+ static int bdrv_replace_node_noperm(BlockDriverState *from,
+                                     BlockDriverState *to,
+                                     bool auto_skip, Transaction *tran,
+@@ -5074,6 +5144,11 @@ static int bdrv_replace_node_noperm(BlockDriverState *from,
+ 
+     GLOBAL_STATE_CODE();
+ 
+    bdrv_drained_begin(from);
+    bdrv_drained_begin(to);
+    tran_add(tran, &undrain_on_clean, from);
+    tran_add(tran, &undrain_on_clean, to);
+
+     QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+         assert(c->bs == from);
+         if (!should_update_child(c, to)) {
+diff --git a/block/io.c b/block/io.c
+index 5e9150d92c..ae64830eac 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -81,7 +81,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
+     }
+ }
+ 
+-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
+ {
+     if (c->klass->drained_poll) {
+         return c->klass->drained_poll(c);
+diff --git a/include/block/block-io.h b/include/block/block-io.h
+index 8f5e75756a..65e6d2569b 100644
+--- a/include/block/block-io.h
+++ b/include/block/block-io.h
+@@ -292,6 +292,14 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+  */
+ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+ 
+/**
+ * bdrv_parent_drained_poll_single:
+ *
+ * Returns true if there is any pending activity to cease before @c can be
+ * called quiesced, false otherwise.
+ */
+bool bdrv_parent_drained_poll_single(BdrvChild *c);
+
+ /**
+  * bdrv_parent_drained_end_single:
+  *
+diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
+index 172bc6debc..2686a8acee 100644
+--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
+@@ -1654,6 +1654,7 @@ static void test_drop_intermediate_poll(void)
+ 
+ 
+ typedef struct BDRVReplaceTestState {
+    bool setup_completed;
+     bool was_drained;
+     bool was_undrained;
+     bool has_read;
+@@ -1738,6 +1739,10 @@ static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
+ {
+     BDRVReplaceTestState *s = bs->opaque;
+ 
+    if (!s->setup_completed) {
+        return;
+    }
+
+     if (!s->drain_count) {
+         s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
+         bdrv_inc_in_flight(bs);
+@@ -1769,6 +1774,10 @@ static void bdrv_replace_test_drain_end(BlockDriverState *bs)
+ {
+     BDRVReplaceTestState *s = bs->opaque;
+ 
+    if (!s->setup_completed) {
+        return;
+    }
+
+     g_assert(s->drain_count > 0);
+     if (!--s->drain_count) {
+         s->was_undrained = true;
+@@ -1867,6 +1876,7 @@ static void do_test_replace_child_mid_drain(int old_drain_count,
+     bdrv_ref(old_child_bs);
+     bdrv_attach_child(parent_bs, old_child_bs, "child", &child_of_bds,
+                       BDRV_CHILD_COW, &error_abort);
+    parent_s->setup_completed = true;
+ 
+     for (i = 0; i < old_drain_count; i++) {
+         bdrv_drained_begin(old_child_bs);
+-- 
+2.31.1
+
--- a/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch
+++ b/kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch
@ -0,0 +1,54 @@
+From 6af6de77dace29aa8548b3649dc9c6163740ac86 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:03 +0100
+Subject: [PATCH 21/31] block: Don't use subtree drains in
+ bdrv_drop_intermediate()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [9/16] 3c06fa826f262558f57d38b0155500c2e8e23a53 (sgarzarella/qemu-kvm-c-9-s)
+
+Instead of using a subtree drain from the top node (which also drains
+child nodes of base that we're not even interested in), use a normal
+drain for base, which automatically drains all of the parents, too.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-9-kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 631086deefc32690ee56efed1c5b891dec31ae37)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/block.c b/block.c
+index cb5e96b1cf..b3449a312e 100644
+--- a/block.c
+++ b/block.c
+@@ -5586,7 +5586,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+     GLOBAL_STATE_CODE();
+ 
+     bdrv_ref(top);
+-    bdrv_subtree_drained_begin(top);
+    bdrv_drained_begin(base);
+ 
+     if (!top->drv || !base->drv) {
+         goto exit;
+@@ -5659,7 +5659,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
+ 
+     ret = 0;
+ exit:
+-    bdrv_subtree_drained_end(top);
+    bdrv_drained_end(base);
+     bdrv_unref(top);
+     return ret;
+ }
+-- 
+2.31.1
+
--- a/kvm-block-Drain-individual-nodes-during-reopen.patch
+++ b/kvm-block-Drain-individual-nodes-during-reopen.patch
@ -0,0 +1,157 @@
+From ad52cb621daad45d3c2a0e2e670d6ca2e16690bd Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:02 +0100
+Subject: [PATCH 20/31] block: Drain individual nodes during reopen
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [8/16] 5441b6f0ae9102ef40d1093e1db3084eea81e3b0 (sgarzarella/qemu-kvm-c-9-s)
+
+bdrv_reopen() and friends use subtree drains as a lazy way of covering
+all the nodes they touch. Turns out that this lazy way is a lot more
+complicated than just draining the nodes individually, even not
+accounting for the additional complexity in the drain mechanism itself.
+
+Simplify the code by switching to draining the individual nodes that are
+already managed in the BlockReopenQueue anyway.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Message-Id: <20221118174110.55183-8-kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit d22933acd2f470eeef779e4d444e848f76dcfaf8)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c             | 16 +++++++++-------
+ block/replication.c |  6 ------
+ blockdev.c          | 13 -------------
+ 3 files changed, 9 insertions(+), 26 deletions(-)
+
+diff --git a/block.c b/block.c
+index 46df410b07..cb5e96b1cf 100644
+--- a/block.c
+++ b/block.c
+@@ -4150,7 +4150,7 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
+  * returns a pointer to bs_queue, which is either the newly allocated
+  * bs_queue, or the existing bs_queue being used.
+  *
+- * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ * bs is drained here and undrained by bdrv_reopen_queue_free().
+  *
+  * To be called with bs->aio_context locked.
+  */
+@@ -4172,12 +4172,10 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
+     int flags;
+     QemuOpts *opts;
+ 
+-    /* Make sure that the caller remembered to use a drained section. This is
+-     * important to avoid graph changes between the recursive queuing here and
+-     * bdrv_reopen_multiple(). */
+-    assert(bs->quiesce_counter > 0);
+     GLOBAL_STATE_CODE();
+ 
+    bdrv_drained_begin(bs);
+
+     if (bs_queue == NULL) {
+         bs_queue = g_new0(BlockReopenQueue, 1);
+         QTAILQ_INIT(bs_queue);
+@@ -4328,6 +4326,12 @@ void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue)
+     if (bs_queue) {
+         BlockReopenQueueEntry *bs_entry, *next;
+         QTAILQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+            AioContext *ctx = bdrv_get_aio_context(bs_entry->state.bs);
+
+            aio_context_acquire(ctx);
+            bdrv_drained_end(bs_entry->state.bs);
+            aio_context_release(ctx);
+
+             qobject_unref(bs_entry->state.explicit_options);
+             qobject_unref(bs_entry->state.options);
+             g_free(bs_entry);
+@@ -4475,7 +4479,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+ 
+     GLOBAL_STATE_CODE();
+ 
+-    bdrv_subtree_drained_begin(bs);
+     queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+ 
+     if (ctx != qemu_get_aio_context()) {
+@@ -4486,7 +4489,6 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+     if (ctx != qemu_get_aio_context()) {
+         aio_context_acquire(ctx);
+     }
+-    bdrv_subtree_drained_end(bs);
+ 
+     return ret;
+ }
+diff --git a/block/replication.c b/block/replication.c
+index f1eed25e43..c62f48a874 100644
+--- a/block/replication.c
+++ b/block/replication.c
+@@ -374,9 +374,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
+         s->orig_secondary_read_only = bdrv_is_read_only(secondary_disk->bs);
+     }
+ 
+-    bdrv_subtree_drained_begin(hidden_disk->bs);
+-    bdrv_subtree_drained_begin(secondary_disk->bs);
+-
+     if (s->orig_hidden_read_only) {
+         QDict *opts = qdict_new();
+         qdict_put_bool(opts, BDRV_OPT_READ_ONLY, !writable);
+@@ -401,9 +398,6 @@ static void reopen_backing_file(BlockDriverState *bs, bool writable,
+             aio_context_acquire(ctx);
+         }
+     }
+-
+-    bdrv_subtree_drained_end(hidden_disk->bs);
+-    bdrv_subtree_drained_end(secondary_disk->bs);
+ }
+ 
+ static void backup_job_cleanup(BlockDriverState *bs)
+diff --git a/blockdev.c b/blockdev.c
+index 3f1dec6242..8ffb3d9537 100644
+--- a/blockdev.c
+++ b/blockdev.c
+@@ -3547,8 +3547,6 @@ fail:
+ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
+ {
+     BlockReopenQueue *queue = NULL;
+-    GSList *drained = NULL;
+-    GSList *p;
+ 
+     /* Add each one of the BDS that we want to reopen to the queue */
+     for (; reopen_list != NULL; reopen_list = reopen_list->next) {
+@@ -3585,9 +3583,7 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
+         ctx = bdrv_get_aio_context(bs);
+         aio_context_acquire(ctx);
+ 
+-        bdrv_subtree_drained_begin(bs);
+         queue = bdrv_reopen_queue(queue, bs, qdict, false);
+-        drained = g_slist_prepend(drained, bs);
+ 
+         aio_context_release(ctx);
+     }
+@@ -3598,15 +3594,6 @@ void qmp_blockdev_reopen(BlockdevOptionsList *reopen_list, Error **errp)
+ 
+ fail:
+     bdrv_reopen_queue_free(queue);
+-    for (p = drained; p; p = p->next) {
+-        BlockDriverState *bs = p->data;
+-        AioContext *ctx = bdrv_get_aio_context(bs);
+-
+-        aio_context_acquire(ctx);
+-        bdrv_subtree_drained_end(bs);
+-        aio_context_release(ctx);
+-    }
+-    g_slist_free(drained);
+ }
+ 
+ void qmp_blockdev_del(const char *node_name, Error **errp)
+-- 
+2.31.1
+
--- a/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch
+++ b/kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch
@ -0,0 +1,96 @@
+From 9a789d104a4a69031ad95d7fad6380ab21e82503 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:08 +0100
+Subject: [PATCH 26/31] block: Drop out of coroutine in
+ bdrv_do_drained_begin_quiesce()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [14/16] c9266663b822f703e55b6a07de98ceb56e69e924 (sgarzarella/qemu-kvm-c-9-s)
+
+The next patch adds a parent drain to bdrv_attach_child_common(), which
+shouldn't be, but is currently called from coroutines in some cases (e.g.
+.bdrv_co_create implementations generally open new nodes). Therefore,
+the assertion that we're not in a coroutine doesn't hold true any more.
+
+We could just remove the assertion because there is nothing in the
+function that should be in conflict with running in a coroutine, but
+just to be on the safe side, we can reverse the caller relationship
+between bdrv_do_drained_begin() and bdrv_do_drained_begin_quiesce() so
+that the latter also just drops out of coroutine context and we can
+still be certain in the future that any drain code doesn't run in
+coroutines.
+
+As a nice side effect, the structure of bdrv_do_drained_begin() is now
+symmetrical with bdrv_do_drained_end().
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Message-Id: <20221118174110.55183-14-kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 05c272ff0cf1b16cc3606f746182dd99b774f553)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block/io.c | 25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+diff --git a/block/io.c b/block/io.c
+index 2e9503df6a..5e9150d92c 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -346,10 +346,15 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+     }
+ }
+ 
+-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+                                  bool poll)
+ {
+     IO_OR_GS_CODE();
+-    assert(!qemu_in_coroutine());
+
+    if (qemu_in_coroutine()) {
+        bdrv_co_yield_to_drain(bs, true, parent, poll);
+        return;
+    }
+ 
+     /* Stop things in parent-to-child order */
+     if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+@@ -359,17 +364,6 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+             bs->drv->bdrv_drain_begin(bs);
+         }
+     }
+-}
+-
+-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+-                                  bool poll)
+-{
+-    if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(bs, true, parent, poll);
+-        return;
+-    }
+-
+-    bdrv_do_drained_begin_quiesce(bs, parent);
+ 
+     /*
+      * Wait for drained requests to finish.
+@@ -385,6 +379,11 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+     }
+ }
+ 
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+{
+    bdrv_do_drained_begin(bs, parent, false);
+}
+
+ void bdrv_drained_begin(BlockDriverState *bs)
+ {
+     IO_OR_GS_CODE();
+-- 
+2.31.1
+
--- a/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch
+++ b/kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch
@ -0,0 +1,67 @@
+From e790b4c20a5124239fe93e91fbc87745e5f2cea6 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:01 +0100
+Subject: [PATCH 19/31] block: Fix locking for bdrv_reopen_queue_child()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [7/16] 46bb54506c4400b9a1bf66b6bd7987ff67260003 (sgarzarella/qemu-kvm-c-9-s)
+
+Callers don't agree whether bdrv_reopen_queue_child() should be called
+with the AioContext lock held or not. Standardise on holding the lock
+(as done by QMP blockdev-reopen and the replication block driver) and
+fix bdrv_reopen() to do the same.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Message-Id: <20221118174110.55183-7-kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 2e117866d7c96cc17e84cd2946fee1bf3292d814)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/block.c b/block.c
+index 7999fd08c5..46df410b07 100644
+--- a/block.c
+++ b/block.c
+@@ -4151,6 +4151,8 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
+  * bs_queue, or the existing bs_queue being used.
+  *
+  * bs must be drained between bdrv_reopen_queue() and bdrv_reopen_multiple().
+ *
+ * To be called with bs->aio_context locked.
+  */
+ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
+                                                  BlockDriverState *bs,
+@@ -4309,6 +4311,7 @@ static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
+     return bs_queue;
+ }
+ 
+/* To be called with bs->aio_context locked */
+ BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+                                     BlockDriverState *bs,
+                                     QDict *options, bool keep_old_opts)
+@@ -4473,11 +4476,11 @@ int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
+     GLOBAL_STATE_CODE();
+ 
+     bdrv_subtree_drained_begin(bs);
+    queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+
+     if (ctx != qemu_get_aio_context()) {
+         aio_context_release(ctx);
+     }
+-
+-    queue = bdrv_reopen_queue(NULL, bs, opts, keep_old_opts);
+     ret = bdrv_reopen_multiple(queue, errp);
+ 
+     if (ctx != qemu_get_aio_context()) {
+-- 
+2.31.1
+
--- a/kvm-block-Inline-bdrv_drain_invoke.patch
+++ b/kvm-block-Inline-bdrv_drain_invoke.patch
@ -0,0 +1,81 @@
+From 1808e560396872173f787f8e338e9837a4c3d626 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:00 +0100
+Subject: [PATCH 18/31] block: Inline bdrv_drain_invoke()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [6/16] 2c7473a36360eb43d94b967deb12308cb5ea0d3b (sgarzarella/qemu-kvm-c-9-s)
+
+bdrv_drain_invoke() has now two entirely separate cases that share no
+code any more and are selected depending on a bool parameter. Each case
+has only one caller. Just inline the function.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-6-kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit c7bc05f78ab31fb02fc9635f60b9bd22efc8d121)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block/io.c | 23 ++++++-----------------
+ 1 file changed, 6 insertions(+), 17 deletions(-)
+
+diff --git a/block/io.c b/block/io.c
+index f4ca62b034..a25103be6f 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -242,21 +242,6 @@ typedef struct {
+     bool ignore_bds_parents;
+ } BdrvCoDrainData;
+ 
+-/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
+-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
+-{
+-    if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
+-            (!begin && !bs->drv->bdrv_drain_end)) {
+-        return;
+-    }
+-
+-    if (begin) {
+-        bs->drv->bdrv_drain_begin(bs);
+-    } else {
+-        bs->drv->bdrv_drain_end(bs);
+-    }
+-}
+-
+ /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
+ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+                      BdrvChild *ignore_parent, bool ignore_bds_parents)
+@@ -390,7 +375,9 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+     }
+ 
+     bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
+-    bdrv_drain_invoke(bs, true);
+    if (bs->drv && bs->drv->bdrv_drain_begin) {
+        bs->drv->bdrv_drain_begin(bs);
+    }
+ }
+ 
+ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+@@ -461,7 +448,9 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+     assert(bs->quiesce_counter > 0);
+ 
+     /* Re-enable things in child-to-parent order */
+-    bdrv_drain_invoke(bs, false);
+    if (bs->drv && bs->drv->bdrv_drain_end) {
+        bs->drv->bdrv_drain_end(bs);
+    }
+     bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
+ 
+     old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
+-- 
+2.31.1
+
--- a/kvm-block-Remove-drained_end_counter.patch
+++ b/kvm-block-Remove-drained_end_counter.patch
@ -0,0 +1,433 @@
+From 3009e49f242ab371ffad35bb29c2c26ddfac75d4 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:40:59 +0100
+Subject: [PATCH 17/31] block: Remove drained_end_counter
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [5/16] 5589e3f05dece5394a05641f7f42096e8dc62bdb (sgarzarella/qemu-kvm-c-9-s)
+
+drained_end_counter is unused now, nobody changes its value any more. It
+can be removed.
+
+In cases where we had two almost identical functions that only differed
+in whether the caller passes drained_end_counter, or whether they would
+poll for a local drained_end_counter to reach 0, these become a single
+function.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Message-Id: <20221118174110.55183-5-kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 2f65df6e16dea2d6e7212fa675f4779d9281e26f)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                          |  5 +-
+ block/block-backend.c            |  4 +-
+ block/io.c                       | 98 ++++++++------------------------
+ blockjob.c                       |  2 +-
+ include/block/block-io.h         | 24 --------
+ include/block/block_int-common.h |  6 +-
+ 6 files changed, 30 insertions(+), 109 deletions(-)
+
+diff --git a/block.c b/block.c
+index 16a62a329c..7999fd08c5 100644
+--- a/block.c
+++ b/block.c
+@@ -1235,11 +1235,10 @@ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
+     return bdrv_drain_poll(bs, false, NULL, false);
+ }
+ 
+-static void bdrv_child_cb_drained_end(BdrvChild *child,
+-                                      int *drained_end_counter)
+static void bdrv_child_cb_drained_end(BdrvChild *child)
+ {
+     BlockDriverState *bs = child->opaque;
+-    bdrv_drained_end_no_poll(bs, drained_end_counter);
+    bdrv_drained_end(bs);
+ }
+ 
+ static int bdrv_child_cb_inactivate(BdrvChild *child)
+diff --git a/block/block-backend.c b/block/block-backend.c
+index d98a96ff37..feaf2181fa 100644
+--- a/block/block-backend.c
+++ b/block/block-backend.c
+@@ -129,7 +129,7 @@ static void blk_root_inherit_options(BdrvChildRole role, bool parent_is_format,
+ }
+ static void blk_root_drained_begin(BdrvChild *child);
+ static bool blk_root_drained_poll(BdrvChild *child);
+-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter);
+static void blk_root_drained_end(BdrvChild *child);
+ 
+ static void blk_root_change_media(BdrvChild *child, bool load);
+ static void blk_root_resize(BdrvChild *child);
+@@ -2556,7 +2556,7 @@ static bool blk_root_drained_poll(BdrvChild *child)
+     return busy || !!blk->in_flight;
+ }
+ 
+-static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
+static void blk_root_drained_end(BdrvChild *child)
+ {
+     BlockBackend *blk = child->opaque;
+     assert(blk->quiesce_counter);
+diff --git a/block/io.c b/block/io.c
+index c2ed4b2af9..f4ca62b034 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -58,28 +58,19 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
+     }
+ }
+ 
+-static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
+-                                                   int *drained_end_counter)
+void bdrv_parent_drained_end_single(BdrvChild *c)
+ {
+    IO_OR_GS_CODE();
+
+     assert(c->parent_quiesce_counter > 0);
+     c->parent_quiesce_counter--;
+     if (c->klass->drained_end) {
+-        c->klass->drained_end(c, drained_end_counter);
+        c->klass->drained_end(c);
+     }
+ }
+ 
+-void bdrv_parent_drained_end_single(BdrvChild *c)
+-{
+-    int drained_end_counter = 0;
+-    AioContext *ctx = bdrv_child_get_parent_aio_context(c);
+-    IO_OR_GS_CODE();
+-    bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
+-    AIO_WAIT_WHILE(ctx, qatomic_read(&drained_end_counter) > 0);
+-}
+-
+ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
+-                                    bool ignore_bds_parents,
+-                                    int *drained_end_counter)
+                                    bool ignore_bds_parents)
+ {
+     BdrvChild *c;
+ 
+@@ -87,7 +78,7 @@ static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
+         if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+             continue;
+         }
+-        bdrv_parent_drained_end_single_no_poll(c, drained_end_counter);
+        bdrv_parent_drained_end_single(c);
+     }
+ }
+ 
+@@ -249,12 +240,10 @@ typedef struct {
+     bool poll;
+     BdrvChild *parent;
+     bool ignore_bds_parents;
+-    int *drained_end_counter;
+ } BdrvCoDrainData;
+ 
+ /* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
+-static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
+-                              int *drained_end_counter)
+static void bdrv_drain_invoke(BlockDriverState *bs, bool begin)
+ {
+     if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
+             (!begin && !bs->drv->bdrv_drain_end)) {
+@@ -305,8 +294,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+                                   BdrvChild *parent, bool ignore_bds_parents,
+                                   bool poll);
+ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+-                                BdrvChild *parent, bool ignore_bds_parents,
+-                                int *drained_end_counter);
+                                BdrvChild *parent, bool ignore_bds_parents);
+ 
+ static void bdrv_co_drain_bh_cb(void *opaque)
+ {
+@@ -319,14 +307,12 @@ static void bdrv_co_drain_bh_cb(void *opaque)
+         aio_context_acquire(ctx);
+         bdrv_dec_in_flight(bs);
+         if (data->begin) {
+-            assert(!data->drained_end_counter);
+             bdrv_do_drained_begin(bs, data->recursive, data->parent,
+                                   data->ignore_bds_parents, data->poll);
+         } else {
+             assert(!data->poll);
+             bdrv_do_drained_end(bs, data->recursive, data->parent,
+-                                data->ignore_bds_parents,
+-                                data->drained_end_counter);
+                                data->ignore_bds_parents);
+         }
+         aio_context_release(ctx);
+     } else {
+@@ -342,8 +328,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+                                                 bool begin, bool recursive,
+                                                 BdrvChild *parent,
+                                                 bool ignore_bds_parents,
+-                                                bool poll,
+-                                                int *drained_end_counter)
+                                                bool poll)
+ {
+     BdrvCoDrainData data;
+     Coroutine *self = qemu_coroutine_self();
+@@ -363,7 +348,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+         .parent = parent,
+         .ignore_bds_parents = ignore_bds_parents,
+         .poll = poll,
+-        .drained_end_counter = drained_end_counter,
+     };
+ 
+     if (bs) {
+@@ -406,7 +390,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+     }
+ 
+     bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
+-    bdrv_drain_invoke(bs, true, NULL);
+    bdrv_drain_invoke(bs, true);
+ }
+ 
+ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+@@ -417,7 +401,7 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+ 
+     if (qemu_in_coroutine()) {
+         bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
+-                               poll, NULL);
+                               poll);
+         return;
+     }
+ 
+@@ -461,38 +445,24 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs)
+ 
+ /**
+  * This function does not poll, nor must any of its recursively called
+- * functions.  The *drained_end_counter pointee will be incremented
+- * once for every background operation scheduled, and decremented once
+- * the operation settles.  Therefore, the pointer must remain valid
+- * until the pointee reaches 0.  That implies that whoever sets up the
+- * pointee has to poll until it is 0.
+- *
+- * We use atomic operations to access *drained_end_counter, because
+- * (1) when called from bdrv_set_aio_context_ignore(), the subgraph of
+- *     @bs may contain nodes in different AioContexts,
+- * (2) bdrv_drain_all_end() uses the same counter for all nodes,
+- *     regardless of which AioContext they are in.
+ * functions.
+  */
+ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+-                                BdrvChild *parent, bool ignore_bds_parents,
+-                                int *drained_end_counter)
+                                BdrvChild *parent, bool ignore_bds_parents)
+ {
+     BdrvChild *child;
+     int old_quiesce_counter;
+ 
+-    assert(drained_end_counter != NULL);
+-
+     if (qemu_in_coroutine()) {
+         bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
+-                               false, drained_end_counter);
+                               false);
+         return;
+     }
+     assert(bs->quiesce_counter > 0);
+ 
+     /* Re-enable things in child-to-parent order */
+-    bdrv_drain_invoke(bs, false, drained_end_counter);
+-    bdrv_parent_drained_end(bs, parent, ignore_bds_parents,
+-                            drained_end_counter);
+    bdrv_drain_invoke(bs, false);
+    bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
+ 
+     old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
+     if (old_quiesce_counter == 1) {
+@@ -503,32 +473,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+         assert(!ignore_bds_parents);
+         bs->recursive_quiesce_counter--;
+         QLIST_FOREACH(child, &bs->children, next) {
+-            bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents,
+-                                drained_end_counter);
+            bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
+         }
+     }
+ }
+ 
+ void bdrv_drained_end(BlockDriverState *bs)
+ {
+-    int drained_end_counter = 0;
+     IO_OR_GS_CODE();
+-    bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
+-    BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
+-}
+-
+-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
+-{
+-    IO_CODE();
+-    bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
+    bdrv_do_drained_end(bs, false, NULL, false);
+ }
+ 
+ void bdrv_subtree_drained_end(BlockDriverState *bs)
+ {
+-    int drained_end_counter = 0;
+     IO_OR_GS_CODE();
+-    bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
+-    BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
+    bdrv_do_drained_end(bs, true, NULL, false);
+ }
+ 
+ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
+@@ -543,16 +502,12 @@ void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
+ 
+ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
+ {
+-    int drained_end_counter = 0;
+     int i;
+     IO_OR_GS_CODE();
+ 
+     for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
+-        bdrv_do_drained_end(child->bs, true, child, false,
+-                            &drained_end_counter);
+        bdrv_do_drained_end(child->bs, true, child, false);
+     }
+-
+-    BDRV_POLL_WHILE(child->bs, qatomic_read(&drained_end_counter) > 0);
+ }
+ 
+ void bdrv_drain(BlockDriverState *bs)
+@@ -610,7 +565,7 @@ void bdrv_drain_all_begin(void)
+     GLOBAL_STATE_CODE();
+ 
+     if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
+        bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
+         return;
+     }
+ 
+@@ -649,22 +604,19 @@ void bdrv_drain_all_begin(void)
+ 
+ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
+ {
+-    int drained_end_counter = 0;
+     GLOBAL_STATE_CODE();
+ 
+     g_assert(bs->quiesce_counter > 0);
+     g_assert(!bs->refcnt);
+ 
+     while (bs->quiesce_counter) {
+-        bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+        bdrv_do_drained_end(bs, false, NULL, true);
+     }
+-    BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
+ }
+ 
+ void bdrv_drain_all_end(void)
+ {
+     BlockDriverState *bs = NULL;
+-    int drained_end_counter = 0;
+     GLOBAL_STATE_CODE();
+ 
+     /*
+@@ -680,13 +632,11 @@ void bdrv_drain_all_end(void)
+         AioContext *aio_context = bdrv_get_aio_context(bs);
+ 
+         aio_context_acquire(aio_context);
+-        bdrv_do_drained_end(bs, false, NULL, true, &drained_end_counter);
+        bdrv_do_drained_end(bs, false, NULL, true);
+         aio_context_release(aio_context);
+     }
+ 
+     assert(qemu_get_current_aio_context() == qemu_get_aio_context());
+-    AIO_WAIT_WHILE(NULL, qatomic_read(&drained_end_counter) > 0);
+-
+     assert(bdrv_drain_all_count > 0);
+     bdrv_drain_all_count--;
+ }
+diff --git a/blockjob.c b/blockjob.c
+index f51d4e18f3..0ab721e139 100644
+--- a/blockjob.c
+++ b/blockjob.c
+@@ -120,7 +120,7 @@ static bool child_job_drained_poll(BdrvChild *c)
+     }
+ }
+ 
+-static void child_job_drained_end(BdrvChild *c, int *drained_end_counter)
+static void child_job_drained_end(BdrvChild *c)
+ {
+     BlockJob *job = c->opaque;
+     job_resume(&job->job);
+diff --git a/include/block/block-io.h b/include/block/block-io.h
+index b099d7db45..054e964c9b 100644
+--- a/include/block/block-io.h
+++ b/include/block/block-io.h
+@@ -237,21 +237,6 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+                                     int64_t bytes, BdrvRequestFlags read_flags,
+                                     BdrvRequestFlags write_flags);
+ 
+-/**
+- * bdrv_drained_end_no_poll:
+- *
+- * Same as bdrv_drained_end(), but do not poll for the subgraph to
+- * actually become unquiesced.  Therefore, no graph changes will occur
+- * with this function.
+- *
+- * *drained_end_counter is incremented for every background operation
+- * that is scheduled, and will be decremented for every operation once
+- * it settles.  The caller must poll until it reaches 0.  The counter
+- * should be accessed using atomic operations only.
+- */
+-void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
+-
+-
+ /*
+  * "I/O or GS" API functions. These functions can run without
+  * the BQL, but only in one specific iothread/main loop.
+@@ -311,9 +296,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+  * bdrv_parent_drained_end_single:
+  *
+  * End a quiesced section for the parent of @c.
+- *
+- * This polls @bs's AioContext until all scheduled sub-drained_ends
+- * have settled, which may result in graph changes.
+  */
+ void bdrv_parent_drained_end_single(BdrvChild *c);
+ 
+@@ -361,12 +343,6 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
+  * bdrv_drained_end:
+  *
+  * End a quiescent section started by bdrv_drained_begin().
+- *
+- * This polls @bs's AioContext until all scheduled sub-drained_ends
+- * have settled.  On one hand, that may result in graph changes.  On
+- * the other, this requires that the caller either runs in the main
+- * loop; or that all involved nodes (@bs and all of its parents) are
+- * in the caller's AioContext.
+  */
+ void bdrv_drained_end(BlockDriverState *bs);
+ 
+diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
+index 40d646d1ed..2b97576f6d 100644
+--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
+@@ -939,15 +939,11 @@ struct BdrvChildClass {
+      * These functions must not change the graph (and therefore also must not
+      * call aio_poll(), which could change the graph indirectly).
+      *
+-     * If drained_end() schedules background operations, it must atomically
+-     * increment *drained_end_counter for each such operation and atomically
+-     * decrement it once the operation has settled.
+-     *
+      * Note that this can be nested. If drained_begin() was called twice, new
+      * I/O is allowed only after drained_end() was called twice, too.
+      */
+     void (*drained_begin)(BdrvChild *child);
+-    void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+    void (*drained_end)(BdrvChild *child);
+ 
+     /*
+      * Returns whether the parent has pending requests for the child. This
+-- 
+2.31.1
+
--- a/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch
+++ b/kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch
@ -0,0 +1,274 @@
+From 0dc7990533cef41e58579ee96315aca1fdc44ea1 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:07 +0100
+Subject: [PATCH 25/31] block: Remove ignore_bds_parents parameter from
+ drain_begin/end.
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [13/16] 1ed88d975a9569bffeb33ad847874417780ce408 (sgarzarella/qemu-kvm-c-9-s)
+
+ignore_bds_parents is now ignored during drain_begin and drain_end, so
+we can just remove it there. It is still a valid optimisation for
+drain_all in bdrv_drained_poll(), so leave it around there.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Message-Id: <20221118174110.55183-13-kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit a82a3bd135078d14f1bb4b5e50f51e77d3748270)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                  |  2 +-
+ block/io.c               | 58 +++++++++++++++-------------------------
+ include/block/block-io.h |  3 +--
+ 3 files changed, 24 insertions(+), 39 deletions(-)
+
+diff --git a/block.c b/block.c
+index 5a583e260d..af31a94863 100644
+--- a/block.c
+++ b/block.c
+@@ -1226,7 +1226,7 @@ static char *bdrv_child_get_parent_desc(BdrvChild *c)
+ static void bdrv_child_cb_drained_begin(BdrvChild *child)
+ {
+     BlockDriverState *bs = child->opaque;
+-    bdrv_do_drained_begin_quiesce(bs, NULL, false);
+    bdrv_do_drained_begin_quiesce(bs, NULL);
+ }
+ 
+ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
+diff --git a/block/io.c b/block/io.c
+index 87d6f22ec4..2e9503df6a 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -45,13 +45,12 @@ static void bdrv_parent_cb_resize(BlockDriverState *bs);
+ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+     int64_t offset, int64_t bytes, BdrvRequestFlags flags);
+ 
+-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
+-                                      bool ignore_bds_parents)
+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
+ {
+     BdrvChild *c, *next;
+ 
+     QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
+-        if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+        if (c == ignore) {
+             continue;
+         }
+         bdrv_parent_drained_begin_single(c, false);
+@@ -70,13 +69,12 @@ void bdrv_parent_drained_end_single(BdrvChild *c)
+     }
+ }
+ 
+-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
+-                                    bool ignore_bds_parents)
+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
+ {
+     BdrvChild *c;
+ 
+     QLIST_FOREACH(c, &bs->parents, next_parent) {
+-        if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+        if (c == ignore) {
+             continue;
+         }
+         bdrv_parent_drained_end_single(c);
+@@ -242,7 +240,6 @@ typedef struct {
+     bool begin;
+     bool poll;
+     BdrvChild *parent;
+-    bool ignore_bds_parents;
+ } BdrvCoDrainData;
+ 
+ /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
+@@ -269,9 +266,8 @@ static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
+ }
+ 
+ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+-                                  bool ignore_bds_parents, bool poll);
+-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+-                                bool ignore_bds_parents);
+                                  bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
+ 
+ static void bdrv_co_drain_bh_cb(void *opaque)
+ {
+@@ -284,11 +280,10 @@ static void bdrv_co_drain_bh_cb(void *opaque)
+         aio_context_acquire(ctx);
+         bdrv_dec_in_flight(bs);
+         if (data->begin) {
+-            bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
+-                                  data->poll);
+            bdrv_do_drained_begin(bs, data->parent, data->poll);
+         } else {
+             assert(!data->poll);
+-            bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
+            bdrv_do_drained_end(bs, data->parent);
+         }
+         aio_context_release(ctx);
+     } else {
+@@ -303,7 +298,6 @@ static void bdrv_co_drain_bh_cb(void *opaque)
+ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+                                                 bool begin,
+                                                 BdrvChild *parent,
+-                                                bool ignore_bds_parents,
+                                                 bool poll)
+ {
+     BdrvCoDrainData data;
+@@ -321,7 +315,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+         .done = false,
+         .begin = begin,
+         .parent = parent,
+-        .ignore_bds_parents = ignore_bds_parents,
+         .poll = poll,
+     };
+ 
+@@ -353,8 +346,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+     }
+ }
+ 
+-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+-                                   BdrvChild *parent, bool ignore_bds_parents)
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+ {
+     IO_OR_GS_CODE();
+     assert(!qemu_in_coroutine());
+@@ -362,9 +354,7 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+     /* Stop things in parent-to-child order */
+     if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+         aio_disable_external(bdrv_get_aio_context(bs));
+-
+-        /* TODO Remove ignore_bds_parents, we don't consider it any more */
+-        bdrv_parent_drained_begin(bs, parent, false);
+        bdrv_parent_drained_begin(bs, parent);
+         if (bs->drv && bs->drv->bdrv_drain_begin) {
+             bs->drv->bdrv_drain_begin(bs);
+         }
+@@ -372,14 +362,14 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+ }
+ 
+ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+-                                  bool ignore_bds_parents, bool poll)
+                                  bool poll)
+ {
+     if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
+        bdrv_co_yield_to_drain(bs, true, parent, poll);
+         return;
+     }
+ 
+-    bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
+    bdrv_do_drained_begin_quiesce(bs, parent);
+ 
+     /*
+      * Wait for drained requests to finish.
+@@ -391,7 +381,6 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+      * nodes.
+      */
+     if (poll) {
+-        assert(!ignore_bds_parents);
+         BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
+     }
+ }
+@@ -399,20 +388,19 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ void bdrv_drained_begin(BlockDriverState *bs)
+ {
+     IO_OR_GS_CODE();
+-    bdrv_do_drained_begin(bs, NULL, false, true);
+    bdrv_do_drained_begin(bs, NULL, true);
+ }
+ 
+ /**
+  * This function does not poll, nor must any of its recursively called
+  * functions.
+  */
+-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+-                                bool ignore_bds_parents)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
+ {
+     int old_quiesce_counter;
+ 
+     if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
+        bdrv_co_yield_to_drain(bs, false, parent, false);
+         return;
+     }
+     assert(bs->quiesce_counter > 0);
+@@ -423,9 +411,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+         if (bs->drv && bs->drv->bdrv_drain_end) {
+             bs->drv->bdrv_drain_end(bs);
+         }
+-        /* TODO Remove ignore_bds_parents, we don't consider it any more */
+-        bdrv_parent_drained_end(bs, parent, false);
+-
+        bdrv_parent_drained_end(bs, parent);
+         aio_enable_external(bdrv_get_aio_context(bs));
+     }
+ }
+@@ -433,7 +419,7 @@ static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+ void bdrv_drained_end(BlockDriverState *bs)
+ {
+     IO_OR_GS_CODE();
+-    bdrv_do_drained_end(bs, NULL, false);
+    bdrv_do_drained_end(bs, NULL);
+ }
+ 
+ void bdrv_drain(BlockDriverState *bs)
+@@ -491,7 +477,7 @@ void bdrv_drain_all_begin(void)
+     GLOBAL_STATE_CODE();
+ 
+     if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
+        bdrv_co_yield_to_drain(NULL, true, NULL, true);
+         return;
+     }
+ 
+@@ -516,7 +502,7 @@ void bdrv_drain_all_begin(void)
+         AioContext *aio_context = bdrv_get_aio_context(bs);
+ 
+         aio_context_acquire(aio_context);
+-        bdrv_do_drained_begin(bs, NULL, true, false);
+        bdrv_do_drained_begin(bs, NULL, false);
+         aio_context_release(aio_context);
+     }
+ 
+@@ -536,7 +522,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
+     g_assert(!bs->refcnt);
+ 
+     while (bs->quiesce_counter) {
+-        bdrv_do_drained_end(bs, NULL, true);
+        bdrv_do_drained_end(bs, NULL);
+     }
+ }
+ 
+@@ -558,7 +544,7 @@ void bdrv_drain_all_end(void)
+         AioContext *aio_context = bdrv_get_aio_context(bs);
+ 
+         aio_context_acquire(aio_context);
+-        bdrv_do_drained_end(bs, NULL, true);
+        bdrv_do_drained_end(bs, NULL);
+         aio_context_release(aio_context);
+     }
+ 
+diff --git a/include/block/block-io.h b/include/block/block-io.h
+index 9c36a16a1f..8f5e75756a 100644
+--- a/include/block/block-io.h
+++ b/include/block/block-io.h
+@@ -329,8 +329,7 @@ void bdrv_drained_begin(BlockDriverState *bs);
+  * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
+  * running requests to complete.
+  */
+-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+-                                   BdrvChild *parent, bool ignore_bds_parents);
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent);
+ 
+ /**
+  * bdrv_drained_end:
+-- 
+2.31.1
+
--- a/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch
+++ b/kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch
@ -0,0 +1,106 @@
+From 60b66881fb972e1cdff1cd7b4c865e5e21c141b0 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:10 +0100
+Subject: [PATCH 28/31] block: Remove poll parameter from
+ bdrv_parent_drained_begin_single()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [16/16] fd526cc9e5bebeb256cfa56d23ec596f26caa37a (sgarzarella/qemu-kvm-c-9-s)
+
+All callers of bdrv_parent_drained_begin_single() pass poll=false now,
+so we don't need the parameter any more.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Message-Id: <20221118174110.55183-16-kwolf@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 606ed756c1d69cba4822be8923248d2fd714f069)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                  | 4 ++--
+ block/io.c               | 8 ++------
+ include/block/block-io.h | 5 ++---
+ 3 files changed, 6 insertions(+), 11 deletions(-)
+
+diff --git a/block.c b/block.c
+index 65588d313a..0d78711416 100644
+--- a/block.c
+++ b/block.c
+@@ -2417,7 +2417,7 @@ static void bdrv_replace_child_abort(void *opaque)
+          * new_bs drained when calling bdrv_replace_child_tran() is not a
+          * requirement any more.
+          */
+-        bdrv_parent_drained_begin_single(s->child, false);
+        bdrv_parent_drained_begin_single(s->child);
+         assert(!bdrv_parent_drained_poll_single(s->child));
+     }
+     assert(s->child->quiesced_parent);
+@@ -3059,7 +3059,7 @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
+      * a problem, we already did this), but it will still poll until the parent
+      * is fully quiesced, so it will not be negatively affected either.
+      */
+-    bdrv_parent_drained_begin_single(new_child, false);
+    bdrv_parent_drained_begin_single(new_child);
+     bdrv_replace_child_noperm(new_child, child_bs);
+ 
+     BdrvAttachChildCommonState *s = g_new(BdrvAttachChildCommonState, 1);
+diff --git a/block/io.c b/block/io.c
+index ae64830eac..38e57d1f67 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -53,7 +53,7 @@ static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
+         if (c == ignore) {
+             continue;
+         }
+-        bdrv_parent_drained_begin_single(c, false);
+        bdrv_parent_drained_begin_single(c);
+     }
+ }
+ 
+@@ -105,9 +105,8 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
+     return busy;
+ }
+ 
+-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+void bdrv_parent_drained_begin_single(BdrvChild *c)
+ {
+-    AioContext *ctx = bdrv_child_get_parent_aio_context(c);
+     IO_OR_GS_CODE();
+ 
+     assert(!c->quiesced_parent);
+@@ -116,9 +115,6 @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+     if (c->klass->drained_begin) {
+         c->klass->drained_begin(c);
+     }
+-    if (poll) {
+-        AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
+-    }
+ }
+ 
+ static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
+diff --git a/include/block/block-io.h b/include/block/block-io.h
+index 65e6d2569b..92aaa7c1e9 100644
+--- a/include/block/block-io.h
+++ b/include/block/block-io.h
+@@ -287,10 +287,9 @@ bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+ /**
+  * bdrv_parent_drained_begin_single:
+  *
+- * Begin a quiesced section for the parent of @c. If @poll is true, wait for
+- * any pending activity to cease.
+ * Begin a quiesced section for the parent of @c.
+  */
+-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
+void bdrv_parent_drained_begin_single(BdrvChild *c);
+ 
+ /**
+  * bdrv_parent_drained_poll_single:
+-- 
+2.31.1
+
--- a/kvm-block-Remove-subtree-drains.patch
+++ b/kvm-block-Remove-subtree-drains.patch
@ -0,0 +1,896 @@
+From 79063522861cb2baf921b204bcdf4c3bfb5697f4 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:05 +0100
+Subject: [PATCH 23/31] block: Remove subtree drains
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [11/16] d92f5041cceeeec49a65441b22d20f692c0f1c77 (sgarzarella/qemu-kvm-c-9-s)
+
+Subtree drains are not used any more. Remove them.
+
+After this, BdrvChildClass.attach/detach() don't poll any more.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-11-kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 299403aedaeb7f08d8e98aa8614b29d4e5546066)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                          |  20 +--
+ block/io.c                       | 121 +++-----------
+ include/block/block-io.h         |  18 +--
+ include/block/block_int-common.h |   1 -
+ include/block/block_int-io.h     |  12 --
+ tests/unit/test-bdrv-drain.c     | 261 ++-----------------------------
+ 6 files changed, 44 insertions(+), 389 deletions(-)
+
+diff --git a/block.c b/block.c
+index 5330e89903..e0e3b21790 100644
+--- a/block.c
+++ b/block.c
+@@ -1232,7 +1232,7 @@ static void bdrv_child_cb_drained_begin(BdrvChild *child)
+ static bool bdrv_child_cb_drained_poll(BdrvChild *child)
+ {
+     BlockDriverState *bs = child->opaque;
+-    return bdrv_drain_poll(bs, false, NULL, false);
+    return bdrv_drain_poll(bs, NULL, false);
+ }
+ 
+ static void bdrv_child_cb_drained_end(BdrvChild *child)
+@@ -1482,8 +1482,6 @@ static void bdrv_child_cb_attach(BdrvChild *child)
+         assert(!bs->file);
+         bs->file = child;
+     }
+-
+-    bdrv_apply_subtree_drain(child, bs);
+ }
+ 
+ static void bdrv_child_cb_detach(BdrvChild *child)
+@@ -1494,8 +1492,6 @@ static void bdrv_child_cb_detach(BdrvChild *child)
+         bdrv_backing_detach(child);
+     }
+ 
+-    bdrv_unapply_subtree_drain(child, bs);
+-
+     assert_bdrv_graph_writable(bs);
+     QLIST_REMOVE(child, next);
+     if (child == bs->backing) {
+@@ -2851,9 +2847,6 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+     }
+ 
+     if (old_bs) {
+-        /* Detach first so that the recursive drain sections coming from @child
+-         * are already gone and we only end the drain sections that came from
+-         * elsewhere. */
+         if (child->klass->detach) {
+             child->klass->detach(child);
+         }
+@@ -2868,17 +2861,14 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
+         QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
+ 
+         /*
+-         * Detaching the old node may have led to the new node's
+-         * quiesce_counter having been decreased.  Not a problem, we
+-         * just need to recognize this here and then invoke
+-         * drained_end appropriately more often.
+         * Polling in bdrv_parent_drained_begin_single() may have led to the new
+         * node's quiesce_counter having been decreased.  Not a problem, we just
+         * need to recognize this here and then invoke drained_end appropriately
+         * more often.
+          */
+         assert(new_bs->quiesce_counter <= new_bs_quiesce_counter);
+         drain_saldo += new_bs->quiesce_counter - new_bs_quiesce_counter;
+ 
+-        /* Attach only after starting new drained sections, so that recursive
+-         * drain sections coming from @child don't get an extra .drained_begin
+-         * callback. */
+         if (child->klass->attach) {
+             child->klass->attach(child);
+         }
+diff --git a/block/io.c b/block/io.c
+index a25103be6f..75224480d0 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -236,17 +236,15 @@ typedef struct {
+     BlockDriverState *bs;
+     bool done;
+     bool begin;
+-    bool recursive;
+     bool poll;
+     BdrvChild *parent;
+     bool ignore_bds_parents;
+ } BdrvCoDrainData;
+ 
+ /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
+-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+-                     BdrvChild *ignore_parent, bool ignore_bds_parents)
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+                     bool ignore_bds_parents)
+ {
+-    BdrvChild *child, *next;
+     IO_OR_GS_CODE();
+ 
+     if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
+@@ -257,29 +255,19 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+         return true;
+     }
+ 
+-    if (recursive) {
+-        assert(!ignore_bds_parents);
+-        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+-            if (bdrv_drain_poll(child->bs, recursive, child, false)) {
+-                return true;
+-            }
+-        }
+-    }
+-
+     return false;
+ }
+ 
+-static bool bdrv_drain_poll_top_level(BlockDriverState *bs, bool recursive,
+static bool bdrv_drain_poll_top_level(BlockDriverState *bs,
+                                       BdrvChild *ignore_parent)
+ {
+-    return bdrv_drain_poll(bs, recursive, ignore_parent, false);
+    return bdrv_drain_poll(bs, ignore_parent, false);
+ }
+ 
+-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+-                                  BdrvChild *parent, bool ignore_bds_parents,
+-                                  bool poll);
+-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+-                                BdrvChild *parent, bool ignore_bds_parents);
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+                                  bool ignore_bds_parents, bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+                                bool ignore_bds_parents);
+ 
+ static void bdrv_co_drain_bh_cb(void *opaque)
+ {
+@@ -292,12 +280,11 @@ static void bdrv_co_drain_bh_cb(void *opaque)
+         aio_context_acquire(ctx);
+         bdrv_dec_in_flight(bs);
+         if (data->begin) {
+-            bdrv_do_drained_begin(bs, data->recursive, data->parent,
+-                                  data->ignore_bds_parents, data->poll);
+            bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
+                                  data->poll);
+         } else {
+             assert(!data->poll);
+-            bdrv_do_drained_end(bs, data->recursive, data->parent,
+-                                data->ignore_bds_parents);
+            bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
+         }
+         aio_context_release(ctx);
+     } else {
+@@ -310,7 +297,7 @@ static void bdrv_co_drain_bh_cb(void *opaque)
+ }
+ 
+ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+-                                                bool begin, bool recursive,
+                                                bool begin,
+                                                 BdrvChild *parent,
+                                                 bool ignore_bds_parents,
+                                                 bool poll)
+@@ -329,7 +316,6 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
+         .bs = bs,
+         .done = false,
+         .begin = begin,
+-        .recursive = recursive,
+         .parent = parent,
+         .ignore_bds_parents = ignore_bds_parents,
+         .poll = poll,
+@@ -380,29 +366,16 @@ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+     }
+ }
+ 
+-static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+-                                  BdrvChild *parent, bool ignore_bds_parents,
+-                                  bool poll)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+                                  bool ignore_bds_parents, bool poll)
+ {
+-    BdrvChild *child, *next;
+-
+     if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(bs, true, recursive, parent, ignore_bds_parents,
+-                               poll);
+        bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
+         return;
+     }
+ 
+     bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
+ 
+-    if (recursive) {
+-        assert(!ignore_bds_parents);
+-        bs->recursive_quiesce_counter++;
+-        QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+-            bdrv_do_drained_begin(child->bs, true, child, ignore_bds_parents,
+-                                  false);
+-        }
+-    }
+-
+     /*
+      * Wait for drained requests to finish.
+      *
+@@ -414,35 +387,27 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
+      */
+     if (poll) {
+         assert(!ignore_bds_parents);
+-        BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, recursive, parent));
+        BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
+     }
+ }
+ 
+ void bdrv_drained_begin(BlockDriverState *bs)
+ {
+     IO_OR_GS_CODE();
+-    bdrv_do_drained_begin(bs, false, NULL, false, true);
+-}
+-
+-void bdrv_subtree_drained_begin(BlockDriverState *bs)
+-{
+-    IO_OR_GS_CODE();
+-    bdrv_do_drained_begin(bs, true, NULL, false, true);
+    bdrv_do_drained_begin(bs, NULL, false, true);
+ }
+ 
+ /**
+  * This function does not poll, nor must any of its recursively called
+  * functions.
+  */
+-static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+-                                BdrvChild *parent, bool ignore_bds_parents)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
+                                bool ignore_bds_parents)
+ {
+-    BdrvChild *child;
+     int old_quiesce_counter;
+ 
+     if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(bs, false, recursive, parent, ignore_bds_parents,
+-                               false);
+        bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
+         return;
+     }
+     assert(bs->quiesce_counter > 0);
+@@ -457,46 +422,12 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
+     if (old_quiesce_counter == 1) {
+         aio_enable_external(bdrv_get_aio_context(bs));
+     }
+-
+-    if (recursive) {
+-        assert(!ignore_bds_parents);
+-        bs->recursive_quiesce_counter--;
+-        QLIST_FOREACH(child, &bs->children, next) {
+-            bdrv_do_drained_end(child->bs, true, child, ignore_bds_parents);
+-        }
+-    }
+ }
+ 
+ void bdrv_drained_end(BlockDriverState *bs)
+ {
+     IO_OR_GS_CODE();
+-    bdrv_do_drained_end(bs, false, NULL, false);
+-}
+-
+-void bdrv_subtree_drained_end(BlockDriverState *bs)
+-{
+-    IO_OR_GS_CODE();
+-    bdrv_do_drained_end(bs, true, NULL, false);
+-}
+-
+-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
+-{
+-    int i;
+-    IO_OR_GS_CODE();
+-
+-    for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
+-        bdrv_do_drained_begin(child->bs, true, child, false, true);
+-    }
+-}
+-
+-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
+-{
+-    int i;
+-    IO_OR_GS_CODE();
+-
+-    for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
+-        bdrv_do_drained_end(child->bs, true, child, false);
+-    }
+    bdrv_do_drained_end(bs, NULL, false);
+ }
+ 
+ void bdrv_drain(BlockDriverState *bs)
+@@ -529,7 +460,7 @@ static bool bdrv_drain_all_poll(void)
+     while ((bs = bdrv_next_all_states(bs))) {
+         AioContext *aio_context = bdrv_get_aio_context(bs);
+         aio_context_acquire(aio_context);
+-        result |= bdrv_drain_poll(bs, false, NULL, true);
+        result |= bdrv_drain_poll(bs, NULL, true);
+         aio_context_release(aio_context);
+     }
+ 
+@@ -554,7 +485,7 @@ void bdrv_drain_all_begin(void)
+     GLOBAL_STATE_CODE();
+ 
+     if (qemu_in_coroutine()) {
+-        bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true);
+        bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
+         return;
+     }
+ 
+@@ -579,7 +510,7 @@ void bdrv_drain_all_begin(void)
+         AioContext *aio_context = bdrv_get_aio_context(bs);
+ 
+         aio_context_acquire(aio_context);
+-        bdrv_do_drained_begin(bs, false, NULL, true, false);
+        bdrv_do_drained_begin(bs, NULL, true, false);
+         aio_context_release(aio_context);
+     }
+ 
+@@ -599,7 +530,7 @@ void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
+     g_assert(!bs->refcnt);
+ 
+     while (bs->quiesce_counter) {
+-        bdrv_do_drained_end(bs, false, NULL, true);
+        bdrv_do_drained_end(bs, NULL, true);
+     }
+ }
+ 
+@@ -621,7 +552,7 @@ void bdrv_drain_all_end(void)
+         AioContext *aio_context = bdrv_get_aio_context(bs);
+ 
+         aio_context_acquire(aio_context);
+-        bdrv_do_drained_end(bs, false, NULL, true);
+        bdrv_do_drained_end(bs, NULL, true);
+         aio_context_release(aio_context);
+     }
+ 
+diff --git a/include/block/block-io.h b/include/block/block-io.h
+index 054e964c9b..9c36a16a1f 100644
+--- a/include/block/block-io.h
+++ b/include/block/block-io.h
+@@ -302,8 +302,7 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
+ /**
+  * bdrv_drain_poll:
+  *
+- * Poll for pending requests in @bs, its parents (except for @ignore_parent),
+- * and if @recursive is true its children as well (used for subtree drain).
+ * Poll for pending requests in @bs and its parents (except for @ignore_parent).
+  *
+  * If @ignore_bds_parents is true, parents that are BlockDriverStates must
+  * ignore the drain request because they will be drained separately (used for
+@@ -311,8 +310,8 @@ void bdrv_parent_drained_end_single(BdrvChild *c);
+  *
+  * This is part of bdrv_drained_begin.
+  */
+-bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+-                     BdrvChild *ignore_parent, bool ignore_bds_parents);
+bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
+                     bool ignore_bds_parents);
+ 
+ /**
+  * bdrv_drained_begin:
+@@ -333,12 +332,6 @@ void bdrv_drained_begin(BlockDriverState *bs);
+ void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+                                    BdrvChild *parent, bool ignore_bds_parents);
+ 
+-/**
+- * Like bdrv_drained_begin, but recursively begins a quiesced section for
+- * exclusive access to all child nodes as well.
+- */
+-void bdrv_subtree_drained_begin(BlockDriverState *bs);
+-
+ /**
+  * bdrv_drained_end:
+  *
+@@ -346,9 +339,4 @@ void bdrv_subtree_drained_begin(BlockDriverState *bs);
+  */
+ void bdrv_drained_end(BlockDriverState *bs);
+ 
+-/**
+- * End a quiescent section started by bdrv_subtree_drained_begin().
+- */
+-void bdrv_subtree_drained_end(BlockDriverState *bs);
+-
+ #endif /* BLOCK_IO_H */
+diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
+index 2b97576f6d..791dddfd7d 100644
+--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
+@@ -1184,7 +1184,6 @@ struct BlockDriverState {
+ 
+     /* Accessed with atomic ops.  */
+     int quiesce_counter;
+-    int recursive_quiesce_counter;
+ 
+     unsigned int write_gen;               /* Current data generation */
+ 
+diff --git a/include/block/block_int-io.h b/include/block/block_int-io.h
+index 4b0b3e17ef..8bc061ebb8 100644
+--- a/include/block/block_int-io.h
+++ b/include/block/block_int-io.h
+@@ -179,16 +179,4 @@ void bdrv_bsc_invalidate_range(BlockDriverState *bs,
+  */
+ void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
+ 
+-
+-/*
+- * "I/O or GS" API functions. These functions can run without
+- * the BQL, but only in one specific iothread/main loop.
+- *
+- * See include/block/block-io.h for more information about
+- * the "I/O or GS" API.
+- */
+-
+-void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
+-void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
+-
+ #endif /* BLOCK_INT_IO_H */
+diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
+index 695519ee02..dda08de8db 100644
+--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
+@@ -156,7 +156,6 @@ static void call_in_coroutine(void (*entry)(void))
+ enum drain_type {
+     BDRV_DRAIN_ALL,
+     BDRV_DRAIN,
+-    BDRV_SUBTREE_DRAIN,
+     DRAIN_TYPE_MAX,
+ };
+ 
+@@ -165,7 +164,6 @@ static void do_drain_begin(enum drain_type drain_type, BlockDriverState *bs)
+     switch (drain_type) {
+     case BDRV_DRAIN_ALL:        bdrv_drain_all_begin(); break;
+     case BDRV_DRAIN:            bdrv_drained_begin(bs); break;
+-    case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_begin(bs); break;
+     default:                    g_assert_not_reached();
+     }
+ }
+@@ -175,7 +173,6 @@ static void do_drain_end(enum drain_type drain_type, BlockDriverState *bs)
+     switch (drain_type) {
+     case BDRV_DRAIN_ALL:        bdrv_drain_all_end(); break;
+     case BDRV_DRAIN:            bdrv_drained_end(bs); break;
+-    case BDRV_SUBTREE_DRAIN:    bdrv_subtree_drained_end(bs); break;
+     default:                    g_assert_not_reached();
+     }
+ }
+@@ -271,11 +268,6 @@ static void test_drv_cb_drain(void)
+     test_drv_cb_common(BDRV_DRAIN, false);
+ }
+ 
+-static void test_drv_cb_drain_subtree(void)
+-{
+-    test_drv_cb_common(BDRV_SUBTREE_DRAIN, true);
+-}
+-
+ static void test_drv_cb_co_drain_all(void)
+ {
+     call_in_coroutine(test_drv_cb_drain_all);
+@@ -286,11 +278,6 @@ static void test_drv_cb_co_drain(void)
+     call_in_coroutine(test_drv_cb_drain);
+ }
+ 
+-static void test_drv_cb_co_drain_subtree(void)
+-{
+-    call_in_coroutine(test_drv_cb_drain_subtree);
+-}
+-
+ static void test_quiesce_common(enum drain_type drain_type, bool recursive)
+ {
+     BlockBackend *blk;
+@@ -332,11 +319,6 @@ static void test_quiesce_drain(void)
+     test_quiesce_common(BDRV_DRAIN, false);
+ }
+ 
+-static void test_quiesce_drain_subtree(void)
+-{
+-    test_quiesce_common(BDRV_SUBTREE_DRAIN, true);
+-}
+-
+ static void test_quiesce_co_drain_all(void)
+ {
+     call_in_coroutine(test_quiesce_drain_all);
+@@ -347,11 +329,6 @@ static void test_quiesce_co_drain(void)
+     call_in_coroutine(test_quiesce_drain);
+ }
+ 
+-static void test_quiesce_co_drain_subtree(void)
+-{
+-    call_in_coroutine(test_quiesce_drain_subtree);
+-}
+-
+ static void test_nested(void)
+ {
+     BlockBackend *blk;
+@@ -402,158 +379,6 @@ static void test_nested(void)
+     blk_unref(blk);
+ }
+ 
+-static void test_multiparent(void)
+-{
+-    BlockBackend *blk_a, *blk_b;
+-    BlockDriverState *bs_a, *bs_b, *backing;
+-    BDRVTestState *a_s, *b_s, *backing_s;
+-
+-    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
+-    bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
+-                                &error_abort);
+-    a_s = bs_a->opaque;
+-    blk_insert_bs(blk_a, bs_a, &error_abort);
+-
+-    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
+-    bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
+-                                &error_abort);
+-    b_s = bs_b->opaque;
+-    blk_insert_bs(blk_b, bs_b, &error_abort);
+-
+-    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+-    backing_s = backing->opaque;
+-    bdrv_set_backing_hd(bs_a, backing, &error_abort);
+-    bdrv_set_backing_hd(bs_b, backing, &error_abort);
+-
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 0);
+-    g_assert_cmpint(a_s->drain_count, ==, 0);
+-    g_assert_cmpint(b_s->drain_count, ==, 0);
+-    g_assert_cmpint(backing_s->drain_count, ==, 0);
+-
+-    do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+-
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 1);
+-    g_assert_cmpint(a_s->drain_count, ==, 1);
+-    g_assert_cmpint(b_s->drain_count, ==, 1);
+-    g_assert_cmpint(backing_s->drain_count, ==, 1);
+-
+-    do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
+-
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 2);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 2);
+-    g_assert_cmpint(a_s->drain_count, ==, 2);
+-    g_assert_cmpint(b_s->drain_count, ==, 2);
+-    g_assert_cmpint(backing_s->drain_count, ==, 2);
+-
+-    do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
+-
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 1);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 1);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 1);
+-    g_assert_cmpint(a_s->drain_count, ==, 1);
+-    g_assert_cmpint(b_s->drain_count, ==, 1);
+-    g_assert_cmpint(backing_s->drain_count, ==, 1);
+-
+-    do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+-
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 0);
+-    g_assert_cmpint(a_s->drain_count, ==, 0);
+-    g_assert_cmpint(b_s->drain_count, ==, 0);
+-    g_assert_cmpint(backing_s->drain_count, ==, 0);
+-
+-    bdrv_unref(backing);
+-    bdrv_unref(bs_a);
+-    bdrv_unref(bs_b);
+-    blk_unref(blk_a);
+-    blk_unref(blk_b);
+-}
+-
+-static void test_graph_change_drain_subtree(void)
+-{
+-    BlockBackend *blk_a, *blk_b;
+-    BlockDriverState *bs_a, *bs_b, *backing;
+-    BDRVTestState *a_s, *b_s, *backing_s;
+-
+-    blk_a = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
+-    bs_a = bdrv_new_open_driver(&bdrv_test, "test-node-a", BDRV_O_RDWR,
+-                                &error_abort);
+-    a_s = bs_a->opaque;
+-    blk_insert_bs(blk_a, bs_a, &error_abort);
+-
+-    blk_b = blk_new(qemu_get_aio_context(), BLK_PERM_ALL, BLK_PERM_ALL);
+-    bs_b = bdrv_new_open_driver(&bdrv_test, "test-node-b", BDRV_O_RDWR,
+-                                &error_abort);
+-    b_s = bs_b->opaque;
+-    blk_insert_bs(blk_b, bs_b, &error_abort);
+-
+-    backing = bdrv_new_open_driver(&bdrv_test, "backing", 0, &error_abort);
+-    backing_s = backing->opaque;
+-    bdrv_set_backing_hd(bs_a, backing, &error_abort);
+-
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 0);
+-    g_assert_cmpint(a_s->drain_count, ==, 0);
+-    g_assert_cmpint(b_s->drain_count, ==, 0);
+-    g_assert_cmpint(backing_s->drain_count, ==, 0);
+-
+-    do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+-    do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+-    do_drain_begin(BDRV_SUBTREE_DRAIN, bs_a);
+-    do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
+-    do_drain_begin(BDRV_SUBTREE_DRAIN, bs_b);
+-
+-    bdrv_set_backing_hd(bs_b, backing, &error_abort);
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 5);
+-    g_assert_cmpint(a_s->drain_count, ==, 5);
+-    g_assert_cmpint(b_s->drain_count, ==, 5);
+-    g_assert_cmpint(backing_s->drain_count, ==, 5);
+-
+-    bdrv_set_backing_hd(bs_b, NULL, &error_abort);
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 3);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 2);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 3);
+-    g_assert_cmpint(a_s->drain_count, ==, 3);
+-    g_assert_cmpint(b_s->drain_count, ==, 2);
+-    g_assert_cmpint(backing_s->drain_count, ==, 3);
+-
+-    bdrv_set_backing_hd(bs_b, backing, &error_abort);
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 5);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 5);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 5);
+-    g_assert_cmpint(a_s->drain_count, ==, 5);
+-    g_assert_cmpint(b_s->drain_count, ==, 5);
+-    g_assert_cmpint(backing_s->drain_count, ==, 5);
+-
+-    do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
+-    do_drain_end(BDRV_SUBTREE_DRAIN, bs_b);
+-    do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+-    do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+-    do_drain_end(BDRV_SUBTREE_DRAIN, bs_a);
+-
+-    g_assert_cmpint(bs_a->quiesce_counter, ==, 0);
+-    g_assert_cmpint(bs_b->quiesce_counter, ==, 0);
+-    g_assert_cmpint(backing->quiesce_counter, ==, 0);
+-    g_assert_cmpint(a_s->drain_count, ==, 0);
+-    g_assert_cmpint(b_s->drain_count, ==, 0);
+-    g_assert_cmpint(backing_s->drain_count, ==, 0);
+-
+-    bdrv_unref(backing);
+-    bdrv_unref(bs_a);
+-    bdrv_unref(bs_b);
+-    blk_unref(blk_a);
+-    blk_unref(blk_b);
+-}
+-
+ static void test_graph_change_drain_all(void)
+ {
+     BlockBackend *blk_a, *blk_b;
+@@ -773,12 +598,6 @@ static void test_iothread_drain(void)
+     test_iothread_common(BDRV_DRAIN, 1);
+ }
+ 
+-static void test_iothread_drain_subtree(void)
+-{
+-    test_iothread_common(BDRV_SUBTREE_DRAIN, 0);
+-    test_iothread_common(BDRV_SUBTREE_DRAIN, 1);
+-}
+-
+ 
+ typedef struct TestBlockJob {
+     BlockJob common;
+@@ -863,7 +682,6 @@ enum test_job_result {
+ enum test_job_drain_node {
+     TEST_JOB_DRAIN_SRC,
+     TEST_JOB_DRAIN_SRC_CHILD,
+-    TEST_JOB_DRAIN_SRC_PARENT,
+ };
+ 
+ static void test_blockjob_common_drain_node(enum drain_type drain_type,
+@@ -901,9 +719,6 @@ static void test_blockjob_common_drain_node(enum drain_type drain_type,
+     case TEST_JOB_DRAIN_SRC_CHILD:
+         drain_bs = src_backing;
+         break;
+-    case TEST_JOB_DRAIN_SRC_PARENT:
+-        drain_bs = src_overlay;
+-        break;
+     default:
+         g_assert_not_reached();
+     }
+@@ -1055,10 +870,6 @@ static void test_blockjob_common(enum drain_type drain_type, bool use_iothread,
+                                     TEST_JOB_DRAIN_SRC);
+     test_blockjob_common_drain_node(drain_type, use_iothread, result,
+                                     TEST_JOB_DRAIN_SRC_CHILD);
+-    if (drain_type == BDRV_SUBTREE_DRAIN) {
+-        test_blockjob_common_drain_node(drain_type, use_iothread, result,
+-                                        TEST_JOB_DRAIN_SRC_PARENT);
+-    }
+ }
+ 
+ static void test_blockjob_drain_all(void)
+@@ -1071,11 +882,6 @@ static void test_blockjob_drain(void)
+     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_SUCCESS);
+ }
+ 
+-static void test_blockjob_drain_subtree(void)
+-{
+-    test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_SUCCESS);
+-}
+-
+ static void test_blockjob_error_drain_all(void)
+ {
+     test_blockjob_common(BDRV_DRAIN_ALL, false, TEST_JOB_FAIL_RUN);
+@@ -1088,12 +894,6 @@ static void test_blockjob_error_drain(void)
+     test_blockjob_common(BDRV_DRAIN, false, TEST_JOB_FAIL_PREPARE);
+ }
+ 
+-static void test_blockjob_error_drain_subtree(void)
+-{
+-    test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_RUN);
+-    test_blockjob_common(BDRV_SUBTREE_DRAIN, false, TEST_JOB_FAIL_PREPARE);
+-}
+-
+ static void test_blockjob_iothread_drain_all(void)
+ {
+     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_SUCCESS);
+@@ -1104,11 +904,6 @@ static void test_blockjob_iothread_drain(void)
+     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_SUCCESS);
+ }
+ 
+-static void test_blockjob_iothread_drain_subtree(void)
+-{
+-    test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_SUCCESS);
+-}
+-
+ static void test_blockjob_iothread_error_drain_all(void)
+ {
+     test_blockjob_common(BDRV_DRAIN_ALL, true, TEST_JOB_FAIL_RUN);
+@@ -1121,12 +916,6 @@ static void test_blockjob_iothread_error_drain(void)
+     test_blockjob_common(BDRV_DRAIN, true, TEST_JOB_FAIL_PREPARE);
+ }
+ 
+-static void test_blockjob_iothread_error_drain_subtree(void)
+-{
+-    test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_RUN);
+-    test_blockjob_common(BDRV_SUBTREE_DRAIN, true, TEST_JOB_FAIL_PREPARE);
+-}
+-
+ 
+ typedef struct BDRVTestTopState {
+     BdrvChild *wait_child;
+@@ -1273,14 +1062,6 @@ static void do_test_delete_by_drain(bool detach_instead_of_delete,
+         bdrv_drain(child_bs);
+         bdrv_unref(child_bs);
+         break;
+-    case BDRV_SUBTREE_DRAIN:
+-        /* Would have to ref/unref bs here for !detach_instead_of_delete, but
+-         * then the whole test becomes pointless because the graph changes
+-         * don't occur during the drain any more. */
+-        assert(detach_instead_of_delete);
+-        bdrv_subtree_drained_begin(bs);
+-        bdrv_subtree_drained_end(bs);
+-        break;
+     case BDRV_DRAIN_ALL:
+         bdrv_drain_all_begin();
+         bdrv_drain_all_end();
+@@ -1315,11 +1096,6 @@ static void test_detach_by_drain(void)
+     do_test_delete_by_drain(true, BDRV_DRAIN);
+ }
+ 
+-static void test_detach_by_drain_subtree(void)
+-{
+-    do_test_delete_by_drain(true, BDRV_SUBTREE_DRAIN);
+-}
+-
+ 
+ struct detach_by_parent_data {
+     BlockDriverState *parent_b;
+@@ -1452,7 +1228,10 @@ static void test_detach_indirect(bool by_parent_cb)
+     g_assert(acb != NULL);
+ 
+     /* Drain and check the expected result */
+-    bdrv_subtree_drained_begin(parent_b);
+    bdrv_drained_begin(parent_b);
+    bdrv_drained_begin(a);
+    bdrv_drained_begin(b);
+    bdrv_drained_begin(c);
+ 
+     g_assert(detach_by_parent_data.child_c != NULL);
+ 
+@@ -1467,12 +1246,15 @@ static void test_detach_indirect(bool by_parent_cb)
+     g_assert(QLIST_NEXT(child_a, next) == NULL);
+ 
+     g_assert_cmpint(parent_a->quiesce_counter, ==, 1);
+-    g_assert_cmpint(parent_b->quiesce_counter, ==, 1);
+    g_assert_cmpint(parent_b->quiesce_counter, ==, 3);
+     g_assert_cmpint(a->quiesce_counter, ==, 1);
+-    g_assert_cmpint(b->quiesce_counter, ==, 0);
+    g_assert_cmpint(b->quiesce_counter, ==, 1);
+     g_assert_cmpint(c->quiesce_counter, ==, 1);
+ 
+-    bdrv_subtree_drained_end(parent_b);
+    bdrv_drained_end(parent_b);
+    bdrv_drained_end(a);
+    bdrv_drained_end(b);
+    bdrv_drained_end(c);
+ 
+     bdrv_unref(parent_b);
+     blk_unref(blk);
+@@ -2202,70 +1984,47 @@ int main(int argc, char **argv)
+ 
+     g_test_add_func("/bdrv-drain/driver-cb/drain_all", test_drv_cb_drain_all);
+     g_test_add_func("/bdrv-drain/driver-cb/drain", test_drv_cb_drain);
+-    g_test_add_func("/bdrv-drain/driver-cb/drain_subtree",
+-                    test_drv_cb_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/driver-cb/co/drain_all",
+                     test_drv_cb_co_drain_all);
+     g_test_add_func("/bdrv-drain/driver-cb/co/drain", test_drv_cb_co_drain);
+-    g_test_add_func("/bdrv-drain/driver-cb/co/drain_subtree",
+-                    test_drv_cb_co_drain_subtree);
+-
+ 
+     g_test_add_func("/bdrv-drain/quiesce/drain_all", test_quiesce_drain_all);
+     g_test_add_func("/bdrv-drain/quiesce/drain", test_quiesce_drain);
+-    g_test_add_func("/bdrv-drain/quiesce/drain_subtree",
+-                    test_quiesce_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/quiesce/co/drain_all",
+                     test_quiesce_co_drain_all);
+     g_test_add_func("/bdrv-drain/quiesce/co/drain", test_quiesce_co_drain);
+-    g_test_add_func("/bdrv-drain/quiesce/co/drain_subtree",
+-                    test_quiesce_co_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/nested", test_nested);
+-    g_test_add_func("/bdrv-drain/multiparent", test_multiparent);
+ 
+-    g_test_add_func("/bdrv-drain/graph-change/drain_subtree",
+-                    test_graph_change_drain_subtree);
+     g_test_add_func("/bdrv-drain/graph-change/drain_all",
+                     test_graph_change_drain_all);
+ 
+     g_test_add_func("/bdrv-drain/iothread/drain_all", test_iothread_drain_all);
+     g_test_add_func("/bdrv-drain/iothread/drain", test_iothread_drain);
+-    g_test_add_func("/bdrv-drain/iothread/drain_subtree",
+-                    test_iothread_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/blockjob/drain_all", test_blockjob_drain_all);
+     g_test_add_func("/bdrv-drain/blockjob/drain", test_blockjob_drain);
+-    g_test_add_func("/bdrv-drain/blockjob/drain_subtree",
+-                    test_blockjob_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/blockjob/error/drain_all",
+                     test_blockjob_error_drain_all);
+     g_test_add_func("/bdrv-drain/blockjob/error/drain",
+                     test_blockjob_error_drain);
+-    g_test_add_func("/bdrv-drain/blockjob/error/drain_subtree",
+-                    test_blockjob_error_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/blockjob/iothread/drain_all",
+                     test_blockjob_iothread_drain_all);
+     g_test_add_func("/bdrv-drain/blockjob/iothread/drain",
+                     test_blockjob_iothread_drain);
+-    g_test_add_func("/bdrv-drain/blockjob/iothread/drain_subtree",
+-                    test_blockjob_iothread_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_all",
+                     test_blockjob_iothread_error_drain_all);
+     g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain",
+                     test_blockjob_iothread_error_drain);
+-    g_test_add_func("/bdrv-drain/blockjob/iothread/error/drain_subtree",
+-                    test_blockjob_iothread_error_drain_subtree);
+ 
+     g_test_add_func("/bdrv-drain/deletion/drain", test_delete_by_drain);
+     g_test_add_func("/bdrv-drain/detach/drain_all", test_detach_by_drain_all);
+     g_test_add_func("/bdrv-drain/detach/drain", test_detach_by_drain);
+-    g_test_add_func("/bdrv-drain/detach/drain_subtree", test_detach_by_drain_subtree);
+     g_test_add_func("/bdrv-drain/detach/parent_cb", test_detach_by_parent_cb);
+     g_test_add_func("/bdrv-drain/detach/driver_cb", test_detach_by_driver_cb);
+ 
+-- 
+2.31.1
+
--- a/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch
+++ b/kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch
@ -0,0 +1,302 @@
+From 0e894c93cae97bb792dc483be8e295d097ebd7a1 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:40:58 +0100
+Subject: [PATCH 16/31] block: Revert .bdrv_drained_begin/end to
+ non-coroutine_fn
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [4/16] 86d6049e40a99604e414c2572b67f74b85868832 (sgarzarella/qemu-kvm-c-9-s)
+
+Polling during bdrv_drained_end() can be problematic (and in the future,
+we may get cases for bdrv_drained_begin() where polling is forbidden,
+and we don't care about already in-flight requests, but just want to
+prevent new requests from arriving).
+
+The .bdrv_drained_begin/end callbacks running in a coroutine is the only
+reason why we have to do this polling, so make them non-coroutine
+callbacks again. None of the callers actually yield any more.
+
+This means that bdrv_drained_end() effectively doesn't poll any more,
+even if AIO_WAIT_WHILE() loops are still there (their condition is false
+from the beginning). This is generally not a problem, but in
+test-bdrv-drain, some additional explicit aio_poll() calls need to be
+added because the test case wants to verify the final state after BHs
+have executed.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-4-kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 5e8ac21717373cbe96ef7a91e216bf5788815d63)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                          |  4 +--
+ block/io.c                       | 49 +++++---------------------------
+ block/qed.c                      |  6 ++--
+ block/throttle.c                 |  8 +++---
+ include/block/block_int-common.h | 10 ++++---
+ tests/unit/test-bdrv-drain.c     | 18 ++++++------
+ 6 files changed, 32 insertions(+), 63 deletions(-)
+
+diff --git a/block.c b/block.c
+index ec184150a2..16a62a329c 100644
+--- a/block.c
+++ b/block.c
+@@ -1713,8 +1713,8 @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
+     assert(is_power_of_2(bs->bl.request_alignment));
+ 
+     for (i = 0; i < bs->quiesce_counter; i++) {
+-        if (drv->bdrv_co_drain_begin) {
+-            drv->bdrv_co_drain_begin(bs);
+        if (drv->bdrv_drain_begin) {
+            drv->bdrv_drain_begin(bs);
+         }
+     }
+ 
+diff --git a/block/io.c b/block/io.c
+index b9424024f9..c2ed4b2af9 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -252,55 +252,20 @@ typedef struct {
+     int *drained_end_counter;
+ } BdrvCoDrainData;
+ 
+-static void coroutine_fn bdrv_drain_invoke_entry(void *opaque)
+-{
+-    BdrvCoDrainData *data = opaque;
+-    BlockDriverState *bs = data->bs;
+-
+-    if (data->begin) {
+-        bs->drv->bdrv_co_drain_begin(bs);
+-    } else {
+-        bs->drv->bdrv_co_drain_end(bs);
+-    }
+-
+-    /* Set data->done and decrement drained_end_counter before bdrv_wakeup() */
+-    qatomic_mb_set(&data->done, true);
+-    if (!data->begin) {
+-        qatomic_dec(data->drained_end_counter);
+-    }
+-    bdrv_dec_in_flight(bs);
+-
+-    g_free(data);
+-}
+-
+-/* Recursively call BlockDriver.bdrv_co_drain_begin/end callbacks */
+/* Recursively call BlockDriver.bdrv_drain_begin/end callbacks */
+ static void bdrv_drain_invoke(BlockDriverState *bs, bool begin,
+                               int *drained_end_counter)
+ {
+-    BdrvCoDrainData *data;
+-
+-    if (!bs->drv || (begin && !bs->drv->bdrv_co_drain_begin) ||
+-            (!begin && !bs->drv->bdrv_co_drain_end)) {
+    if (!bs->drv || (begin && !bs->drv->bdrv_drain_begin) ||
+            (!begin && !bs->drv->bdrv_drain_end)) {
+         return;
+     }
+ 
+-    data = g_new(BdrvCoDrainData, 1);
+-    *data = (BdrvCoDrainData) {
+-        .bs = bs,
+-        .done = false,
+-        .begin = begin,
+-        .drained_end_counter = drained_end_counter,
+-    };
+-
+-    if (!begin) {
+-        qatomic_inc(drained_end_counter);
+    if (begin) {
+        bs->drv->bdrv_drain_begin(bs);
+    } else {
+        bs->drv->bdrv_drain_end(bs);
+     }
+-
+-    /* Make sure the driver callback completes during the polling phase for
+-     * drain_begin. */
+-    bdrv_inc_in_flight(bs);
+-    data->co = qemu_coroutine_create(bdrv_drain_invoke_entry, data);
+-    aio_co_schedule(bdrv_get_aio_context(bs), data->co);
+ }
+ 
+ /* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
+diff --git a/block/qed.c b/block/qed.c
+index 013f826c44..c2691a85b1 100644
+--- a/block/qed.c
+++ b/block/qed.c
+@@ -262,7 +262,7 @@ static bool coroutine_fn qed_plug_allocating_write_reqs(BDRVQEDState *s)
+     assert(!s->allocating_write_reqs_plugged);
+     if (s->allocating_acb != NULL) {
+         /* Another allocating write came concurrently.  This cannot happen
+-         * from bdrv_qed_co_drain_begin, but it can happen when the timer runs.
+         * from bdrv_qed_drain_begin, but it can happen when the timer runs.
+          */
+         qemu_co_mutex_unlock(&s->table_lock);
+         return false;
+@@ -365,7 +365,7 @@ static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
+     }
+ }
+ 
+-static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
+static void bdrv_qed_drain_begin(BlockDriverState *bs)
+ {
+     BDRVQEDState *s = bs->opaque;
+ 
+@@ -1661,7 +1661,7 @@ static BlockDriver bdrv_qed = {
+     .bdrv_co_check            = bdrv_qed_co_check,
+     .bdrv_detach_aio_context  = bdrv_qed_detach_aio_context,
+     .bdrv_attach_aio_context  = bdrv_qed_attach_aio_context,
+-    .bdrv_co_drain_begin      = bdrv_qed_co_drain_begin,
+    .bdrv_drain_begin         = bdrv_qed_drain_begin,
+ };
+ 
+ static void bdrv_qed_init(void)
+diff --git a/block/throttle.c b/block/throttle.c
+index 131eba3ab4..88851c84f4 100644
+--- a/block/throttle.c
+++ b/block/throttle.c
+@@ -214,7 +214,7 @@ static void throttle_reopen_abort(BDRVReopenState *reopen_state)
+     reopen_state->opaque = NULL;
+ }
+ 
+-static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
+static void throttle_drain_begin(BlockDriverState *bs)
+ {
+     ThrottleGroupMember *tgm = bs->opaque;
+     if (qatomic_fetch_inc(&tgm->io_limits_disabled) == 0) {
+@@ -222,7 +222,7 @@ static void coroutine_fn throttle_co_drain_begin(BlockDriverState *bs)
+     }
+ }
+ 
+-static void coroutine_fn throttle_co_drain_end(BlockDriverState *bs)
+static void throttle_drain_end(BlockDriverState *bs)
+ {
+     ThrottleGroupMember *tgm = bs->opaque;
+     assert(tgm->io_limits_disabled);
+@@ -261,8 +261,8 @@ static BlockDriver bdrv_throttle = {
+     .bdrv_reopen_commit                 =   throttle_reopen_commit,
+     .bdrv_reopen_abort                  =   throttle_reopen_abort,
+ 
+-    .bdrv_co_drain_begin                =   throttle_co_drain_begin,
+-    .bdrv_co_drain_end                  =   throttle_co_drain_end,
+    .bdrv_drain_begin                   =   throttle_drain_begin,
+    .bdrv_drain_end                     =   throttle_drain_end,
+ 
+     .is_filter                          =   true,
+     .strong_runtime_opts                =   throttle_strong_runtime_opts,
+diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
+index 31ae91e56e..40d646d1ed 100644
+--- a/include/block/block_int-common.h
+++ b/include/block/block_int-common.h
+@@ -735,17 +735,19 @@ struct BlockDriver {
+     void (*bdrv_io_unplug)(BlockDriverState *bs);
+ 
+     /**
+-     * bdrv_co_drain_begin is called if implemented in the beginning of a
+     * bdrv_drain_begin is called if implemented in the beginning of a
+      * drain operation to drain and stop any internal sources of requests in
+      * the driver.
+-     * bdrv_co_drain_end is called if implemented at the end of the drain.
+     * bdrv_drain_end is called if implemented at the end of the drain.
+      *
+      * They should be used by the driver to e.g. manage scheduled I/O
+      * requests, or toggle an internal state. After the end of the drain new
+      * requests will continue normally.
+     *
+     * Implementations of both functions must not call aio_poll().
+      */
+-    void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
+-    void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+    void (*bdrv_drain_begin)(BlockDriverState *bs);
+    void (*bdrv_drain_end)(BlockDriverState *bs);
+ 
+     bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
+     bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
+diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
+index 24f34e24ad..695519ee02 100644
+--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
+@@ -46,7 +46,7 @@ static void coroutine_fn sleep_in_drain_begin(void *opaque)
+     bdrv_dec_in_flight(bs);
+ }
+ 
+-static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
+static void bdrv_test_drain_begin(BlockDriverState *bs)
+ {
+     BDRVTestState *s = bs->opaque;
+     s->drain_count++;
+@@ -57,7 +57,7 @@ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
+     }
+ }
+ 
+-static void coroutine_fn bdrv_test_co_drain_end(BlockDriverState *bs)
+static void bdrv_test_drain_end(BlockDriverState *bs)
+ {
+     BDRVTestState *s = bs->opaque;
+     s->drain_count--;
+@@ -111,8 +111,8 @@ static BlockDriver bdrv_test = {
+     .bdrv_close             = bdrv_test_close,
+     .bdrv_co_preadv         = bdrv_test_co_preadv,
+ 
+-    .bdrv_co_drain_begin    = bdrv_test_co_drain_begin,
+-    .bdrv_co_drain_end      = bdrv_test_co_drain_end,
+    .bdrv_drain_begin       = bdrv_test_drain_begin,
+    .bdrv_drain_end         = bdrv_test_drain_end,
+ 
+     .bdrv_child_perm        = bdrv_default_perms,
+ 
+@@ -1703,6 +1703,7 @@ static void test_blockjob_commit_by_drained_end(void)
+     bdrv_drained_begin(bs_child);
+     g_assert(!job_has_completed);
+     bdrv_drained_end(bs_child);
+    aio_poll(qemu_get_aio_context(), false);
+     g_assert(job_has_completed);
+ 
+     bdrv_unref(bs_parents[0]);
+@@ -1858,6 +1859,7 @@ static void test_drop_intermediate_poll(void)
+ 
+     g_assert(!job_has_completed);
+     ret = bdrv_drop_intermediate(chain[1], chain[0], NULL);
+    aio_poll(qemu_get_aio_context(), false);
+     g_assert(ret == 0);
+     g_assert(job_has_completed);
+ 
+@@ -1946,7 +1948,7 @@ static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
+  * .was_drained.
+  * Increment .drain_count.
+  */
+-static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
+static void bdrv_replace_test_drain_begin(BlockDriverState *bs)
+ {
+     BDRVReplaceTestState *s = bs->opaque;
+ 
+@@ -1977,7 +1979,7 @@ static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
+  * If .drain_count reaches 0 and the node has a backing file, issue a
+  * read request.
+  */
+-static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
+static void bdrv_replace_test_drain_end(BlockDriverState *bs)
+ {
+     BDRVReplaceTestState *s = bs->opaque;
+ 
+@@ -2002,8 +2004,8 @@ static BlockDriver bdrv_replace_test = {
+     .bdrv_close             = bdrv_replace_test_close,
+     .bdrv_co_preadv         = bdrv_replace_test_co_preadv,
+ 
+-    .bdrv_co_drain_begin    = bdrv_replace_test_co_drain_begin,
+-    .bdrv_co_drain_end      = bdrv_replace_test_co_drain_end,
+    .bdrv_drain_begin       = bdrv_replace_test_drain_begin,
+    .bdrv_drain_end         = bdrv_replace_test_drain_end,
+ 
+     .bdrv_child_perm        = bdrv_default_perms,
+ };
+-- 
+2.31.1
+
--- a/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch
+++ b/kvm-block-drop-bdrv_remove_filter_or_cow_child.patch
@ -0,0 +1,70 @@
+From defd6b325264d94ffb1355a8b19f9a77bd694a2f Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vladimir.sementsov-ogievskiy@openvz.org>
+Date: Mon, 7 Nov 2022 19:35:56 +0300
+Subject: [PATCH 13/31] block: drop bdrv_remove_filter_or_cow_child
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [1/16] 6339edd738c3b79f8ecb6c1e012e52b6afb1a622 (sgarzarella/qemu-kvm-c-9-s)
+
+Drop this simple wrapper used only in one place. We have too many graph
+modifying functions even without it.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@openvz.org>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221107163558.618889-3-vsementsov@yandex-team.ru>
+Reviewed-by: Kevin Wolf <kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit f38eaec4c3618dfc4a23e20435cefb5bf8325264)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c | 15 +--------------
+ 1 file changed, 1 insertion(+), 14 deletions(-)
+
+diff --git a/block.c b/block.c
+index a18f052374..ec184150a2 100644
+--- a/block.c
+++ b/block.c
+@@ -93,8 +93,6 @@ static bool bdrv_recurse_has_child(BlockDriverState *bs,
+ static void bdrv_replace_child_noperm(BdrvChild *child,
+                                       BlockDriverState *new_bs);
+ static void bdrv_remove_child(BdrvChild *child, Transaction *tran);
+-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+-                                            Transaction *tran);
+ 
+ static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+                                BlockReopenQueue *queue,
+@@ -5073,17 +5071,6 @@ static void bdrv_remove_child(BdrvChild *child, Transaction *tran)
+     tran_add(tran, &bdrv_remove_child_drv, child);
+ }
+ 
+-/*
+- * A function to remove backing-chain child of @bs if exists: cow child for
+- * format nodes (always .backing) and filter child for filters (may be .file or
+- * .backing)
+- */
+-static void bdrv_remove_filter_or_cow_child(BlockDriverState *bs,
+-                                            Transaction *tran)
+-{
+-    bdrv_remove_child(bdrv_filter_or_cow_child(bs), tran);
+-}
+-
+ static int bdrv_replace_node_noperm(BlockDriverState *from,
+                                     BlockDriverState *to,
+                                     bool auto_skip, Transaction *tran,
+@@ -5168,7 +5155,7 @@ static int bdrv_replace_node_common(BlockDriverState *from,
+     }
+ 
+     if (detach_subchain) {
+-        bdrv_remove_filter_or_cow_child(to_cow_parent, tran);
+        bdrv_remove_child(bdrv_filter_or_cow_child(to_cow_parent), tran);
+     }
+ 
+     found = g_hash_table_new(NULL, NULL);
+-- 
+2.31.1
+
--- a/kvm-kvm-Atomic-memslot-updates.patch
+++ b/kvm-kvm-Atomic-memslot-updates.patch
@ -0,0 +1,286 @@
+From e13fdc97ff05cdee46c112c2dee70b6ef33e7fa7 Mon Sep 17 00:00:00 2001
+From: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Date: Mon, 16 Jan 2023 07:17:31 -0500
+Subject: [PATCH 31/31] kvm: Atomic memslot updates
+
+RH-Author: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-MergeRequest: 138: accel: introduce accelerator blocker API
+RH-Bugzilla: 1979276
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [3/3] 9f03181ebcad2474fbe859acbce7b9891caa216b (eesposit/qemu-kvm)
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1979276
+
+commit f39b7d2b96e3e73c01bb678cd096f7baf0b9ab39
+Author: David Hildenbrand <david@redhat.com>
+Date:   Fri Nov 11 10:47:58 2022 -0500
+
+    kvm: Atomic memslot updates
+
+    If we update an existing memslot (e.g., resize, split), we temporarily
+    remove the memslot to re-add it immediately afterwards. These updates
+    are not atomic, especially not for KVM VCPU threads, such that we can
+    get spurious faults.
+
+    Let's inhibit most KVM ioctls while performing relevant updates, such
+    that we can perform the update just as if it would happen atomically
+    without additional kernel support.
+
+    We capture the add/del changes and apply them in the notifier commit
+    stage instead. There, we can check for overlaps and perform the ioctl
+    inhibiting only if really required (-> overlap).
+
+    To keep things simple we don't perform additional checks that wouldn't
+    actually result in an overlap -- such as !RAM memory regions in some
+    cases (see kvm_set_phys_mem()).
+
+    To minimize cache-line bouncing, use a separate indicator
+    (in_ioctl_lock) per CPU.  Also, make sure to hold the kvm_slots_lock
+    while performing both actions (removing+re-adding).
+
+    We have to wait until all IOCTLs were exited and block new ones from
+    getting executed.
+
+    This approach cannot result in a deadlock as long as the inhibitor does
+    not hold any locks that might hinder an IOCTL from getting finished and
+    exited - something fairly unusual. The inhibitor will always hold the BQL.
+
+    AFAIKs, one possible candidate would be userfaultfd. If a page cannot be
+    placed (e.g., during postcopy), because we're waiting for a lock, or if the
+    userfaultfd thread cannot process a fault, because it is waiting for a
+    lock, there could be a deadlock. However, the BQL is not applicable here,
+    because any other guest memory access while holding the BQL would already
+    result in a deadlock.
+
+    Nothing else in the kernel should block forever and wait for userspace
+    intervention.
+
+    Note: pause_all_vcpus()/resume_all_vcpus() or
+    start_exclusive()/end_exclusive() cannot be used, as they either drop
+    the BQL or require to be called without the BQL - something inhibitors
+    cannot handle. We need a low-level locking mechanism that is
+    deadlock-free even when not releasing the BQL.
+
+    Signed-off-by: David Hildenbrand <david@redhat.com>
+    Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+    Tested-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+    Message-Id: <20221111154758.1372674-4-eesposit@redhat.com>
+    Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Signed-off-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+---
+ accel/kvm/kvm-all.c      | 101 ++++++++++++++++++++++++++++++++++-----
+ include/sysemu/kvm_int.h |   8 ++++
+ 2 files changed, 98 insertions(+), 11 deletions(-)
+
+diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
+index ff660fd469..39ed30ab59 100644
+--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
+@@ -31,6 +31,7 @@
+ #include "sysemu/kvm_int.h"
+ #include "sysemu/runstate.h"
+ #include "sysemu/cpus.h"
+#include "sysemu/accel-blocker.h"
+ #include "qemu/bswap.h"
+ #include "exec/memory.h"
+ #include "exec/ram_addr.h"
+@@ -46,6 +47,7 @@
+ #include "sysemu/hw_accel.h"
+ #include "kvm-cpus.h"
+ #include "sysemu/dirtylimit.h"
+#include "qemu/range.h"
+ 
+ #include "hw/boards.h"
+ #include "monitor/stats.h"
+@@ -1292,6 +1294,7 @@ void kvm_set_max_memslot_size(hwaddr max_slot_size)
+     kvm_max_slot_size = max_slot_size;
+ }
+ 
+/* Called with KVMMemoryListener.slots_lock held */
+ static void kvm_set_phys_mem(KVMMemoryListener *kml,
+                              MemoryRegionSection *section, bool add)
+ {
+@@ -1326,14 +1329,12 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
+     ram = memory_region_get_ram_ptr(mr) + mr_offset;
+     ram_start_offset = memory_region_get_ram_addr(mr) + mr_offset;
+ 
+-    kvm_slots_lock();
+-
+     if (!add) {
+         do {
+             slot_size = MIN(kvm_max_slot_size, size);
+             mem = kvm_lookup_matching_slot(kml, start_addr, slot_size);
+             if (!mem) {
+-                goto out;
+                return;
+             }
+             if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
+                 /*
+@@ -1371,7 +1372,7 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
+             start_addr += slot_size;
+             size -= slot_size;
+         } while (size);
+-        goto out;
+        return;
+     }
+ 
+     /* register the new slot */
+@@ -1396,9 +1397,6 @@ static void kvm_set_phys_mem(KVMMemoryListener *kml,
+         ram += slot_size;
+         size -= slot_size;
+     } while (size);
+-
+-out:
+-    kvm_slots_unlock();
+ }
+ 
+ static void *kvm_dirty_ring_reaper_thread(void *data)
+@@ -1455,18 +1453,95 @@ static void kvm_region_add(MemoryListener *listener,
+                            MemoryRegionSection *section)
+ {
+     KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+    KVMMemoryUpdate *update;
+
+    update = g_new0(KVMMemoryUpdate, 1);
+    update->section = *section;
+ 
+-    memory_region_ref(section->mr);
+-    kvm_set_phys_mem(kml, section, true);
+    QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next);
+ }
+ 
+ static void kvm_region_del(MemoryListener *listener,
+                            MemoryRegionSection *section)
+ {
+     KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+    KVMMemoryUpdate *update;
+
+    update = g_new0(KVMMemoryUpdate, 1);
+    update->section = *section;
+
+    QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next);
+}
+
+static void kvm_region_commit(MemoryListener *listener)
+{
+    KVMMemoryListener *kml = container_of(listener, KVMMemoryListener,
+                                          listener);
+    KVMMemoryUpdate *u1, *u2;
+    bool need_inhibit = false;
+
+    if (QSIMPLEQ_EMPTY(&kml->transaction_add) &&
+        QSIMPLEQ_EMPTY(&kml->transaction_del)) {
+        return;
+    }
+
+    /*
+     * We have to be careful when regions to add overlap with ranges to remove.
+     * We have to simulate atomic KVM memslot updates by making sure no ioctl()
+     * is currently active.
+     *
+     * The lists are order by addresses, so it's easy to find overlaps.
+     */
+    u1 = QSIMPLEQ_FIRST(&kml->transaction_del);
+    u2 = QSIMPLEQ_FIRST(&kml->transaction_add);
+    while (u1 && u2) {
+        Range r1, r2;
+
+        range_init_nofail(&r1, u1->section.offset_within_address_space,
+                          int128_get64(u1->section.size));
+        range_init_nofail(&r2, u2->section.offset_within_address_space,
+                          int128_get64(u2->section.size));
+
+        if (range_overlaps_range(&r1, &r2)) {
+            need_inhibit = true;
+            break;
+        }
+        if (range_lob(&r1) < range_lob(&r2)) {
+            u1 = QSIMPLEQ_NEXT(u1, next);
+        } else {
+            u2 = QSIMPLEQ_NEXT(u2, next);
+        }
+    }
+
+    kvm_slots_lock();
+    if (need_inhibit) {
+        accel_ioctl_inhibit_begin();
+    }
+
+    /* Remove all memslots before adding the new ones. */
+    while (!QSIMPLEQ_EMPTY(&kml->transaction_del)) {
+        u1 = QSIMPLEQ_FIRST(&kml->transaction_del);
+        QSIMPLEQ_REMOVE_HEAD(&kml->transaction_del, next);
+ 
+-    kvm_set_phys_mem(kml, section, false);
+-    memory_region_unref(section->mr);
+        kvm_set_phys_mem(kml, &u1->section, false);
+        memory_region_unref(u1->section.mr);
+
+        g_free(u1);
+    }
+    while (!QSIMPLEQ_EMPTY(&kml->transaction_add)) {
+        u1 = QSIMPLEQ_FIRST(&kml->transaction_add);
+        QSIMPLEQ_REMOVE_HEAD(&kml->transaction_add, next);
+
+        memory_region_ref(u1->section.mr);
+        kvm_set_phys_mem(kml, &u1->section, true);
+
+        g_free(u1);
+    }
+
+    if (need_inhibit) {
+        accel_ioctl_inhibit_end();
+    }
+    kvm_slots_unlock();
+ }
+ 
+ static void kvm_log_sync(MemoryListener *listener,
+@@ -1610,8 +1685,12 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
+         kml->slots[i].slot = i;
+     }
+ 
+    QSIMPLEQ_INIT(&kml->transaction_add);
+    QSIMPLEQ_INIT(&kml->transaction_del);
+
+     kml->listener.region_add = kvm_region_add;
+     kml->listener.region_del = kvm_region_del;
+    kml->listener.commit = kvm_region_commit;
+     kml->listener.log_start = kvm_log_start;
+     kml->listener.log_stop = kvm_log_stop;
+     kml->listener.priority = 10;
+diff --git a/include/sysemu/kvm_int.h b/include/sysemu/kvm_int.h
+index 3b4adcdc10..60b520a13e 100644
+--- a/include/sysemu/kvm_int.h
+++ b/include/sysemu/kvm_int.h
+@@ -12,6 +12,7 @@
+ #include "exec/memory.h"
+ #include "qapi/qapi-types-common.h"
+ #include "qemu/accel.h"
+#include "qemu/queue.h"
+ #include "sysemu/kvm.h"
+ 
+ typedef struct KVMSlot
+@@ -31,10 +32,17 @@ typedef struct KVMSlot
+     ram_addr_t ram_start_offset;
+ } KVMSlot;
+ 
+typedef struct KVMMemoryUpdate {
+    QSIMPLEQ_ENTRY(KVMMemoryUpdate) next;
+    MemoryRegionSection section;
+} KVMMemoryUpdate;
+
+ typedef struct KVMMemoryListener {
+     MemoryListener listener;
+     KVMSlot *slots;
+     int as_id;
+    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_add;
+    QSIMPLEQ_HEAD(, KVMMemoryUpdate) transaction_del;
+ } KVMMemoryListener;
+ 
+ #define KVM_MSI_HASHTAB_SIZE    256
+-- 
+2.31.1
+
--- a/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch
+++ b/kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch
@ -0,0 +1,84 @@
+From f628a08d20b9ab6be24c2ab18b38a934a314c78b Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:40:56 +0100
+Subject: [PATCH 14/31] qed: Don't yield in bdrv_qed_co_drain_begin()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [2/16] f18e9aebb7e04a62e309b656bac8f2ab83df657f (sgarzarella/qemu-kvm-c-9-s)
+
+We want to change .bdrv_co_drained_begin() back to be a non-coroutine
+callback, so in preparation, avoid yielding in its implementation.
+
+Because we increase bs->in_flight and bdrv_drained_begin() polls, the
+behaviour is unchanged.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-2-kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 6d47eb0c8bf2d50682c7dccae74d24104076fe23)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block/qed.c | 20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+diff --git a/block/qed.c b/block/qed.c
+index 2f36ad342c..013f826c44 100644
+--- a/block/qed.c
+++ b/block/qed.c
+@@ -282,9 +282,8 @@ static void coroutine_fn qed_unplug_allocating_write_reqs(BDRVQEDState *s)
+     qemu_co_mutex_unlock(&s->table_lock);
+ }
+ 
+-static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+static void coroutine_fn qed_need_check_timer(BDRVQEDState *s)
+ {
+-    BDRVQEDState *s = opaque;
+     int ret;
+ 
+     trace_qed_need_check_timer_cb(s);
+@@ -310,9 +309,20 @@ static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+     (void) ret;
+ }
+ 
+static void coroutine_fn qed_need_check_timer_entry(void *opaque)
+{
+    BDRVQEDState *s = opaque;
+
+    qed_need_check_timer(opaque);
+    bdrv_dec_in_flight(s->bs);
+}
+
+ static void qed_need_check_timer_cb(void *opaque)
+ {
+    BDRVQEDState *s = opaque;
+     Coroutine *co = qemu_coroutine_create(qed_need_check_timer_entry, opaque);
+
+    bdrv_inc_in_flight(s->bs);
+     qemu_coroutine_enter(co);
+ }
+ 
+@@ -363,8 +373,12 @@ static void coroutine_fn bdrv_qed_co_drain_begin(BlockDriverState *bs)
+      * header is flushed.
+      */
+     if (s->need_check_timer && timer_pending(s->need_check_timer)) {
+        Coroutine *co;
+
+         qed_cancel_need_check_timer(s);
+-        qed_need_check_timer_entry(s);
+        co = qemu_coroutine_create(qed_need_check_timer_entry, s);
+        bdrv_inc_in_flight(bs);
+        aio_co_enter(bdrv_get_aio_context(bs), co);
+     }
+ }
+ 
+-- 
+2.31.1
+
--- a/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch
+++ b/kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch
@ -0,0 +1,70 @@
+From 51fcf352a97f2e99a6a3fb8ae663b45436304120 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@redhat.com>
+Date: Tue, 10 Jan 2023 14:25:34 +0100
+Subject: [PATCH 11/31] s390x/s390-virtio-ccw: Activate zPCI features on
+ s390-ccw-virtio-rhel8.6.0
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cédric Le Goater <clg@redhat.com>
+RH-MergeRequest: 133: s390x/s390-virtio-ccw: Activate zPCI features on s390-ccw-virtio-rhel8.6.0
+RH-Bugzilla: 2159408
+RH-Acked-by: Thomas Huth <thuth@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [1/1] 1ed82e56fe74a283a1726c4893dc3387e645072c (clegoate/qemu-kvm-c9s)
+
+commit c7b14d3af7 ("s390x/s390-virtio-ccw: Switch off zPCI enhancements
+on older machines") activated zPCI enhancement features (interpretation
+and forward assist) silently on the s390-ccw-virtio-rhel8.6.0 machine
+for RHEL8.8. It didn't seem to be a problem since migration is not
+possible but it broke LEAPP upgrade to RHEL9 when the machine is
+defined with a passthrough device. Activate the zPCI features also on
+RHEL9.2 for the machines to be alike in both latest RHEL distros.
+
+Upstream Status: RHEL-only
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2159408
+
+Signed-off-by: Cédric Le Goater <clg@redhat.com>
+---
+ hw/s390x/s390-virtio-ccw.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c
+index aa142a1a4e..4cdd59c394 100644
+--- a/hw/s390x/s390-virtio-ccw.c
+++ b/hw/s390x/s390-virtio-ccw.c
+@@ -1234,8 +1234,14 @@ static void ccw_machine_rhel860_instance_options(MachineState *machine)
+ 
+ static void ccw_machine_rhel860_class_options(MachineClass *mc)
+ {
+    static GlobalProperty compat[] = {
+        { TYPE_S390_PCI_DEVICE, "interpret", "on", },
+        { TYPE_S390_PCI_DEVICE, "forwarding-assist", "on", },
+    };
+
+     ccw_machine_rhel900_class_options(mc);
+     compat_props_add(mc->compat_props, hw_compat_rhel_8_6, hw_compat_rhel_8_6_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+ 
+     /* All RHEL machines for prior major releases are deprecated */
+     mc->deprecation_reason = rhel_old_machine_deprecation;
+@@ -1259,8 +1265,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine)
+ 
+ static void ccw_machine_rhel850_class_options(MachineClass *mc)
+ {
+    static GlobalProperty compat[] = {
+        { TYPE_S390_PCI_DEVICE, "interpret", "off", },
+        { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", },
+    };
+
+     ccw_machine_rhel860_class_options(mc);
+     compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len);
+    compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat));
+     mc->smp_props.prefer_sockets = true;
+ }
+ DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false);
+-- 
+2.31.1
+
--- a/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch
+++ b/kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch
@ -0,0 +1,159 @@
+From 5defda06ec4c24818a34126c5048be5e274b63f5 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:41:04 +0100
+Subject: [PATCH 22/31] stream: Replace subtree drain with a single node drain
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [10/16] a93250b1f6ef296e903df0ba5d8b29bc2ed540a8 (sgarzarella/qemu-kvm-c-9-s)
+
+The subtree drain was introduced in commit b1e1af394d9 as a way to avoid
+graph changes between finding the base node and changing the block graph
+as necessary on completion of the image streaming job.
+
+The block graph could change between these two points because
+bdrv_set_backing_hd() first drains the parent node, which involved
+polling and can do anything.
+
+Subtree draining was an imperfect way to make this less likely (because
+with it, fewer callbacks are called during this window). Everyone agreed
+that it's not really the right solution, and it was only committed as a
+stopgap solution.
+
+This replaces the subtree drain with a solution that simply drains the
+parent node before we try to find the base node, and then call a version
+of bdrv_set_backing_hd() that doesn't drain, but just asserts that the
+parent node is already drained.
+
+This way, any graph changes caused by draining happen before we start
+looking at the graph and things stay consistent between finding the base
+node and changing the graph.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-10-kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 92140b9f3f07d80e2c27edcc6e32f392be2135e6)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ block.c                            | 17 ++++++++++++++---
+ block/stream.c                     | 26 ++++++++++++++++----------
+ include/block/block-global-state.h |  3 +++
+ 3 files changed, 33 insertions(+), 13 deletions(-)
+
+diff --git a/block.c b/block.c
+index b3449a312e..5330e89903 100644
+--- a/block.c
+++ b/block.c
+@@ -3403,14 +3403,15 @@ static int bdrv_set_backing_noperm(BlockDriverState *bs,
+     return bdrv_set_file_or_backing_noperm(bs, backing_hd, true, tran, errp);
+ }
+ 
+-int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+-                        Error **errp)
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+                                BlockDriverState *backing_hd,
+                                Error **errp)
+ {
+     int ret;
+     Transaction *tran = tran_new();
+ 
+     GLOBAL_STATE_CODE();
+-    bdrv_drained_begin(bs);
+    assert(bs->quiesce_counter > 0);
+ 
+     ret = bdrv_set_backing_noperm(bs, backing_hd, tran, errp);
+     if (ret < 0) {
+@@ -3420,7 +3421,17 @@ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+     ret = bdrv_refresh_perms(bs, errp);
+ out:
+     tran_finalize(tran, ret);
+    return ret;
+}
+ 
+int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                        Error **errp)
+{
+    int ret;
+    GLOBAL_STATE_CODE();
+
+    bdrv_drained_begin(bs);
+    ret = bdrv_set_backing_hd_drained(bs, backing_hd, errp);
+     bdrv_drained_end(bs);
+ 
+     return ret;
+diff --git a/block/stream.c b/block/stream.c
+index 694709bd25..8744ad103f 100644
+--- a/block/stream.c
+++ b/block/stream.c
+@@ -64,13 +64,16 @@ static int stream_prepare(Job *job)
+     bdrv_cor_filter_drop(s->cor_filter_bs);
+     s->cor_filter_bs = NULL;
+ 
+-    bdrv_subtree_drained_begin(s->above_base);
+    /*
+     * bdrv_set_backing_hd() requires that unfiltered_bs is drained. Drain
+     * already here and use bdrv_set_backing_hd_drained() instead because
+     * the polling during drained_begin() might change the graph, and if we do
+     * this only later, we may end up working with the wrong base node (or it
+     * might even have gone away by the time we want to use it).
+     */
+    bdrv_drained_begin(unfiltered_bs);
+ 
+     base = bdrv_filter_or_cow_bs(s->above_base);
+-    if (base) {
+-        bdrv_ref(base);
+-    }
+-
+     unfiltered_base = bdrv_skip_filters(base);
+ 
+     if (bdrv_cow_child(unfiltered_bs)) {
+@@ -82,7 +85,13 @@ static int stream_prepare(Job *job)
+             }
+         }
+ 
+-        bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
+        bdrv_set_backing_hd_drained(unfiltered_bs, base, &local_err);
+
+        /*
+         * This call will do I/O, so the graph can change again from here on.
+         * We have already completed the graph change, so we are not in danger
+         * of operating on the wrong node any more if this happens.
+         */
+         ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false);
+         if (local_err) {
+             error_report_err(local_err);
+@@ -92,10 +101,7 @@ static int stream_prepare(Job *job)
+     }
+ 
+ out:
+-    if (base) {
+-        bdrv_unref(base);
+-    }
+-    bdrv_subtree_drained_end(s->above_base);
+    bdrv_drained_end(unfiltered_bs);
+     return ret;
+ }
+ 
+diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
+index c7bd4a2088..00e0cf8aea 100644
+--- a/include/block/block-global-state.h
+++ b/include/block/block-global-state.h
+@@ -82,6 +82,9 @@ int bdrv_open_file_child(const char *filename,
+ BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
+ int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
+                         Error **errp);
+int bdrv_set_backing_hd_drained(BlockDriverState *bs,
+                                BlockDriverState *backing_hd,
+                                Error **errp);
+ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
+                            const char *bdref_key, Error **errp);
+ BlockDriverState *bdrv_open(const char *filename, const char *reference,
+-- 
+2.31.1
+
--- a/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch
+++ b/kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch
@ -0,0 +1,153 @@
+From 093c4a6834f3ec5a05390a3630ae4edec80885b8 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Fri, 18 Nov 2022 18:40:57 +0100
+Subject: [PATCH 15/31] test-bdrv-drain: Don't yield in
+ .bdrv_co_drained_begin/end()
+
+RH-Author: Stefano Garzarella <sgarzare@redhat.com>
+RH-MergeRequest: 135: block: Simplify drain to prevent QEMU from crashing during snapshot
+RH-Bugzilla: 2155112
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Czenczek <hreitz@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Commit: [3/16] 5282d3e13cb85dfb480edb11b7eb2769248465df (sgarzarella/qemu-kvm-c-9-s)
+
+We want to change .bdrv_co_drained_begin/end() back to be non-coroutine
+callbacks, so in preparation, avoid yielding in their implementation.
+
+This does almost the same as the existing logic in bdrv_drain_invoke(),
+by creating and entering coroutines internally. However, since the test
+case is by far the heaviest user of coroutine code in drain callbacks,
+it is preferable to have the complexity in the test case rather than the
+drain core, which is already complicated enough without this.
+
+The behaviour for bdrv_drain_begin() is unchanged because we increase
+bs->in_flight and this is still polled. However, bdrv_drain_end()
+doesn't wait for the spawned coroutine to complete any more. This is
+fine, we don't rely on bdrv_drain_end() restarting all operations
+immediately before the next aio_poll().
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20221118174110.55183-3-kwolf@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 7bce1c299834557bffd92294608ea528648cfe75)
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+---
+ tests/unit/test-bdrv-drain.c | 64 ++++++++++++++++++++++++++----------
+ 1 file changed, 46 insertions(+), 18 deletions(-)
+
+diff --git a/tests/unit/test-bdrv-drain.c b/tests/unit/test-bdrv-drain.c
+index 09dc4a4891..24f34e24ad 100644
+--- a/tests/unit/test-bdrv-drain.c
+++ b/tests/unit/test-bdrv-drain.c
+@@ -38,12 +38,22 @@ typedef struct BDRVTestState {
+     bool sleep_in_drain_begin;
+ } BDRVTestState;
+ 
+static void coroutine_fn sleep_in_drain_begin(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+
+    qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+    bdrv_dec_in_flight(bs);
+}
+
+ static void coroutine_fn bdrv_test_co_drain_begin(BlockDriverState *bs)
+ {
+     BDRVTestState *s = bs->opaque;
+     s->drain_count++;
+     if (s->sleep_in_drain_begin) {
+-        qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 100000);
+        Coroutine *co = qemu_coroutine_create(sleep_in_drain_begin, bs);
+        bdrv_inc_in_flight(bs);
+        aio_co_enter(bdrv_get_aio_context(bs), co);
+     }
+ }
+ 
+@@ -1916,6 +1926,21 @@ static int coroutine_fn bdrv_replace_test_co_preadv(BlockDriverState *bs,
+     return 0;
+ }
+ 
+static void coroutine_fn bdrv_replace_test_drain_co(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    BDRVReplaceTestState *s = bs->opaque;
+
+    /* Keep waking io_co up until it is done */
+    while (s->io_co) {
+        aio_co_wake(s->io_co);
+        s->io_co = NULL;
+        qemu_coroutine_yield();
+    }
+    s->drain_co = NULL;
+    bdrv_dec_in_flight(bs);
+}
+
+ /**
+  * If .drain_count is 0, wake up .io_co if there is one; and set
+  * .was_drained.
+@@ -1926,20 +1951,27 @@ static void coroutine_fn bdrv_replace_test_co_drain_begin(BlockDriverState *bs)
+     BDRVReplaceTestState *s = bs->opaque;
+ 
+     if (!s->drain_count) {
+-        /* Keep waking io_co up until it is done */
+-        s->drain_co = qemu_coroutine_self();
+-        while (s->io_co) {
+-            aio_co_wake(s->io_co);
+-            s->io_co = NULL;
+-            qemu_coroutine_yield();
+-        }
+-        s->drain_co = NULL;
+-
+        s->drain_co = qemu_coroutine_create(bdrv_replace_test_drain_co, bs);
+        bdrv_inc_in_flight(bs);
+        aio_co_enter(bdrv_get_aio_context(bs), s->drain_co);
+         s->was_drained = true;
+     }
+     s->drain_count++;
+ }
+ 
+static void coroutine_fn bdrv_replace_test_read_entry(void *opaque)
+{
+    BlockDriverState *bs = opaque;
+    char data;
+    QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
+    int ret;
+
+    /* Queue a read request post-drain */
+    ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
+    g_assert(ret >= 0);
+    bdrv_dec_in_flight(bs);
+}
+
+ /**
+  * Reduce .drain_count, set .was_undrained once it reaches 0.
+  * If .drain_count reaches 0 and the node has a backing file, issue a
+@@ -1951,17 +1983,13 @@ static void coroutine_fn bdrv_replace_test_co_drain_end(BlockDriverState *bs)
+ 
+     g_assert(s->drain_count > 0);
+     if (!--s->drain_count) {
+-        int ret;
+-
+         s->was_undrained = true;
+ 
+         if (bs->backing) {
+-            char data;
+-            QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, &data, 1);
+-
+-            /* Queue a read request post-drain */
+-            ret = bdrv_replace_test_co_preadv(bs, 0, 1, &qiov, 0);
+-            g_assert(ret >= 0);
+            Coroutine *co = qemu_coroutine_create(bdrv_replace_test_read_entry,
+                                                  bs);
+            bdrv_inc_in_flight(bs);
+            aio_co_enter(bdrv_get_aio_context(bs), co);
+         }
+     }
+ }
+-- 
+2.31.1
+
--- a/kvm-vhost-add-support-for-configure-interrupt.patch
+++ b/kvm-vhost-add-support-for-configure-interrupt.patch
@ -0,0 +1,185 @@
+From 42818e2bc6fa537fe52f7f0e6b094774a1eb00e1 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:48 +0800
+Subject: [PATCH 07/31] vhost: add support for configure interrupt
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [7/10] d58b439eb093f5dd3b7ca081af0ab75780e42917 (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+Add functions to support configure interrupt.
+The configure interrupt process will start in vhost_dev_start
+and stop in vhost_dev_stop.
+
+Also add the functions to support vhost_config_pending and
+vhost_config_mask.
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-8-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit f9a09ca3ea69d108d828b7c82f1bd61b2df6fc96)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/virtio/vhost.c         | 78 ++++++++++++++++++++++++++++++++++++++-
+ include/hw/virtio/vhost.h |  4 ++
+ 2 files changed, 81 insertions(+), 1 deletion(-)
+
+diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
+index 7fb008bc9e..84dbb39e07 100644
+--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
+@@ -1596,7 +1596,68 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
+     file.index = hdev->vhost_ops->vhost_get_vq_index(hdev, n);
+     r = hdev->vhost_ops->vhost_set_vring_call(hdev, &file);
+     if (r < 0) {
+-        VHOST_OPS_DEBUG(r, "vhost_set_vring_call failed");
+        error_report("vhost_set_vring_call failed %d", -r);
+    }
+}
+
+bool vhost_config_pending(struct vhost_dev *hdev)
+{
+    assert(hdev->vhost_ops);
+    if ((hdev->started == false) ||
+        (hdev->vhost_ops->vhost_set_config_call == NULL)) {
+        return false;
+    }
+
+    EventNotifier *notifier =
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
+    return event_notifier_test_and_clear(notifier);
+}
+
+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask)
+{
+    int fd;
+    int r;
+    EventNotifier *notifier =
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier;
+    EventNotifier *config_notifier = &vdev->config_notifier;
+    assert(hdev->vhost_ops);
+
+    if ((hdev->started == false) ||
+        (hdev->vhost_ops->vhost_set_config_call == NULL)) {
+        return;
+    }
+    if (mask) {
+        assert(vdev->use_guest_notifier_mask);
+        fd = event_notifier_get_fd(notifier);
+    } else {
+        fd = event_notifier_get_fd(config_notifier);
+    }
+    r = hdev->vhost_ops->vhost_set_config_call(hdev, fd);
+    if (r < 0) {
+        error_report("vhost_set_config_call failed %d", -r);
+    }
+}
+
+static void vhost_stop_config_intr(struct vhost_dev *dev)
+{
+    int fd = -1;
+    assert(dev->vhost_ops);
+    if (dev->vhost_ops->vhost_set_config_call) {
+        dev->vhost_ops->vhost_set_config_call(dev, fd);
+    }
+}
+
+static void vhost_start_config_intr(struct vhost_dev *dev)
+{
+    int r;
+
+    assert(dev->vhost_ops);
+    int fd = event_notifier_get_fd(&dev->vdev->config_notifier);
+    if (dev->vhost_ops->vhost_set_config_call) {
+        r = dev->vhost_ops->vhost_set_config_call(dev, fd);
+        if (!r) {
+            event_notifier_set(&dev->vdev->config_notifier);
+        }
+     }
+ }
+ 
+@@ -1836,6 +1897,16 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+         }
+     }
+ 
+    r = event_notifier_init(
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier, 0);
+    if (r < 0) {
+        return r;
+    }
+    event_notifier_test_and_clear(
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
+    if (!vdev->use_guest_notifier_mask) {
+        vhost_config_mask(hdev, vdev, true);
+    }
+     if (hdev->log_enabled) {
+         uint64_t log_base;
+ 
+@@ -1874,6 +1945,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+             vhost_device_iotlb_miss(hdev, vq->used_phys, true);
+         }
+     }
+    vhost_start_config_intr(hdev);
+     return 0;
+ fail_start:
+     if (vrings) {
+@@ -1903,6 +1975,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+ 
+     /* should only be called after backend is connected */
+     assert(hdev->vhost_ops);
+    event_notifier_test_and_clear(
+        &hdev->vqs[VHOST_QUEUE_NUM_CONFIG_INR].masked_config_notifier);
+    event_notifier_test_and_clear(&vdev->config_notifier);
+ 
+     trace_vhost_dev_stop(hdev, vdev->name, vrings);
+ 
+@@ -1925,6 +2000,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev, bool vrings)
+         }
+         memory_listener_unregister(&hdev->iommu_listener);
+     }
+    vhost_stop_config_intr(hdev);
+     vhost_log_put(hdev, true);
+     hdev->started = false;
+     vdev->vhost_started = false;
+diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
+index 67a6807fac..05bedb2416 100644
+--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
+@@ -33,6 +33,7 @@ struct vhost_virtqueue {
+     unsigned used_size;
+     EventNotifier masked_notifier;
+     EventNotifier error_notifier;
+    EventNotifier masked_config_notifier;
+     struct vhost_dev *dev;
+ };
+ 
+@@ -41,6 +42,7 @@ typedef unsigned long vhost_log_chunk_t;
+ #define VHOST_LOG_BITS (8 * sizeof(vhost_log_chunk_t))
+ #define VHOST_LOG_CHUNK (VHOST_LOG_PAGE * VHOST_LOG_BITS)
+ #define VHOST_INVALID_FEATURE_BIT   (0xff)
+#define VHOST_QUEUE_NUM_CONFIG_INR 0
+ 
+ struct vhost_log {
+     unsigned long long size;
+@@ -168,6 +170,8 @@ int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+  * Disable direct notifications to vhost device.
+  */
+ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
+bool vhost_config_pending(struct vhost_dev *hdev);
+void vhost_config_mask(struct vhost_dev *hdev, VirtIODevice *vdev, bool mask);
+ 
+ /**
+  * vhost_dev_is_started() - report status of vhost device
+-- 
+2.31.1
+
--- a/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch
+++ b/kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch
@ -0,0 +1,157 @@
+From 55aad90e347599e88747888ddbefcba33427f386 Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Fri, 16 Dec 2022 11:35:52 +0800
+Subject: [PATCH 12/31] vhost: fix vq dirty bitmap syncing when vIOMMU is
+ enabled
+
+RH-Author: Eric Auger <eric.auger@redhat.com>
+RH-MergeRequest: 134: vhost: fix vq dirty bitmap syncing when vIOMMU is enabled
+RH-Bugzilla: 2124856
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Jason Wang <jasowang@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Commit: [1/1] 57ef499b63dc2cca6e64ee84d1dc127635868ca2 (eauger1/centos-qemu-kvm)
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124856
+Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=49989924
+Upstream: yes
+
+When vIOMMU is enabled, the vq->used_phys is actually the IOVA not
+GPA. So we need to translate it to GPA before the syncing otherwise we
+may hit the following crash since IOVA could be out of the scope of
+the GPA log size. This could be noted when using virtio-IOMMU with
+vhost using 1G memory.
+
+Fixes: c471ad0e9bd46 ("vhost_net: device IOTLB support")
+Cc: qemu-stable@nongnu.org
+Tested-by: Lei Yang <leiyang@redhat.com>
+Reported-by: Yalan Zhang <yalzhang@redhat.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <20221216033552.77087-1-jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 345cc1cbcbce2bab00abc2b88338d7d89c702d6b)
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+---
+ hw/virtio/vhost.c | 84 ++++++++++++++++++++++++++++++++++++-----------
+ 1 file changed, 64 insertions(+), 20 deletions(-)
+
+diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
+index 84dbb39e07..2c566dc539 100644
+--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
+@@ -20,6 +20,7 @@
+ #include "qemu/range.h"
+ #include "qemu/error-report.h"
+ #include "qemu/memfd.h"
+#include "qemu/log.h"
+ #include "standard-headers/linux/vhost_types.h"
+ #include "hw/virtio/virtio-bus.h"
+ #include "hw/virtio/virtio-access.h"
+@@ -106,6 +107,24 @@ static void vhost_dev_sync_region(struct vhost_dev *dev,
+     }
+ }
+ 
+static bool vhost_dev_has_iommu(struct vhost_dev *dev)
+{
+    VirtIODevice *vdev = dev->vdev;
+
+    /*
+     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
+     * incremental memory mapping API via IOTLB API. For platform that
+     * does not have IOMMU, there's no need to enable this feature
+     * which may cause unnecessary IOTLB miss/update transactions.
+     */
+    if (vdev) {
+        return virtio_bus_device_iommu_enabled(vdev) &&
+            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+    } else {
+        return false;
+    }
+}
+
+ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
+                                    MemoryRegionSection *section,
+                                    hwaddr first,
+@@ -137,8 +156,51 @@ static int vhost_sync_dirty_bitmap(struct vhost_dev *dev,
+             continue;
+         }
+ 
+-        vhost_dev_sync_region(dev, section, start_addr, end_addr, vq->used_phys,
+-                              range_get_last(vq->used_phys, vq->used_size));
+        if (vhost_dev_has_iommu(dev)) {
+            IOMMUTLBEntry iotlb;
+            hwaddr used_phys = vq->used_phys, used_size = vq->used_size;
+            hwaddr phys, s, offset;
+
+            while (used_size) {
+                rcu_read_lock();
+                iotlb = address_space_get_iotlb_entry(dev->vdev->dma_as,
+                                                      used_phys,
+                                                      true,
+                                                      MEMTXATTRS_UNSPECIFIED);
+                rcu_read_unlock();
+
+                if (!iotlb.target_as) {
+                    qemu_log_mask(LOG_GUEST_ERROR, "translation "
+                                  "failure for used_iova %"PRIx64"\n",
+                                  used_phys);
+                    return -EINVAL;
+                }
+
+                offset = used_phys & iotlb.addr_mask;
+                phys = iotlb.translated_addr + offset;
+
+                /*
+                 * Distance from start of used ring until last byte of
+                 * IOMMU page.
+                 */
+                s = iotlb.addr_mask - offset;
+                /*
+                 * Size of used ring, or of the part of it until end
+                 * of IOMMU page. To avoid zero result, do the adding
+                 * outside of MIN().
+                 */
+                s = MIN(s, used_size - 1) + 1;
+
+                vhost_dev_sync_region(dev, section, start_addr, end_addr, phys,
+                                      range_get_last(phys, s));
+                used_size -= s;
+                used_phys += s;
+            }
+        } else {
+            vhost_dev_sync_region(dev, section, start_addr,
+                                  end_addr, vq->used_phys,
+                                  range_get_last(vq->used_phys, vq->used_size));
+        }
+     }
+     return 0;
+ }
+@@ -306,24 +368,6 @@ static inline void vhost_dev_log_resize(struct vhost_dev *dev, uint64_t size)
+     dev->log_size = size;
+ }
+ 
+-static bool vhost_dev_has_iommu(struct vhost_dev *dev)
+-{
+-    VirtIODevice *vdev = dev->vdev;
+-
+-    /*
+-     * For vhost, VIRTIO_F_IOMMU_PLATFORM means the backend support
+-     * incremental memory mapping API via IOTLB API. For platform that
+-     * does not have IOMMU, there's no need to enable this feature
+-     * which may cause unnecessary IOTLB miss/update transactions.
+-     */
+-    if (vdev) {
+-        return virtio_bus_device_iommu_enabled(vdev) &&
+-            virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+-    } else {
+-        return false;
+-    }
+-}
+-
+ static void *vhost_memory_map(struct vhost_dev *dev, hwaddr addr,
+                               hwaddr *plen, bool is_write)
+ {
+-- 
+2.31.1
+
--- a/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch
+++ b/kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch
@ -0,0 +1,56 @@
+From d135303da1187d9f214e520a977fe7c47e5ce1f0 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:45 +0800
+Subject: [PATCH 04/31] vhost: introduce new VhostOps vhost_set_config_call
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [4/10] c2492838d9c1415e42d2507f2956d640a30325f2 (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+This patch introduces new VhostOps vhost_set_config_call.
+This function allows the qemu to set the config
+event fd to kernel driver.
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-5-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 9b30cdf9bbf9524a4f4f8a6eb551eb13cbbd3893)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ include/hw/virtio/vhost-backend.h | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h
+index eab46d7f0b..c5ab49051e 100644
+--- a/include/hw/virtio/vhost-backend.h
+++ b/include/hw/virtio/vhost-backend.h
+@@ -128,6 +128,8 @@ typedef int (*vhost_get_device_id_op)(struct vhost_dev *dev, uint32_t *dev_id);
+ 
+ typedef bool (*vhost_force_iommu_op)(struct vhost_dev *dev);
+ 
+typedef int (*vhost_set_config_call_op)(struct vhost_dev *dev,
+                                       int fd);
+ typedef struct VhostOps {
+     VhostBackendType backend_type;
+     vhost_backend_init vhost_backend_init;
+@@ -174,6 +176,7 @@ typedef struct VhostOps {
+     vhost_vq_get_addr_op  vhost_vq_get_addr;
+     vhost_get_device_id_op vhost_get_device_id;
+     vhost_force_iommu_op vhost_force_iommu;
+    vhost_set_config_call_op vhost_set_config_call;
+ } VhostOps;
+ 
+ int vhost_backend_update_device_iotlb(struct vhost_dev *dev,
+-- 
+2.31.1
+
--- a/kvm-vhost-vdpa-add-support-for-config-interrupt.patch
+++ b/kvm-vhost-vdpa-add-support-for-config-interrupt.patch
@ -0,0 +1,73 @@
+From e01563a8de9a45937ffd8d4c1d74a6890ffb6eb6 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:46 +0800
+Subject: [PATCH 05/31] vhost-vdpa: add support for config interrupt
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [5/10] 49bfd214a503f8e199ff93f4bbfcbd4c4f2405b5 (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+Add new call back function in vhost-vdpa, The function
+vhost_set_config_call can set the event fd to kernel.
+This function will be called in the vhost_dev_start
+and vhost_dev_stop
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-6-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 259f3acc1c675dd77ebbdb28a483f5d0220bdbf6)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/virtio/trace-events | 1 +
+ hw/virtio/vhost-vdpa.c | 8 ++++++++
+ 2 files changed, 9 insertions(+)
+
+diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
+index 14fc5b9bb2..46f2faf04e 100644
+--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
+@@ -62,6 +62,7 @@ vhost_vdpa_get_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRI
+ vhost_vdpa_set_owner(void *dev) "dev: %p"
+ vhost_vdpa_vq_get_addr(void *dev, void *vq, uint64_t desc_user_addr, uint64_t avail_user_addr, uint64_t used_user_addr) "dev: %p vq: %p desc_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64
+ vhost_vdpa_get_iova_range(void *dev, uint64_t first, uint64_t last) "dev: %p first: 0x%"PRIx64" last: 0x%"PRIx64
+vhost_vdpa_set_config_call(void *dev, int fd)"dev: %p fd: %d"
+ 
+ # virtio.c
+ virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u"
+diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
+index 7468e44b87..c5be2645b0 100644
+--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
+@@ -754,6 +754,13 @@ static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
+     return 0;
+ }
+ 
+static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
+                                       int fd)
+{
+    trace_vhost_vdpa_set_config_call(dev, fd);
+    return vhost_vdpa_call(dev, VHOST_VDPA_SET_CONFIG_CALL, &fd);
+}
+
+ static void vhost_vdpa_dump_config(struct vhost_dev *dev, const uint8_t *config,
+                                    uint32_t config_len)
+ {
+@@ -1310,4 +1317,5 @@ const VhostOps vdpa_ops = {
+         .vhost_get_device_id = vhost_vdpa_get_device_id,
+         .vhost_vq_get_addr = vhost_vdpa_vq_get_addr,
+         .vhost_force_iommu = vhost_vdpa_force_iommu,
+        .vhost_set_config_call = vhost_vdpa_set_config_call,
+ };
+-- 
+2.31.1
+
--- a/kvm-virtio-add-support-for-configure-interrupt.patch
+++ b/kvm-virtio-add-support-for-configure-interrupt.patch
@ -0,0 +1,115 @@
+From e04c76339580effae41617b690b58a6605e0f40b Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:47 +0800
+Subject: [PATCH 06/31] virtio: add support for configure interrupt
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [6/10] 7048eb488b732578686d451684babaf17b582b05 (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+Add the functions to support the configure interrupt in virtio
+The function virtio_config_guest_notifier_read will notify the
+guest if there is an configure interrupt.
+The function virtio_config_set_guest_notifier_fd_handler is
+to set the fd hander for the notifier
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-7-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 7d847d0c9b93b91160f40d69a65c904d76f1edd8)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/virtio/virtio.c         | 29 +++++++++++++++++++++++++++++
+ include/hw/virtio/virtio.h |  4 ++++
+ 2 files changed, 33 insertions(+)
+
+diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
+index eb6347ab5d..34e9c5d141 100644
+--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
+@@ -4012,7 +4012,14 @@ static void virtio_queue_guest_notifier_read(EventNotifier *n)
+         virtio_irq(vq);
+     }
+ }
+static void virtio_config_guest_notifier_read(EventNotifier *n)
+{
+    VirtIODevice *vdev = container_of(n, VirtIODevice, config_notifier);
+ 
+    if (event_notifier_test_and_clear(n)) {
+        virtio_notify_config(vdev);
+    }
+}
+ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+                                                 bool with_irqfd)
+ {
+@@ -4029,6 +4036,23 @@ void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
+     }
+ }
+ 
+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
+                                                 bool assign, bool with_irqfd)
+{
+    EventNotifier *n;
+    n = &vdev->config_notifier;
+    if (assign && !with_irqfd) {
+        event_notifier_set_handler(n, virtio_config_guest_notifier_read);
+    } else {
+        event_notifier_set_handler(n, NULL);
+    }
+    if (!assign) {
+        /* Test and clear notifier before closing it,*/
+        /* in case poll callback didn't have time to run. */
+        virtio_config_guest_notifier_read(n);
+    }
+}
+
+ EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
+ {
+     return &vq->guest_notifier;
+@@ -4109,6 +4133,11 @@ EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
+     return &vq->host_notifier;
+ }
+ 
+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev)
+{
+    return &vdev->config_notifier;
+}
+
+ void virtio_queue_set_host_notifier_enabled(VirtQueue *vq, bool enabled)
+ {
+     vq->host_notifier_enabled = enabled;
+diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
+index 1f4a41b958..9c3a4642f2 100644
+--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
+@@ -138,6 +138,7 @@ struct VirtIODevice
+     AddressSpace *dma_as;
+     QLIST_HEAD(, VirtQueue) *vector_queues;
+     QTAILQ_ENTRY(VirtIODevice) next;
+    EventNotifier config_notifier;
+ };
+ 
+ struct VirtioDeviceClass {
+@@ -360,6 +361,9 @@ void virtio_queue_aio_attach_host_notifier_no_poll(VirtQueue *vq, AioContext *ct
+ void virtio_queue_aio_detach_host_notifier(VirtQueue *vq, AioContext *ctx);
+ VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector);
+ VirtQueue *virtio_vector_next_queue(VirtQueue *vq);
+EventNotifier *virtio_config_get_guest_notifier(VirtIODevice *vdev);
+void virtio_config_set_guest_notifier_fd_handler(VirtIODevice *vdev,
+                                                 bool assign, bool with_irqfd);
+ 
+ static inline void virtio_add_feature(uint64_t *features, unsigned int fbit)
+ {
+-- 
+2.31.1
+
--- a/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch
+++ b/kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch
@ -0,0 +1,262 @@
+From 34a267758cf016f34b327318500efdbf0f606033 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:42 +0800
+Subject: [PATCH 01/31] virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [1/10] f374aaae221bc5a4c2521a267d21350b812e11ba (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+To support configure interrupt for vhost-vdpa
+Introduce VIRTIO_CONFIG_IRQ_IDX -1 as configure interrupt's queue index,
+Then we can reuse the functions guest_notifier_mask and guest_notifier_pending.
+Add the check of queue index in these drivers, if the driver does not support
+configure interrupt, the function will just return
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-2-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 544f0278afcab2bebab61b14e4c2c58e65911f5b)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/display/vhost-user-gpu.c    | 18 ++++++++++++++++++
+ hw/net/virtio-net.c            | 22 ++++++++++++++++++++--
+ hw/virtio/vhost-user-fs.c      | 18 ++++++++++++++++++
+ hw/virtio/vhost-user-gpio.c    | 10 ++++++++++
+ hw/virtio/vhost-vsock-common.c | 18 ++++++++++++++++++
+ hw/virtio/virtio-crypto.c      | 18 ++++++++++++++++++
+ include/hw/virtio/virtio.h     |  3 +++
+ 7 files changed, 105 insertions(+), 2 deletions(-)
+
+diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c
+index 19c0e20103..4380a5e672 100644
+--- a/hw/display/vhost-user-gpu.c
+++ b/hw/display/vhost-user-gpu.c
+@@ -486,6 +486,15 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx)
+ {
+     VhostUserGPU *g = VHOST_USER_GPU(vdev);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
+     return vhost_virtqueue_pending(&g->vhost->dev, idx);
+ }
+ 
+@@ -494,6 +503,15 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+ {
+     VhostUserGPU *g = VHOST_USER_GPU(vdev);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
+     vhost_virtqueue_mask(&g->vhost->dev, vdev, idx, mask);
+ }
+ 
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index aba12759d5..bee35d6f9f 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -3316,6 +3316,15 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+     } else {
+         nc = qemu_get_subqueue(n->nic, vq2q(idx));
+     }
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return false
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
+     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
+ }
+ 
+@@ -3339,8 +3348,17 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+     } else {
+         nc = qemu_get_subqueue(n->nic, vq2q(idx));
+     }
+-    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
+-                             vdev, idx, mask);
+    /*
+     *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
+
+    vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
+ }
+ 
+ static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
+diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c
+index d97b179e6f..f5049735ac 100644
+--- a/hw/virtio/vhost-user-fs.c
+++ b/hw/virtio/vhost-user-fs.c
+@@ -159,6 +159,15 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx,
+ {
+     VHostUserFS *fs = VHOST_USER_FS(vdev);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
+     vhost_virtqueue_mask(&fs->vhost_dev, vdev, idx, mask);
+ }
+ 
+@@ -166,6 +175,15 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx)
+ {
+     VHostUserFS *fs = VHOST_USER_FS(vdev);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
+     return vhost_virtqueue_pending(&fs->vhost_dev, idx);
+ }
+ 
+diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
+index b7b82a1099..fe3da32c74 100644
+--- a/hw/virtio/vhost-user-gpio.c
+++ b/hw/virtio/vhost-user-gpio.c
+@@ -191,6 +191,16 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask)
+ {
+     VHostUserGPIO *gpio = VHOST_USER_GPIO(vdev);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
+
+     vhost_virtqueue_mask(&gpio->vhost_dev, vdev, idx, mask);
+ }
+ 
+diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
+index d21c72b401..d2b5519d5a 100644
+--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
+@@ -127,6 +127,15 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx,
+ {
+     VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
+     vhost_virtqueue_mask(&vvc->vhost_dev, vdev, idx, mask);
+ }
+ 
+@@ -135,6 +144,15 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev,
+ {
+     VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
+     return vhost_virtqueue_pending(&vvc->vhost_dev, idx);
+ }
+ 
+diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c
+index 97da74e719..516425e26a 100644
+--- a/hw/virtio/virtio-crypto.c
+++ b/hw/virtio/virtio-crypto.c
+@@ -1182,6 +1182,15 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx,
+ 
+     assert(vcrypto->vhost_started);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return;
+    }
+     cryptodev_vhost_virtqueue_mask(vdev, queue, idx, mask);
+ }
+ 
+@@ -1192,6 +1201,15 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx)
+ 
+     assert(vcrypto->vhost_started);
+ 
+    /*
+     * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1
+     * as the Marco of configure interrupt's IDX, If this driver does not
+     * support, the function will return
+     */
+
+    if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        return false;
+    }
+     return cryptodev_vhost_virtqueue_pending(vdev, queue, idx);
+ }
+ 
+diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
+index acfd4df125..1f4a41b958 100644
+--- a/include/hw/virtio/virtio.h
+++ b/include/hw/virtio/virtio.h
+@@ -79,6 +79,9 @@ typedef struct VirtQueueElement
+ 
+ #define VIRTIO_NO_VECTOR 0xffff
+ 
+/* special index value used internally for config irqs */
+#define VIRTIO_CONFIG_IRQ_IDX -1
+
+ #define TYPE_VIRTIO_DEVICE "virtio-device"
+ OBJECT_DECLARE_TYPE(VirtIODevice, VirtioDeviceClass, VIRTIO_DEVICE)
+ 
+-- 
+2.31.1
+
--- a/kvm-virtio-mmio-add-support-for-configure-interrupt.patch
+++ b/kvm-virtio-mmio-add-support-for-configure-interrupt.patch
@ -0,0 +1,80 @@
+From 181705090c9963c2da97811838ace5bb058737c6 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:50 +0800
+Subject: [PATCH 09/31] virtio-mmio: add support for configure interrupt
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [9/10] 742cc2b425ffd7bbd393772526e7481446ee131c (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+Add configure interrupt support in virtio-mmio bus.
+add function to set configure guest notifier.
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-10-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit cd336e834620ea78edef049c3567f312974e475b)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/virtio/virtio-mmio.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
+index d240efef97..103260ec15 100644
+--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
+@@ -670,7 +670,30 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
+ 
+     return 0;
+ }
+static int virtio_mmio_set_config_guest_notifier(DeviceState *d, bool assign,
+                                                 bool with_irqfd)
+{
+    VirtIOMMIOProxy *proxy = VIRTIO_MMIO(d);
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+    EventNotifier *notifier = virtio_config_get_guest_notifier(vdev);
+    int r = 0;
+ 
+    if (assign) {
+        r = event_notifier_init(notifier, 0);
+        if (r < 0) {
+            return r;
+        }
+        virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+    } else {
+        virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+        event_notifier_cleanup(notifier);
+    }
+    if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
+        vdc->guest_notifier_mask(vdev, VIRTIO_CONFIG_IRQ_IDX, !assign);
+    }
+    return r;
+}
+ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs,
+                                            bool assign)
+ {
+@@ -692,6 +715,10 @@ static int virtio_mmio_set_guest_notifiers(DeviceState *d, int nvqs,
+             goto assign_error;
+         }
+     }
+    r = virtio_mmio_set_config_guest_notifier(d, assign, with_irqfd);
+    if (r < 0) {
+        goto assign_error;
+    }
+ 
+     return 0;
+ 
+-- 
+2.31.1
+
--- a/kvm-virtio-net-add-support-for-configure-interrupt.patch
+++ b/kvm-virtio-net-add-support-for-configure-interrupt.patch
@ -0,0 +1,115 @@
+From 2b8e3409edb8a17d89c3829cfa3d92bdfdd43c53 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:49 +0800
+Subject: [PATCH 08/31] virtio-net: add support for configure interrupt
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [8/10] 1b125169bea6c81c508b154fa1bae68af153b312 (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+Add functions to support configure interrupt in virtio_net
+Add the functions to support vhost_net_config_pending
+and vhost_net_config_mask.
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-9-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 8aab0d1dbe90c7b5ac6672a1a09b0578178f5f4c)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/net/vhost_net-stub.c | 9 +++++++++
+ hw/net/vhost_net.c      | 9 +++++++++
+ hw/net/virtio-net.c     | 4 ++--
+ include/net/vhost_net.h | 2 ++
+ 4 files changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c
+index 9f7daae99c..c36f258201 100644
+--- a/hw/net/vhost_net-stub.c
+++ b/hw/net/vhost_net-stub.c
+@@ -82,6 +82,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+ {
+ }
+ 
+bool vhost_net_config_pending(VHostNetState *net)
+{
+    return false;
+}
+
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
+{
+}
+
+ int vhost_net_notify_migration_done(struct vhost_net *net, char* mac_addr)
+ {
+     return -1;
+diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
+index 043058ff43..6a55f5a473 100644
+--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
+@@ -478,6 +478,15 @@ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+     vhost_virtqueue_mask(&net->dev, dev, idx, mask);
+ }
+ 
+bool vhost_net_config_pending(VHostNetState *net)
+{
+    return vhost_config_pending(&net->dev);
+}
+
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask)
+{
+    vhost_config_mask(&net->dev, dev, mask);
+}
+ VHostNetState *get_vhost_net(NetClientState *nc)
+ {
+     VHostNetState *vhost_net = 0;
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index bee35d6f9f..ec974f7a76 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -3323,7 +3323,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+      */
+ 
+     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+-        return false;
+        return vhost_net_config_pending(get_vhost_net(nc->peer));
+     }
+     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
+ }
+@@ -3355,9 +3355,9 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+      */
+ 
+     if (idx == VIRTIO_CONFIG_IRQ_IDX) {
+        vhost_net_config_mask(get_vhost_net(nc->peer), vdev, mask);
+         return;
+     }
+-
+     vhost_net_virtqueue_mask(get_vhost_net(nc->peer), vdev, idx, mask);
+ }
+ 
+diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
+index 40b9a40074..dbbd0dc04e 100644
+--- a/include/net/vhost_net.h
+++ b/include/net/vhost_net.h
+@@ -39,6 +39,8 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data,
+ bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
+ void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
+                               int idx, bool mask);
+bool vhost_net_config_pending(VHostNetState *net);
+void vhost_net_config_mask(VHostNetState *net, VirtIODevice *dev, bool mask);
+ int vhost_net_notify_migration_done(VHostNetState *net, char* mac_addr);
+ VHostNetState *get_vhost_net(NetClientState *nc);
+ 
+-- 
+2.31.1
+
--- a/kvm-virtio-pci-add-support-for-configure-interrupt.patch
+++ b/kvm-virtio-pci-add-support-for-configure-interrupt.patch
@ -0,0 +1,274 @@
+From 61ac1476d3820c97e1cc103af422b17bc94c6ca5 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:51 +0800
+Subject: [PATCH 10/31] virtio-pci: add support for configure interrupt
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [10/10] ebd6a11d7699660d8ac5a4e44a790f823daea57c (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+Add process to handle the configure interrupt, The function's
+logic is the same with vq interrupt.Add extra process to check
+the configure interrupt
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-11-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 1680542862edd963e6380dd4121a5e85df55581f)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/virtio/virtio-pci.c         | 118 +++++++++++++++++++++++++++------
+ include/hw/virtio/virtio-pci.h |   4 +-
+ 2 files changed, 102 insertions(+), 20 deletions(-)
+
+diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
+index ec816ea367..3f00e91718 100644
+--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
+@@ -751,7 +751,8 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
+     VirtQueue *vq;
+ 
+     if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
+-        return -1;
+        *n = virtio_config_get_guest_notifier(vdev);
+        *vector = vdev->config_vector;
+     } else {
+         if (!virtio_queue_get_num(vdev, queue_no)) {
+             return -1;
+@@ -811,7 +812,7 @@ undo:
+     }
+     return ret;
+ }
+-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+static int kvm_virtio_pci_vector_vq_use(VirtIOPCIProxy *proxy, int nvqs)
+ {
+     int queue_no;
+     int ret = 0;
+@@ -826,6 +827,10 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+     return ret;
+ }
+ 
+static int kvm_virtio_pci_vector_config_use(VirtIOPCIProxy *proxy)
+{
+    return kvm_virtio_pci_vector_use_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
+}
+ 
+ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
+                                               int queue_no)
+@@ -850,7 +855,7 @@ static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
+     kvm_virtio_pci_vq_vector_release(proxy, vector);
+ }
+ 
+-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+static void kvm_virtio_pci_vector_vq_release(VirtIOPCIProxy *proxy, int nvqs)
+ {
+     int queue_no;
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+@@ -863,6 +868,11 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+     }
+ }
+ 
+static void kvm_virtio_pci_vector_config_release(VirtIOPCIProxy *proxy)
+{
+    kvm_virtio_pci_vector_release_one(proxy, VIRTIO_CONFIG_IRQ_IDX);
+}
+
+ static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
+                                        unsigned int queue_no,
+                                        unsigned int vector,
+@@ -944,9 +954,19 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
+         }
+         vq = virtio_vector_next_queue(vq);
+     }
+-
+    /* unmask config intr */
+    if (vector == vdev->config_vector) {
+        n = virtio_config_get_guest_notifier(vdev);
+        ret = virtio_pci_one_vector_unmask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector,
+                                           msg, n);
+        if (ret < 0) {
+            goto undo_config;
+        }
+    }
+     return 0;
+-
+undo_config:
+    n = virtio_config_get_guest_notifier(vdev);
+    virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
+ undo:
+     vq = virtio_vector_first_queue(vdev, vector);
+     while (vq && unmasked >= 0) {
+@@ -980,6 +1000,11 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
+         }
+         vq = virtio_vector_next_queue(vq);
+     }
+
+    if (vector == vdev->config_vector) {
+        n = virtio_config_get_guest_notifier(vdev);
+        virtio_pci_one_vector_mask(proxy, VIRTIO_CONFIG_IRQ_IDX, vector, n);
+    }
+ }
+ 
+ static void virtio_pci_vector_poll(PCIDevice *dev,
+@@ -1011,6 +1036,34 @@ static void virtio_pci_vector_poll(PCIDevice *dev,
+             msix_set_pending(dev, vector);
+         }
+     }
+    /* poll the config intr */
+    ret = virtio_pci_get_notifier(proxy, VIRTIO_CONFIG_IRQ_IDX, &notifier,
+                                  &vector);
+    if (ret < 0) {
+        return;
+    }
+    if (vector < vector_start || vector >= vector_end ||
+        !msix_is_masked(dev, vector)) {
+        return;
+    }
+    if (k->guest_notifier_pending) {
+        if (k->guest_notifier_pending(vdev, VIRTIO_CONFIG_IRQ_IDX)) {
+            msix_set_pending(dev, vector);
+        }
+    } else if (event_notifier_test_and_clear(notifier)) {
+        msix_set_pending(dev, vector);
+    }
+}
+
+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq,
+                                              int n, bool assign,
+                                              bool with_irqfd)
+{
+    if (n == VIRTIO_CONFIG_IRQ_IDX) {
+        virtio_config_set_guest_notifier_fd_handler(vdev, assign, with_irqfd);
+    } else {
+        virtio_queue_set_guest_notifier_fd_handler(vq, assign, with_irqfd);
+    }
+ }
+ 
+ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
+@@ -1019,17 +1072,25 @@ static int virtio_pci_set_guest_notifier(DeviceState *d, int n, bool assign,
+     VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+-    VirtQueue *vq = virtio_get_queue(vdev, n);
+-    EventNotifier *notifier = virtio_queue_get_guest_notifier(vq);
+    VirtQueue *vq = NULL;
+    EventNotifier *notifier = NULL;
+
+    if (n == VIRTIO_CONFIG_IRQ_IDX) {
+        notifier = virtio_config_get_guest_notifier(vdev);
+    } else {
+        vq = virtio_get_queue(vdev, n);
+        notifier = virtio_queue_get_guest_notifier(vq);
+    }
+ 
+     if (assign) {
+         int r = event_notifier_init(notifier, 0);
+         if (r < 0) {
+             return r;
+         }
+-        virtio_queue_set_guest_notifier_fd_handler(vq, true, with_irqfd);
+        virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, true, with_irqfd);
+     } else {
+-        virtio_queue_set_guest_notifier_fd_handler(vq, false, with_irqfd);
+        virtio_pci_set_guest_notifier_fd_handler(vdev, vq, n, false,
+                                                 with_irqfd);
+         event_notifier_cleanup(notifier);
+     }
+ 
+@@ -1072,10 +1133,13 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
+     proxy->nvqs_with_notifiers = nvqs;
+ 
+     /* Must unset vector notifier while guest notifier is still assigned */
+-    if ((proxy->vector_irqfd || k->guest_notifier_mask) && !assign) {
+    if ((proxy->vector_irqfd ||
+         (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) &&
+        !assign) {
+         msix_unset_vector_notifiers(&proxy->pci_dev);
+         if (proxy->vector_irqfd) {
+-            kvm_virtio_pci_vector_release(proxy, nvqs);
+            kvm_virtio_pci_vector_vq_release(proxy, nvqs);
+            kvm_virtio_pci_vector_config_release(proxy);
+             g_free(proxy->vector_irqfd);
+             proxy->vector_irqfd = NULL;
+         }
+@@ -1091,20 +1155,30 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
+             goto assign_error;
+         }
+     }
+-
+    r = virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, assign,
+                                      with_irqfd);
+    if (r < 0) {
+        goto config_assign_error;
+    }
+     /* Must set vector notifier after guest notifier has been assigned */
+-    if ((with_irqfd || k->guest_notifier_mask) && assign) {
+    if ((with_irqfd ||
+         (vdev->use_guest_notifier_mask && k->guest_notifier_mask)) &&
+        assign) {
+         if (with_irqfd) {
+             proxy->vector_irqfd =
+                 g_malloc0(sizeof(*proxy->vector_irqfd) *
+                           msix_nr_vectors_allocated(&proxy->pci_dev));
+-            r = kvm_virtio_pci_vector_use(proxy, nvqs);
+            r = kvm_virtio_pci_vector_vq_use(proxy, nvqs);
+            if (r < 0) {
+                goto config_assign_error;
+            }
+            r = kvm_virtio_pci_vector_config_use(proxy);
+             if (r < 0) {
+-                goto assign_error;
+                goto config_error;
+             }
+         }
+-        r = msix_set_vector_notifiers(&proxy->pci_dev,
+-                                      virtio_pci_vector_unmask,
+
+        r = msix_set_vector_notifiers(&proxy->pci_dev, virtio_pci_vector_unmask,
+                                       virtio_pci_vector_mask,
+                                       virtio_pci_vector_poll);
+         if (r < 0) {
+@@ -1117,9 +1191,15 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
+ notifiers_error:
+     if (with_irqfd) {
+         assert(assign);
+-        kvm_virtio_pci_vector_release(proxy, nvqs);
+        kvm_virtio_pci_vector_vq_release(proxy, nvqs);
+     }
+-
+config_error:
+    if (with_irqfd) {
+        kvm_virtio_pci_vector_config_release(proxy);
+    }
+config_assign_error:
+    virtio_pci_set_guest_notifier(d, VIRTIO_CONFIG_IRQ_IDX, !assign,
+                                  with_irqfd);
+ assign_error:
+     /* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
+     assert(assign);
+diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
+index 938799e8f6..c02e278f46 100644
+--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
+@@ -256,5 +256,7 @@ void virtio_pci_types_register(const VirtioPCIDeviceTypeInfo *t);
+  * @fixed_queues.
+  */
+ unsigned virtio_pci_optimal_num_queues(unsigned fixed_queues);
+-
+void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue *vq,
+                                              int n, bool assign,
+                                              bool with_irqfd);
+ #endif
+-- 
+2.31.1
+
--- a/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch
+++ b/kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch
@ -0,0 +1,272 @@
+From 9a234f849273d3480e4a88042cb1ea06a37a626b Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:43 +0800
+Subject: [PATCH 02/31] virtio-pci: decouple notifier from interrupt process
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [2/10] a20f4c9ff38b239531d12cbcc7deaa649c86abc3 (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+To reuse the notifier process. We add the virtio_pci_get_notifier
+to get the notifier and vector. The INPUT for this function is IDX,
+The OUTPUT is the notifier and the vector
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-3-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 2e07f69d0c828e21515b63dc22884d548540b382)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/virtio/virtio-pci.c | 88 +++++++++++++++++++++++++++---------------
+ 1 file changed, 57 insertions(+), 31 deletions(-)
+
+diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
+index a1c9dfa7bb..52c7692fff 100644
+--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
+@@ -728,29 +728,41 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
+ }
+ 
+ static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
+-                                 unsigned int queue_no,
+                                 EventNotifier *n,
+                                  unsigned int vector)
+ {
+     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+-    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+-    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
+-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+     return kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, n, NULL, irqfd->virq);
+ }
+ 
+ static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
+-                                      unsigned int queue_no,
+                                      EventNotifier *n ,
+                                       unsigned int vector)
+ {
+-    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+-    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
+-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+     int ret;
+ 
+     ret = kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, n, irqfd->virq);
+     assert(ret == 0);
+ }
+static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
+                                      EventNotifier **n, unsigned int *vector)
+{
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+    VirtQueue *vq;
+
+    if (queue_no == VIRTIO_CONFIG_IRQ_IDX) {
+        return -1;
+    } else {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            return -1;
+        }
+        *vector = virtio_queue_vector(vdev, queue_no);
+        vq = virtio_get_queue(vdev, queue_no);
+        *n = virtio_queue_get_guest_notifier(vq);
+    }
+    return 0;
+}
+ 
+ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+ {
+@@ -759,12 +771,15 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+     unsigned int vector;
+     int ret, queue_no;
+-
+    EventNotifier *n;
+     for (queue_no = 0; queue_no < nvqs; queue_no++) {
+         if (!virtio_queue_get_num(vdev, queue_no)) {
+             break;
+         }
+-        vector = virtio_queue_vector(vdev, queue_no);
+        ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+        if (ret < 0) {
+            break;
+        }
+         if (vector >= msix_nr_vectors_allocated(dev)) {
+             continue;
+         }
+@@ -776,7 +791,7 @@ static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+          * Otherwise, delay until unmasked in the frontend.
+          */
+         if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+-            ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+            ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
+             if (ret < 0) {
+                 kvm_virtio_pci_vq_vector_release(proxy, vector);
+                 goto undo;
+@@ -792,7 +807,11 @@ undo:
+             continue;
+         }
+         if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+-            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+            ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+            if (ret < 0) {
+                break;
+            }
+            kvm_virtio_pci_irqfd_release(proxy, n, vector);
+         }
+         kvm_virtio_pci_vq_vector_release(proxy, vector);
+     }
+@@ -806,12 +825,16 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+     unsigned int vector;
+     int queue_no;
+     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+-
+    EventNotifier *n;
+    int ret ;
+     for (queue_no = 0; queue_no < nvqs; queue_no++) {
+         if (!virtio_queue_get_num(vdev, queue_no)) {
+             break;
+         }
+-        vector = virtio_queue_vector(vdev, queue_no);
+        ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+        if (ret < 0) {
+            break;
+        }
+         if (vector >= msix_nr_vectors_allocated(dev)) {
+             continue;
+         }
+@@ -819,21 +842,20 @@ static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+          * Otherwise, it was cleaned when masked in the frontend.
+          */
+         if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+-            kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+            kvm_virtio_pci_irqfd_release(proxy, n, vector);
+         }
+         kvm_virtio_pci_vq_vector_release(proxy, vector);
+     }
+ }
+ 
+-static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
+static int virtio_pci_one_vector_unmask(VirtIOPCIProxy *proxy,
+                                        unsigned int queue_no,
+                                        unsigned int vector,
+-                                       MSIMessage msg)
+                                       MSIMessage msg,
+                                       EventNotifier *n)
+ {
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+-    VirtQueue *vq = virtio_get_queue(vdev, queue_no);
+-    EventNotifier *n = virtio_queue_get_guest_notifier(vq);
+     VirtIOIRQFD *irqfd;
+     int ret = 0;
+ 
+@@ -860,14 +882,15 @@ static int virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
+             event_notifier_set(n);
+         }
+     } else {
+-        ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
+        ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
+     }
+     return ret;
+ }
+ 
+-static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
+static void virtio_pci_one_vector_mask(VirtIOPCIProxy *proxy,
+                                              unsigned int queue_no,
+-                                             unsigned int vector)
+                                             unsigned int vector,
+                                             EventNotifier *n)
+ {
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+@@ -878,7 +901,7 @@ static void virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
+     if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+         k->guest_notifier_mask(vdev, queue_no, true);
+     } else {
+-        kvm_virtio_pci_irqfd_release(proxy, queue_no, vector);
+        kvm_virtio_pci_irqfd_release(proxy, n, vector);
+     }
+ }
+ 
+@@ -888,6 +911,7 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
+     VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
+    EventNotifier *n;
+     int ret, index, unmasked = 0;
+ 
+     while (vq) {
+@@ -896,7 +920,8 @@ static int virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
+             break;
+         }
+         if (index < proxy->nvqs_with_notifiers) {
+-            ret = virtio_pci_vq_vector_unmask(proxy, index, vector, msg);
+            n = virtio_queue_get_guest_notifier(vq);
+            ret = virtio_pci_one_vector_unmask(proxy, index, vector, msg, n);
+             if (ret < 0) {
+                 goto undo;
+             }
+@@ -912,7 +937,8 @@ undo:
+     while (vq && unmasked >= 0) {
+         index = virtio_get_queue_index(vq);
+         if (index < proxy->nvqs_with_notifiers) {
+-            virtio_pci_vq_vector_mask(proxy, index, vector);
+            n = virtio_queue_get_guest_notifier(vq);
+            virtio_pci_one_vector_mask(proxy, index, vector, n);
+             --unmasked;
+         }
+         vq = virtio_vector_next_queue(vq);
+@@ -925,15 +951,17 @@ static void virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
+     VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     VirtQueue *vq = virtio_vector_first_queue(vdev, vector);
+    EventNotifier *n;
+     int index;
+ 
+     while (vq) {
+         index = virtio_get_queue_index(vq);
+        n = virtio_queue_get_guest_notifier(vq);
+         if (!virtio_queue_get_num(vdev, index)) {
+             break;
+         }
+         if (index < proxy->nvqs_with_notifiers) {
+-            virtio_pci_vq_vector_mask(proxy, index, vector);
+            virtio_pci_one_vector_mask(proxy, index, vector, n);
+         }
+         vq = virtio_vector_next_queue(vq);
+     }
+@@ -949,19 +977,17 @@ static void virtio_pci_vector_poll(PCIDevice *dev,
+     int queue_no;
+     unsigned int vector;
+     EventNotifier *notifier;
+-    VirtQueue *vq;
+    int ret;
+ 
+     for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
+-        if (!virtio_queue_get_num(vdev, queue_no)) {
+        ret = virtio_pci_get_notifier(proxy, queue_no, &notifier, &vector);
+        if (ret < 0) {
+             break;
+         }
+-        vector = virtio_queue_vector(vdev, queue_no);
+         if (vector < vector_start || vector >= vector_end ||
+             !msix_is_masked(dev, vector)) {
+             continue;
+         }
+-        vq = virtio_get_queue(vdev, queue_no);
+-        notifier = virtio_queue_get_guest_notifier(vq);
+         if (k->guest_notifier_pending) {
+             if (k->guest_notifier_pending(vdev, queue_no)) {
+                 msix_set_pending(dev, vector);
+-- 
+2.31.1
+
--- a/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch
+++ b/kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch
@ -0,0 +1,212 @@
+From 58cd577ff157cfaf7506bba135db58e75c330ff0 Mon Sep 17 00:00:00 2001
+From: Cindy Lu <lulu@redhat.com>
+Date: Thu, 22 Dec 2022 15:04:44 +0800
+Subject: [PATCH 03/31] virtio-pci: decouple the single vector from the
+ interrupt process
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Cindy Lu <lulu@redhat.com>
+RH-MergeRequest: 132: vhost-vdpa: support config interrupt in vhost-vdpa
+RH-Bugzilla: 1905805
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Commit: [3/10] 2c79cb678f005fb2f53b2db0f237347634ab3422 (lulu6/qemu-kvm3)
+
+https://bugzilla.redhat.com/show_bug.cgi?id=1905805
+
+To reuse the interrupt process in configure interrupt
+Need to decouple the single vector from the interrupt process.
+We add new function kvm_virtio_pci_vector_use_one and _release_one.
+These functions are used for the single vector, the whole process will
+finish in the loop with vq number.
+
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+Message-Id: <20221222070451.936503-4-lulu@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit ee3b8dc6cc496ba7f4e27aed4493275c706a7942)
+Signed-off-by: Cindy Lu <lulu@redhat.com>
+---
+ hw/virtio/virtio-pci.c | 131 +++++++++++++++++++++++------------------
+ 1 file changed, 73 insertions(+), 58 deletions(-)
+
+diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
+index 52c7692fff..ec816ea367 100644
+--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
+@@ -699,7 +699,6 @@ static uint32_t virtio_read_config(PCIDevice *pci_dev,
+ }
+ 
+ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
+-                                        unsigned int queue_no,
+                                         unsigned int vector)
+ {
+     VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
+@@ -764,87 +763,103 @@ static int virtio_pci_get_notifier(VirtIOPCIProxy *proxy, int queue_no,
+     return 0;
+ }
+ 
+-static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+static int kvm_virtio_pci_vector_use_one(VirtIOPCIProxy *proxy, int queue_no)
+ {
+    unsigned int vector;
+    int ret;
+    EventNotifier *n;
+     PCIDevice *dev = &proxy->pci_dev;
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+-    unsigned int vector;
+-    int ret, queue_no;
+-    EventNotifier *n;
+-    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+-        if (!virtio_queue_get_num(vdev, queue_no)) {
+-            break;
+-        }
+-        ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+-        if (ret < 0) {
+-            break;
+-        }
+-        if (vector >= msix_nr_vectors_allocated(dev)) {
+-            continue;
+-        }
+-        ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector);
+
+    ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+    if (ret < 0) {
+        return ret;
+    }
+    if (vector >= msix_nr_vectors_allocated(dev)) {
+        return 0;
+    }
+    ret = kvm_virtio_pci_vq_vector_use(proxy, vector);
+    if (ret < 0) {
+        goto undo;
+    }
+    /*
+     * If guest supports masking, set up irqfd now.
+     * Otherwise, delay until unmasked in the frontend.
+     */
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
+         if (ret < 0) {
+            kvm_virtio_pci_vq_vector_release(proxy, vector);
+             goto undo;
+         }
+-        /* If guest supports masking, set up irqfd now.
+-         * Otherwise, delay until unmasked in the frontend.
+-         */
+-        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+-            ret = kvm_virtio_pci_irqfd_use(proxy, n, vector);
+-            if (ret < 0) {
+-                kvm_virtio_pci_vq_vector_release(proxy, vector);
+-                goto undo;
+-            }
+-        }
+     }
+-    return 0;
+ 
+    return 0;
+ undo:
+-    while (--queue_no >= 0) {
+-        vector = virtio_queue_vector(vdev, queue_no);
+-        if (vector >= msix_nr_vectors_allocated(dev)) {
+-            continue;
+
+    vector = virtio_queue_vector(vdev, queue_no);
+    if (vector >= msix_nr_vectors_allocated(dev)) {
+        return ret;
+    }
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+        if (ret < 0) {
+            return ret;
+         }
+-        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+-            ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+-            if (ret < 0) {
+-                break;
+-            }
+-            kvm_virtio_pci_irqfd_release(proxy, n, vector);
+        kvm_virtio_pci_irqfd_release(proxy, n, vector);
+    }
+    return ret;
+}
+static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
+{
+    int queue_no;
+    int ret = 0;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+    for (queue_no = 0; queue_no < nvqs; queue_no++) {
+        if (!virtio_queue_get_num(vdev, queue_no)) {
+            return -1;
+         }
+-        kvm_virtio_pci_vq_vector_release(proxy, vector);
+        ret = kvm_virtio_pci_vector_use_one(proxy, queue_no);
+     }
+     return ret;
+ }
+ 
+-static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+
+static void kvm_virtio_pci_vector_release_one(VirtIOPCIProxy *proxy,
+                                              int queue_no)
+ {
+-    PCIDevice *dev = &proxy->pci_dev;
+     VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+     unsigned int vector;
+-    int queue_no;
+-    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+     EventNotifier *n;
+-    int ret ;
+    int ret;
+    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
+    PCIDevice *dev = &proxy->pci_dev;
+
+    ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+    if (ret < 0) {
+        return;
+    }
+    if (vector >= msix_nr_vectors_allocated(dev)) {
+        return;
+    }
+    if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+        kvm_virtio_pci_irqfd_release(proxy, n, vector);
+    }
+    kvm_virtio_pci_vq_vector_release(proxy, vector);
+}
+
+static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
+{
+    int queue_no;
+    VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus);
+
+     for (queue_no = 0; queue_no < nvqs; queue_no++) {
+         if (!virtio_queue_get_num(vdev, queue_no)) {
+             break;
+         }
+-        ret = virtio_pci_get_notifier(proxy, queue_no, &n, &vector);
+-        if (ret < 0) {
+-            break;
+-        }
+-        if (vector >= msix_nr_vectors_allocated(dev)) {
+-            continue;
+-        }
+-        /* If guest supports masking, clean up irqfd now.
+-         * Otherwise, it was cleaned when masked in the frontend.
+-         */
+-        if (vdev->use_guest_notifier_mask && k->guest_notifier_mask) {
+-            kvm_virtio_pci_irqfd_release(proxy, n, vector);
+-        }
+-        kvm_virtio_pci_vq_vector_release(proxy, vector);
+        kvm_virtio_pci_vector_release_one(proxy, queue_no);
+     }
+ }
+ 
+-- 
+2.31.1
+
--- a/qemu-kvm.spec
+++ b/qemu-kvm.spec
@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version}                    \
 Summary: QEMU is a machine emulator and virtualizer
 Name: qemu-kvm
 Version: 7.2.0
-Release: 4%{?rcrel}%{?dist}%{?cc_suffix}
+Release: 5%{?rcrel}%{?dist}%{?cc_suffix}
 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
 # Epoch 15 used for RHEL 8
 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@ -210,6 +210,68 @@ Patch30: kvm-hw-arm-virt-Add-properties-to-disable-high-memory-re.patch
 Patch31: kvm-hw-arm-virt-Enable-compat-high-memory-region-address.patch
 # For bz#2155749 - [regression][stable guest abi][qemu-kvm7.2]Migration failed due to virtio-rng device between RHEL8.8 and RHEL9.2/MSI-X
 Patch32: kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch33: kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch34: kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch35: kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch36: kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch37: kvm-vhost-vdpa-add-support-for-config-interrupt.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch38: kvm-virtio-add-support-for-configure-interrupt.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch39: kvm-vhost-add-support-for-configure-interrupt.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch40: kvm-virtio-net-add-support-for-configure-interrupt.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch41: kvm-virtio-mmio-add-support-for-configure-interrupt.patch
+# For bz#1905805 - support config interrupt in vhost-vdpa qemu
+Patch42: kvm-virtio-pci-add-support-for-configure-interrupt.patch
+# For bz#2159408 - [s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8
+Patch43: kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch
+# For bz#2124856 - VM with virtio interface and iommu=on will crash when try to migrate
+Patch44: kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch45: kvm-block-drop-bdrv_remove_filter_or_cow_child.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch46: kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch47: kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch48: kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch49: kvm-block-Remove-drained_end_counter.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch50: kvm-block-Inline-bdrv_drain_invoke.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch51: kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch52: kvm-block-Drain-individual-nodes-during-reopen.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch53: kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch54: kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch55: kvm-block-Remove-subtree-drains.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch56: kvm-block-Call-drain-callbacks-only-once.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch57: kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch58: kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch59: kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch
+# For bz#2155112 - Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled)
+Patch60: kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch
+# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on
+Patch61: kvm-accel-introduce-accelerator-blocker-API.patch
+# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on
+Patch62: kvm-KVM-keep-track-of-running-ioctls.patch
+# For bz#1979276 - SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on
+Patch63: kvm-kvm-Atomic-memslot-updates.patch

 %if %{have_clang}
 BuildRequires: clang
@ -1238,6 +1300,49 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
 %endif

 %changelog
+* Tue Jan 17 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-5
+- kvm-virtio-introduce-macro-VIRTIO_CONFIG_IRQ_IDX.patch [bz#1905805]
+- kvm-virtio-pci-decouple-notifier-from-interrupt-process.patch [bz#1905805]
+- kvm-virtio-pci-decouple-the-single-vector-from-the-inter.patch [bz#1905805]
+- kvm-vhost-introduce-new-VhostOps-vhost_set_config_call.patch [bz#1905805]
+- kvm-vhost-vdpa-add-support-for-config-interrupt.patch [bz#1905805]
+- kvm-virtio-add-support-for-configure-interrupt.patch [bz#1905805]
+- kvm-vhost-add-support-for-configure-interrupt.patch [bz#1905805]
+- kvm-virtio-net-add-support-for-configure-interrupt.patch [bz#1905805]
+- kvm-virtio-mmio-add-support-for-configure-interrupt.patch [bz#1905805]
+- kvm-virtio-pci-add-support-for-configure-interrupt.patch [bz#1905805]
+- kvm-s390x-s390-virtio-ccw-Activate-zPCI-features-on-s390.patch [bz#2159408]
+- kvm-vhost-fix-vq-dirty-bitmap-syncing-when-vIOMMU-is-ena.patch [bz#2124856]
+- kvm-block-drop-bdrv_remove_filter_or_cow_child.patch [bz#2155112]
+- kvm-qed-Don-t-yield-in-bdrv_qed_co_drain_begin.patch [bz#2155112]
+- kvm-test-bdrv-drain-Don-t-yield-in-.bdrv_co_drained_begi.patch [bz#2155112]
+- kvm-block-Revert-.bdrv_drained_begin-end-to-non-coroutin.patch [bz#2155112]
+- kvm-block-Remove-drained_end_counter.patch [bz#2155112]
+- kvm-block-Inline-bdrv_drain_invoke.patch [bz#2155112]
+- kvm-block-Fix-locking-for-bdrv_reopen_queue_child.patch [bz#2155112]
+- kvm-block-Drain-individual-nodes-during-reopen.patch [bz#2155112]
+- kvm-block-Don-t-use-subtree-drains-in-bdrv_drop_intermed.patch [bz#2155112]
+- kvm-stream-Replace-subtree-drain-with-a-single-node-drai.patch [bz#2155112]
+- kvm-block-Remove-subtree-drains.patch [bz#2155112]
+- kvm-block-Call-drain-callbacks-only-once.patch [bz#2155112]
+- kvm-block-Remove-ignore_bds_parents-parameter-from-drain.patch [bz#2155112]
+- kvm-block-Drop-out-of-coroutine-in-bdrv_do_drained_begin.patch [bz#2155112]
+- kvm-block-Don-t-poll-in-bdrv_replace_child_noperm.patch [bz#2155112]
+- kvm-block-Remove-poll-parameter-from-bdrv_parent_drained.patch [bz#2155112]
+- kvm-accel-introduce-accelerator-blocker-API.patch [bz#1979276]
+- kvm-KVM-keep-track-of-running-ioctls.patch [bz#1979276]
+- kvm-kvm-Atomic-memslot-updates.patch [bz#1979276]
+- Resolves: bz#1905805
+  (support config interrupt in vhost-vdpa qemu)
+- Resolves: bz#2159408
+  ([s390x] VMs with ISM passthrough don't autostart after leapp upgrade from RHEL 8)
+- Resolves: bz#2124856
+  (VM with virtio interface and iommu=on will crash when try to migrate)
+- Resolves: bz#2155112
+  (Qemu coredump after do snapshot of mirrored top image and its converted base image(iothread enabled))
+- Resolves: bz#1979276
+  (SVM: non atomic memslot updates cause boot failure with seabios and cpu-pm=on)
+
 * Thu Jan 12 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-4
 - kvm-virtio-rng-pci-fix-migration-compat-for-vectors.patch [bz#2155749]
 - kvm-Update-QGA-service-for-new-command-line.patch [bz#2156515]