From 0552c42c392f9ef0d01b98e248d64a86401c0e59 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Mon, 13 Jun 2022 00:34:04 -0400
Subject: [PATCH] * Mon Jun 13 2022 Miroslav Rezanina <mrezanin@redhat.com> -
 7.0.0-6

- kvm-Introduce-event-loop-base-abstract-class.patch [bz#2031024]
- kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch [bz#2031024]
- kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch [bz#2031024]
- kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#2072379]
- kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#2072379]
- kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#2072379]
- kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#2072379]
- kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2070804]
- kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2070804]
- kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2070804]
- kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2070804]
- kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2070804]
- kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2070804]
- kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2070804]
- kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch [bz#2094270]
- kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch [bz#2086262]
- Resolves: bz#2031024
  (Add support for fixing thread pool size [QEMU])
- Resolves: bz#2072379
  (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs))
- Resolves: bz#2070804
  (PXE boot crash qemu when using multiqueue vDPA)
- Resolves: bz#2094270
  (Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore)
- Resolves: bz#2086262
  ([Win11][tpm]vfio_listener_region_del received unaligned region)
---
 ...oduce-event-loop-base-abstract-class.patch | 503 ++++++++++++++++++
 ...lly-limit-the-maximum-number-of-CPUs.patch |  58 ++
 ...08-Fix-when-missing-user_allow_other.patch |  52 ++
 ...-Test-new-refcount-rebuild-algorithm.patch | 445 ++++++++++++++++
 ...d-errp-to-rebuild_refcount_structure.patch | 162 ++++++
 ...mprove-refcount-structure-rebuilding.patch | 465 ++++++++++++++++
 ...base-Introduce-options-to-set-the-th.patch | 385 ++++++++++++++
 ...oop-Introduce-the-main-loop-into-QOM.patch | 233 ++++++++
 ...ve-spurious-warning-on-vfio_listener.patch |  78 +++
 ...-improper-cleanup-in-vhost_net_start.patch |  56 ++
 ...backend-feature-should-set-only-once.patch |  58 ++
 ...e-name-and-polarity-for-vhost_vdpa_o.patch | 123 +++++
 ...mproper-cleanup-in-net_init_vhost_vd.patch |  48 ++
 ...-ctrl_vq-index-for-non-mq-guest-for-.patch | 143 +++++
 ...-handle-mq-request-in-userspace-hand.patch | 109 ++++
 ...-vhost_dev-and-notifiers-for-cvq-onl.patch |  52 ++
 qemu-kvm.spec                                 |  62 ++-
 17 files changed, 3031 insertions(+), 1 deletion(-)
 create mode 100644 kvm-Introduce-event-loop-base-abstract-class.patch
 create mode 100644 kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch
 create mode 100644 kvm-iotests-108-Fix-when-missing-user_allow_other.patch
 create mode 100644 kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch
 create mode 100644 kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch
 create mode 100644 kvm-qcow2-Improve-refcount-structure-rebuilding.patch
 create mode 100644 kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch
 create mode 100644 kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch
 create mode 100644 kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch
 create mode 100644 kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch
 create mode 100644 kvm-vhost-vdpa-backend-feature-should-set-only-once.patch
 create mode 100644 kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch
 create mode 100644 kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch
 create mode 100644 kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch
 create mode 100644 kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch
 create mode 100644 kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch

diff --git a/kvm-Introduce-event-loop-base-abstract-class.patch b/kvm-Introduce-event-loop-base-abstract-class.patch
new file mode 100644
index 0000000..9f987ea
--- /dev/null
+++ b/kvm-Introduce-event-loop-base-abstract-class.patch
@@ -0,0 +1,503 @@
+From 1163da281c178359dd7e1cf1ced5c98caa600f8e Mon Sep 17 00:00:00 2001
+From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Date: Mon, 25 Apr 2022 09:57:21 +0200
+Subject: [PATCH 01/16] Introduce event-loop-base abstract class
+
+RH-Author: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size
+RH-Commit: [1/3] 5817205d8f56cc4aa98bd5963ecac54a59bad990
+RH-Bugzilla: 2031024
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+Introduce the 'event-loop-base' abstract class, it'll hold the
+properties common to all event loops and provide the necessary hooks for
+their creation and maintenance. Then have iothread inherit from it.
+
+EventLoopBaseClass is defined as user creatable and provides a hook for
+its children to attach themselves to the user creatable class 'complete'
+function. It also provides an update_params() callback to propagate
+property changes onto its children.
+
+The new 'event-loop-base' class will live in the root directory. It is
+built on its own using the 'link_whole' option (there are no direct
+function dependencies between the class and its children, it all happens
+trough 'constructor' magic). And also imposes new compilation
+dependencies:
+
+    qom <- event-loop-base <- blockdev (iothread.c)
+
+And in subsequent patches:
+
+    qom <- event-loop-base <- qemuutil (util/main-loop.c)
+
+All this forced some amount of reordering in meson.build:
+
+ - Moved qom build definition before qemuutil. Doing it the other way
+   around (i.e. moving qemuutil after qom) isn't possible as a lot of
+   core libraries that live in between the two depend on it.
+
+ - Process the 'hw' subdir earlier, as it introduces files into the
+   'qom' source set.
+
+No functional changes intended.
+
+Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Acked-by: Markus Armbruster <armbru@redhat.com>
+Message-id: 20220425075723.20019-2-nsaenzju@redhat.com
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+(cherry picked from commit 7d5983e3c8c40b1d0668faba31d79905c4fadd7d)
+---
+ event-loop-base.c                | 104 +++++++++++++++++++++++++++++++
+ include/sysemu/event-loop-base.h |  36 +++++++++++
+ include/sysemu/iothread.h        |   6 +-
+ iothread.c                       |  65 ++++++-------------
+ meson.build                      |  23 ++++---
+ qapi/qom.json                    |  22 +++++--
+ 6 files changed, 192 insertions(+), 64 deletions(-)
+ create mode 100644 event-loop-base.c
+ create mode 100644 include/sysemu/event-loop-base.h
+
+diff --git a/event-loop-base.c b/event-loop-base.c
+new file mode 100644
+index 0000000000..a924c73a7c
+--- /dev/null
++++ b/event-loop-base.c
+@@ -0,0 +1,104 @@
++/*
++ * QEMU event-loop base
++ *
++ * Copyright (C) 2022 Red Hat Inc
++ *
++ * Authors:
++ *  Stefan Hajnoczi <stefanha@redhat.com>
++ *  Nicolas Saenz Julienne <nsaenzju@redhat.com>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2 or later.
++ * See the COPYING file in the top-level directory.
++ */
++
++#include "qemu/osdep.h"
++#include "qom/object_interfaces.h"
++#include "qapi/error.h"
++#include "sysemu/event-loop-base.h"
++
++typedef struct {
++    const char *name;
++    ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
++} EventLoopBaseParamInfo;
++
++static EventLoopBaseParamInfo aio_max_batch_info = {
++    "aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
++};
++
++static void event_loop_base_get_param(Object *obj, Visitor *v,
++        const char *name, void *opaque, Error **errp)
++{
++    EventLoopBase *event_loop_base = EVENT_LOOP_BASE(obj);
++    EventLoopBaseParamInfo *info = opaque;
++    int64_t *field = (void *)event_loop_base + info->offset;
++
++    visit_type_int64(v, name, field, errp);
++}
++
++static void event_loop_base_set_param(Object *obj, Visitor *v,
++        const char *name, void *opaque, Error **errp)
++{
++    EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(obj);
++    EventLoopBase *base = EVENT_LOOP_BASE(obj);
++    EventLoopBaseParamInfo *info = opaque;
++    int64_t *field = (void *)base + info->offset;
++    int64_t value;
++
++    if (!visit_type_int64(v, name, &value, errp)) {
++        return;
++    }
++
++    if (value < 0) {
++        error_setg(errp, "%s value must be in range [0, %" PRId64 "]",
++                   info->name, INT64_MAX);
++        return;
++    }
++
++    *field = value;
++
++    if (bc->update_params) {
++        bc->update_params(base, errp);
++    }
++
++    return;
++}
++
++static void event_loop_base_complete(UserCreatable *uc, Error **errp)
++{
++    EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
++    EventLoopBase *base = EVENT_LOOP_BASE(uc);
++
++    if (bc->init) {
++        bc->init(base, errp);
++    }
++}
++
++static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
++{
++    UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
++    ucc->complete = event_loop_base_complete;
++
++    object_class_property_add(klass, "aio-max-batch", "int",
++                              event_loop_base_get_param,
++                              event_loop_base_set_param,
++                              NULL, &aio_max_batch_info);
++}
++
++static const TypeInfo event_loop_base_info = {
++    .name = TYPE_EVENT_LOOP_BASE,
++    .parent = TYPE_OBJECT,
++    .instance_size = sizeof(EventLoopBase),
++    .class_size = sizeof(EventLoopBaseClass),
++    .class_init = event_loop_base_class_init,
++    .abstract = true,
++    .interfaces = (InterfaceInfo[]) {
++        { TYPE_USER_CREATABLE },
++        { }
++    }
++};
++
++static void register_types(void)
++{
++    type_register_static(&event_loop_base_info);
++}
++type_init(register_types);
+diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
+new file mode 100644
+index 0000000000..8e77d8b69f
+--- /dev/null
++++ b/include/sysemu/event-loop-base.h
+@@ -0,0 +1,36 @@
++/*
++ * QEMU event-loop backend
++ *
++ * Copyright (C) 2022 Red Hat Inc
++ *
++ * Authors:
++ *  Nicolas Saenz Julienne <nsaenzju@redhat.com>
++ *
++ * This work is licensed under the terms of the GNU GPL, version 2 or later.
++ * See the COPYING file in the top-level directory.
++ */
++#ifndef QEMU_EVENT_LOOP_BASE_H
++#define QEMU_EVENT_LOOP_BASE_H
++
++#include "qom/object.h"
++#include "block/aio.h"
++#include "qemu/typedefs.h"
++
++#define TYPE_EVENT_LOOP_BASE         "event-loop-base"
++OBJECT_DECLARE_TYPE(EventLoopBase, EventLoopBaseClass,
++                    EVENT_LOOP_BASE)
++
++struct EventLoopBaseClass {
++    ObjectClass parent_class;
++
++    void (*init)(EventLoopBase *base, Error **errp);
++    void (*update_params)(EventLoopBase *base, Error **errp);
++};
++
++struct EventLoopBase {
++    Object parent;
++
++    /* AioContext AIO engine parameters */
++    int64_t aio_max_batch;
++};
++#endif
+diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
+index 7f714bd136..8f8601d6ab 100644
+--- a/include/sysemu/iothread.h
++++ b/include/sysemu/iothread.h
+@@ -17,11 +17,12 @@
+ #include "block/aio.h"
+ #include "qemu/thread.h"
+ #include "qom/object.h"
++#include "sysemu/event-loop-base.h"
+ 
+ #define TYPE_IOTHREAD "iothread"
+ 
+ struct IOThread {
+-    Object parent_obj;
++    EventLoopBase parent_obj;
+ 
+     QemuThread thread;
+     AioContext *ctx;
+@@ -37,9 +38,6 @@ struct IOThread {
+     int64_t poll_max_ns;
+     int64_t poll_grow;
+     int64_t poll_shrink;
+-
+-    /* AioContext AIO engine parameters */
+-    int64_t aio_max_batch;
+ };
+ typedef struct IOThread IOThread;
+ 
+diff --git a/iothread.c b/iothread.c
+index 0f98af0f2a..8fa2f3bfb8 100644
+--- a/iothread.c
++++ b/iothread.c
+@@ -17,6 +17,7 @@
+ #include "qemu/module.h"
+ #include "block/aio.h"
+ #include "block/block.h"
++#include "sysemu/event-loop-base.h"
+ #include "sysemu/iothread.h"
+ #include "qapi/error.h"
+ #include "qapi/qapi-commands-misc.h"
+@@ -152,10 +153,15 @@ static void iothread_init_gcontext(IOThread *iothread)
+     iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
+ }
+ 
+-static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
++static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
+ {
++    IOThread *iothread = IOTHREAD(base);
+     ERRP_GUARD();
+ 
++    if (!iothread->ctx) {
++        return;
++    }
++
+     aio_context_set_poll_params(iothread->ctx,
+                                 iothread->poll_max_ns,
+                                 iothread->poll_grow,
+@@ -166,14 +172,15 @@ static void iothread_set_aio_context_params(IOThread *iothread, Error **errp)
+     }
+ 
+     aio_context_set_aio_params(iothread->ctx,
+-                               iothread->aio_max_batch,
++                               iothread->parent_obj.aio_max_batch,
+                                errp);
+ }
+ 
+-static void iothread_complete(UserCreatable *obj, Error **errp)
++
++static void iothread_init(EventLoopBase *base, Error **errp)
+ {
+     Error *local_error = NULL;
+-    IOThread *iothread = IOTHREAD(obj);
++    IOThread *iothread = IOTHREAD(base);
+     char *thread_name;
+ 
+     iothread->stopping = false;
+@@ -189,7 +196,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
+      */
+     iothread_init_gcontext(iothread);
+ 
+-    iothread_set_aio_context_params(iothread, &local_error);
++    iothread_set_aio_context_params(base, &local_error);
+     if (local_error) {
+         error_propagate(errp, local_error);
+         aio_context_unref(iothread->ctx);
+@@ -201,7 +208,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
+      * to inherit.
+      */
+     thread_name = g_strdup_printf("IO %s",
+-                        object_get_canonical_path_component(OBJECT(obj)));
++                        object_get_canonical_path_component(OBJECT(base)));
+     qemu_thread_create(&iothread->thread, thread_name, iothread_run,
+                        iothread, QEMU_THREAD_JOINABLE);
+     g_free(thread_name);
+@@ -226,9 +233,6 @@ static IOThreadParamInfo poll_grow_info = {
+ static IOThreadParamInfo poll_shrink_info = {
+     "poll-shrink", offsetof(IOThread, poll_shrink),
+ };
+-static IOThreadParamInfo aio_max_batch_info = {
+-    "aio-max-batch", offsetof(IOThread, aio_max_batch),
+-};
+ 
+ static void iothread_get_param(Object *obj, Visitor *v,
+         const char *name, IOThreadParamInfo *info, Error **errp)
+@@ -288,35 +292,12 @@ static void iothread_set_poll_param(Object *obj, Visitor *v,
+     }
+ }
+ 
+-static void iothread_get_aio_param(Object *obj, Visitor *v,
+-        const char *name, void *opaque, Error **errp)
+-{
+-    IOThreadParamInfo *info = opaque;
+-
+-    iothread_get_param(obj, v, name, info, errp);
+-}
+-
+-static void iothread_set_aio_param(Object *obj, Visitor *v,
+-        const char *name, void *opaque, Error **errp)
+-{
+-    IOThread *iothread = IOTHREAD(obj);
+-    IOThreadParamInfo *info = opaque;
+-
+-    if (!iothread_set_param(obj, v, name, info, errp)) {
+-        return;
+-    }
+-
+-    if (iothread->ctx) {
+-        aio_context_set_aio_params(iothread->ctx,
+-                                   iothread->aio_max_batch,
+-                                   errp);
+-    }
+-}
+-
+ static void iothread_class_init(ObjectClass *klass, void *class_data)
+ {
+-    UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
+-    ucc->complete = iothread_complete;
++    EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(klass);
++
++    bc->init = iothread_init;
++    bc->update_params = iothread_set_aio_context_params;
+ 
+     object_class_property_add(klass, "poll-max-ns", "int",
+                               iothread_get_poll_param,
+@@ -330,23 +311,15 @@ static void iothread_class_init(ObjectClass *klass, void *class_data)
+                               iothread_get_poll_param,
+                               iothread_set_poll_param,
+                               NULL, &poll_shrink_info);
+-    object_class_property_add(klass, "aio-max-batch", "int",
+-                              iothread_get_aio_param,
+-                              iothread_set_aio_param,
+-                              NULL, &aio_max_batch_info);
+ }
+ 
+ static const TypeInfo iothread_info = {
+     .name = TYPE_IOTHREAD,
+-    .parent = TYPE_OBJECT,
++    .parent = TYPE_EVENT_LOOP_BASE,
+     .class_init = iothread_class_init,
+     .instance_size = sizeof(IOThread),
+     .instance_init = iothread_instance_init,
+     .instance_finalize = iothread_instance_finalize,
+-    .interfaces = (InterfaceInfo[]) {
+-        {TYPE_USER_CREATABLE},
+-        {}
+-    },
+ };
+ 
+ static void iothread_register_types(void)
+@@ -383,7 +356,7 @@ static int query_one_iothread(Object *object, void *opaque)
+     info->poll_max_ns = iothread->poll_max_ns;
+     info->poll_grow = iothread->poll_grow;
+     info->poll_shrink = iothread->poll_shrink;
+-    info->aio_max_batch = iothread->aio_max_batch;
++    info->aio_max_batch = iothread->parent_obj.aio_max_batch;
+ 
+     QAPI_LIST_APPEND(*tail, info);
+     return 0;
+diff --git a/meson.build b/meson.build
+index 6f7e430f0f..b9c919a55e 100644
+--- a/meson.build
++++ b/meson.build
+@@ -2804,6 +2804,7 @@ subdir('qom')
+ subdir('authz')
+ subdir('crypto')
+ subdir('ui')
++subdir('hw')
+ 
+ 
+ if enable_modules
+@@ -2811,6 +2812,18 @@ if enable_modules
+   modulecommon = declare_dependency(link_whole: libmodulecommon, compile_args: '-DBUILD_DSO')
+ endif
+ 
++qom_ss = qom_ss.apply(config_host, strict: false)
++libqom = static_library('qom', qom_ss.sources() + genh,
++                        dependencies: [qom_ss.dependencies()],
++                        name_suffix: 'fa')
++qom = declare_dependency(link_whole: libqom)
++
++event_loop_base = files('event-loop-base.c')
++event_loop_base = static_library('event-loop-base', sources: event_loop_base + genh,
++                                 build_by_default: true)
++event_loop_base = declare_dependency(link_whole: event_loop_base,
++                                     dependencies: [qom])
++
+ stub_ss = stub_ss.apply(config_all, strict: false)
+ 
+ util_ss.add_all(trace_ss)
+@@ -2897,7 +2910,6 @@ subdir('monitor')
+ subdir('net')
+ subdir('replay')
+ subdir('semihosting')
+-subdir('hw')
+ subdir('tcg')
+ subdir('fpu')
+ subdir('accel')
+@@ -3022,13 +3034,6 @@ qemu_syms = custom_target('qemu.syms', output: 'qemu.syms',
+                              capture: true,
+                              command: [undefsym, nm, '@INPUT@'])
+ 
+-qom_ss = qom_ss.apply(config_host, strict: false)
+-libqom = static_library('qom', qom_ss.sources() + genh,
+-                        dependencies: [qom_ss.dependencies()],
+-                        name_suffix: 'fa')
+-
+-qom = declare_dependency(link_whole: libqom)
+-
+ authz_ss = authz_ss.apply(config_host, strict: false)
+ libauthz = static_library('authz', authz_ss.sources() + genh,
+                           dependencies: [authz_ss.dependencies()],
+@@ -3081,7 +3086,7 @@ libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
+                              build_by_default: false)
+ 
+ blockdev = declare_dependency(link_whole: [libblockdev],
+-                              dependencies: [block])
++                              dependencies: [block, event_loop_base])
+ 
+ qmp_ss = qmp_ss.apply(config_host, strict: false)
+ libqmp = static_library('qmp', qmp_ss.sources() + genh,
+diff --git a/qapi/qom.json b/qapi/qom.json
+index eeb5395ff3..a2439533c5 100644
+--- a/qapi/qom.json
++++ b/qapi/qom.json
+@@ -499,6 +499,20 @@
+             '*repeat': 'bool',
+             '*grab-toggle': 'GrabToggleKeys' } }
+ 
++##
++# @EventLoopBaseProperties:
++#
++# Common properties for event loops
++#
++# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
++#                 0 means that the engine will use its default.
++#                 (default: 0)
++#
++# Since: 7.1
++##
++{ 'struct': 'EventLoopBaseProperties',
++  'data': { '*aio-max-batch': 'int' } }
++
+ ##
+ # @IothreadProperties:
+ #
+@@ -516,17 +530,15 @@
+ #               algorithm detects it is spending too long polling without
+ #               encountering events. 0 selects a default behaviour (default: 0)
+ #
+-# @aio-max-batch: maximum number of requests in a batch for the AIO engine,
+-#                 0 means that the engine will use its default
+-#                 (default:0, since 6.1)
++# The @aio-max-batch option is available since 6.1.
+ #
+ # Since: 2.0
+ ##
+ { 'struct': 'IothreadProperties',
++  'base': 'EventLoopBaseProperties',
+   'data': { '*poll-max-ns': 'int',
+             '*poll-grow': 'int',
+-            '*poll-shrink': 'int',
+-            '*aio-max-batch': 'int' } }
++            '*poll-shrink': 'int' } }
+ 
+ ##
+ # @MemoryBackendProperties:
+-- 
+2.31.1
+
diff --git a/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch
new file mode 100644
index 0000000..7740d0b
--- /dev/null
+++ b/kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch
@@ -0,0 +1,58 @@
+From 5ab8613582fd56b847fe75750acb5b7255900b35 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Thu, 9 Jun 2022 11:55:15 +0200
+Subject: [PATCH 15/16] Revert "globally limit the maximum number of CPUs"
+
+RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
+RH-MergeRequest: 99: Revert "globally limit the maximum number of CPUs"
+RH-Commit: [1/1] 13100d4a2209b2190a3654c1f9cf4ebade1e8d24 (vkuznets/qemu-kvm-c9s)
+RH-Bugzilla: 2094270
+RH-Acked-by: Andrew Jones <drjones@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Thomas Huth <thuth@redhat.com>
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2094270
+Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45871149
+Upstream Status: RHEL-only
+Tested: with upstream kernel
+
+Downstream QEMU carries a patch that sets the hard limit of possible vCPUs
+to the value that the KVM code of the kernel recommends as soft limit.
+Upstream KVM code has been changed recently to not use an arbitrary soft
+limit anymore, but to cap the value on the amount of available physical
+CPUs of the host. This defeats the purpose of the downstream change in
+QEMU completely. Drop the downstream-only patch to allow CPU overcommit.
+
+This reverts commit 6669f6fa677d43144f39d6ad59725b7ba622f1c2.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+---
+ accel/kvm/kvm-all.c | 12 ------------
+ 1 file changed, 12 deletions(-)
+
+diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
+index fdf0e4d429..5f1377ca04 100644
+--- a/accel/kvm/kvm-all.c
++++ b/accel/kvm/kvm-all.c
+@@ -2430,18 +2430,6 @@ static int kvm_init(MachineState *ms)
+     soft_vcpus_limit = kvm_recommended_vcpus(s);
+     hard_vcpus_limit = kvm_max_vcpus(s);
+ 
+-#ifdef HOST_PPC64
+-    /*
+-     * On POWER, the kernel advertises a soft limit based on the
+-     * number of CPU threads on the host.  We want to allow exceeding
+-     * this for testing purposes, so we don't want to set hard limit
+-     * to soft limit as on x86.
+-     */
+-#else
+-    /* RHEL doesn't support nr_vcpus > soft_vcpus_limit */
+-    hard_vcpus_limit = soft_vcpus_limit;
+-#endif
+-
+     while (nc->name) {
+         if (nc->num > soft_vcpus_limit) {
+             warn_report("Number of %s cpus requested (%d) exceeds "
+-- 
+2.31.1
+
diff --git a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch
new file mode 100644
index 0000000..a37ea6f
--- /dev/null
+++ b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch
@@ -0,0 +1,52 @@
+From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Thu, 21 Apr 2022 16:24:35 +0200
+Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
+RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s)
+RH-Bugzilla: 2072379
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+FUSE exports' allow-other option defaults to "auto", which means that it
+will try passing allow_other as a mount option, and fall back to not
+using it when an error occurs.  We make no effort to hide fusermount's
+error message (because it would be difficult, and because users might
+want to know about the fallback occurring), and so when allow_other does
+not work (primarily when /etc/fuse.conf does not contain
+user_allow_other), this error message will appear and break the
+reference output.
+
+We do not need allow_other here, though, so we can just pass
+allow-other=off to fix that.
+
+Reported-by: Markus Armbruster <armbru@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220421142435.569600-1-hreitz@redhat.com>
+Tested-by: Markus Armbruster <armbru@redhat.com>
+Tested-by: Eric Blake <eblake@redhat.com>
+(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ tests/qemu-iotests/108 | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
+index a3090e2875..4681c7c769 100755
+--- a/tests/qemu-iotests/108
++++ b/tests/qemu-iotests/108
+@@ -326,7 +326,7 @@ else
+ 
+     $QSD \
+         --blockdev file,node-name=export-node,filename="$TEST_IMG" \
+-        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
++        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \
+         --pidfile "$TEST_DIR/qsd.pid" \
+         &
+ 
+-- 
+2.31.1
+
diff --git a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch
new file mode 100644
index 0000000..7a968f6
--- /dev/null
+++ b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch
@@ -0,0 +1,445 @@
+From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Tue, 5 Apr 2022 15:46:51 +0200
+Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
+RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s)
+RH-Bugzilla: 2072379
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+One clear problem with how qcow2's refcount structure rebuild algorithm
+used to be before "qcow2: Improve refcount structure rebuilding" was
+that it is prone to failure for qcow2 images on block devices: There is
+generally unused space after the actual image, and if that exceeds what
+one refblock covers, the old algorithm would invariably write the
+reftable past the block device's end, which cannot work.  The new
+algorithm does not have this problem.
+
+Test it with three tests:
+(1) Create an image with more empty space at the end than what one
+    refblock covers, see whether rebuilding the refcount structures
+    results in a change in the image file length.  (It should not.)
+
+(2) Leave precisely enough space somewhere at the beginning of the image
+    for the new reftable (and the refblock for that place), see whether
+    the new algorithm puts the reftable there.  (It should.)
+
+(3) Test the original problem: Create (something like) a block device
+    with a fixed size, then create a qcow2 image in there, write some
+    data, and then have qemu-img check rebuild the refcount structures.
+    Before HEAD^, the reftable would have been written past the image
+    file end, i.e. outside of what the block device provides, which
+    cannot work.  HEAD^ should have fixed that.
+    ("Something like a block device" means a loop device if we can use
+    one ("sudo -n losetup" works), or a FUSE block export with
+    growable=false otherwise.)
+
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220405134652.19278-3-hreitz@redhat.com>
+(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162)
+
+Conflicts:
+- 108: The downstream qemu-storage-daemon does not support --daemonize,
+  so this switch has been replaced by a loop waiting for the PID file to
+  appear
+
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ tests/qemu-iotests/108     | 263 ++++++++++++++++++++++++++++++++++++-
+ tests/qemu-iotests/108.out |  81 ++++++++++++
+ 2 files changed, 343 insertions(+), 1 deletion(-)
+
+diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
+index 56339ab2c5..a3090e2875 100755
+--- a/tests/qemu-iotests/108
++++ b/tests/qemu-iotests/108
+@@ -30,13 +30,20 @@ status=1	# failure is the default!
+ 
+ _cleanup()
+ {
+-	_cleanup_test_img
++    _cleanup_test_img
++    if [ -f "$TEST_DIR/qsd.pid" ]; then
++        qsd_pid=$(cat "$TEST_DIR/qsd.pid")
++        kill -KILL "$qsd_pid"
++        fusermount -u "$TEST_DIR/fuse-export" &>/dev/null
++    fi
++    rm -f "$TEST_DIR/fuse-export"
+ }
+ trap "_cleanup; exit \$status" 0 1 2 3 15
+ 
+ # get standard environment, filters and checks
+ . ./common.rc
+ . ./common.filter
++. ./common.qemu
+ 
+ # This tests qcow2-specific low-level functionality
+ _supported_fmt qcow2
+@@ -47,6 +54,22 @@ _supported_os Linux
+ # files
+ _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file
+ 
++# This test either needs sudo -n losetup or FUSE exports to work
++if sudo -n losetup &>/dev/null; then
++    loopdev=true
++else
++    loopdev=false
++
++    # QSD --export fuse will either yield "Parameter 'id' is missing"
++    # or "Invalid parameter 'fuse'", depending on whether there is
++    # FUSE support or not.
++    error=$($QSD --export fuse 2>&1)
++    if [[ $error = *"'fuse'"* ]]; then
++        _notrun 'Passwordless sudo for losetup or FUSE support required, but' \
++                'neither is available'
++    fi
++fi
++
+ echo
+ echo '=== Repairing an image without any refcount table ==='
+ echo
+@@ -138,6 +161,244 @@ _make_test_img 64M
+ poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00"
+ _check_test_img -r all
+ 
++echo
++echo '=== Check rebuilt reftable location ==='
++
++# In an earlier version of the refcount rebuild algorithm, the
++# reftable was generally placed at the image end (unless something was
++# allocated in the area covered by the refblock right before the image
++# file end, then we would try to place the reftable in that refblock).
++# This was later changed so the reftable would be placed in the
++# earliest possible location.  Test this.
++
++echo
++echo '--- Does the image size increase? ---'
++echo
++
++# First test: Just create some image, write some data to it, and
++# resize it so there is free space at the end of the image (enough
++# that it spans at least one full refblock, which for cluster_size=512
++# images, spans 128k).  With the old algorithm, the reftable would
++# have then been placed at the end of the image file, but with the new
++# one, it will be put in that free space.
++# We want to check whether the size of the image file increases due to
++# rebuilding the refcount structures (it should not).
++
++_make_test_img -o 'cluster_size=512' 1M
++# Write something
++$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
++
++# Add free space
++file_len=$(stat -c '%s' "$TEST_IMG")
++truncate -s $((file_len + 256 * 1024)) "$TEST_IMG"
++
++# Corrupt the image by saying the image header was not allocated
++rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
++poke_file "$TEST_IMG" $rb_offset "\x00\x00"
++
++# Check whether rebuilding the refcount structures increases the image
++# file size
++file_len=$(stat -c '%s' "$TEST_IMG")
++echo
++# The only leaks there can be are the old refcount structures that are
++# leaked during rebuilding, no need to clutter the output with them
++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
++echo
++post_repair_file_len=$(stat -c '%s' "$TEST_IMG")
++
++if [[ $file_len -eq $post_repair_file_len ]]; then
++    echo 'OK: Image size did not change'
++else
++    echo 'ERROR: Image size differs' \
++        "($file_len before, $post_repair_file_len after)"
++fi
++
++echo
++echo '--- Will the reftable occupy a hole specifically left for it?  ---'
++echo
++
++# Note: With cluster_size=512, every refblock covers 128k.
++# The reftable covers 8M per reftable cluster.
++
++# Create an image that requires two reftable clusters (just because
++# this is more interesting than a single-clustered reftable).
++_make_test_img -o 'cluster_size=512' 9M
++$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io
++
++# Writing 8M will have resized the reftable.  Unfortunately, doing so
++# will leave holes in the file, so we need to fill them up so we can
++# be sure the whole file is allocated.  Do that by writing
++# consecutively smaller chunks starting from 8 MB, until the file
++# length increases even with a chunk size of 512.  Then we must have
++# filled all holes.
++ofs=$((8 * 1024 * 1024))
++block_len=$((16 * 1024))
++while [[ $block_len -ge 512 ]]; do
++    file_len=$(stat -c '%s' "$TEST_IMG")
++    while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do
++        # Do not include this in the reference output, it does not
++        # really matter which qemu-io calls we do here exactly
++        $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null
++        ofs=$((ofs + block_len))
++    done
++    block_len=$((block_len / 2))
++done
++
++# Fill up to 9M (do not include this in the reference output either,
++# $ofs is random for all we know)
++$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null
++
++# Make space as follows:
++# - For the first refblock: Right at the beginning of the image (this
++#   refblock is placed in the first place possible),
++# - For the reftable somewhere soon afterwards, still near the
++#   beginning of the image (i.e. covered by the first refblock); the
++#   reftable too is placed in the first place possible, but only after
++#   all refblocks have been placed)
++# No space is needed for the other refblocks, because no refblock is
++# put before the space it covers.  In this test case, we do not mind
++# if they are placed at the image file's end.
++
++# Before we make that space, we have to find out the host offset of
++# the area that belonged to the two data clusters at guest offset 4k,
++# because we expect the reftable to be placed there, and we will have
++# to verify that it is.
++
++l1_offset=$(peek_file_be "$TEST_IMG" 40 8)
++l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8)
++l2_offset=$((l2_offset & 0x00fffffffffffe00))
++data_4k_offset=$(peek_file_be "$TEST_IMG" \
++                 $((l2_offset + 4096 / 512 * 8)) 8)
++data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00))
++
++$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io
++
++# Corrupt the image by saying the image header was not allocated
++rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
++rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
++poke_file "$TEST_IMG" $rb_offset "\x00\x00"
++
++echo
++# The only leaks there can be are the old refcount structures that are
++# leaked during rebuilding, no need to clutter the output with them
++_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
++echo
++
++# Check whether the reftable was put where we expected
++rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
++if [[ $rt_offset -eq $data_4k_offset ]]; then
++    echo 'OK: Reftable is where we expect it'
++else
++    echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset"
++fi
++
++echo
++echo '--- Rebuilding refcount structures on block devices ---'
++echo
++
++# A block device cannot really grow, at least not during qemu-img
++# check.  As mentioned in the above cases, rebuilding the refcount
++# structure may lead to new refcount structures being written after
++# the end of the image, and in the past that happened even if there
++# was more than sufficient space in the image.  Such post-EOF writes
++# will not work on block devices, so test that the new algorithm
++# avoids it.
++
++# If we have passwordless sudo and losetup, we can use those to create
++# a block device.  Otherwise, we can resort to qemu's FUSE export to
++# create a file that isn't growable, which effectively tests the same
++# thing.
++
++_cleanup_test_img
++truncate -s $((64 * 1024 * 1024)) "$TEST_IMG"
++
++if $loopdev; then
++    export_mp=$(sudo -n losetup --show -f "$TEST_IMG")
++    export_mp_driver=host_device
++    sudo -n chmod go+rw "$export_mp"
++else
++    # Create non-growable FUSE export that is a bit like an empty
++    # block device
++    export_mp="$TEST_DIR/fuse-export"
++    export_mp_driver=file
++    touch "$export_mp"
++
++    $QSD \
++        --blockdev file,node-name=export-node,filename="$TEST_IMG" \
++        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
++        --pidfile "$TEST_DIR/qsd.pid" \
++        &
++
++    while [ ! -f "$TEST_DIR/qsd.pid" ]; do
++        sleep 0.1
++    done
++fi
++
++# Now create a qcow2 image on the device -- unfortunately, qemu-img
++# create force-creates the file, so we have to resort to the
++# blockdev-create job.
++_launch_qemu \
++    --blockdev $export_mp_driver,node-name=file,filename="$export_mp"
++
++_send_qemu_cmd \
++    $QEMU_HANDLE \
++    '{ "execute": "qmp_capabilities" }' \
++    'return'
++
++# Small cluster size again, so the image needs multiple refblocks
++_send_qemu_cmd \
++    $QEMU_HANDLE \
++    '{ "execute": "blockdev-create",
++       "arguments": {
++           "job-id": "create",
++           "options": {
++               "driver": "qcow2",
++               "file": "file",
++               "size": '$((64 * 1024 * 1024))',
++               "cluster-size": 512
++           } } }' \
++    '"concluded"'
++
++_send_qemu_cmd \
++    $QEMU_HANDLE \
++    '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \
++    'return'
++
++_send_qemu_cmd \
++    $QEMU_HANDLE \
++    '{ "execute": "quit" }' \
++    'return'
++
++wait=y _cleanup_qemu
++echo
++
++# Write some data
++$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io
++
++# Corrupt the image by saying the image header was not allocated
++rt_offset=$(peek_file_be "$export_mp" 48 8)
++rb_offset=$(peek_file_be "$export_mp" $rt_offset 8)
++poke_file "$export_mp" $rb_offset "\x00\x00"
++
++# Repairing such a simple case should just work
++# (We used to put the reftable at the end of the image file, which can
++# never work for non-growable devices.)
++echo
++TEST_IMG="$export_mp" _check_test_img -r all \
++    | grep -v '^Repairing cluster.*refcount=1 reference=0'
++
++if $loopdev; then
++    sudo -n losetup -d "$export_mp"
++else
++    qsd_pid=$(cat "$TEST_DIR/qsd.pid")
++    kill -TERM "$qsd_pid"
++    # Wait for process to exit (cannot `wait` because the QSD is daemonized)
++    while [ -f "$TEST_DIR/qsd.pid" ]; do
++        true
++    done
++fi
++
+ # success, all done
+ echo '*** done'
+ rm -f $seq.full
+diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out
+index 75bab8dc84..b5401d788d 100644
+--- a/tests/qemu-iotests/108.out
++++ b/tests/qemu-iotests/108.out
+@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired:
+     0 leaked clusters
+     1 corruptions
+ 
++Double checking the fixed image now...
++No errors were found on the image.
++
++=== Check rebuilt reftable location ===
++
++--- Does the image size increase? ---
++
++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
++wrote 65536/65536 bytes at offset 0
++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++ERROR cluster 0 refcount=0 reference=1
++Rebuilding refcount structure
++The following inconsistencies were found and repaired:
++
++    0 leaked clusters
++    1 corruptions
++
++Double checking the fixed image now...
++No errors were found on the image.
++
++OK: Image size did not change
++
++--- Will the reftable occupy a hole specifically left for it?  ---
++
++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184
++wrote 8388608/8388608 bytes at offset 0
++8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++discard 512/512 bytes at offset 0
++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++discard 1024/1024 bytes at offset 4096
++1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++ERROR cluster 0 refcount=0 reference=1
++Rebuilding refcount structure
++The following inconsistencies were found and repaired:
++
++    0 leaked clusters
++    1 corruptions
++
++Double checking the fixed image now...
++No errors were found on the image.
++
++OK: Reftable is where we expect it
++
++--- Rebuilding refcount structures on block devices ---
++
++{ "execute": "qmp_capabilities" }
++{"return": {}}
++{ "execute": "blockdev-create",
++       "arguments": {
++           "job-id": "create",
++           "options": {
++               "driver": "IMGFMT",
++               "file": "file",
++               "size": 67108864,
++               "cluster-size": 512
++           } } }
++{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}}
++{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}}
++{"return": {}}
++{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}}
++{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}}
++{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}}
++{ "execute": "job-dismiss", "arguments": { "id": "create" } }
++{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}}
++{"return": {}}
++{ "execute": "quit" }
++{"return": {}}
++{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
++
++wrote 65536/65536 bytes at offset 0
++64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++ERROR cluster 0 refcount=0 reference=1
++Rebuilding refcount structure
++The following inconsistencies were found and repaired:
++
++    0 leaked clusters
++    1 corruptions
++
+ Double checking the fixed image now...
+ No errors were found on the image.
+ *** done
+-- 
+2.31.1
+
diff --git a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch
new file mode 100644
index 0000000..9010d3d
--- /dev/null
+++ b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch
@@ -0,0 +1,162 @@
+From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Tue, 5 Apr 2022 15:46:52 +0200
+Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure()
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
+RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s)
+RH-Bugzilla: 2072379
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+Instead of fprint()-ing error messages in rebuild_refcount_structure()
+and its rebuild_refcounts_write_refblocks() helper, pass them through an
+Error object to qcow2_check_refcounts() (which will then print it).
+
+Suggested-by: Eric Blake <eblake@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220405134652.19278-4-hreitz@redhat.com>
+Reviewed-by: Eric Blake <eblake@redhat.com>
+(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ block/qcow2-refcount.c | 33 +++++++++++++++++++--------------
+ 1 file changed, 19 insertions(+), 14 deletions(-)
+
+diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
+index c5669eaa51..ed0ecfaa89 100644
+--- a/block/qcow2-refcount.c
++++ b/block/qcow2-refcount.c
+@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
+ static int rebuild_refcounts_write_refblocks(
+         BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
+         int64_t first_cluster, int64_t end_cluster,
+-        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr
++        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
++        Error **errp
+     )
+ {
+     BDRVQcow2State *s = bs->opaque;
+@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks(
+                                                   nb_clusters,
+                                                   &first_free_cluster);
+             if (refblock_offset < 0) {
+-                fprintf(stderr, "ERROR allocating refblock: %s\n",
+-                        strerror(-refblock_offset));
++                error_setg_errno(errp, -refblock_offset,
++                                 "ERROR allocating refblock");
+                 return refblock_offset;
+             }
+ 
+@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks(
+                                   on_disk_reftable_entries *
+                                   REFTABLE_ENTRY_SIZE);
+                 if (!on_disk_reftable) {
++                    error_setg(errp, "ERROR allocating reftable memory");
+                     return -ENOMEM;
+                 }
+ 
+@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks(
+         ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
+                                             s->cluster_size, false);
+         if (ret < 0) {
+-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
++            error_setg_errno(errp, -ret, "ERROR writing refblock");
+             return ret;
+         }
+ 
+@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks(
+         ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
+                           s->cluster_size);
+         if (ret < 0) {
+-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
++            error_setg_errno(errp, -ret, "ERROR writing refblock");
+             return ret;
+         }
+ 
+@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks(
+ static int rebuild_refcount_structure(BlockDriverState *bs,
+                                       BdrvCheckResult *res,
+                                       void **refcount_table,
+-                                      int64_t *nb_clusters)
++                                      int64_t *nb_clusters,
++                                      Error **errp)
+ {
+     BDRVQcow2State *s = bs->opaque;
+     int64_t reftable_offset = -1;
+@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+         rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                           0, *nb_clusters,
+                                           &on_disk_reftable,
+-                                          &on_disk_reftable_entries);
++                                          &on_disk_reftable_entries, errp);
+     if (reftable_size_changed < 0) {
+         res->check_errors++;
+         ret = reftable_size_changed;
+@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+                                               refcount_table, nb_clusters,
+                                               &first_free_cluster);
+         if (reftable_offset < 0) {
+-            fprintf(stderr, "ERROR allocating reftable: %s\n",
+-                    strerror(-reftable_offset));
++            error_setg_errno(errp, -reftable_offset,
++                             "ERROR allocating reftable");
+             res->check_errors++;
+             ret = reftable_offset;
+             goto fail;
+@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+                                               reftable_start_cluster,
+                                               reftable_end_cluster,
+                                               &on_disk_reftable,
+-                                              &on_disk_reftable_entries);
++                                              &on_disk_reftable_entries, errp);
+         if (reftable_size_changed < 0) {
+             res->check_errors++;
+             ret = reftable_size_changed;
+@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+     ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
+                                         false);
+     if (ret < 0) {
+-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
++        error_setg_errno(errp, -ret, "ERROR writing reftable");
+         goto fail;
+     }
+ 
+@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+     ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+                       reftable_length);
+     if (ret < 0) {
+-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
++        error_setg_errno(errp, -ret, "ERROR writing reftable");
+         goto fail;
+     }
+ 
+@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+                            &reftable_offset_and_clusters,
+                            sizeof(reftable_offset_and_clusters));
+     if (ret < 0) {
+-        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
++        error_setg_errno(errp, -ret, "ERROR setting reftable");
+         goto fail;
+     }
+ 
+@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+     if (rebuild && (fix & BDRV_FIX_ERRORS)) {
+         BdrvCheckResult old_res = *res;
+         int fresh_leaks = 0;
++        Error *local_err = NULL;
+ 
+         fprintf(stderr, "Rebuilding refcount structure\n");
+         ret = rebuild_refcount_structure(bs, res, &refcount_table,
+-                                         &nb_clusters);
++                                         &nb_clusters, &local_err);
+         if (ret < 0) {
++            error_report_err(local_err);
+             goto fail;
+         }
+ 
+-- 
+2.31.1
+
diff --git a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch
new file mode 100644
index 0000000..cdc92b8
--- /dev/null
+++ b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch
@@ -0,0 +1,465 @@
+From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Tue, 5 Apr 2022 15:46:50 +0200
+Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding
+RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s)
+RH-Bugzilla: 2072379
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+When rebuilding the refcount structures (when qemu-img check -r found
+errors with refcount = 0, but reference count > 0), the new refcount
+table defaults to being put at the image file end[1].  There is no good
+reason for that except that it means we will not have to rewrite any
+refblocks we already wrote to disk.
+
+Changing the code to rewrite those refblocks is not too difficult,
+though, so let us do that.  That is beneficial for images on block
+devices, where we cannot really write beyond the end of the image file.
+
+Use this opportunity to add extensive comments to the code, and refactor
+it a bit, getting rid of the backwards-jumping goto.
+
+[1] Unless there is something allocated in the area pointed to by the
+    last refblock, so we have to write that refblock.  In that case, we
+    try to put the reftable in there.
+
+Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071
+Closes: https://gitlab.com/qemu-project/qemu/-/issues/941
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220405134652.19278-2-hreitz@redhat.com>
+(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------
+ 1 file changed, 235 insertions(+), 97 deletions(-)
+
+diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
+index b91499410c..c5669eaa51 100644
+--- a/block/qcow2-refcount.c
++++ b/block/qcow2-refcount.c
+@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
+ }
+ 
+ /*
+- * Creates a new refcount structure based solely on the in-memory information
+- * given through *refcount_table. All necessary allocations will be reflected
+- * in that array.
++ * Helper function for rebuild_refcount_structure().
+  *
+- * On success, the old refcount structure is leaked (it will be covered by the
+- * new refcount structure).
++ * Scan the range of clusters [first_cluster, end_cluster) for allocated
++ * clusters and write all corresponding refblocks to disk.  The refblock
++ * and allocation data is taken from the in-memory refcount table
++ * *refcount_table[] (of size *nb_clusters), which is basically one big
++ * (unlimited size) refblock for the whole image.
++ *
++ * For these refblocks, clusters are allocated using said in-memory
++ * refcount table.  Care is taken that these allocations are reflected
++ * in the refblocks written to disk.
++ *
++ * The refblocks' offsets are written into a reftable, which is
++ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr).  If
++ * that reftable is of insufficient size, it will be resized to fit.
++ * This reftable is not written to disk.
++ *
++ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed
++ * to point to existing valid refblocks that do not need to be allocated
++ * again.)
++ *
++ * Return whether the on-disk reftable array was resized (true/false),
++ * or -errno on error.
+  */
+-static int rebuild_refcount_structure(BlockDriverState *bs,
+-                                      BdrvCheckResult *res,
+-                                      void **refcount_table,
+-                                      int64_t *nb_clusters)
++static int rebuild_refcounts_write_refblocks(
++        BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
++        int64_t first_cluster, int64_t end_cluster,
++        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr
++    )
+ {
+     BDRVQcow2State *s = bs->opaque;
+-    int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
++    int64_t cluster;
+     int64_t refblock_offset, refblock_start, refblock_index;
+-    uint32_t reftable_size = 0;
+-    uint64_t *on_disk_reftable = NULL;
++    int64_t first_free_cluster = 0;
++    uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
++    uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
+     void *on_disk_refblock;
+-    int ret = 0;
+-    struct {
+-        uint64_t reftable_offset;
+-        uint32_t reftable_clusters;
+-    } QEMU_PACKED reftable_offset_and_clusters;
+-
+-    qcow2_cache_empty(bs, s->refcount_block_cache);
++    bool reftable_grown = false;
++    int ret;
+ 
+-write_refblocks:
+-    for (; cluster < *nb_clusters; cluster++) {
++    for (cluster = first_cluster; cluster < end_cluster; cluster++) {
++        /* Check all clusters to find refblocks that contain non-zero entries */
+         if (!s->get_refcount(*refcount_table, cluster)) {
+             continue;
+         }
+ 
++        /*
++         * This cluster is allocated, so we need to create a refblock
++         * for it.  The data we will write to disk is just the
++         * respective slice from *refcount_table, so it will contain
++         * accurate refcounts for all clusters belonging to this
++         * refblock.  After we have written it, we will therefore skip
++         * all remaining clusters in this refblock.
++         */
++
+         refblock_index = cluster >> s->refcount_block_bits;
+         refblock_start = refblock_index << s->refcount_block_bits;
+ 
+-        /* Don't allocate a cluster in a refblock already written to disk */
+-        if (first_free_cluster < refblock_start) {
+-            first_free_cluster = refblock_start;
+-        }
+-        refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
+-                                              nb_clusters, &first_free_cluster);
+-        if (refblock_offset < 0) {
+-            fprintf(stderr, "ERROR allocating refblock: %s\n",
+-                    strerror(-refblock_offset));
+-            res->check_errors++;
+-            ret = refblock_offset;
+-            goto fail;
+-        }
++        if (on_disk_reftable_entries > refblock_index &&
++            on_disk_reftable[refblock_index])
++        {
++            /*
++             * We can get here after a `goto write_refblocks`: We have a
++             * reftable from a previous run, and the refblock is already
++             * allocated.  No need to allocate it again.
++             */
++            refblock_offset = on_disk_reftable[refblock_index];
++        } else {
++            int64_t refblock_cluster_index;
+ 
+-        if (reftable_size <= refblock_index) {
+-            uint32_t old_reftable_size = reftable_size;
+-            uint64_t *new_on_disk_reftable;
++            /* Don't allocate a cluster in a refblock already written to disk */
++            if (first_free_cluster < refblock_start) {
++                first_free_cluster = refblock_start;
++            }
++            refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
++                                                  nb_clusters,
++                                                  &first_free_cluster);
++            if (refblock_offset < 0) {
++                fprintf(stderr, "ERROR allocating refblock: %s\n",
++                        strerror(-refblock_offset));
++                return refblock_offset;
++            }
+ 
+-            reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
+-                                     s->cluster_size) / REFTABLE_ENTRY_SIZE;
+-            new_on_disk_reftable = g_try_realloc(on_disk_reftable,
+-                                                 reftable_size *
+-                                                 REFTABLE_ENTRY_SIZE);
+-            if (!new_on_disk_reftable) {
+-                res->check_errors++;
+-                ret = -ENOMEM;
+-                goto fail;
++            refblock_cluster_index = refblock_offset / s->cluster_size;
++            if (refblock_cluster_index >= end_cluster) {
++                /*
++                 * We must write the refblock that holds this refblock's
++                 * refcount
++                 */
++                end_cluster = refblock_cluster_index + 1;
+             }
+-            on_disk_reftable = new_on_disk_reftable;
+ 
+-            memset(on_disk_reftable + old_reftable_size, 0,
+-                   (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE);
++            if (on_disk_reftable_entries <= refblock_index) {
++                on_disk_reftable_entries =
++                    ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
++                             s->cluster_size) / REFTABLE_ENTRY_SIZE;
++                on_disk_reftable =
++                    g_try_realloc(on_disk_reftable,
++                                  on_disk_reftable_entries *
++                                  REFTABLE_ENTRY_SIZE);
++                if (!on_disk_reftable) {
++                    return -ENOMEM;
++                }
+ 
+-            /* The offset we have for the reftable is now no longer valid;
+-             * this will leak that range, but we can easily fix that by running
+-             * a leak-fixing check after this rebuild operation */
+-            reftable_offset = -1;
+-        } else {
+-            assert(on_disk_reftable);
+-        }
+-        on_disk_reftable[refblock_index] = refblock_offset;
++                memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
++                       (on_disk_reftable_entries -
++                        *on_disk_reftable_entries_ptr) *
++                       REFTABLE_ENTRY_SIZE);
+ 
+-        /* If this is apparently the last refblock (for now), try to squeeze the
+-         * reftable in */
+-        if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
+-            reftable_offset < 0)
+-        {
+-            uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
+-                                                          REFTABLE_ENTRY_SIZE);
+-            reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
+-                                                  refcount_table, nb_clusters,
+-                                                  &first_free_cluster);
+-            if (reftable_offset < 0) {
+-                fprintf(stderr, "ERROR allocating reftable: %s\n",
+-                        strerror(-reftable_offset));
+-                res->check_errors++;
+-                ret = reftable_offset;
+-                goto fail;
++                *on_disk_reftable_ptr = on_disk_reftable;
++                *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
++
++                reftable_grown = true;
++            } else {
++                assert(on_disk_reftable);
+             }
++            on_disk_reftable[refblock_index] = refblock_offset;
+         }
+ 
++        /* Refblock is allocated, write it to disk */
++
+         ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
+                                             s->cluster_size, false);
+         if (ret < 0) {
+             fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+-            goto fail;
++            return ret;
+         }
+ 
+-        /* The size of *refcount_table is always cluster-aligned, therefore the
+-         * write operation will not overflow */
++        /*
++         * The refblock is simply a slice of *refcount_table.
++         * Note that the size of *refcount_table is always aligned to
++         * whole clusters, so the write operation will not result in
++         * out-of-bounds accesses.
++         */
+         on_disk_refblock = (void *)((char *) *refcount_table +
+                                     refblock_index * s->cluster_size);
+ 
+@@ -2550,23 +2579,99 @@ write_refblocks:
+                           s->cluster_size);
+         if (ret < 0) {
+             fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+-            goto fail;
++            return ret;
+         }
+ 
+-        /* Go to the end of this refblock */
++        /* This refblock is done, skip to its end */
+         cluster = refblock_start + s->refcount_block_size - 1;
+     }
+ 
+-    if (reftable_offset < 0) {
+-        uint64_t post_refblock_start, reftable_clusters;
++    return reftable_grown;
++}
++
++/*
++ * Creates a new refcount structure based solely on the in-memory information
++ * given through *refcount_table (this in-memory information is basically just
++ * the concatenation of all refblocks).  All necessary allocations will be
++ * reflected in that array.
++ *
++ * On success, the old refcount structure is leaked (it will be covered by the
++ * new refcount structure).
++ */
++static int rebuild_refcount_structure(BlockDriverState *bs,
++                                      BdrvCheckResult *res,
++                                      void **refcount_table,
++                                      int64_t *nb_clusters)
++{
++    BDRVQcow2State *s = bs->opaque;
++    int64_t reftable_offset = -1;
++    int64_t reftable_length = 0;
++    int64_t reftable_clusters;
++    int64_t refblock_index;
++    uint32_t on_disk_reftable_entries = 0;
++    uint64_t *on_disk_reftable = NULL;
++    int ret = 0;
++    int reftable_size_changed = 0;
++    struct {
++        uint64_t reftable_offset;
++        uint32_t reftable_clusters;
++    } QEMU_PACKED reftable_offset_and_clusters;
++
++    qcow2_cache_empty(bs, s->refcount_block_cache);
++
++    /*
++     * For each refblock containing entries, we try to allocate a
++     * cluster (in the in-memory refcount table) and write its offset
++     * into on_disk_reftable[].  We then write the whole refblock to
++     * disk (as a slice of the in-memory refcount table).
++     * This is done by rebuild_refcounts_write_refblocks().
++     *
++     * Once we have scanned all clusters, we try to find space for the
++     * reftable.  This will dirty the in-memory refcount table (i.e.
++     * make it differ from the refblocks we have already written), so we
++     * need to run rebuild_refcounts_write_refblocks() again for the
++     * range of clusters where the reftable has been allocated.
++     *
++     * This second run might make the reftable grow again, in which case
++     * we will need to allocate another space for it, which is why we
++     * repeat all this until the reftable stops growing.
++     *
++     * (This loop will terminate, because with every cluster the
++     * reftable grows, it can accomodate a multitude of more refcounts,
++     * so that at some point this must be able to cover the reftable
++     * and all refblocks describing it.)
++     *
++     * We then convert the reftable to big-endian and write it to disk.
++     *
++     * Note that we never free any reftable allocations.  Doing so would
++     * needlessly complicate the algorithm: The eventual second check
++     * run we do will clean up all leaks we have caused.
++     */
++
++    reftable_size_changed =
++        rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
++                                          0, *nb_clusters,
++                                          &on_disk_reftable,
++                                          &on_disk_reftable_entries);
++    if (reftable_size_changed < 0) {
++        res->check_errors++;
++        ret = reftable_size_changed;
++        goto fail;
++    }
++
++    /*
++     * There was no reftable before, so rebuild_refcounts_write_refblocks()
++     * must have increased its size (from 0 to something).
++     */
++    assert(reftable_size_changed);
++
++    do {
++        int64_t reftable_start_cluster, reftable_end_cluster;
++        int64_t first_free_cluster = 0;
++
++        reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
++        reftable_clusters = size_to_clusters(s, reftable_length);
+ 
+-        post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
+-        reftable_clusters =
+-            size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE);
+-        /* Not pretty but simple */
+-        if (first_free_cluster < post_refblock_start) {
+-            first_free_cluster = post_refblock_start;
+-        }
+         reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
+                                               refcount_table, nb_clusters,
+                                               &first_free_cluster);
+@@ -2578,24 +2683,55 @@ write_refblocks:
+             goto fail;
+         }
+ 
+-        goto write_refblocks;
+-    }
++        /*
++         * We need to update the affected refblocks, so re-run the
++         * write_refblocks loop for the reftable's range of clusters.
++         */
++        assert(offset_into_cluster(s, reftable_offset) == 0);
++        reftable_start_cluster = reftable_offset / s->cluster_size;
++        reftable_end_cluster = reftable_start_cluster + reftable_clusters;
++        reftable_size_changed =
++            rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
++                                              reftable_start_cluster,
++                                              reftable_end_cluster,
++                                              &on_disk_reftable,
++                                              &on_disk_reftable_entries);
++        if (reftable_size_changed < 0) {
++            res->check_errors++;
++            ret = reftable_size_changed;
++            goto fail;
++        }
++
++        /*
++         * If the reftable size has changed, we will need to find a new
++         * allocation, repeating the loop.
++         */
++    } while (reftable_size_changed);
+ 
+-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
++    /* The above loop must have run at least once */
++    assert(reftable_offset >= 0);
++
++    /*
++     * All allocations are done, all refblocks are written, convert the
++     * reftable to big-endian and write it to disk.
++     */
++
++    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
++         refblock_index++)
++    {
+         cpu_to_be64s(&on_disk_reftable[refblock_index]);
+     }
+ 
+-    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
+-                                        reftable_size * REFTABLE_ENTRY_SIZE,
++    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
+                                         false);
+     if (ret < 0) {
+         fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+         goto fail;
+     }
+ 
+-    assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE);
++    assert(reftable_length < INT_MAX);
+     ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+-                      reftable_size * REFTABLE_ENTRY_SIZE);
++                      reftable_length);
+     if (ret < 0) {
+         fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+         goto fail;
+@@ -2604,7 +2740,7 @@ write_refblocks:
+     /* Enter new reftable into the image header */
+     reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
+     reftable_offset_and_clusters.reftable_clusters =
+-        cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE));
++        cpu_to_be32(reftable_clusters);
+     ret = bdrv_pwrite_sync(bs->file,
+                            offsetof(QCowHeader, refcount_table_offset),
+                            &reftable_offset_and_clusters,
+@@ -2614,12 +2750,14 @@ write_refblocks:
+         goto fail;
+     }
+ 
+-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
++    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
++         refblock_index++)
++    {
+         be64_to_cpus(&on_disk_reftable[refblock_index]);
+     }
+     s->refcount_table = on_disk_reftable;
+     s->refcount_table_offset = reftable_offset;
+-    s->refcount_table_size = reftable_size;
++    s->refcount_table_size = on_disk_reftable_entries;
+     update_max_refcount_table_index(s);
+ 
+     return 0;
+-- 
+2.31.1
+
diff --git a/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch
new file mode 100644
index 0000000..77929a6
--- /dev/null
+++ b/kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch
@@ -0,0 +1,385 @@
+From 7a6fa42d4a4263c94b9bf18290f9e7680ea9e7f4 Mon Sep 17 00:00:00 2001
+From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Date: Mon, 25 Apr 2022 09:57:23 +0200
+Subject: [PATCH 03/16] util/event-loop-base: Introduce options to set the
+ thread pool size
+
+RH-Author: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size
+RH-Commit: [3/3] af78a88ff3c69701cbb5f9e980c3d6ebbd13ff98
+RH-Bugzilla: 2031024
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+The thread pool regulates itself: when idle, it kills threads until
+empty, when in demand, it creates new threads until full. This behaviour
+doesn't play well with latency sensitive workloads where the price of
+creating a new thread is too high. For example, when paired with qemu's
+'-mlock', or using safety features like SafeStack, creating a new thread
+has been measured take multiple milliseconds.
+
+In order to mitigate this let's introduce a new 'EventLoopBase'
+property to set the thread pool size. The threads will be created during
+the pool's initialization or upon updating the property's value, remain
+available during its lifetime regardless of demand, and destroyed upon
+freeing it. A properly characterized workload will then be able to
+configure the pool to avoid any latency spikes.
+
+Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Acked-by: Markus Armbruster <armbru@redhat.com>
+Message-id: 20220425075723.20019-4-nsaenzju@redhat.com
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+(cherry picked from commit 71ad4713cc1d7fca24388b828ef31ae6cb38a31c)
+---
+ event-loop-base.c                | 23 +++++++++++++
+ include/block/aio.h              | 10 ++++++
+ include/block/thread-pool.h      |  3 ++
+ include/sysemu/event-loop-base.h |  4 +++
+ iothread.c                       |  3 ++
+ qapi/qom.json                    | 10 +++++-
+ util/aio-posix.c                 |  1 +
+ util/async.c                     | 20 ++++++++++++
+ util/main-loop.c                 |  9 ++++++
+ util/thread-pool.c               | 55 +++++++++++++++++++++++++++++---
+ 10 files changed, 133 insertions(+), 5 deletions(-)
+
+diff --git a/event-loop-base.c b/event-loop-base.c
+index e7f99a6ec8..d5be4dc6fc 100644
+--- a/event-loop-base.c
++++ b/event-loop-base.c
+@@ -14,6 +14,7 @@
+ #include "qemu/osdep.h"
+ #include "qom/object_interfaces.h"
+ #include "qapi/error.h"
++#include "block/thread-pool.h"
+ #include "sysemu/event-loop-base.h"
+ 
+ typedef struct {
+@@ -21,9 +22,22 @@ typedef struct {
+     ptrdiff_t offset; /* field's byte offset in EventLoopBase struct */
+ } EventLoopBaseParamInfo;
+ 
++static void event_loop_base_instance_init(Object *obj)
++{
++    EventLoopBase *base = EVENT_LOOP_BASE(obj);
++
++    base->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
++}
++
+ static EventLoopBaseParamInfo aio_max_batch_info = {
+     "aio-max-batch", offsetof(EventLoopBase, aio_max_batch),
+ };
++static EventLoopBaseParamInfo thread_pool_min_info = {
++    "thread-pool-min", offsetof(EventLoopBase, thread_pool_min),
++};
++static EventLoopBaseParamInfo thread_pool_max_info = {
++    "thread-pool-max", offsetof(EventLoopBase, thread_pool_max),
++};
+ 
+ static void event_loop_base_get_param(Object *obj, Visitor *v,
+         const char *name, void *opaque, Error **errp)
+@@ -95,12 +109,21 @@ static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
+                               event_loop_base_get_param,
+                               event_loop_base_set_param,
+                               NULL, &aio_max_batch_info);
++    object_class_property_add(klass, "thread-pool-min", "int",
++                              event_loop_base_get_param,
++                              event_loop_base_set_param,
++                              NULL, &thread_pool_min_info);
++    object_class_property_add(klass, "thread-pool-max", "int",
++                              event_loop_base_get_param,
++                              event_loop_base_set_param,
++                              NULL, &thread_pool_max_info);
+ }
+ 
+ static const TypeInfo event_loop_base_info = {
+     .name = TYPE_EVENT_LOOP_BASE,
+     .parent = TYPE_OBJECT,
+     .instance_size = sizeof(EventLoopBase),
++    .instance_init = event_loop_base_instance_init,
+     .class_size = sizeof(EventLoopBaseClass),
+     .class_init = event_loop_base_class_init,
+     .abstract = true,
+diff --git a/include/block/aio.h b/include/block/aio.h
+index 5634173b12..d128558f1d 100644
+--- a/include/block/aio.h
++++ b/include/block/aio.h
+@@ -192,6 +192,8 @@ struct AioContext {
+     QSLIST_HEAD(, Coroutine) scheduled_coroutines;
+     QEMUBH *co_schedule_bh;
+ 
++    int thread_pool_min;
++    int thread_pool_max;
+     /* Thread pool for performing work and receiving completion callbacks.
+      * Has its own locking.
+      */
+@@ -769,4 +771,12 @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+ void aio_context_set_aio_params(AioContext *ctx, int64_t max_batch,
+                                 Error **errp);
+ 
++/**
++ * aio_context_set_thread_pool_params:
++ * @ctx: the aio context
++ * @min: min number of threads to have readily available in the thread pool
++ * @min: max number of threads the thread pool can contain
++ */
++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
++                                        int64_t max, Error **errp);
+ #endif
+diff --git a/include/block/thread-pool.h b/include/block/thread-pool.h
+index 7dd7d730a0..2020bcc92d 100644
+--- a/include/block/thread-pool.h
++++ b/include/block/thread-pool.h
+@@ -20,6 +20,8 @@
+ 
+ #include "block/block.h"
+ 
++#define THREAD_POOL_MAX_THREADS_DEFAULT         64
++
+ typedef int ThreadPoolFunc(void *opaque);
+ 
+ typedef struct ThreadPool ThreadPool;
+@@ -33,5 +35,6 @@ BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool,
+ int coroutine_fn thread_pool_submit_co(ThreadPool *pool,
+         ThreadPoolFunc *func, void *arg);
+ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg);
++void thread_pool_update_params(ThreadPool *pool, struct AioContext *ctx);
+ 
+ #endif
+diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
+index fced4c9fea..2748bf6ae1 100644
+--- a/include/sysemu/event-loop-base.h
++++ b/include/sysemu/event-loop-base.h
+@@ -33,5 +33,9 @@ struct EventLoopBase {
+ 
+     /* AioContext AIO engine parameters */
+     int64_t aio_max_batch;
++
++    /* AioContext thread pool parameters */
++    int64_t thread_pool_min;
++    int64_t thread_pool_max;
+ };
+ #endif
+diff --git a/iothread.c b/iothread.c
+index 8fa2f3bfb8..529194a566 100644
+--- a/iothread.c
++++ b/iothread.c
+@@ -174,6 +174,9 @@ static void iothread_set_aio_context_params(EventLoopBase *base, Error **errp)
+     aio_context_set_aio_params(iothread->ctx,
+                                iothread->parent_obj.aio_max_batch,
+                                errp);
++
++    aio_context_set_thread_pool_params(iothread->ctx, base->thread_pool_min,
++                                       base->thread_pool_max, errp);
+ }
+ 
+ 
+diff --git a/qapi/qom.json b/qapi/qom.json
+index 7d4a2ac1b9..6a653c6636 100644
+--- a/qapi/qom.json
++++ b/qapi/qom.json
+@@ -508,10 +508,18 @@
+ #                 0 means that the engine will use its default.
+ #                 (default: 0)
+ #
++# @thread-pool-min: minimum number of threads reserved in the thread pool
++#                   (default:0)
++#
++# @thread-pool-max: maximum number of threads the thread pool can contain
++#                   (default:64)
++#
+ # Since: 7.1
+ ##
+ { 'struct': 'EventLoopBaseProperties',
+-  'data': { '*aio-max-batch': 'int' } }
++  'data': { '*aio-max-batch': 'int',
++            '*thread-pool-min': 'int',
++            '*thread-pool-max': 'int' } }
+ 
+ ##
+ # @IothreadProperties:
+diff --git a/util/aio-posix.c b/util/aio-posix.c
+index be0182a3c6..731f3826c0 100644
+--- a/util/aio-posix.c
++++ b/util/aio-posix.c
+@@ -15,6 +15,7 @@
+ 
+ #include "qemu/osdep.h"
+ #include "block/block.h"
++#include "block/thread-pool.h"
+ #include "qemu/main-loop.h"
+ #include "qemu/rcu.h"
+ #include "qemu/rcu_queue.h"
+diff --git a/util/async.c b/util/async.c
+index 2ea1172f3e..554ba70cca 100644
+--- a/util/async.c
++++ b/util/async.c
+@@ -563,6 +563,9 @@ AioContext *aio_context_new(Error **errp)
+ 
+     ctx->aio_max_batch = 0;
+ 
++    ctx->thread_pool_min = 0;
++    ctx->thread_pool_max = THREAD_POOL_MAX_THREADS_DEFAULT;
++
+     return ctx;
+ fail:
+     g_source_destroy(&ctx->source);
+@@ -696,3 +699,20 @@ void qemu_set_current_aio_context(AioContext *ctx)
+     assert(!get_my_aiocontext());
+     set_my_aiocontext(ctx);
+ }
++
++void aio_context_set_thread_pool_params(AioContext *ctx, int64_t min,
++                                        int64_t max, Error **errp)
++{
++
++    if (min > max || !max || min > INT_MAX || max > INT_MAX) {
++        error_setg(errp, "bad thread-pool-min/thread-pool-max values");
++        return;
++    }
++
++    ctx->thread_pool_min = min;
++    ctx->thread_pool_max = max;
++
++    if (ctx->thread_pool) {
++        thread_pool_update_params(ctx->thread_pool, ctx);
++    }
++}
+diff --git a/util/main-loop.c b/util/main-loop.c
+index 5b13f456fa..a0f48186ab 100644
+--- a/util/main-loop.c
++++ b/util/main-loop.c
+@@ -30,6 +30,7 @@
+ #include "sysemu/replay.h"
+ #include "qemu/main-loop.h"
+ #include "block/aio.h"
++#include "block/thread-pool.h"
+ #include "qemu/error-report.h"
+ #include "qemu/queue.h"
+ #include "qemu/compiler.h"
+@@ -187,12 +188,20 @@ int qemu_init_main_loop(Error **errp)
+ 
+ static void main_loop_update_params(EventLoopBase *base, Error **errp)
+ {
++    ERRP_GUARD();
++
+     if (!qemu_aio_context) {
+         error_setg(errp, "qemu aio context not ready");
+         return;
+     }
+ 
+     aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
++    if (*errp) {
++        return;
++    }
++
++    aio_context_set_thread_pool_params(qemu_aio_context, base->thread_pool_min,
++                                       base->thread_pool_max, errp);
+ }
+ 
+ MainLoop *mloop;
+diff --git a/util/thread-pool.c b/util/thread-pool.c
+index d763cea505..196835b4d3 100644
+--- a/util/thread-pool.c
++++ b/util/thread-pool.c
+@@ -58,7 +58,6 @@ struct ThreadPool {
+     QemuMutex lock;
+     QemuCond worker_stopped;
+     QemuSemaphore sem;
+-    int max_threads;
+     QEMUBH *new_thread_bh;
+ 
+     /* The following variables are only accessed from one AioContext. */
+@@ -71,8 +70,27 @@ struct ThreadPool {
+     int new_threads;     /* backlog of threads we need to create */
+     int pending_threads; /* threads created but not running yet */
+     bool stopping;
++    int min_threads;
++    int max_threads;
+ };
+ 
++static inline bool back_to_sleep(ThreadPool *pool, int ret)
++{
++    /*
++     * The semaphore timed out, we should exit the loop except when:
++     *  - There is work to do, we raced with the signal.
++     *  - The max threads threshold just changed, we raced with the signal.
++     *  - The thread pool forces a minimum number of readily available threads.
++     */
++    if (ret == -1 && (!QTAILQ_EMPTY(&pool->request_list) ||
++            pool->cur_threads > pool->max_threads ||
++            pool->cur_threads <= pool->min_threads)) {
++            return true;
++    }
++
++    return false;
++}
++
+ static void *worker_thread(void *opaque)
+ {
+     ThreadPool *pool = opaque;
+@@ -91,8 +109,9 @@ static void *worker_thread(void *opaque)
+             ret = qemu_sem_timedwait(&pool->sem, 10000);
+             qemu_mutex_lock(&pool->lock);
+             pool->idle_threads--;
+-        } while (ret == -1 && !QTAILQ_EMPTY(&pool->request_list));
+-        if (ret == -1 || pool->stopping) {
++        } while (back_to_sleep(pool, ret));
++        if (ret == -1 || pool->stopping ||
++            pool->cur_threads > pool->max_threads) {
+             break;
+         }
+ 
+@@ -294,6 +313,33 @@ void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg)
+     thread_pool_submit_aio(pool, func, arg, NULL, NULL);
+ }
+ 
++void thread_pool_update_params(ThreadPool *pool, AioContext *ctx)
++{
++    qemu_mutex_lock(&pool->lock);
++
++    pool->min_threads = ctx->thread_pool_min;
++    pool->max_threads = ctx->thread_pool_max;
++
++    /*
++     * We either have to:
++     *  - Increase the number available of threads until over the min_threads
++     *    threshold.
++     *  - Decrease the number of available threads until under the max_threads
++     *    threshold.
++     *  - Do nothing. The current number of threads fall in between the min and
++     *    max thresholds. We'll let the pool manage itself.
++     */
++    for (int i = pool->cur_threads; i < pool->min_threads; i++) {
++        spawn_thread(pool);
++    }
++
++    for (int i = pool->cur_threads; i > pool->max_threads; i--) {
++        qemu_sem_post(&pool->sem);
++    }
++
++    qemu_mutex_unlock(&pool->lock);
++}
++
+ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
+ {
+     if (!ctx) {
+@@ -306,11 +352,12 @@ static void thread_pool_init_one(ThreadPool *pool, AioContext *ctx)
+     qemu_mutex_init(&pool->lock);
+     qemu_cond_init(&pool->worker_stopped);
+     qemu_sem_init(&pool->sem, 0);
+-    pool->max_threads = 64;
+     pool->new_thread_bh = aio_bh_new(ctx, spawn_thread_bh_fn, pool);
+ 
+     QLIST_INIT(&pool->head);
+     QTAILQ_INIT(&pool->request_list);
++
++    thread_pool_update_params(pool, ctx);
+ }
+ 
+ ThreadPool *thread_pool_new(AioContext *ctx)
+-- 
+2.31.1
+
diff --git a/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch
new file mode 100644
index 0000000..2104424
--- /dev/null
+++ b/kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch
@@ -0,0 +1,233 @@
+From b4969662de01848f887a3918e97e516efc213f71 Mon Sep 17 00:00:00 2001
+From: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Date: Mon, 25 Apr 2022 09:57:22 +0200
+Subject: [PATCH 02/16] util/main-loop: Introduce the main loop into QOM
+
+RH-Author: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+RH-MergeRequest: 93: util/thread-pool: Expose minimum and maximum size
+RH-Commit: [2/3] a481b77e25ad50d13dcbe26b36c551b18c89bddd
+RH-Bugzilla: 2031024
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+'event-loop-base' provides basic property handling for all 'AioContext'
+based event loops. So let's define a new 'MainLoopClass' that inherits
+from it. This will permit tweaking the main loop's properties through
+qapi as well as through the command line using the '-object' keyword[1].
+Only one instance of 'MainLoopClass' might be created at any time.
+
+'EventLoopBaseClass' learns a new callback, 'can_be_deleted()' so as to
+mark 'MainLoop' as non-deletable.
+
+[1] For example:
+      -object main-loop,id=main-loop,aio-max-batch=<value>
+
+Signed-off-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Acked-by: Markus Armbruster <armbru@redhat.com>
+Message-id: 20220425075723.20019-3-nsaenzju@redhat.com
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+(cherry picked from commit 70ac26b9e5ca8374bb3ef3f30b871726673c9f27)
+---
+ event-loop-base.c                | 13 ++++++++
+ include/qemu/main-loop.h         | 10 ++++++
+ include/sysemu/event-loop-base.h |  1 +
+ meson.build                      |  3 +-
+ qapi/qom.json                    | 13 ++++++++
+ util/main-loop.c                 | 56 ++++++++++++++++++++++++++++++++
+ 6 files changed, 95 insertions(+), 1 deletion(-)
+
+diff --git a/event-loop-base.c b/event-loop-base.c
+index a924c73a7c..e7f99a6ec8 100644
+--- a/event-loop-base.c
++++ b/event-loop-base.c
+@@ -73,10 +73,23 @@ static void event_loop_base_complete(UserCreatable *uc, Error **errp)
+     }
+ }
+ 
++static bool event_loop_base_can_be_deleted(UserCreatable *uc)
++{
++    EventLoopBaseClass *bc = EVENT_LOOP_BASE_GET_CLASS(uc);
++    EventLoopBase *backend = EVENT_LOOP_BASE(uc);
++
++    if (bc->can_be_deleted) {
++        return bc->can_be_deleted(backend);
++    }
++
++    return true;
++}
++
+ static void event_loop_base_class_init(ObjectClass *klass, void *class_data)
+ {
+     UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
+     ucc->complete = event_loop_base_complete;
++    ucc->can_be_deleted = event_loop_base_can_be_deleted;
+ 
+     object_class_property_add(klass, "aio-max-batch", "int",
+                               event_loop_base_get_param,
+diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
+index d3750c8e76..20c9387654 100644
+--- a/include/qemu/main-loop.h
++++ b/include/qemu/main-loop.h
+@@ -26,9 +26,19 @@
+ #define QEMU_MAIN_LOOP_H
+ 
+ #include "block/aio.h"
++#include "qom/object.h"
++#include "sysemu/event-loop-base.h"
+ 
+ #define SIG_IPI SIGUSR1
+ 
++#define TYPE_MAIN_LOOP  "main-loop"
++OBJECT_DECLARE_TYPE(MainLoop, MainLoopClass, MAIN_LOOP)
++
++struct MainLoop {
++    EventLoopBase parent_obj;
++};
++typedef struct MainLoop MainLoop;
++
+ /**
+  * qemu_init_main_loop: Set up the process so that it can run the main loop.
+  *
+diff --git a/include/sysemu/event-loop-base.h b/include/sysemu/event-loop-base.h
+index 8e77d8b69f..fced4c9fea 100644
+--- a/include/sysemu/event-loop-base.h
++++ b/include/sysemu/event-loop-base.h
+@@ -25,6 +25,7 @@ struct EventLoopBaseClass {
+ 
+     void (*init)(EventLoopBase *base, Error **errp);
+     void (*update_params)(EventLoopBase *base, Error **errp);
++    bool (*can_be_deleted)(EventLoopBase *base);
+ };
+ 
+ struct EventLoopBase {
+diff --git a/meson.build b/meson.build
+index b9c919a55e..5a7c10e639 100644
+--- a/meson.build
++++ b/meson.build
+@@ -2832,7 +2832,8 @@ libqemuutil = static_library('qemuutil',
+                              sources: util_ss.sources() + stub_ss.sources() + genh,
+                              dependencies: [util_ss.dependencies(), libm, threads, glib, socket, malloc, pixman])
+ qemuutil = declare_dependency(link_with: libqemuutil,
+-                              sources: genh + version_res)
++                              sources: genh + version_res,
++                              dependencies: [event_loop_base])
+ 
+ if have_system or have_user
+   decodetree = generator(find_program('scripts/decodetree.py'),
+diff --git a/qapi/qom.json b/qapi/qom.json
+index a2439533c5..7d4a2ac1b9 100644
+--- a/qapi/qom.json
++++ b/qapi/qom.json
+@@ -540,6 +540,17 @@
+             '*poll-grow': 'int',
+             '*poll-shrink': 'int' } }
+ 
++##
++# @MainLoopProperties:
++#
++# Properties for the main-loop object.
++#
++# Since: 7.1
++##
++{ 'struct': 'MainLoopProperties',
++  'base': 'EventLoopBaseProperties',
++  'data': {} }
++
+ ##
+ # @MemoryBackendProperties:
+ #
+@@ -830,6 +841,7 @@
+     { 'name': 'input-linux',
+       'if': 'CONFIG_LINUX' },
+     'iothread',
++    'main-loop',
+     { 'name': 'memory-backend-epc',
+       'if': 'CONFIG_LINUX' },
+     'memory-backend-file',
+@@ -895,6 +907,7 @@
+       'input-linux':                { 'type': 'InputLinuxProperties',
+                                       'if': 'CONFIG_LINUX' },
+       'iothread':                   'IothreadProperties',
++      'main-loop':                  'MainLoopProperties',
+       'memory-backend-epc':         { 'type': 'MemoryBackendEpcProperties',
+                                       'if': 'CONFIG_LINUX' },
+       'memory-backend-file':        'MemoryBackendFileProperties',
+diff --git a/util/main-loop.c b/util/main-loop.c
+index b7b0ce4ca0..5b13f456fa 100644
+--- a/util/main-loop.c
++++ b/util/main-loop.c
+@@ -33,6 +33,7 @@
+ #include "qemu/error-report.h"
+ #include "qemu/queue.h"
+ #include "qemu/compiler.h"
++#include "qom/object.h"
+ 
+ #ifndef _WIN32
+ #include <sys/wait.h>
+@@ -184,6 +185,61 @@ int qemu_init_main_loop(Error **errp)
+     return 0;
+ }
+ 
++static void main_loop_update_params(EventLoopBase *base, Error **errp)
++{
++    if (!qemu_aio_context) {
++        error_setg(errp, "qemu aio context not ready");
++        return;
++    }
++
++    aio_context_set_aio_params(qemu_aio_context, base->aio_max_batch, errp);
++}
++
++MainLoop *mloop;
++
++static void main_loop_init(EventLoopBase *base, Error **errp)
++{
++    MainLoop *m = MAIN_LOOP(base);
++
++    if (mloop) {
++        error_setg(errp, "only one main-loop instance allowed");
++        return;
++    }
++
++    main_loop_update_params(base, errp);
++
++    mloop = m;
++    return;
++}
++
++static bool main_loop_can_be_deleted(EventLoopBase *base)
++{
++    return false;
++}
++
++static void main_loop_class_init(ObjectClass *oc, void *class_data)
++{
++    EventLoopBaseClass *bc = EVENT_LOOP_BASE_CLASS(oc);
++
++    bc->init = main_loop_init;
++    bc->update_params = main_loop_update_params;
++    bc->can_be_deleted = main_loop_can_be_deleted;
++}
++
++static const TypeInfo main_loop_info = {
++    .name = TYPE_MAIN_LOOP,
++    .parent = TYPE_EVENT_LOOP_BASE,
++    .class_init = main_loop_class_init,
++    .instance_size = sizeof(MainLoop),
++};
++
++static void main_loop_register_types(void)
++{
++    type_register_static(&main_loop_info);
++}
++
++type_init(main_loop_register_types)
++
+ static int max_priority;
+ 
+ #ifndef _WIN32
+-- 
+2.31.1
+
diff --git a/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch
new file mode 100644
index 0000000..7e644c5
--- /dev/null
+++ b/kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch
@@ -0,0 +1,78 @@
+From 3de8fb9f3dba18d04efa10b70bcec641035effc5 Mon Sep 17 00:00:00 2001
+From: Eric Auger <eric.auger@redhat.com>
+Date: Tue, 24 May 2022 05:14:05 -0400
+Subject: [PATCH 16/16] vfio/common: remove spurious warning on
+ vfio_listener_region_del
+
+RH-Author: Eric Auger <eric.auger@redhat.com>
+RH-MergeRequest: 101: vfio/common: remove spurious warning on vfio_listener_region_del
+RH-Commit: [1/1] dac688b8a981ebb964fea79ea198c329b9cdb551 (eauger1/centos-qemu-kvm)
+RH-Bugzilla: 2086262
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Alex Williamson <None>
+
+    Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2086262
+    Brew: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=45876133
+    Upstream Status: YES
+    Tested: With TPM-CRB and VFIO
+
+851d6d1a0f ("vfio/common: remove spurious tpm-crb-cmd misalignment
+warning") removed the warning on vfio_listener_region_add() path.
+
+However the same warning also hits on region_del path. Let's remove
+it and reword the dynamic trace as this can be called on both
+map and unmap path.
+
+Contextual Conflict in hw/vfio/common.c
+We don't have 8e3b0cbb721 ("Replace qemu_real_host_page variables with inlined functions")
+
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+Link: https://lore.kernel.org/r/20220524091405.416256-1-eric.auger@redhat.com
+Fixes: 851d6d1a0ff2 ("vfio/common: remove spurious tpm-crb-cmd misalignment warning")
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+(cherry picked from commit ec6600be0dc16982181c7ad80d94c143c0807dd2)
+Signed-off-by: Eric Auger <eric.auger@redhat.com>
+---
+ hw/vfio/common.c     | 10 +++++++++-
+ hw/vfio/trace-events |  2 +-
+ 2 files changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/hw/vfio/common.c b/hw/vfio/common.c
+index 0fbe0d47af..637981f9a1 100644
+--- a/hw/vfio/common.c
++++ b/hw/vfio/common.c
+@@ -1145,7 +1145,15 @@ static void vfio_listener_region_del(MemoryListener *listener,
+     if (unlikely((section->offset_within_address_space &
+                   ~qemu_real_host_page_mask) !=
+                  (section->offset_within_region & ~qemu_real_host_page_mask))) {
+-        error_report("%s received unaligned region", __func__);
++        if (!vfio_known_safe_misalignment(section)) {
++            error_report("%s received unaligned region %s iova=0x%"PRIx64
++                         " offset_within_region=0x%"PRIx64
++                         " qemu_real_host_page_size=0x%"PRIxPTR,
++                         __func__, memory_region_name(section->mr),
++                         section->offset_within_address_space,
++                         section->offset_within_region,
++                         qemu_real_host_page_size);
++        }
+         return;
+     }
+ 
+diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
+index 582882db91..73dffe9e00 100644
+--- a/hw/vfio/trace-events
++++ b/hw/vfio/trace-events
+@@ -100,7 +100,7 @@ vfio_listener_region_add_skip(uint64_t start, uint64_t end) "SKIPPING region_add
+ vfio_spapr_group_attach(int groupfd, int tablefd) "Attached groupfd %d to liobn fd %d"
+ vfio_listener_region_add_iommu(uint64_t start, uint64_t end) "region_add [iommu] 0x%"PRIx64" - 0x%"PRIx64
+ vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr) "region_add [ram] 0x%"PRIx64" - 0x%"PRIx64" [%p]"
+-vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR ": cannot be mapped for DMA"
++vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR
+ vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
+ vfio_listener_region_del_skip(uint64_t start, uint64_t end) "SKIPPING region_del 0x%"PRIx64" - 0x%"PRIx64
+ vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
+-- 
+2.31.1
+
diff --git a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch
new file mode 100644
index 0000000..70e8f59
--- /dev/null
+++ b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch
@@ -0,0 +1,56 @@
+From edb2bd99355f300b512c040e91f5870ea14a5d7e Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:15 -0700
+Subject: [PATCH 11/16] vhost-net: fix improper cleanup in vhost_net_start
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA
+RH-Commit: [4/7] e88e482dd4b344f0cc887a358268beaed4d62917 (jasowang/qemu-kvm-cs)
+RH-Bugzilla: 2070804
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+
+vhost_net_start() missed a corresponding stop_one() upon error from
+vhost_set_vring_enable(). While at it, make the error handling for
+err_start more robust. No real issue was found due to this though.
+
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/vhost_net.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
+index 30379d2ca4..d6d7c51f62 100644
+--- a/hw/net/vhost_net.c
++++ b/hw/net/vhost_net.c
+@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
+             r = vhost_set_vring_enable(peer, peer->vring_enable);
+ 
+             if (r < 0) {
++                vhost_net_stop_one(get_vhost_net(peer), dev);
+                 goto err_start;
+             }
+         }
+@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
+ 
+ err_start:
+     while (--i >= 0) {
+-        peer = qemu_get_peer(ncs , i);
++        peer = qemu_get_peer(ncs, i < data_queue_pairs ?
++                                  i : n->max_queue_pairs);
+         vhost_net_stop_one(get_vhost_net(peer), dev);
+     }
+     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
+-- 
+2.31.1
+
diff --git a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch
new file mode 100644
index 0000000..747bf5f
--- /dev/null
+++ b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch
@@ -0,0 +1,58 @@
+From 46c5a35aa56cf0dd55376638dbf7d46e85f497e1 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:16 -0700
+Subject: [PATCH 12/16] vhost-vdpa: backend feature should set only once
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA
+RH-Commit: [5/7] 7531bb8da0c99b29997e8bfc6d1e811daf3cdd38 (jasowang/qemu-kvm-cs)
+RH-Bugzilla: 2070804
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+
+The vhost_vdpa_one_time_request() branch in
+vhost_vdpa_set_backend_cap() incorrectly sends down
+ioctls on vhost_dev with non-zero index. This may
+end up with multiple VHOST_SET_BACKEND_FEATURES
+ioctl calls sent down on the vhost-vdpa fd that is
+shared between all these vhost_dev's.
+
+To fix it, send down ioctl only once via the first
+vhost_dev with index 0. Toggle the polarity of the
+vhost_vdpa_one_time_request() test should do the
+trick.
+
+Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request")
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Eugenio Pérez <eperezma@redhat.com>
+Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/virtio/vhost-vdpa.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
+index 8adf7c0b92..6e3dbd9e89 100644
+--- a/hw/virtio/vhost-vdpa.c
++++ b/hw/virtio/vhost-vdpa.c
+@@ -665,7 +665,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+ 
+     features &= f;
+ 
+-    if (vhost_vdpa_one_time_request(dev)) {
++    if (!vhost_vdpa_one_time_request(dev)) {
+         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+         if (r) {
+             return -EFAULT;
+-- 
+2.31.1
+
diff --git a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch
new file mode 100644
index 0000000..2466557
--- /dev/null
+++ b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch
@@ -0,0 +1,123 @@
+From 58acdab17ec00ab76105ab92a51c5ba4dec3df5a Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:17 -0700
+Subject: [PATCH 13/16] vhost-vdpa: change name and polarity for
+ vhost_vdpa_one_time_request()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA
+RH-Commit: [6/7] 7029778f463a136ff412c63b86b6953390e47bf8 (jasowang/qemu-kvm-cs)
+RH-Bugzilla: 2070804
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+
+The name vhost_vdpa_one_time_request() was confusing. No
+matter whatever it returns, its typical occurrence had
+always been at requests that only need to be applied once.
+And the name didn't suggest what it actually checks for.
+Change it to vhost_vdpa_first_dev() with polarity flipped
+for better readibility of code. That way it is able to
+reflect what the check is really about.
+
+This call is applicable to request which performs operation
+only once, before queues are set up, and usually at the beginning
+of the caller function. Document the requirement for it in place.
+
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/virtio/vhost-vdpa.c | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
+index 6e3dbd9e89..33dcaa135e 100644
+--- a/hw/virtio/vhost-vdpa.c
++++ b/hw/virtio/vhost-vdpa.c
+@@ -366,11 +366,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
+                                     v->iova_range.last);
+ }
+ 
+-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
++/*
++ * The use of this function is for requests that only need to be
++ * applied once. Typically such request occurs at the beginning
++ * of operation, and before setting up queues. It should not be
++ * used for request that performs operation until all queues are
++ * set, which would need to check dev->vq_index_end instead.
++ */
++static bool vhost_vdpa_first_dev(struct vhost_dev *dev)
+ {
+     struct vhost_vdpa *v = dev->opaque;
+ 
+-    return v->index != 0;
++    return v->index == 0;
+ }
+ 
+ static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
+@@ -451,7 +458,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
+ 
+     vhost_vdpa_get_iova_range(v);
+ 
+-    if (vhost_vdpa_one_time_request(dev)) {
++    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -594,7 +601,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
+ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
+                                     struct vhost_memory *mem)
+ {
+-    if (vhost_vdpa_one_time_request(dev)) {
++    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -623,7 +630,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
+     struct vhost_vdpa *v = dev->opaque;
+     int ret;
+ 
+-    if (vhost_vdpa_one_time_request(dev)) {
++    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -665,7 +672,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+ 
+     features &= f;
+ 
+-    if (!vhost_vdpa_one_time_request(dev)) {
++    if (vhost_vdpa_first_dev(dev)) {
+         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+         if (r) {
+             return -EFAULT;
+@@ -1118,7 +1125,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
+                                      struct vhost_log *log)
+ {
+     struct vhost_vdpa *v = dev->opaque;
+-    if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
++    if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -1240,7 +1247,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
+ 
+ static int vhost_vdpa_set_owner(struct vhost_dev *dev)
+ {
+-    if (vhost_vdpa_one_time_request(dev)) {
++    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+-- 
+2.31.1
+
diff --git a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch
new file mode 100644
index 0000000..7716cbf
--- /dev/null
+++ b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch
@@ -0,0 +1,48 @@
+From 3142102adb98f46518c0ac1773b0c48710c6bed6 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:14 -0700
+Subject: [PATCH 10/16] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA
+RH-Commit: [3/7] c83ff6c97d34cfae3c3447edde934b42a9ace75f (jasowang/qemu-kvm-cs)
+RH-Bugzilla: 2070804
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+
+... such that no memory leaks on dangling net clients in case of
+error.
+
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ net/vhost-vdpa.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
+index 1e9fe47c03..df1e69ee72 100644
+--- a/net/vhost-vdpa.c
++++ b/net/vhost-vdpa.c
+@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
+ 
+ err:
+     if (i) {
+-        qemu_del_net_client(ncs[0]);
++        for (i--; i >= 0; i--) {
++            qemu_del_net_client(ncs[i]);
++        }
+     }
+     qemu_close(vdpa_device_fd);
+ 
+-- 
+2.31.1
+
diff --git a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch
new file mode 100644
index 0000000..9da7ea7
--- /dev/null
+++ b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch
@@ -0,0 +1,143 @@
+From 316b73277de233c7a9b6917077c00d7012060944 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:13 -0700
+Subject: [PATCH 09/16] virtio-net: align ctrl_vq index for non-mq guest for
+ vhost_vdpa
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA
+RH-Commit: [2/7] 7f764bbb579c7b473ad67fc25b46e698d277e781 (jasowang/qemu-kvm-cs)
+RH-Bugzilla: 2070804
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+
+With MQ enabled vdpa device and non-MQ supporting guest e.g.
+booting vdpa with mq=on over OVMF of single vqp, below assert
+failure is seen:
+
+../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed.
+
+0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
+1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
+2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
+3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
+4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
+5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
+6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
+7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
+   at ../hw/virtio/virtio-pci.c:974
+8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
+9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
+   at ../hw/net/vhost_net.c:361
+10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
+11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
+12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
+13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
+14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
+   at ../softmmu/memory.c:492
+15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
+16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
+   at ../softmmu/memory.c:1504
+17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165
+18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
+19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
+   at ../softmmu/physmem.c:2914
+20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
+   attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
+21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
+22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
+23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
+24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
+25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
+
+The cause for the assert failure is due to that the vhost_dev index
+for the ctrl vq was not aligned with actual one in use by the guest.
+Upon multiqueue feature negotiation in virtio_net_set_multiqueue(),
+if guest doesn't support multiqueue, the guest vq layout would shrink
+to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl).
+This results in ctrl_vq taking a different vhost_dev group index than
+the default. We can map vq to the correct vhost_dev group by checking
+if MQ is supported by guest and successfully negotiated. Since the
+MQ feature is only present along with CTRL_VQ, we ensure the index
+2 is only meant for the control vq while MQ is not supported by guest.
+
+Fixes: 22288fe ("virtio-net: vhost control virtqueue support")
+Suggested-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++--
+ 1 file changed, 31 insertions(+), 2 deletions(-)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index ffb3475201..f0bb29c741 100644
+--- a/hw/net/virtio-net.c
++++ b/hw/net/virtio-net.c
+@@ -14,6 +14,7 @@
+ #include "qemu/osdep.h"
+ #include "qemu/atomic.h"
+ #include "qemu/iov.h"
++#include "qemu/log.h"
+ #include "qemu/main-loop.h"
+ #include "qemu/module.h"
+ #include "hw/virtio/virtio.h"
+@@ -3171,8 +3172,22 @@ static NetClientInfo net_virtio_info = {
+ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+ {
+     VirtIONet *n = VIRTIO_NET(vdev);
+-    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
++    NetClientState *nc;
+     assert(n->vhost_started);
++    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
++        /* Must guard against invalid features and bogus queue index
++         * from being set by malicious guest, or penetrated through
++         * buggy migration stream.
++         */
++        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
++            qemu_log_mask(LOG_GUEST_ERROR,
++                          "%s: bogus vq index ignored\n", __func__);
++            return false;
++        }
++        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
++    } else {
++        nc = qemu_get_subqueue(n->nic, vq2q(idx));
++    }
+     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
+ }
+ 
+@@ -3180,8 +3195,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                            bool mask)
+ {
+     VirtIONet *n = VIRTIO_NET(vdev);
+-    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
++    NetClientState *nc;
+     assert(n->vhost_started);
++    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
++        /* Must guard against invalid features and bogus queue index
++         * from being set by malicious guest, or penetrated through
++         * buggy migration stream.
++         */
++        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
++            qemu_log_mask(LOG_GUEST_ERROR,
++                          "%s: bogus vq index ignored\n", __func__);
++            return;
++        }
++        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
++    } else {
++        nc = qemu_get_subqueue(n->nic, vq2q(idx));
++    }
+     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
+                              vdev, idx, mask);
+ }
+-- 
+2.31.1
+
diff --git a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch
new file mode 100644
index 0000000..3930cc2
--- /dev/null
+++ b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch
@@ -0,0 +1,109 @@
+From 521a1953bc11ab6823dcbbee773bcf86e926a9e7 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:18 -0700
+Subject: [PATCH 14/16] virtio-net: don't handle mq request in userspace
+ handler for vhost-vdpa
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA
+RH-Commit: [7/7] 9781cab45448ae16a00fbf10cf7995df6b984a0a (jasowang/qemu-kvm-cs)
+RH-Bugzilla: 2070804
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+
+virtio_queue_host_notifier_read() tends to read pending event
+left behind on ioeventfd in the vhost_net_stop() path, and
+attempts to handle outstanding kicks from userspace vq handler.
+However, in the ctrl_vq handler, virtio_net_handle_mq() has a
+recursive call into virtio_net_set_status(), which may lead to
+segmentation fault as shown in below stack trace:
+
+0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376
+1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
+2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at ../hw/virtio/vhost.c:318
+3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336
+4  0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
+5  0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
+6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
+7  0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1)
+   at ../hw/net/vhost_net.c:423
+8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
+9  0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370
+10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408
+11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
+12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
+13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
+14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
+15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
+   at ../../../include/hw/virtio/virtio-bus.h:35
+16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
+17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1)
+   at ../hw/net/vhost_net.c:423
+18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
+19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370
+20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=<optimized out>) at ../hw/virtio/virtio.c:1945
+21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
+22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262
+23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
+24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
+25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at ../softmmu/main.c:51
+
+For now, temporarily disable handling MQ request from the ctrl_vq
+userspace hanlder to avoid the recursive virtio_net_set_status()
+call. Some rework is needed to allow changing the number of
+queues without going through a full virtio_net_set_status cycle,
+particularly for vhost-vdpa backend.
+
+This patch will need to be reverted as soon as future patches of
+having the change of #queues handled in userspace is merged.
+
+Fixes: 402378407db ("vhost-vdpa: multiqueue support")
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/virtio-net.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index f0bb29c741..099e65036d 100644
+--- a/hw/net/virtio-net.c
++++ b/hw/net/virtio-net.c
+@@ -1381,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
+ {
+     VirtIODevice *vdev = VIRTIO_DEVICE(n);
+     uint16_t queue_pairs;
++    NetClientState *nc = qemu_get_queue(n->nic);
+ 
+     virtio_net_disable_rss(n);
+     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
+@@ -1412,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
+         return VIRTIO_NET_ERR;
+     }
+ 
++    /* Avoid changing the number of queue_pairs for vdpa device in
++     * userspace handler. A future fix is needed to handle the mq
++     * change in userspace handler with vhost-vdpa. Let's disable
++     * the mq handling from userspace for now and only allow get
++     * done through the kernel. Ripples may be seen when falling
++     * back to userspace, but without doing it qemu process would
++     * crash on a recursive entry to virtio_net_set_status().
++     */
++    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
++        return VIRTIO_NET_ERR;
++    }
++
+     n->curr_queue_pairs = queue_pairs;
+     /* stop the backend before changing the number of queue_pairs to avoid handling a
+      * disabled queue */
+-- 
+2.31.1
+
diff --git a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch
new file mode 100644
index 0000000..f6072d2
--- /dev/null
+++ b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch
@@ -0,0 +1,52 @@
+From 9e737aba614e94da4458f02d4ff97e95ffffd19f Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:12 -0700
+Subject: [PATCH 08/16] virtio-net: setup vhost_dev and notifiers for cvq only
+ when feature is negotiated
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA
+RH-Commit: [1/7] a5c5a2862b2e4d15ef7c09da3e4234fdef37cc66 (jasowang/qemu-kvm-cs)
+RH-Bugzilla: 2070804
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+
+When the control virtqueue feature is absent or not negotiated,
+vhost_net_start() still tries to set up vhost_dev and install
+vhost notifiers for the control virtqueue, which results in
+erroneous ioctl calls with incorrect queue index sending down
+to driver. Do that only when needed.
+
+Fixes: 22288fe ("virtio-net: vhost control virtqueue support")
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/virtio-net.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index 1067e72b39..ffb3475201 100644
+--- a/hw/net/virtio-net.c
++++ b/hw/net/virtio-net.c
+@@ -245,7 +245,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
+     VirtIODevice *vdev = VIRTIO_DEVICE(n);
+     NetClientState *nc = qemu_get_queue(n->nic);
+     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+-    int cvq = n->max_ncs - n->max_queue_pairs;
++    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
++              n->max_ncs - n->max_queue_pairs : 0;
+ 
+     if (!get_vhost_net(nc->peer)) {
+         return;
+-- 
+2.31.1
+
diff --git a/qemu-kvm.spec b/qemu-kvm.spec
index da9fadb..26f0c0a 100644
--- a/qemu-kvm.spec
+++ b/qemu-kvm.spec
@@ -151,7 +151,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version}                    \
 Summary: QEMU is a machine emulator and virtualizer
 Name: qemu-kvm
 Version: 7.0.0
-Release: 5%{?rcrel}%{?dist}%{?cc_suffix}
+Release: 6%{?rcrel}%{?dist}%{?cc_suffix}
 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
 # Epoch 15 used for RHEL 8
 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@@ -244,6 +244,38 @@ Patch44: kvm-migration-Fix-operator-type.patch
 Patch45: kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch
 # For bz#1708300 - RFE: qemu-nbd vs NBD_FLAG_CAN_MULTI_CONN
 Patch46: kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch
+# For bz#2031024 - Add support for fixing thread pool size [QEMU]
+Patch47: kvm-Introduce-event-loop-base-abstract-class.patch
+# For bz#2031024 - Add support for fixing thread pool size [QEMU]
+Patch48: kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch
+# For bz#2031024 - Add support for fixing thread pool size [QEMU]
+Patch49: kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch
+# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch50: kvm-qcow2-Improve-refcount-structure-rebuilding.patch
+# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch51: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch
+# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch52: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch
+# For bz#2072379 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch53: kvm-iotests-108-Fix-when-missing-user_allow_other.patch
+# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA
+Patch54: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch
+# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA
+Patch55: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch
+# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA
+Patch56: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch
+# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA
+Patch57: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch
+# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA
+Patch58: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch
+# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA
+Patch59: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch
+# For bz#2070804 - PXE boot crash qemu when using multiqueue vDPA
+Patch60: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch
+# For bz#2094270 - Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore
+Patch61: kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch
+# For bz#2086262 - [Win11][tpm]vfio_listener_region_del received unaligned region
+Patch62: kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch
 
 # Source-git patches
 
@@ -1279,6 +1311,34 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
 %endif
 
 %changelog
+* Mon Jun 13 2022 Miroslav Rezanina <mrezanin@redhat.com> - 7.0.0-6
+- kvm-Introduce-event-loop-base-abstract-class.patch [bz#2031024]
+- kvm-util-main-loop-Introduce-the-main-loop-into-QOM.patch [bz#2031024]
+- kvm-util-event-loop-base-Introduce-options-to-set-the-th.patch [bz#2031024]
+- kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#2072379]
+- kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#2072379]
+- kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#2072379]
+- kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#2072379]
+- kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2070804]
+- kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2070804]
+- kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2070804]
+- kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2070804]
+- kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2070804]
+- kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2070804]
+- kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2070804]
+- kvm-Revert-globally-limit-the-maximum-number-of-CPUs.patch [bz#2094270]
+- kvm-vfio-common-remove-spurious-warning-on-vfio_listener.patch [bz#2086262]
+- Resolves: bz#2031024
+  (Add support for fixing thread pool size [QEMU])
+- Resolves: bz#2072379
+  (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs))
+- Resolves: bz#2070804
+  (PXE boot crash qemu when using multiqueue vDPA)
+- Resolves: bz#2094270
+  (Do not set the hard vCPU limit to the soft vCPU limit in downstream qemu-kvm anymore)
+- Resolves: bz#2086262
+  ([Win11][tpm]vfio_listener_region_del received unaligned region)
+
 * Mon May 30 2022 Miroslav Rezanina <mrezanin@redhat.com> - 7.0.0-5
 - kvm-qemu-nbd-Pass-max-connections-to-blockdev-layer.patch [bz#1708300]
 - kvm-nbd-server-Allow-MULTI_CONN-for-shared-writable-expo.patch [bz#1708300]