import qemu-kvm-6.2.0-20.module+el8.7.0+16634+93bdaf0e.1

2022-09-20 23:07:15 +00:00 · 2022-09-20 23:07:15 +00:00 · 82e468bb05
commit 82e468bb05
parent d1ab2a85b4
88 changed files with 12566 additions and 1 deletions
--- a/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch
+++ b/SOURCES/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch
@ -0,0 +1,87 @@
+From cd49a32e9c9e33efc51652b68180a07683814b4d Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Mon, 11 Jul 2022 18:11:12 -0300
+Subject: [PATCH 4/9] Add dirty-sync-missed-zero-copy migration stat
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [4/8] 56cce61cf95aafc8dafae7531b43c166084abfec
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Acked-by: Markus Armbruster <armbru@redhat.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Message-Id: <20220711211112.18951-3-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c | 2 ++
+ monitor/hmp-cmds.c    | 5 +++++
+ qapi/migration.json   | 7 ++++++-
+ 3 files changed, 13 insertions(+), 1 deletion(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index e100b30f00..952a26c5c2 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -1012,6 +1012,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
+     info->ram->normal_bytes = ram_counters.normal * page_size;
+     info->ram->mbps = s->mbps;
+     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
+    info->ram->dirty_sync_missed_zero_copy =
+            ram_counters.dirty_sync_missed_zero_copy;
+     info->ram->postcopy_requests = ram_counters.postcopy_requests;
+     info->ram->page_size = page_size;
+     info->ram->multifd_bytes = ram_counters.multifd_bytes;
+diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
+index 8c384dc1b2..f7216ab5d0 100644
+--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
+@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
+             monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n",
+                            info->ram->postcopy_bytes >> 10);
+         }
+        if (info->ram->dirty_sync_missed_zero_copy) {
+            monitor_printf(mon,
+                           "Zero-copy-send fallbacks happened: %" PRIu64 " times\n",
+                           info->ram->dirty_sync_missed_zero_copy);
+        }
+     }
+ 
+     if (info->has_disk) {
+diff --git a/qapi/migration.json b/qapi/migration.json
+index c8ec260ab0..94bc5c69db 100644
+--- a/qapi/migration.json
+++ b/qapi/migration.json
+@@ -55,6 +55,10 @@
+ # @postcopy-bytes: The number of bytes sent during the post-copy phase
+ #                  (since 7.0).
+ #
+# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could
+#                               not avoid copying dirty pages. This is between
+#                               0 and @dirty-sync-count * @multifd-channels.
+#                               (since 7.1)
+ # Since: 0.14
+ ##
+ { 'struct': 'MigrationStats',
+@@ -65,7 +69,8 @@
+            'postcopy-requests' : 'int', 'page-size' : 'int',
+            'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64',
+            'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64',
+-           'postcopy-bytes' : 'uint64' } }
+           'postcopy-bytes' : 'uint64',
+           'dirty-sync-missed-zero-copy' : 'uint64' } }
+ 
+ ##
+ # @XBZRLECacheStats:
+-- 
+2.31.1
+
--- a/SOURCES/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
+++ b/SOURCES/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
@ -0,0 +1,109 @@
+From ea5299b5dde7d0b6b2f93cb646e6a24c9f105466 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Wed, 23 Mar 2022 12:33:25 +0100
+Subject: [PATCH 13/24] KVM: x86: workaround invalid CPUID[0xD,9] info on some
+ AMD processors
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [13/13] 38f147c911258e84e01336271ebd23a1c24371fc
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Some AMD processors expose the PKRU extended save state even if they do not have
+the related PKU feature in CPUID.  Worse, when they do they report a size of
+64, whereas the expected size of the PKRU extended save state is 8, therefore
+the esa->size == eax assertion does not hold.
+
+The state is already ignored by KVM_GET_SUPPORTED_CPUID because it
+was not enabled in the host XCR0.  However, QEMU kvm_cpu_xsave_init()
+runs before QEMU invokes arch_prctl() to enable dynamically-enabled
+save states such as XTILEDATA, and KVM_GET_SUPPORTED_CPUID hides save
+states that have yet to be enabled.  Therefore, kvm_cpu_xsave_init()
+needs to consult the host CPUID instead of KVM_GET_SUPPORTED_CPUID,
+and dies with an assertion failure.
+
+When setting up the ExtSaveArea array to match the host, ignore features that
+KVM does not report as supported.  This will cause QEMU to skip the incorrect
+CPUID leaf instead of tripping the assertion.
+
+Closes: https://gitlab.com/qemu-project/qemu/-/issues/916
+Reported-by: Daniel P. Berrangé <berrange@redhat.com>
+Analyzed-by: Yang Zhong <yang.zhong@intel.com>
+Reported-by: Peter Krempa <pkrempa@redhat.com>
+Tested-by: Daniel P. Berrangé <berrange@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 58f7db26f21c690cf9a669c314cfd7371506084a)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.c         |  4 ++--
+ target/i386/cpu.h         |  2 ++
+ target/i386/kvm/kvm-cpu.c | 19 ++++++++++++-------
+ 3 files changed, 16 insertions(+), 9 deletions(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index 09e08f7f38..0543b846ff 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -4980,8 +4980,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp)
+     return cpu_list;
+ }
+ 
+-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+-                                                   bool migratable_only)
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+                                            bool migratable_only)
+ {
+     FeatureWordInfo *wi = &feature_word_info[w];
+     uint64_t r = 0;
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 8ab2a4042a..006b735fe4 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -604,6 +604,8 @@ typedef enum FeatureWord {
+ } FeatureWord;
+ 
+ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
+uint64_t x86_cpu_get_supported_feature_word(FeatureWord w,
+                                            bool migratable_only);
+ 
+ /* cpuid_features bits */
+ #define CPUID_FP87 (1U << 0)
+diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
+index bdc967c484..74c1396a93 100644
+--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
+@@ -99,13 +99,18 @@ static void kvm_cpu_xsave_init(void)
+     for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) {
+         ExtSaveArea *esa = &x86_ext_save_areas[i];
+ 
+-        if (esa->size) {
+-            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+-            if (eax != 0) {
+-                assert(esa->size == eax);
+-                esa->offset = ebx;
+-                esa->ecx = ecx;
+-            }
+        if (!esa->size) {
+            continue;
+        }
+        if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits)
+            != esa->bits) {
+            continue;
+        }
+        host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+        if (eax != 0) {
+            assert(esa->size == eax);
+            esa->offset = ebx;
+            esa->ecx = ecx;
+         }
+     }
+ }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch
+++ b/SOURCES/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch
@ -0,0 +1,420 @@
+From 7eeec7c008e947bc3e1fed682791092b408852c6 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Wed, 18 May 2022 02:52:24 -0300
+Subject: [PATCH 17/37] QIOChannel: Add flags on io_writev and introduce
+ io_flush callback
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [17/26] 7bde4e79fd3f76a6cc84d9cacf50420584ddd35c
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Add flags to io_writev and introduce io_flush as optional callback to
+QIOChannelClass, allowing the implementation of zero copy writes by
+subclasses.
+
+How to use them:
+- Write data using qio_channel_writev*(...,QIO_CHANNEL_WRITE_FLAG_ZERO_COPY),
+- Wait write completion with qio_channel_flush().
+
+Notes:
+As some zero copy write implementations work asynchronously, it's
+recommended to keep the write buffer untouched until the return of
+qio_channel_flush(), to avoid the risk of sending an updated buffer
+instead of the buffer state during write.
+
+As io_flush callback is optional, if a subclass does not implement it, then:
+- io_flush will return 0 without changing anything.
+
+Also, some functions like qio_channel_writev_full_all() were adapted to
+receive a flag parameter. That allows shared code between zero copy and
+non-zero copy writev, and also an easier implementation on new flags.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Message-Id: <20220513062836.965425-3-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit b88651cb4d4fa416fdbb6afaf5b26ec8c035eaad)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ chardev/char-io.c                   |  2 +-
+ hw/remote/mpqemu-link.c             |  2 +-
+ include/io/channel.h                | 38 +++++++++++++++++++++-
+ io/channel-buffer.c                 |  1 +
+ io/channel-command.c                |  1 +
+ io/channel-file.c                   |  1 +
+ io/channel-socket.c                 |  2 ++
+ io/channel-tls.c                    |  1 +
+ io/channel-websock.c                |  1 +
+ io/channel.c                        | 49 +++++++++++++++++++++++------
+ migration/rdma.c                    |  1 +
+ scsi/pr-manager-helper.c            |  2 +-
+ tests/unit/test-io-channel-socket.c |  1 +
+ 13 files changed, 88 insertions(+), 14 deletions(-)
+
+diff --git a/chardev/char-io.c b/chardev/char-io.c
+index 8ced184160..4451128cba 100644
+--- a/chardev/char-io.c
+++ b/chardev/char-io.c
+@@ -122,7 +122,7 @@ int io_channel_send_full(QIOChannel *ioc,
+ 
+         ret = qio_channel_writev_full(
+             ioc, &iov, 1,
+-            fds, nfds, NULL);
+            fds, nfds, 0, NULL);
+         if (ret == QIO_CHANNEL_ERR_BLOCK) {
+             if (offset) {
+                 return offset;
+diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
+index 7e841820e5..e8f556bd27 100644
+--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
+@@ -69,7 +69,7 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
+     }
+ 
+     if (!qio_channel_writev_full_all(ioc, send, G_N_ELEMENTS(send),
+-                                    fds, nfds, errp)) {
+                                    fds, nfds, 0, errp)) {
+         ret = true;
+     } else {
+         trace_mpqemu_send_io_error(msg->cmd, msg->size, nfds);
+diff --git a/include/io/channel.h b/include/io/channel.h
+index 88988979f8..c680ee7480 100644
+--- a/include/io/channel.h
+++ b/include/io/channel.h
+@@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
+ 
+ #define QIO_CHANNEL_ERR_BLOCK -2
+ 
+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+
+ typedef enum QIOChannelFeature QIOChannelFeature;
+ 
+ enum QIOChannelFeature {
+     QIO_CHANNEL_FEATURE_FD_PASS,
+     QIO_CHANNEL_FEATURE_SHUTDOWN,
+     QIO_CHANNEL_FEATURE_LISTEN,
+    QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
+ };
+ 
+ 
+@@ -104,6 +107,7 @@ struct QIOChannelClass {
+                          size_t niov,
+                          int *fds,
+                          size_t nfds,
+                         int flags,
+                          Error **errp);
+     ssize_t (*io_readv)(QIOChannel *ioc,
+                         const struct iovec *iov,
+@@ -136,6 +140,8 @@ struct QIOChannelClass {
+                                   IOHandler *io_read,
+                                   IOHandler *io_write,
+                                   void *opaque);
+    int (*io_flush)(QIOChannel *ioc,
+                    Error **errp);
+ };
+ 
+ /* General I/O handling functions */
+@@ -228,6 +234,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
+  * @niov: the length of the @iov array
+  * @fds: an array of file handles to send
+  * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
+  * @errp: pointer to a NULL-initialized error object
+  *
+  * Write data to the IO channel, reading it from the
+@@ -260,6 +267,7 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
+                                 size_t niov,
+                                 int *fds,
+                                 size_t nfds,
+                                int flags,
+                                 Error **errp);
+ 
+ /**
+@@ -837,6 +845,7 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
+  * @niov: the length of the @iov array
+  * @fds: an array of file handles to send
+  * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
+  * @errp: pointer to a NULL-initialized error object
+  *
+  *
+@@ -846,6 +855,14 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
+  * to be written, yielding from the current coroutine
+  * if required.
+  *
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
+ * instead of waiting for all requested data to be written,
+ * this function will wait until it's all queued for writing.
+ * In this case, if the buffer gets changed between queueing and
+ * sending, the updated buffer will be sent. If this is not a
+ * desired behavior, it's suggested to call qio_channel_flush()
+ * before reusing the buffer.
+ *
+  * Returns: 0 if all bytes were written, or -1 on error
+  */
+ 
+@@ -853,6 +870,25 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
+                                 const struct iovec *iov,
+                                 size_t niov,
+                                 int *fds, size_t nfds,
+-                                Error **errp);
+                                int flags, Error **errp);
+
+/**
+ * qio_channel_flush:
+ * @ioc: the channel object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Will block until every packet queued with
+ * qio_channel_writev_full() + QIO_CHANNEL_WRITE_FLAG_ZERO_COPY
+ * is sent, or return in case of any error.
+ *
+ * If not implemented, acts as a no-op, and returns 0.
+ *
+ * Returns -1 if any error is found,
+ *          1 if every send failed to use zero copy.
+ *          0 otherwise.
+ */
+
+int qio_channel_flush(QIOChannel *ioc,
+                      Error **errp);
+ 
+ #endif /* QIO_CHANNEL_H */
+diff --git a/io/channel-buffer.c b/io/channel-buffer.c
+index baa4e2b089..bf52011be2 100644
+--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
+@@ -81,6 +81,7 @@ static ssize_t qio_channel_buffer_writev(QIOChannel *ioc,
+                                          size_t niov,
+                                          int *fds,
+                                          size_t nfds,
+                                         int flags,
+                                          Error **errp)
+ {
+     QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
+diff --git a/io/channel-command.c b/io/channel-command.c
+index b2a9e27138..5ff1691bad 100644
+--- a/io/channel-command.c
+++ b/io/channel-command.c
+@@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc,
+                                           size_t niov,
+                                           int *fds,
+                                           size_t nfds,
+                                          int flags,
+                                           Error **errp)
+ {
+     QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
+diff --git a/io/channel-file.c b/io/channel-file.c
+index c4bf799a80..348a48545e 100644
+--- a/io/channel-file.c
+++ b/io/channel-file.c
+@@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc,
+                                        size_t niov,
+                                        int *fds,
+                                        size_t nfds,
+                                       int flags,
+                                        Error **errp)
+ {
+     QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
+diff --git a/io/channel-socket.c b/io/channel-socket.c
+index 606ec97cf7..bfbd64787e 100644
+--- a/io/channel-socket.c
+++ b/io/channel-socket.c
+@@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+                                          size_t niov,
+                                          int *fds,
+                                          size_t nfds,
+                                         int flags,
+                                          Error **errp)
+ {
+     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+@@ -620,6 +621,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+                                          size_t niov,
+                                          int *fds,
+                                          size_t nfds,
+                                         int flags,
+                                          Error **errp)
+ {
+     QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+diff --git a/io/channel-tls.c b/io/channel-tls.c
+index 2ae1b92fc0..4ce890a538 100644
+--- a/io/channel-tls.c
+++ b/io/channel-tls.c
+@@ -301,6 +301,7 @@ static ssize_t qio_channel_tls_writev(QIOChannel *ioc,
+                                       size_t niov,
+                                       int *fds,
+                                       size_t nfds,
+                                      int flags,
+                                       Error **errp)
+ {
+     QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
+diff --git a/io/channel-websock.c b/io/channel-websock.c
+index 70889bb54d..035dd6075b 100644
+--- a/io/channel-websock.c
+++ b/io/channel-websock.c
+@@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc,
+                                           size_t niov,
+                                           int *fds,
+                                           size_t nfds,
+                                          int flags,
+                                           Error **errp)
+ {
+     QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
+diff --git a/io/channel.c b/io/channel.c
+index e8b019dc36..0640941ac5 100644
+--- a/io/channel.c
+++ b/io/channel.c
+@@ -72,18 +72,32 @@ ssize_t qio_channel_writev_full(QIOChannel *ioc,
+                                 size_t niov,
+                                 int *fds,
+                                 size_t nfds,
+                                int flags,
+                                 Error **errp)
+ {
+     QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+ 
+-    if ((fds || nfds) &&
+-        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+    if (fds || nfds) {
+        if (!qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
+            error_setg_errno(errp, EINVAL,
+                             "Channel does not support file descriptor passing");
+            return -1;
+        }
+        if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+            error_setg_errno(errp, EINVAL,
+                             "Zero Copy does not support file descriptor passing");
+            return -1;
+        }
+    }
+
+    if ((flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) &&
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+         error_setg_errno(errp, EINVAL,
+-                         "Channel does not support file descriptor passing");
+                         "Requested Zero Copy feature is not available");
+         return -1;
+     }
+ 
+-    return klass->io_writev(ioc, iov, niov, fds, nfds, errp);
+    return klass->io_writev(ioc, iov, niov, fds, nfds, flags, errp);
+ }
+ 
+ 
+@@ -217,14 +231,14 @@ int qio_channel_writev_all(QIOChannel *ioc,
+                            size_t niov,
+                            Error **errp)
+ {
+-    return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
+    return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, 0, errp);
+ }
+ 
+ int qio_channel_writev_full_all(QIOChannel *ioc,
+                                 const struct iovec *iov,
+                                 size_t niov,
+                                 int *fds, size_t nfds,
+-                                Error **errp)
+                                int flags, Error **errp)
+ {
+     int ret = -1;
+     struct iovec *local_iov = g_new(struct iovec, niov);
+@@ -237,8 +251,10 @@ int qio_channel_writev_full_all(QIOChannel *ioc,
+ 
+     while (nlocal_iov > 0) {
+         ssize_t len;
+-        len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
+-                                      errp);
+
+        len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds,
+                                            nfds, flags, errp);
+
+         if (len == QIO_CHANNEL_ERR_BLOCK) {
+             if (qemu_in_coroutine()) {
+                 qio_channel_yield(ioc, G_IO_OUT);
+@@ -277,7 +293,7 @@ ssize_t qio_channel_writev(QIOChannel *ioc,
+                            size_t niov,
+                            Error **errp)
+ {
+-    return qio_channel_writev_full(ioc, iov, niov, NULL, 0, errp);
+    return qio_channel_writev_full(ioc, iov, niov, NULL, 0, 0, errp);
+ }
+ 
+ 
+@@ -297,7 +313,7 @@ ssize_t qio_channel_write(QIOChannel *ioc,
+                           Error **errp)
+ {
+     struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
+-    return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, errp);
+    return qio_channel_writev_full(ioc, &iov, 1, NULL, 0, 0, errp);
+ }
+ 
+ 
+@@ -473,6 +489,19 @@ off_t qio_channel_io_seek(QIOChannel *ioc,
+     return klass->io_seek(ioc, offset, whence, errp);
+ }
+ 
+int qio_channel_flush(QIOChannel *ioc,
+                                Error **errp)
+{
+    QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+    if (!klass->io_flush ||
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+        return 0;
+    }
+
+    return klass->io_flush(ioc, errp);
+}
+
+ 
+ static void qio_channel_restart_read(void *opaque)
+ {
+diff --git a/migration/rdma.c b/migration/rdma.c
+index f5d3bbe7e9..54acd2000e 100644
+--- a/migration/rdma.c
+++ b/migration/rdma.c
+@@ -2833,6 +2833,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc,
+                                        size_t niov,
+                                        int *fds,
+                                        size_t nfds,
+                                       int flags,
+                                        Error **errp)
+ {
+     QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
+diff --git a/scsi/pr-manager-helper.c b/scsi/pr-manager-helper.c
+index 451c7631b7..3be52a98d5 100644
+--- a/scsi/pr-manager-helper.c
+++ b/scsi/pr-manager-helper.c
+@@ -77,7 +77,7 @@ static int pr_manager_helper_write(PRManagerHelper *pr_mgr,
+         iov.iov_base = (void *)buf;
+         iov.iov_len = sz;
+         n_written = qio_channel_writev_full(QIO_CHANNEL(pr_mgr->ioc), &iov, 1,
+-                                            nfds ? &fd : NULL, nfds, errp);
+                                            nfds ? &fd : NULL, nfds, 0, errp);
+ 
+         if (n_written <= 0) {
+             assert(n_written != QIO_CHANNEL_ERR_BLOCK);
+diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
+index c49eec1f03..6713886d02 100644
+--- a/tests/unit/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
+@@ -444,6 +444,7 @@ static void test_io_channel_unix_fd_pass(void)
+                             G_N_ELEMENTS(iosend),
+                             fdsend,
+                             G_N_ELEMENTS(fdsend),
+                            0,
+                             &error_abort);
+ 
+     qio_channel_readv_full(dst,
+-- 
+2.35.3
+
--- a/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch
+++ b/SOURCES/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch
@ -0,0 +1,56 @@
+From a6c4aed18a027ce8e107fdf9184e9ea43a86f843 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Thu, 4 Aug 2022 04:10:43 -0300
+Subject: [PATCH 8/9] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [8/8] 6e26ee7c9ebaedb07623313cb0678816867751dd
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+For using MSG_ZEROCOPY, there are two steps:
+1 - io_writev() the packet, which enqueues the packet for sending, and
+2 - io_flush(), which gets confirmation that all packets got correctly sent
+
+Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will
+be reported in (1), but it will fail in the first time (2) happens.
+
+This happens because (2) currently checks for cmsg_level & cmsg_type
+associated with IPV4 only, before reporting any error.
+
+Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable
+support for MSG_ZEROCOPY + IPV6
+
+Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Signed-off-by: Daniel P. Berrangé <berrange@redhat.com>
+(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ io/channel-socket.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/io/channel-socket.c b/io/channel-socket.c
+index cf0d67c51b..6010ad7017 100644
+--- a/io/channel-socket.c
+++ b/io/channel-socket.c
+@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
+         }
+ 
+         cm = CMSG_FIRSTHDR(&msg);
+-        if (cm->cmsg_level != SOL_IP &&
+-            cm->cmsg_type != IP_RECVERR) {
+        if (cm->cmsg_level != SOL_IP   && cm->cmsg_type != IP_RECVERR &&
+            cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) {
+             error_setg_errno(errp, EPROTOTYPE,
+                              "Wrong cmsg in errqueue");
+             return -1;
+-- 
+2.31.1
+
--- a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch
+++ b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch
@ -0,0 +1,65 @@
+From 905cc8032fc63619efb3f0a8c9754b7190bcc43a Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Mon, 11 Jul 2022 18:11:11 -0300
+Subject: [PATCH 3/9] QIOChannelSocket: Fix zero-copy flush returning code 1
+ when nothing sent
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [3/8] 1ad707702fa26cd4d0fa1870c21f5f26ae93ff97
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently
+returns 1. This return code should be used only when Linux fails to use
+MSG_ZEROCOPY on a lot of sendmsg().
+
+Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY)
+was attempted.
+
+Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <20220711211112.18951-2-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ io/channel-socket.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/io/channel-socket.c b/io/channel-socket.c
+index df858da924..cf0d67c51b 100644
+--- a/io/channel-socket.c
+++ b/io/channel-socket.c
+@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc,
+     struct cmsghdr *cm;
+     char control[CMSG_SPACE(sizeof(*serr))];
+     int received;
+-    int ret = 1;
+    int ret;
+
+    if (sioc->zero_copy_queued == sioc->zero_copy_sent) {
+        return 0;
+    }
+ 
+     msg.msg_control = control;
+     msg.msg_controllen = sizeof(control);
+     memset(control, 0, sizeof(control));
+ 
+    ret = 1;
+
+     while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
+         received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
+         if (received < 0) {
+-- 
+2.31.1
+
--- a/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch
+++ b/SOURCES/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch
@ -0,0 +1,58 @@
+From c1fd32d93ae42fcf3c1a25f4d56e669f251087d8 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Mon, 20 Jun 2022 02:39:43 -0300
+Subject: [PATCH 25/37] QIOChannelSocket: Fix zero-copy send so socket flush
+ works
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [25/26] 3ede94f3269e21c3ace073ed1a6f24696315bcbb
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial
+part of the flushing mechanism got missing: incrementing zero_copy_queued.
+
+Without that, the flushing interface becomes a no-op, and there is no
+guarantee the buffer is really sent.
+
+This can go as bad as causing a corruption in RAM during migration.
+
+Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX")
+Reported-by: 徐闯 <xuchuangxclwt@bytedance.com>
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 4f5a09714c983a3471fd12e3c7f3196e95c650c1)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ io/channel-socket.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/io/channel-socket.c b/io/channel-socket.c
+index 7d37b39de7..df858da924 100644
+--- a/io/channel-socket.c
+++ b/io/channel-socket.c
+@@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+                          "Unable to write to socket");
+         return -1;
+     }
+
+    if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+        sioc->zero_copy_queued++;
+    }
+
+     return ret;
+ }
+ #else /* WIN32 */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch
+++ b/SOURCES/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch
@ -0,0 +1,249 @@
+From 5fd7af93a06adaddbae719aabbaf912159f4fb28 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Wed, 18 May 2022 02:52:25 -0300
+Subject: [PATCH 18/37] QIOChannelSocket: Implement io_writev zero copy flag &
+ io_flush for CONFIG_LINUX
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [18/26] 6f65c8c879a5df57213b541d58285b65178f8547
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+For CONFIG_LINUX, implement the new zero copy flag and the optional callback
+io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY
+feature is available in the host kernel, which is checked on
+qio_channel_socket_connect_sync()
+
+qio_channel_socket_flush() was implemented by counting how many times
+sendmsg(...,MSG_ZEROCOPY) was successfully called, and then reading the
+socket's error queue, in order to find how many of them finished sending.
+Flush will loop until those counters are the same, or until some error occurs.
+
+Notes on using writev() with QIO_CHANNEL_WRITE_FLAG_ZERO_COPY:
+1: Buffer
+- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying,
+some caution is necessary to avoid overwriting any buffer before it's sent.
+If something like this happen, a newer version of the buffer may be sent instead.
+- If this is a problem, it's recommended to call qio_channel_flush() before freeing
+or re-using the buffer.
+
+2: Locked memory
+- When using MSG_ZERCOCOPY, the buffer memory will be locked after queued, and
+unlocked after it's sent.
+- Depending on the size of each buffer, and how often it's sent, it may require
+a larger amount of locked memory than usually available to non-root user.
+- If the required amount of locked memory is not available, writev_zero_copy
+will return an error, which can abort an operation like migration,
+- Because of this, when an user code wants to add zero copy as a feature, it
+requires a mechanism to disable it, so it can still be accessible to less
+privileged users.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Message-Id: <20220513062836.965425-4-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 2bc58ffc2926a4efdd03edfb5909861fefc68c3d)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ include/io/channel-socket.h |   2 +
+ io/channel-socket.c         | 116 ++++++++++++++++++++++++++++++++++--
+ 2 files changed, 114 insertions(+), 4 deletions(-)
+
+diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
+index e747e63514..513c428fe4 100644
+--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
+@@ -47,6 +47,8 @@ struct QIOChannelSocket {
+     socklen_t localAddrLen;
+     struct sockaddr_storage remoteAddr;
+     socklen_t remoteAddrLen;
+    ssize_t zero_copy_queued;
+    ssize_t zero_copy_sent;
+ };
+ 
+ 
+diff --git a/io/channel-socket.c b/io/channel-socket.c
+index bfbd64787e..38a46ba213 100644
+--- a/io/channel-socket.c
+++ b/io/channel-socket.c
+@@ -26,6 +26,14 @@
+ #include "io/channel-watch.h"
+ #include "trace.h"
+ #include "qapi/clone-visitor.h"
+#ifdef CONFIG_LINUX
+#include <linux/errqueue.h>
+#include <sys/socket.h>
+
+#if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
+#define QEMU_MSG_ZEROCOPY
+#endif
+#endif
+ 
+ #define SOCKET_MAX_FDS 16
+ 
+@@ -55,6 +63,8 @@ qio_channel_socket_new(void)
+ 
+     sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
+     sioc->fd = -1;
+    sioc->zero_copy_queued = 0;
+    sioc->zero_copy_sent = 0;
+ 
+     ioc = QIO_CHANNEL(sioc);
+     qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
+@@ -154,6 +164,16 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
+         return -1;
+     }
+ 
+#ifdef QEMU_MSG_ZEROCOPY
+    int ret, v = 1;
+    ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
+    if (ret == 0) {
+        /* Zero copy available on host */
+        qio_channel_set_feature(QIO_CHANNEL(ioc),
+                                QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
+    }
+#endif
+
+     return 0;
+ }
+ 
+@@ -534,6 +554,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+     char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
+     size_t fdsize = sizeof(int) * nfds;
+     struct cmsghdr *cmsg;
+    int sflags = 0;
+ 
+     memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
+ 
+@@ -558,15 +579,31 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+         memcpy(CMSG_DATA(cmsg), fds, fdsize);
+     }
+ 
+#ifdef QEMU_MSG_ZEROCOPY
+    if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+        sflags = MSG_ZEROCOPY;
+    }
+#endif
+
+  retry:
+-    ret = sendmsg(sioc->fd, &msg, 0);
+    ret = sendmsg(sioc->fd, &msg, sflags);
+     if (ret <= 0) {
+-        if (errno == EAGAIN) {
+        switch (errno) {
+        case EAGAIN:
+             return QIO_CHANNEL_ERR_BLOCK;
+-        }
+-        if (errno == EINTR) {
+        case EINTR:
+             goto retry;
+#ifdef QEMU_MSG_ZEROCOPY
+        case ENOBUFS:
+            if (sflags & MSG_ZEROCOPY) {
+                error_setg_errno(errp, errno,
+                                 "Process can't lock enough memory for using MSG_ZEROCOPY");
+                return -1;
+            }
+            break;
+#endif
+         }
+
+         error_setg_errno(errp, errno,
+                          "Unable to write to socket");
+         return -1;
+@@ -660,6 +697,74 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+ }
+ #endif /* WIN32 */
+ 
+
+#ifdef QEMU_MSG_ZEROCOPY
+static int qio_channel_socket_flush(QIOChannel *ioc,
+                                    Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    struct msghdr msg = {};
+    struct sock_extended_err *serr;
+    struct cmsghdr *cm;
+    char control[CMSG_SPACE(sizeof(*serr))];
+    int received;
+    int ret = 1;
+
+    msg.msg_control = control;
+    msg.msg_controllen = sizeof(control);
+    memset(control, 0, sizeof(control));
+
+    while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
+        received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
+        if (received < 0) {
+            switch (errno) {
+            case EAGAIN:
+                /* Nothing on errqueue, wait until something is available */
+                qio_channel_wait(ioc, G_IO_ERR);
+                continue;
+            case EINTR:
+                continue;
+            default:
+                error_setg_errno(errp, errno,
+                                 "Unable to read errqueue");
+                return -1;
+            }
+        }
+
+        cm = CMSG_FIRSTHDR(&msg);
+        if (cm->cmsg_level != SOL_IP &&
+            cm->cmsg_type != IP_RECVERR) {
+            error_setg_errno(errp, EPROTOTYPE,
+                             "Wrong cmsg in errqueue");
+            return -1;
+        }
+
+        serr = (void *) CMSG_DATA(cm);
+        if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
+            error_setg_errno(errp, serr->ee_errno,
+                             "Error on socket");
+            return -1;
+        }
+        if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
+            error_setg_errno(errp, serr->ee_origin,
+                             "Error not from zero copy");
+            return -1;
+        }
+
+        /* No errors, count successfully finished sendmsg()*/
+        sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
+
+        /* If any sendmsg() succeeded using zero copy, return 0 at the end */
+        if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
+            ret = 0;
+        }
+    }
+
+    return ret;
+}
+
+#endif /* QEMU_MSG_ZEROCOPY */
+
+ static int
+ qio_channel_socket_set_blocking(QIOChannel *ioc,
+                                 bool enabled,
+@@ -789,6 +894,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
+     ioc_klass->io_set_delay = qio_channel_socket_set_delay;
+     ioc_klass->io_create_watch = qio_channel_socket_create_watch;
+     ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
+#ifdef QEMU_MSG_ZEROCOPY
+    ioc_klass->io_flush = qio_channel_socket_flush;
+#endif
+ }
+ 
+ static const TypeInfo qio_channel_socket_info = {
+-- 
+2.35.3
+
--- a/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch
+++ b/SOURCES/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch
@ -0,0 +1,82 @@
+From cbfaf86331c2b2e01a2083303b7554672bf991b7 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Mon, 20 Jun 2022 02:39:42 -0300
+Subject: [PATCH 24/37] QIOChannelSocket: Introduce assert and reduce ifdefs to
+ improve readability
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [24/26] b50e2e65307149f247155a7f7a032dc99e57718d
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were
+introduced, particularly at qio_channel_socket_writev().
+
+Rewrite some of those changes so it's easier to read.
+
+Also, introduce an assert to help detect incorrect zero-copy usage is when
+it's disabled on build.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+  dgilbert: Fixed up thinko'd g_assert_unreachable->g_assert_not_reached
+(cherry picked from commit 803ca43e4c7fcf32f9f68c118301ccd0c83ece3f)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ io/channel-socket.c | 14 +++++++++-----
+ 1 file changed, 9 insertions(+), 5 deletions(-)
+
+diff --git a/io/channel-socket.c b/io/channel-socket.c
+index 38a46ba213..7d37b39de7 100644
+--- a/io/channel-socket.c
+++ b/io/channel-socket.c
+@@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+         memcpy(CMSG_DATA(cmsg), fds, fdsize);
+     }
+ 
+-#ifdef QEMU_MSG_ZEROCOPY
+     if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+#ifdef QEMU_MSG_ZEROCOPY
+         sflags = MSG_ZEROCOPY;
+-    }
+#else
+        /*
+         * We expect QIOChannel class entry point to have
+         * blocked this code path already
+         */
+        g_assert_not_reached();
+ #endif
+    }
+ 
+  retry:
+     ret = sendmsg(sioc->fd, &msg, sflags);
+@@ -593,15 +599,13 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
+             return QIO_CHANNEL_ERR_BLOCK;
+         case EINTR:
+             goto retry;
+-#ifdef QEMU_MSG_ZEROCOPY
+         case ENOBUFS:
+-            if (sflags & MSG_ZEROCOPY) {
+            if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+                 error_setg_errno(errp, errno,
+                                  "Process can't lock enough memory for using MSG_ZEROCOPY");
+                 return -1;
+             }
+             break;
+-#endif
+         }
+ 
+         error_setg_errno(errp, errno,
+-- 
+2.35.3
+
--- a/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch
+++ b/SOURCES/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch
@ -0,0 +1,128 @@
+From 96edd15df257f1d1496397a6fac24b4316570d7e Mon Sep 17 00:00:00 2001
+From: Jon Maloy <jmaloy@redhat.com>
+Date: Thu, 14 Apr 2022 16:45:30 -0400
+Subject: [PATCH 1/3] Revert redhat: Add some devices for exporting upstream
+ machine types
+
+RH-Author: Jon Maloy <jmaloy@redhat.com>
+RH-MergeRequest: 156: Revert redhat: Add some devices for exporting upstream machine types
+RH-Commit: [1/1] f25d0da3a181136917ead82f5a5c59efe3fa445a (jmaloy/qemu-kvm)
+RH-Bugzilla: 2065043
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Thomas Huth <thuth@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2065043
+Upstream: no
+
+Manual revert of commit 70d3924521c9bfd912bcf1a1fc76f49eb377de46, since
+the directory structure looks different from rhel-av-8.4.0.z where
+this commit is taken from. Besides, x86_64-softmmu.mak looks totally
+different and should not be affected by this reversal.
+
+Signed-off-by: Jon Maloy <jmaloy@redhat.com>
+---
+ configs/devices/x86_64-softmmu/x86_64-rh-devices.mak     | 1 -
+ .../devices/x86_64-softmmu/x86_64-upstream-devices.mak   | 4 ----
+ hw/char/parallel.c                                       | 9 ---------
+ hw/i386/pc_piix.c                                        | 2 +-
+ hw/i386/pc_q35.c                                         | 2 +-
+ hw/timer/hpet.c                                          | 8 --------
+ 6 files changed, 2 insertions(+), 24 deletions(-)
+ delete mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak
+
+diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+index fdbbdf9742..31ce08edab 100644
+--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak
+@@ -1,5 +1,4 @@
+ include ../rh-virtio.mak
+-include x86_64-upstream-devices.mak
+ 
+ CONFIG_AC97=y
+ CONFIG_ACPI=y
+diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak
+deleted file mode 100644
+index 2cd20f54d2..0000000000
+--- a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak
+++ /dev/null
+@@ -1,4 +0,0 @@
+-# We need "isa-parallel"
+-CONFIG_PARALLEL=y
+-# We need "hpet"
+-CONFIG_HPET=y
+diff --git a/hw/char/parallel.c b/hw/char/parallel.c
+index e5f108211b..b45e67bfbb 100644
+--- a/hw/char/parallel.c
+++ b/hw/char/parallel.c
+@@ -29,7 +29,6 @@
+ #include "chardev/char-parallel.h"
+ #include "chardev/char-fe.h"
+ #include "hw/acpi/aml-build.h"
+-#include "hw/boards.h"
+ #include "hw/irq.h"
+ #include "hw/isa/isa.h"
+ #include "hw/qdev-properties.h"
+@@ -535,14 +534,6 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp)
+     int base;
+     uint8_t dummy;
+ 
+-    /* Restricted for Red Hat Enterprise Linux */
+-    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+-    if (strstr(mc->name, "rhel")) {
+-        error_setg(errp, "Device %s is not supported with machine type %s",
+-                   object_get_typename(OBJECT(dev)), mc->name);
+-        return;
+-    }
+-
+     if (!qemu_chr_fe_backend_connected(&s->chr)) {
+         error_setg(errp, "Can't create parallel device, empty char device");
+         return;
+diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
+index ab6d03e07a..5f101c8748 100644
+--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
+@@ -966,7 +966,7 @@ static void pc_machine_rhel7_options(MachineClass *m)
+ {
+     PCMachineClass *pcmc = PC_MACHINE_CLASS(m);
+     m->family = "pc_piix_Y";
+-    m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
+    m->default_machine_opts = "firmware=bios-256k.bin";
+     pcmc->default_nic_model = "e1000";
+     pcmc->pci_root_uid = 0;
+     m->default_display = "std";
+diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
+index 882fe7a68d..73b0d0d317 100644
+--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
+@@ -633,7 +633,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m)
+     pcmc->pci_root_uid = 0;
+     m->family = "pc_q35_Z";
+     m->units_per_default_bus = 1;
+-    m->default_machine_opts = "firmware=bios-256k.bin,hpet=off";
+    m->default_machine_opts = "firmware=bios-256k.bin";
+     m->default_display = "std";
+     m->no_floppy = 1;
+     m->no_parallel = 1;
+diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
+index 202e032524..9520471be2 100644
+--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
+@@ -733,14 +733,6 @@ static void hpet_realize(DeviceState *dev, Error **errp)
+     int i;
+     HPETTimer *timer;
+ 
+-    /* Restricted for Red Hat Enterprise Linux */
+-    MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
+-    if (strstr(mc->name, "rhel")) {
+-        error_setg(errp, "Device %s is not supported with machine type %s",
+-                   object_get_typename(OBJECT(dev)), mc->name);
+-        return;
+-    }
+-
+     if (!s->intcap) {
+         warn_report("Hpet's intcap not initialized");
+     }
+-- 
+2.35.1
+
--- a/SOURCES/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch
+++ b/SOURCES/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch
@ -0,0 +1,78 @@
+From 6348063b91b2370cc27153fd58fd11a6681631f6 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Wed, 16 Feb 2022 11:53:53 +0100
+Subject: [PATCH 22/24] block: Make bdrv_refresh_limits() non-recursive
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive
+RH-Commit: [1/3] 1a1fe37f8d8f0344dd8639d6cc9d884d1aff9096
+RH-Bugzilla: 2072932
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+
+bdrv_refresh_limits() recurses down to the node's children.  That does
+not seem necessary: When we refresh limits on some node, and then
+recurse down and were to change one of its children's BlockLimits, then
+that would mean we noticed the changed limits by pure chance.  The fact
+that we refresh the parent's limits has nothing to do with it, so the
+reason for the change probably happened before this point in time, and
+we should have refreshed the limits then.
+
+Consequently, we should actually propagate block limits changes upwards,
+not downwards.  That is a separate and pre-existing issue, though, and
+so will not be addressed in this patch.
+
+The problem with recursing is that bdrv_refresh_limits() is not atomic.
+It begins with zeroing BDS.bl, and only then sets proper, valid limits.
+If we do not drain all nodes whose limits are refreshed, then concurrent
+I/O requests can encounter invalid request_alignment values and crash
+qemu.  Therefore, a recursing bdrv_refresh_limits() requires the whole
+subtree to be drained, which is currently not ensured by most callers.
+
+A non-recursive bdrv_refresh_limits() only requires the node in question
+to not receive I/O requests, and this is done by most callers in some
+way or another:
+- bdrv_open_driver() deals with a new node with no parents yet
+- bdrv_set_file_or_backing_noperm() acts on a drained node
+- bdrv_reopen_commit() acts only on drained nodes
+- bdrv_append() should in theory require the node to be drained; in
+  practice most callers just lock the AioContext, which should at least
+  be enough to prevent concurrent I/O requests from accessing invalid
+  limits
+
+So we can resolve the bug by making bdrv_refresh_limits() non-recursive.
+
+Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1879437
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Message-Id: <20220216105355.30729-2-hreitz@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 4d378bbd831bdd2f6e6adcd4ea5b77b6effaa627)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ block/io.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/block/io.c b/block/io.c
+index 4e4cb556c5..c3e7301613 100644
+--- a/block/io.c
+++ b/block/io.c
+@@ -189,10 +189,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
+     QLIST_FOREACH(c, &bs->children, next) {
+         if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
+         {
+-            bdrv_refresh_limits(c->bs, tran, errp);
+-            if (*errp) {
+-                return;
+-            }
+             bdrv_merge_limits(&bs->bl, &c->bs->bl);
+             have_limits = true;
+         }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch
+++ b/SOURCES/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch
@ -0,0 +1,97 @@
+From fe4abbda80eea7f65b6b5cc544a806fb6e064917 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
+Date: Thu, 18 Nov 2021 12:57:32 +0100
+Subject: [PATCH 2/3] hw/block/fdc: Prevent end-of-track overrun
+ (CVE-2021-3507)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jon Maloy <jmaloy@redhat.com>
+RH-MergeRequest: 194: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
+RH-Commit: [1/2] 31fa0351382b4ca5bd989b09e4d811ae73040673 (jmaloy/qemu-kvm)
+RH-Bugzilla: 1951521
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Thomas Huth <thuth@redhat.com>
+RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
+
+Per the 82078 datasheet, if the end-of-track (EOT byte in
+the FIFO) is more than the number of sectors per side, the
+command is terminated unsuccessfully:
+
+* 5.2.5 DATA TRANSFER TERMINATION
+
+  The 82078 supports terminal count explicitly through
+  the TC pin and implicitly through the underrun/over-
+  run and end-of-track (EOT) functions. For full sector
+  transfers, the EOT parameter can define the last
+  sector to be transferred in a single or multisector
+  transfer. If the last sector to be transferred is a par-
+  tial sector, the host can stop transferring the data in
+  mid-sector, and the 82078 will continue to complete
+  the sector as if a hardware TC was received. The
+  only difference between these implicit functions and
+  TC is that they return "abnormal termination" result
+  status. Such status indications can be ignored if they
+  were expected.
+
+* 6.1.3 READ TRACK
+
+  This command terminates when the EOT specified
+  number of sectors have been read. If the 82078
+  does not find an I D Address Mark on the diskette
+  after the second· occurrence of a pulse on the
+  INDX# pin, then it sets the IC code in Status Regis-
+  ter 0 to "01" (Abnormal termination), sets the MA bit
+  in Status Register 1 to "1", and terminates the com-
+  mand.
+
+* 6.1.6 VERIFY
+
+  Refer to Table 6-6 and Table 6-7 for information
+  concerning the values of MT and EC versus SC and
+  EOT value.
+
+* Table 6·6. Result Phase Table
+
+* Table 6-7. Verify Command Result Phase Table
+
+Fix by aborting the transfer when EOT > # Sectors Per Side.
+
+Cc: qemu-stable@nongnu.org
+Cc: Hervé Poussineau <hpoussin@reactos.org>
+Fixes: baca51faff0 ("floppy driver: disk geometry auto detect")
+Reported-by: Alexander Bulekov <alxndr@bu.edu>
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339
+Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Message-Id: <20211118115733.4038610-2-philmd@redhat.com>
+Reviewed-by: Hanna Reitz <hreitz@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit defac5e2fbddf8423a354ff0454283a2115e1367)
+Signed-off-by: Jon Maloy <jmaloy@redhat.com>
+---
+ hw/block/fdc.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/hw/block/fdc.c b/hw/block/fdc.c
+index 97fa6de423..755a26c114 100644
+--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
+@@ -1531,6 +1531,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
+         int tmp;
+         fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]);
+         tmp = (fdctrl->fifo[6] - ks + 1);
+        if (tmp < 0) {
+            FLOPPY_DPRINTF("invalid EOT: %d\n", tmp);
+            fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
+            fdctrl->fifo[3] = kt;
+            fdctrl->fifo[4] = kh;
+            fdctrl->fifo[5] = ks;
+            return;
+        }
+         if (fdctrl->fifo[0] & 0x80)
+             tmp += fdctrl->fifo[6];
+         fdctrl->data_len *= tmp;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch
+++ b/SOURCES/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch
@ -0,0 +1,68 @@
+From 1bd939d374ec2e994ff47c84e16fa3bc1323a0fd Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Thu, 18 Aug 2022 17:01:13 +0200
+Subject: [PATCH 2/2] i386: do kvm_put_msr_feature_control() first thing when
+ vCPU is reset
+
+RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
+RH-MergeRequest: 216: i386: fix 'system_reset' when the VM is in VMX root operation
+RH-Bugzilla: 2116743
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Commit: [2/2] f838a57f74487eb394794de00006d5d2b9e84344
+
+kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when
+it is in VMX root operation. Do kvm_put_msr_feature_control() before
+kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also
+seems logical to do kvm_put_msr_feature_control() before
+kvm_put_nested_state() and not after it, especially when 'real' nested
+state is set.
+
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20220818150113.479917-3-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c)
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+---
+ target/i386/kvm/kvm.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index 81d729dc40..a06221d3e5 100644
+--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
+@@ -4255,6 +4255,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
+ 
+     assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
+ 
+    /*
+     * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX
+     * root operation upon vCPU reset. kvm_put_msr_feature_control() should also
+     * preceed kvm_put_nested_state() when 'real' nested state is set.
+     */
+    if (level >= KVM_PUT_RESET_STATE) {
+        ret = kvm_put_msr_feature_control(x86_cpu);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+     /* must be before kvm_put_nested_state so that EFER.SVME is set */
+     ret = kvm_put_sregs(x86_cpu);
+     if (ret < 0) {
+@@ -4266,11 +4278,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
+         if (ret < 0) {
+             return ret;
+         }
+-
+-        ret = kvm_put_msr_feature_control(x86_cpu);
+-        if (ret < 0) {
+-            return ret;
+-        }
+     }
+ 
+     if (level == KVM_PUT_FULL_STATE) {
+-- 
+2.31.1
+
--- a/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch
+++ b/SOURCES/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch
@ -0,0 +1,95 @@
+From 4ad00e318f8afbee0e455cfbb6bc693c808d87f3 Mon Sep 17 00:00:00 2001
+From: Vitaly Kuznetsov <vkuznets@redhat.com>
+Date: Thu, 18 Aug 2022 17:01:12 +0200
+Subject: [PATCH 1/2] i386: reset KVM nested state upon CPU reset
+
+RH-Author: Vitaly Kuznetsov <vkuznets@redhat.com>
+RH-MergeRequest: 216: i386: fix 'system_reset' when the VM is in VMX root operation
+RH-Bugzilla: 2116743
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Commit: [1/2] 20d2dabeda74b8cd5135228980a2414e66dc64f3
+
+Make sure env->nested_state is cleaned up when a vCPU is reset, it may
+be stale after an incoming migration, kvm_arch_put_registers() may
+end up failing or putting vCPU in a weird state.
+
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Message-Id: <20220818150113.479917-2-vkuznets@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b)
+Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+---
+ target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++----------
+ 1 file changed, 27 insertions(+), 10 deletions(-)
+
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index bd439e56ad..81d729dc40 100644
+--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
+@@ -1615,6 +1615,30 @@ static void kvm_init_xsave(CPUX86State *env)
+            env->xsave_buf_len);
+ }
+ 
+static void kvm_init_nested_state(CPUX86State *env)
+{
+    struct kvm_vmx_nested_state_hdr *vmx_hdr;
+    uint32_t size;
+
+    if (!env->nested_state) {
+        return;
+    }
+
+    size = env->nested_state->size;
+
+    memset(env->nested_state, 0, size);
+    env->nested_state->size = size;
+
+    if (cpu_has_vmx(env)) {
+        env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
+        vmx_hdr = &env->nested_state->hdr.vmx;
+        vmx_hdr->vmxon_pa = -1ull;
+        vmx_hdr->vmcs12_pa = -1ull;
+    } else if (cpu_has_svm(env)) {
+        env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
+    }
+}
+
+ int kvm_arch_init_vcpu(CPUState *cs)
+ {
+     struct {
+@@ -2042,19 +2066,10 @@ int kvm_arch_init_vcpu(CPUState *cs)
+         assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data));
+ 
+         if (cpu_has_vmx(env) || cpu_has_svm(env)) {
+-            struct kvm_vmx_nested_state_hdr *vmx_hdr;
+-
+             env->nested_state = g_malloc0(max_nested_state_len);
+             env->nested_state->size = max_nested_state_len;
+ 
+-            if (cpu_has_vmx(env)) {
+-                env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX;
+-                vmx_hdr = &env->nested_state->hdr.vmx;
+-                vmx_hdr->vmxon_pa = -1ull;
+-                vmx_hdr->vmcs12_pa = -1ull;
+-            } else {
+-                env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM;
+-            }
+            kvm_init_nested_state(env);
+         }
+     }
+ 
+@@ -2117,6 +2132,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
+     /* enabled by default */
+     env->poll_control_msr = 1;
+ 
+    kvm_init_nested_state(env);
+
+     sev_es_set_reset_vector(CPU(cpu));
+ }
+ 
+-- 
+2.31.1
+
--- a/SOURCES/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch
+++ b/SOURCES/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch
@ -0,0 +1,92 @@
+From eaade87072e903cf550dfdb8ed1480dddc6bb0e3 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Thu, 20 Jan 2022 15:22:59 +0100
+Subject: [PATCH 21/24] ide: Increment BB in-flight counter for TRIM BH
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 188: ide: Increment BB in-flight counter for TRIM BH
+RH-Commit: [1/1] 1e702e735ff63f2b8b69c20cac1b309dd085cd62
+RH-Bugzilla: 2029980
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+When we still have an AIOCB registered for DMA operations, we try to
+settle the respective operation by draining the BlockBackend associated
+with the IDE device.
+
+However, this assumes that every DMA operation is associated with an
+increment of the BlockBackend’s in-flight counter (e.g. through some
+ongoing I/O operation), so that draining the BB until its in-flight
+counter reaches 0 will settle all DMA operations.  That is not the case:
+For TRIM, the guest can issue a zero-length operation that will not
+result in any I/O operation forwarded to the BlockBackend, and also not
+increment the in-flight counter in any other way.  In such a case,
+blk_drain() will be a no-op if no other operations are in flight.
+
+It is clear that if blk_drain() is a no-op, the value of
+s->bus->dma->aiocb will not change between checking it in the `if`
+condition and asserting that it is NULL after blk_drain().
+
+The particular problem is that ide_issue_trim() creates a BH
+(ide_trim_bh_cb()) to settle the TRIM request: iocb->common.cb() is
+ide_dma_cb(), which will either create a new request, or find the
+transfer to be done and call ide_set_inactive(), which clears
+s->bus->dma->aiocb.  Therefore, the blk_drain() must wait for
+ide_trim_bh_cb() to run, which currently it will not always do.
+
+To fix this issue, we increment the BlockBackend's in-flight counter
+when the TRIM operation begins (in ide_issue_trim(), when the
+ide_trim_bh_cb() BH is created) and decrement it when ide_trim_bh_cb()
+is done.
+
+Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2029980
+Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220120142259.120189-1-hreitz@redhat.com>
+Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
+Reviewed-by: John Snow <jsnow@redhat.com>
+Tested-by: John Snow <jsnow@redhat.com>
+(cherry picked from commit 7e5cdb345f77d76cb4877fe6230c4e17a7d0d0ca)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ hw/ide/core.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/hw/ide/core.c b/hw/ide/core.c
+index e28f8aad61..15138225be 100644
+--- a/hw/ide/core.c
+++ b/hw/ide/core.c
+@@ -433,12 +433,16 @@ static const AIOCBInfo trim_aiocb_info = {
+ static void ide_trim_bh_cb(void *opaque)
+ {
+     TrimAIOCB *iocb = opaque;
+    BlockBackend *blk = iocb->s->blk;
+ 
+     iocb->common.cb(iocb->common.opaque, iocb->ret);
+ 
+     qemu_bh_delete(iocb->bh);
+     iocb->bh = NULL;
+     qemu_aio_unref(iocb);
+
+    /* Paired with an increment in ide_issue_trim() */
+    blk_dec_in_flight(blk);
+ }
+ 
+ static void ide_issue_trim_cb(void *opaque, int ret)
+@@ -508,6 +512,9 @@ BlockAIOCB *ide_issue_trim(
+     IDEState *s = opaque;
+     TrimAIOCB *iocb;
+ 
+    /* Paired with a decrement in ide_trim_bh_cb() */
+    blk_inc_in_flight(s->blk);
+
+     iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque);
+     iocb->s = s;
+     iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch
+++ b/SOURCES/kvm-iotests-108-Fix-when-missing-user_allow_other.patch
@ -0,0 +1,52 @@
+From 676e19198916d7631ba1367646dd08dc72079f88 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Thu, 21 Apr 2022 16:24:35 +0200
+Subject: [PATCH 6/6] iotests/108: Fix when missing user_allow_other
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding
+RH-Commit: [4/4] 36b70b5378ae7c8084b9e847706f00003abe9c11
+RH-Bugzilla: 1519071
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+
+FUSE exports' allow-other option defaults to "auto", which means that it
+will try passing allow_other as a mount option, and fall back to not
+using it when an error occurs.  We make no effort to hide fusermount's
+error message (because it would be difficult, and because users might
+want to know about the fallback occurring), and so when allow_other does
+not work (primarily when /etc/fuse.conf does not contain
+user_allow_other), this error message will appear and break the
+reference output.
+
+We do not need allow_other here, though, so we can just pass
+allow-other=off to fix that.
+
+Reported-by: Markus Armbruster <armbru@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220421142435.569600-1-hreitz@redhat.com>
+Tested-by: Markus Armbruster <armbru@redhat.com>
+Tested-by: Eric Blake <eblake@redhat.com>
+(cherry picked from commit 348a0740afc5b313599533eb69bbb2b95d2f1bba)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ tests/qemu-iotests/108 | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
+index 23abbeaff0..775ff08eca 100755
+--- a/tests/qemu-iotests/108
+++ b/tests/qemu-iotests/108
+@@ -326,7 +326,7 @@ else
+ 
+     $QSD \
+         --blockdev file,node-name=export-node,filename="$TEST_IMG" \
+-        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
+        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off,allow-other=off \
+         --pidfile "$TEST_DIR/qsd.pid" \
+         &
+ 
+-- 
+2.27.0
+
--- a/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch
+++ b/SOURCES/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch
@ -0,0 +1,445 @@
+From d638552d76db0db9e2b6ae90a35f0b451b0cbaf8 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Tue, 5 Apr 2022 15:46:51 +0200
+Subject: [PATCH 4/6] iotests/108: Test new refcount rebuild algorithm
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding
+RH-Commit: [2/4] 2aa8c383f0c88c414f10ade8bd2e8af07c35f35b
+RH-Bugzilla: 1519071
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+
+One clear problem with how qcow2's refcount structure rebuild algorithm
+used to be before "qcow2: Improve refcount structure rebuilding" was
+that it is prone to failure for qcow2 images on block devices: There is
+generally unused space after the actual image, and if that exceeds what
+one refblock covers, the old algorithm would invariably write the
+reftable past the block device's end, which cannot work.  The new
+algorithm does not have this problem.
+
+Test it with three tests:
+(1) Create an image with more empty space at the end than what one
+    refblock covers, see whether rebuilding the refcount structures
+    results in a change in the image file length.  (It should not.)
+
+(2) Leave precisely enough space somewhere at the beginning of the image
+    for the new reftable (and the refblock for that place), see whether
+    the new algorithm puts the reftable there.  (It should.)
+
+(3) Test the original problem: Create (something like) a block device
+    with a fixed size, then create a qcow2 image in there, write some
+    data, and then have qemu-img check rebuild the refcount structures.
+    Before HEAD^, the reftable would have been written past the image
+    file end, i.e. outside of what the block device provides, which
+    cannot work.  HEAD^ should have fixed that.
+    ("Something like a block device" means a loop device if we can use
+    one ("sudo -n losetup" works), or a FUSE block export with
+    growable=false otherwise.)
+
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220405134652.19278-3-hreitz@redhat.com>
+(cherry picked from commit 9ffd6d646d1d5ee9087a8cbf0b7d2f96c5656162)
+
+Conflicts:
+- 108: The downstream qemu-storage-daemon does not support --daemonize,
+  so this switch has been replaced by a loop waiting for the PID file to
+  appear
+
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ tests/qemu-iotests/108     | 263 ++++++++++++++++++++++++++++++++++++-
+ tests/qemu-iotests/108.out |  81 ++++++++++++
+ 2 files changed, 343 insertions(+), 1 deletion(-)
+
+diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
+index 8eaef0b8bf..23abbeaff0 100755
+--- a/tests/qemu-iotests/108
+++ b/tests/qemu-iotests/108
+@@ -30,13 +30,20 @@ status=1	# failure is the default!
+ 
+ _cleanup()
+ {
+-	_cleanup_test_img
+    _cleanup_test_img
+    if [ -f "$TEST_DIR/qsd.pid" ]; then
+        qsd_pid=$(cat "$TEST_DIR/qsd.pid")
+        kill -KILL "$qsd_pid"
+        fusermount -u "$TEST_DIR/fuse-export" &>/dev/null
+    fi
+    rm -f "$TEST_DIR/fuse-export"
+ }
+ trap "_cleanup; exit \$status" 0 1 2 3 15
+ 
+ # get standard environment, filters and checks
+ . ./common.rc
+ . ./common.filter
+. ./common.qemu
+ 
+ # This tests qcow2-specific low-level functionality
+ _supported_fmt qcow2
+@@ -47,6 +54,22 @@ _supported_os Linux
+ # files
+ _unsupported_imgopts 'refcount_bits=\([^1]\|.\([^6]\|$\)\)' data_file
+ 
+# This test either needs sudo -n losetup or FUSE exports to work
+if sudo -n losetup &>/dev/null; then
+    loopdev=true
+else
+    loopdev=false
+
+    # QSD --export fuse will either yield "Parameter 'id' is missing"
+    # or "Invalid parameter 'fuse'", depending on whether there is
+    # FUSE support or not.
+    error=$($QSD --export fuse 2>&1)
+    if [[ $error = *"'fuse'"* ]]; then
+        _notrun 'Passwordless sudo for losetup or FUSE support required, but' \
+                'neither is available'
+    fi
+fi
+
+ echo
+ echo '=== Repairing an image without any refcount table ==='
+ echo
+@@ -138,6 +161,244 @@ _make_test_img 64M
+ poke_file "$TEST_IMG" $((0x10008)) "\xff\xff\xff\xff\xff\xff\x00\x00"
+ _check_test_img -r all
+ 
+echo
+echo '=== Check rebuilt reftable location ==='
+
+# In an earlier version of the refcount rebuild algorithm, the
+# reftable was generally placed at the image end (unless something was
+# allocated in the area covered by the refblock right before the image
+# file end, then we would try to place the reftable in that refblock).
+# This was later changed so the reftable would be placed in the
+# earliest possible location.  Test this.
+
+echo
+echo '--- Does the image size increase? ---'
+echo
+
+# First test: Just create some image, write some data to it, and
+# resize it so there is free space at the end of the image (enough
+# that it spans at least one full refblock, which for cluster_size=512
+# images, spans 128k).  With the old algorithm, the reftable would
+# have then been placed at the end of the image file, but with the new
+# one, it will be put in that free space.
+# We want to check whether the size of the image file increases due to
+# rebuilding the refcount structures (it should not).
+
+_make_test_img -o 'cluster_size=512' 1M
+# Write something
+$QEMU_IO -c 'write 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+# Add free space
+file_len=$(stat -c '%s' "$TEST_IMG")
+truncate -s $((file_len + 256 * 1024)) "$TEST_IMG"
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
+
+# Check whether rebuilding the refcount structures increases the image
+# file size
+file_len=$(stat -c '%s' "$TEST_IMG")
+echo
+# The only leaks there can be are the old refcount structures that are
+# leaked during rebuilding, no need to clutter the output with them
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
+echo
+post_repair_file_len=$(stat -c '%s' "$TEST_IMG")
+
+if [[ $file_len -eq $post_repair_file_len ]]; then
+    echo 'OK: Image size did not change'
+else
+    echo 'ERROR: Image size differs' \
+        "($file_len before, $post_repair_file_len after)"
+fi
+
+echo
+echo '--- Will the reftable occupy a hole specifically left for it?  ---'
+echo
+
+# Note: With cluster_size=512, every refblock covers 128k.
+# The reftable covers 8M per reftable cluster.
+
+# Create an image that requires two reftable clusters (just because
+# this is more interesting than a single-clustered reftable).
+_make_test_img -o 'cluster_size=512' 9M
+$QEMU_IO -c 'write 0 8M' "$TEST_IMG" | _filter_qemu_io
+
+# Writing 8M will have resized the reftable.  Unfortunately, doing so
+# will leave holes in the file, so we need to fill them up so we can
+# be sure the whole file is allocated.  Do that by writing
+# consecutively smaller chunks starting from 8 MB, until the file
+# length increases even with a chunk size of 512.  Then we must have
+# filled all holes.
+ofs=$((8 * 1024 * 1024))
+block_len=$((16 * 1024))
+while [[ $block_len -ge 512 ]]; do
+    file_len=$(stat -c '%s' "$TEST_IMG")
+    while [[ $(stat -c '%s' "$TEST_IMG") -eq $file_len ]]; do
+        # Do not include this in the reference output, it does not
+        # really matter which qemu-io calls we do here exactly
+        $QEMU_IO -c "write $ofs $block_len" "$TEST_IMG" >/dev/null
+        ofs=$((ofs + block_len))
+    done
+    block_len=$((block_len / 2))
+done
+
+# Fill up to 9M (do not include this in the reference output either,
+# $ofs is random for all we know)
+$QEMU_IO -c "write $ofs $((9 * 1024 * 1024 - ofs))" "$TEST_IMG" >/dev/null
+
+# Make space as follows:
+# - For the first refblock: Right at the beginning of the image (this
+#   refblock is placed in the first place possible),
+# - For the reftable somewhere soon afterwards, still near the
+#   beginning of the image (i.e. covered by the first refblock); the
+#   reftable too is placed in the first place possible, but only after
+#   all refblocks have been placed)
+# No space is needed for the other refblocks, because no refblock is
+# put before the space it covers.  In this test case, we do not mind
+# if they are placed at the image file's end.
+
+# Before we make that space, we have to find out the host offset of
+# the area that belonged to the two data clusters at guest offset 4k,
+# because we expect the reftable to be placed there, and we will have
+# to verify that it is.
+
+l1_offset=$(peek_file_be "$TEST_IMG" 40 8)
+l2_offset=$(peek_file_be "$TEST_IMG" $l1_offset 8)
+l2_offset=$((l2_offset & 0x00fffffffffffe00))
+data_4k_offset=$(peek_file_be "$TEST_IMG" \
+                 $((l2_offset + 4096 / 512 * 8)) 8)
+data_4k_offset=$((data_4k_offset & 0x00fffffffffffe00))
+
+$QEMU_IO -c "discard 0 512" -c "discard 4k 1k" "$TEST_IMG" | _filter_qemu_io
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+rb_offset=$(peek_file_be "$TEST_IMG" $rt_offset 8)
+poke_file "$TEST_IMG" $rb_offset "\x00\x00"
+
+echo
+# The only leaks there can be are the old refcount structures that are
+# leaked during rebuilding, no need to clutter the output with them
+_check_test_img -r all | grep -v '^Repairing cluster.*refcount=1 reference=0'
+echo
+
+# Check whether the reftable was put where we expected
+rt_offset=$(peek_file_be "$TEST_IMG" 48 8)
+if [[ $rt_offset -eq $data_4k_offset ]]; then
+    echo 'OK: Reftable is where we expect it'
+else
+    echo "ERROR: Reftable is at $rt_offset, but was expected at $data_4k_offset"
+fi
+
+echo
+echo '--- Rebuilding refcount structures on block devices ---'
+echo
+
+# A block device cannot really grow, at least not during qemu-img
+# check.  As mentioned in the above cases, rebuilding the refcount
+# structure may lead to new refcount structures being written after
+# the end of the image, and in the past that happened even if there
+# was more than sufficient space in the image.  Such post-EOF writes
+# will not work on block devices, so test that the new algorithm
+# avoids it.
+
+# If we have passwordless sudo and losetup, we can use those to create
+# a block device.  Otherwise, we can resort to qemu's FUSE export to
+# create a file that isn't growable, which effectively tests the same
+# thing.
+
+_cleanup_test_img
+truncate -s $((64 * 1024 * 1024)) "$TEST_IMG"
+
+if $loopdev; then
+    export_mp=$(sudo -n losetup --show -f "$TEST_IMG")
+    export_mp_driver=host_device
+    sudo -n chmod go+rw "$export_mp"
+else
+    # Create non-growable FUSE export that is a bit like an empty
+    # block device
+    export_mp="$TEST_DIR/fuse-export"
+    export_mp_driver=file
+    touch "$export_mp"
+
+    $QSD \
+        --blockdev file,node-name=export-node,filename="$TEST_IMG" \
+        --export fuse,id=fuse-export,node-name=export-node,mountpoint="$export_mp",writable=on,growable=off \
+        --pidfile "$TEST_DIR/qsd.pid" \
+        &
+
+    while [ ! -f "$TEST_DIR/qsd.pid" ]; do
+        sleep 0.1
+    done
+fi
+
+# Now create a qcow2 image on the device -- unfortunately, qemu-img
+# create force-creates the file, so we have to resort to the
+# blockdev-create job.
+_launch_qemu \
+    --blockdev $export_mp_driver,node-name=file,filename="$export_mp"
+
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "qmp_capabilities" }' \
+    'return'
+
+# Small cluster size again, so the image needs multiple refblocks
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "blockdev-create",
+       "arguments": {
+           "job-id": "create",
+           "options": {
+               "driver": "qcow2",
+               "file": "file",
+               "size": '$((64 * 1024 * 1024))',
+               "cluster-size": 512
+           } } }' \
+    '"concluded"'
+
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "job-dismiss", "arguments": { "id": "create" } }' \
+    'return'
+
+_send_qemu_cmd \
+    $QEMU_HANDLE \
+    '{ "execute": "quit" }' \
+    'return'
+
+wait=y _cleanup_qemu
+echo
+
+# Write some data
+$QEMU_IO -c 'write 0 64k' "$export_mp" | _filter_qemu_io
+
+# Corrupt the image by saying the image header was not allocated
+rt_offset=$(peek_file_be "$export_mp" 48 8)
+rb_offset=$(peek_file_be "$export_mp" $rt_offset 8)
+poke_file "$export_mp" $rb_offset "\x00\x00"
+
+# Repairing such a simple case should just work
+# (We used to put the reftable at the end of the image file, which can
+# never work for non-growable devices.)
+echo
+TEST_IMG="$export_mp" _check_test_img -r all \
+    | grep -v '^Repairing cluster.*refcount=1 reference=0'
+
+if $loopdev; then
+    sudo -n losetup -d "$export_mp"
+else
+    qsd_pid=$(cat "$TEST_DIR/qsd.pid")
+    kill -TERM "$qsd_pid"
+    # Wait for process to exit (cannot `wait` because the QSD is daemonized)
+    while [ -f "$TEST_DIR/qsd.pid" ]; do
+        true
+    done
+fi
+
+ # success, all done
+ echo '*** done'
+ rm -f $seq.full
+diff --git a/tests/qemu-iotests/108.out b/tests/qemu-iotests/108.out
+index 75bab8dc84..b5401d788d 100644
+--- a/tests/qemu-iotests/108.out
+++ b/tests/qemu-iotests/108.out
+@@ -105,6 +105,87 @@ The following inconsistencies were found and repaired:
+     0 leaked clusters
+     1 corruptions
+ 
+Double checking the fixed image now...
+No errors were found on the image.
+
+=== Check rebuilt reftable location ===
+
+--- Does the image size increase? ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+
+OK: Image size did not change
+
+--- Will the reftable occupy a hole specifically left for it?  ---
+
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=9437184
+wrote 8388608/8388608 bytes at offset 0
+8 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 512/512 bytes at offset 0
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+discard 1024/1024 bytes at offset 4096
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+Double checking the fixed image now...
+No errors were found on the image.
+
+OK: Reftable is where we expect it
+
+--- Rebuilding refcount structures on block devices ---
+
+{ "execute": "qmp_capabilities" }
+{"return": {}}
+{ "execute": "blockdev-create",
+       "arguments": {
+           "job-id": "create",
+           "options": {
+               "driver": "IMGFMT",
+               "file": "file",
+               "size": 67108864,
+               "cluster-size": 512
+           } } }
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "created", "id": "create"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "running", "id": "create"}}
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "waiting", "id": "create"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "pending", "id": "create"}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "create"}}
+{ "execute": "job-dismiss", "arguments": { "id": "create" } }
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "create"}}
+{"return": {}}
+{ "execute": "quit" }
+{"return": {}}
+{"timestamp": {"seconds":  TIMESTAMP, "microseconds":  TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
+
+wrote 65536/65536 bytes at offset 0
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+
+ERROR cluster 0 refcount=0 reference=1
+Rebuilding refcount structure
+The following inconsistencies were found and repaired:
+
+    0 leaked clusters
+    1 corruptions
+
+ Double checking the fixed image now...
+ No errors were found on the image.
+ *** done
+-- 
+2.27.0
+
--- a/SOURCES/kvm-iotests-Allow-using-QMP-with-the-QSD.patch
+++ b/SOURCES/kvm-iotests-Allow-using-QMP-with-the-QSD.patch
@ -0,0 +1,99 @@
+From 12f596b66d577eb92f154fadf734d058dd0756d6 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Wed, 16 Feb 2022 11:53:54 +0100
+Subject: [PATCH 23/24] iotests: Allow using QMP with the QSD
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive
+RH-Commit: [2/3] 55bee4690a2e02d3be9f2bd68f2d244d0a36743b
+RH-Bugzilla: 2072932
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+
+Add a parameter to optionally open a QMP connection when creating a
+QemuStorageDaemon instance.
+
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220216105355.30729-3-hreitz@redhat.com>
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit ec88eed8d14088b36a3495710368b8d1a3c33420)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ tests/qemu-iotests/iotests.py | 32 +++++++++++++++++++++++++++++++-
+ 1 file changed, 31 insertions(+), 1 deletion(-)
+
+diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
+index a51b5ce8cd..2ef493755c 100644
+--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
+@@ -38,6 +38,7 @@
+ 
+ from qemu.machine import qtest
+ from qemu.qmp import QMPMessage
+from qemu.aqmp.legacy import QEMUMonitorProtocol
+ 
+ # Use this logger for logging messages directly from the iotests module
+ logger = logging.getLogger('qemu.iotests')
+@@ -315,14 +316,30 @@ def cmd(self, cmd):
+ 
+ 
+ class QemuStorageDaemon:
+-    def __init__(self, *args: str, instance_id: str = 'a'):
+    _qmp: Optional[QEMUMonitorProtocol] = None
+    _qmpsock: Optional[str] = None
+    # Python < 3.8 would complain if this type were not a string literal
+    # (importing `annotations` from `__future__` would work; but not on <= 3.6)
+    _p: 'Optional[subprocess.Popen[bytes]]' = None
+
+    def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False):
+         assert '--pidfile' not in args
+         self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid')
+         all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile]
+ 
+        if qmp:
+            self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock')
+            all_args += ['--chardev',
+                         f'socket,id=qmp-sock,path={self._qmpsock}',
+                         '--monitor', 'qmp-sock']
+
+            self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True)
+
+         # Cannot use with here, we want the subprocess to stay around
+         # pylint: disable=consider-using-with
+         self._p = subprocess.Popen(all_args)
+        if self._qmp is not None:
+            self._qmp.accept()
+         while not os.path.exists(self.pidfile):
+             if self._p.poll() is not None:
+                 cmd = ' '.join(all_args)
+@@ -337,11 +354,24 @@ def __init__(self, *args: str, instance_id: str = 'a'):
+ 
+         assert self._pid == self._p.pid
+ 
+    def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
+            -> QMPMessage:
+        assert self._qmp is not None
+        return self._qmp.cmd(cmd, args)
+
+     def stop(self, kill_signal=15):
+         self._p.send_signal(kill_signal)
+         self._p.wait()
+         self._p = None
+ 
+        if self._qmp:
+            self._qmp.close()
+
+        if self._qmpsock is not None:
+            try:
+                os.remove(self._qmpsock)
+            except OSError:
+                pass
+         try:
+             os.remove(self.pidfile)
+         except OSError:
+-- 
+2.35.3
+
--- a/SOURCES/kvm-iotests-graph-changes-while-io-New-test.patch
+++ b/SOURCES/kvm-iotests-graph-changes-while-io-New-test.patch
@ -0,0 +1,153 @@
+From 27042ff7aca4366c50e8ed66b47487d46774d16a Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Wed, 16 Feb 2022 11:53:55 +0100
+Subject: [PATCH 24/24] iotests/graph-changes-while-io: New test
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive
+RH-Commit: [3/3] b9dffe09bef6cf9b2f0aad69b327ea1df92e847a
+RH-Bugzilla: 2072932
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+
+Test the following scenario:
+1. Some block node (null-co) attached to a user (here: NBD server) that
+   performs I/O and keeps the node in an I/O thread
+2. Repeatedly run blockdev-add/blockdev-del to add/remove an overlay
+   to/from that node
+
+Each blockdev-add triggers bdrv_refresh_limits(), and because
+blockdev-add runs in the main thread, it does not stop the I/O requests.
+I/O can thus happen while the limits are refreshed, and when such a
+request sees a temporarily invalid block limit (e.g. alignment is 0),
+this may easily crash qemu (or the storage daemon in this case).
+
+The block layer needs to ensure that I/O requests to a node are paused
+while that node's BlockLimits are refreshed.
+
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Message-Id: <20220216105355.30729-4-hreitz@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 971bea8089531af56b1bbd9ce62e756bdf006711)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ .../qemu-iotests/tests/graph-changes-while-io | 91 +++++++++++++++++++
+ .../tests/graph-changes-while-io.out          |  5 +
+ 2 files changed, 96 insertions(+)
+ create mode 100755 tests/qemu-iotests/tests/graph-changes-while-io
+ create mode 100644 tests/qemu-iotests/tests/graph-changes-while-io.out
+
+diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io
+new file mode 100755
+index 0000000000..567e8cf21e
+--- /dev/null
+++ b/tests/qemu-iotests/tests/graph-changes-while-io
+@@ -0,0 +1,91 @@
+#!/usr/bin/env python3
+# group: rw
+#
+# Test graph changes while I/O is happening
+#
+# Copyright (C) 2022 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import os
+from threading import Thread
+import iotests
+from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
+        QemuStorageDaemon
+
+
+top = os.path.join(iotests.test_dir, 'top.img')
+nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
+
+
+def do_qemu_img_bench() -> None:
+    """
+    Do some I/O requests on `nbd_sock`.
+    """
+    assert qemu_img('bench', '-f', 'raw', '-c', '2000000',
+                    f'nbd+unix:///node0?socket={nbd_sock}') == 0
+
+
+class TestGraphChangesWhileIO(QMPTestCase):
+    def setUp(self) -> None:
+        # Create an overlay that can be added at runtime on top of the
+        # null-co block node that will receive I/O
+        assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://',
+                               top) == 0
+
+        # QSD instance with a null-co block node in an I/O thread,
+        # exported over NBD (on `nbd_sock`, export name "node0")
+        self.qsd = QemuStorageDaemon(
+            '--object', 'iothread,id=iothread0',
+            '--blockdev', 'null-co,node-name=node0,read-zeroes=true',
+            '--nbd-server', f'addr.type=unix,addr.path={nbd_sock}',
+            '--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' +
+                        'fixed-iothread=true,writable=true',
+            qmp=True
+        )
+
+    def tearDown(self) -> None:
+        self.qsd.stop()
+
+    def test_blockdev_add_while_io(self) -> None:
+        # Run qemu-img bench in the background
+        bench_thr = Thread(target=do_qemu_img_bench)
+        bench_thr.start()
+
+        # While qemu-img bench is running, repeatedly add and remove an
+        # overlay to/from node0
+        while bench_thr.is_alive():
+            result = self.qsd.qmp('blockdev-add', {
+                'driver': imgfmt,
+                'node-name': 'overlay',
+                'backing': 'node0',
+                'file': {
+                    'driver': 'file',
+                    'filename': top
+                }
+            })
+            self.assert_qmp(result, 'return', {})
+
+            result = self.qsd.qmp('blockdev-del', {
+                'node-name': 'overlay'
+            })
+            self.assert_qmp(result, 'return', {})
+
+        bench_thr.join()
+
+if __name__ == '__main__':
+    # Format must support raw backing files
+    iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
+                 supported_protocols=['file'])
+diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out
+new file mode 100644
+index 0000000000..ae1213e6f8
+--- /dev/null
+++ b/tests/qemu-iotests/tests/graph-changes-while-io.out
+@@ -0,0 +1,5 @@
+.
+----------------------------------------------------------------------
+Ran 1 tests
+
+OK
+-- 
+2.35.3
+
--- a/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch
+++ b/SOURCES/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch
@ -0,0 +1,49 @@
+From 99d33621440fd30e0da2974dafb0cd372334305a Mon Sep 17 00:00:00 2001
+From: Stefan Hajnoczi <stefanha@redhat.com>
+Date: Thu, 9 Jun 2022 17:47:12 +0100
+Subject: [PATCH 2/2] linux-aio: explain why max batch is checked in
+ laio_io_unplug()
+
+RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
+RH-MergeRequest: 199: linux-aio: fix unbalanced plugged counter in laio_io_unplug()
+RH-Commit: [2/2] 8617870ed70e3a57269f06eeb242d0fab79a66fb
+RH-Bugzilla: 2105410
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
+RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
+
+It may not be obvious why laio_io_unplug() checks max batch. I discussed
+this with Stefano and have added a comment summarizing the reason.
+
+Cc: Stefano Garzarella <sgarzare@redhat.com>
+Cc: Kevin Wolf <kwolf@redhat.com>
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-id: 20220609164712.1539045-3-stefanha@redhat.com
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+(cherry picked from commit 99b969fbe105117f5af6060d3afef40ca39cc9c1)
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+---
+ block/linux-aio.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/block/linux-aio.c b/block/linux-aio.c
+index 77f17ad596..85650c4222 100644
+--- a/block/linux-aio.c
+++ b/block/linux-aio.c
+@@ -362,6 +362,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
+     assert(s->io_q.plugged);
+     s->io_q.plugged--;
+ 
+    /*
+     * Why max batch checking is performed here:
+     * Another BDS may have queued requests with a higher dev_max_batch and
+     * therefore in_queue could now exceed our dev_max_batch. Re-check the max
+     * batch so we can honor our device's dev_max_batch.
+     */
+     if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
+         (!s->io_q.plugged &&
+          !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
+-- 
+2.35.3
+
--- a/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch
+++ b/SOURCES/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch
@ -0,0 +1,56 @@
+From 0fbb0c87628bef2cb4d1b7748d67020dde50cdef Mon Sep 17 00:00:00 2001
+From: Stefan Hajnoczi <stefanha@redhat.com>
+Date: Thu, 9 Jun 2022 17:47:11 +0100
+Subject: [PATCH 1/2] linux-aio: fix unbalanced plugged counter in
+ laio_io_unplug()
+
+RH-Author: Stefan Hajnoczi <stefanha@redhat.com>
+RH-MergeRequest: 199: linux-aio: fix unbalanced plugged counter in laio_io_unplug()
+RH-Commit: [1/2] f518df755090289905898a36922992288688e338
+RH-Bugzilla: 2105410
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
+RH-Acked-by: Stefano Garzarella <sgarzare@redhat.com>
+
+Every laio_io_plug() call has a matching laio_io_unplug() call. There is
+a plugged counter that tracks the number of levels of plugging and
+allows for nesting.
+
+The plugged counter must reflect the balance between laio_io_plug() and
+laio_io_unplug() calls accurately. Otherwise I/O stalls occur since
+io_submit(2) calls are skipped while plugged.
+
+Reported-by: Nikolay Tenev <nt@storpool.com>
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-id: 20220609164712.1539045-2-stefanha@redhat.com
+Cc: Stefano Garzarella <sgarzare@redhat.com>
+Fixes: 68d7946648 ("linux-aio: add `dev_max_batch` parameter to laio_io_unplug()")
+[Stefano Garzarella suggested adding a Fixes tag.
+--Stefan]
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+(cherry picked from commit f387cac5af030a58ac5a0dacf64cab5e5a4fe5c7)
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+---
+ block/linux-aio.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/block/linux-aio.c b/block/linux-aio.c
+index f53ae72e21..77f17ad596 100644
+--- a/block/linux-aio.c
+++ b/block/linux-aio.c
+@@ -360,8 +360,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s,
+                     uint64_t dev_max_batch)
+ {
+     assert(s->io_q.plugged);
+    s->io_q.plugged--;
+
+     if (s->io_q.in_queue >= laio_max_batch(s, dev_max_batch) ||
+-        (--s->io_q.plugged == 0 &&
+        (!s->io_q.plugged &&
+          !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) {
+         ioq_submit(s);
+     }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch
+++ b/SOURCES/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch
--- a/SOURCES/kvm-linux-headers-include-missing-changes-from-5.17.patch
+++ b/SOURCES/kvm-linux-headers-include-missing-changes-from-5.17.patch
@ -0,0 +1,58 @@
+From aa6181d87e2b4ef1a70be002881908d2df5548a9 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Tue, 22 Feb 2022 17:58:11 +0100
+Subject: [PATCH 04/24] linux-headers: include missing changes from 5.17
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [4/13] 2ed7cbc07e63d85cda916ef44d1e82b1fba7fdf4
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 1ea5208febcc068449b63282d72bb719ab67a466)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ linux-headers/asm-x86/kvm.h | 3 +++
+ linux-headers/linux/kvm.h   | 4 ++++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
+index 2da3316bb5..bf6e96011d 100644
+--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
+@@ -452,6 +452,9 @@ struct kvm_sync_regs {
+ 
+ #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE	0x00000001
+ 
+/* attributes for system fd (group 0) */
+#define KVM_X86_XCOMP_GUEST_SUPP	0
+
+ struct kvm_vmx_nested_state_data {
+ 	__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ 	__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
+index 00af3bc333..d232feaae9 100644
+--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
+@@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
+ #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
+ #define KVM_CAP_VM_GPA_BITS 207
+ #define KVM_CAP_XSAVE2 208
+#define KVM_CAP_SYS_ATTRIBUTES 209
+ 
+ #ifdef KVM_CAP_IRQ_ROUTING
+ 
+@@ -2047,4 +2048,7 @@ struct kvm_stats_desc {
+ 
+ #define KVM_GET_STATS_FD  _IO(KVMIO,  0xce)
+ 
+/* Available with KVM_CAP_XSAVE2 */
+#define KVM_GET_XSAVE2		  _IOR(KVMIO,  0xcf, struct kvm_xsave)
+
+ #endif /* __LINUX_KVM_H */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-linux-headers-update-to-5.16-rc1.patch
+++ b/SOURCES/kvm-linux-headers-update-to-5.16-rc1.patch
@ -0,0 +1,725 @@
+From 64808db4a14867ad774b5e7535972a886e20a156 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 11 Nov 2021 12:06:01 +0100
+Subject: [PATCH 02/24] linux-headers: update to 5.16-rc1
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [2/13] 4af2f4942db029b81890e3862793fb54b62791cc
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Acked-by: Cornelia Huck <cohuck@redhat.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Message-Id: <20211111110604.207376-3-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 43709a0ca3b09e952bde3f38112f1d7fbf7c65b1)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ include/standard-headers/drm/drm_fourcc.h     | 121 +++++++++++++++++-
+ include/standard-headers/linux/ethtool.h      |  31 +++++
+ include/standard-headers/linux/fuse.h         |  10 +-
+ include/standard-headers/linux/pci_regs.h     |   6 +
+ include/standard-headers/linux/virtio_gpu.h   |  18 ++-
+ include/standard-headers/linux/virtio_ids.h   |  24 ++++
+ include/standard-headers/linux/virtio_vsock.h |   3 +-
+ linux-headers/asm-arm64/unistd.h              |   1 +
+ linux-headers/asm-generic/unistd.h            |  22 +++-
+ linux-headers/asm-mips/unistd_n32.h           |   1 +
+ linux-headers/asm-mips/unistd_n64.h           |   1 +
+ linux-headers/asm-mips/unistd_o32.h           |   1 +
+ linux-headers/asm-powerpc/unistd_32.h         |   1 +
+ linux-headers/asm-powerpc/unistd_64.h         |   1 +
+ linux-headers/asm-s390/unistd_32.h            |   1 +
+ linux-headers/asm-s390/unistd_64.h            |   1 +
+ linux-headers/asm-x86/kvm.h                   |   5 +
+ linux-headers/asm-x86/unistd_32.h             |   3 +
+ linux-headers/asm-x86/unistd_64.h             |   3 +
+ linux-headers/asm-x86/unistd_x32.h            |   3 +
+ linux-headers/linux/kvm.h                     |  40 +++++-
+ 21 files changed, 276 insertions(+), 21 deletions(-)
+
+diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
+index 352b51fd0a..2c025cb4fe 100644
+--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
+@@ -103,6 +103,12 @@ extern "C" {
+ /* 8 bpp Red */
+ #define DRM_FORMAT_R8		fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
+ 
+/* 10 bpp Red */
+#define DRM_FORMAT_R10		fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */
+
+/* 12 bpp Red */
+#define DRM_FORMAT_R12		fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */
+
+ /* 16 bpp Red */
+ #define DRM_FORMAT_R16		fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */
+ 
+@@ -372,6 +378,12 @@ extern "C" {
+ 
+ #define DRM_FORMAT_RESERVED	      ((1ULL << 56) - 1)
+ 
+#define fourcc_mod_get_vendor(modifier) \
+	(((modifier) >> 56) & 0xff)
+
+#define fourcc_mod_is_vendor(modifier, vendor) \
+	(fourcc_mod_get_vendor(modifier) == DRM_FORMAT_MOD_VENDOR_## vendor)
+
+ #define fourcc_mod_code(vendor, val) \
+ 	((((uint64_t)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL))
+ 
+@@ -899,9 +911,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
+ 
+ /*
+  * The top 4 bits (out of the 56 bits alloted for specifying vendor specific
+- * modifiers) denote the category for modifiers. Currently we have only two
+- * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen
+- * different categories.
+ * modifiers) denote the category for modifiers. Currently we have three
+ * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of
+ * sixteen different categories.
+  */
+ #define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \
+ 	fourcc_mod_code(ARM, ((uint64_t)(__type) << 52) | ((__val) & 0x000fffffffffffffULL))
+@@ -1016,6 +1028,109 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier)
+  */
+ #define AFBC_FORMAT_MOD_USM	(1ULL << 12)
+ 
+/*
+ * Arm Fixed-Rate Compression (AFRC) modifiers
+ *
+ * AFRC is a proprietary fixed rate image compression protocol and format,
+ * designed to provide guaranteed bandwidth and memory footprint
+ * reductions in graphics and media use-cases.
+ *
+ * AFRC buffers consist of one or more planes, with the same components
+ * and meaning as an uncompressed buffer using the same pixel format.
+ *
+ * Within each plane, the pixel/luma/chroma values are grouped into
+ * "coding unit" blocks which are individually compressed to a
+ * fixed size (in bytes). All coding units within a given plane of a buffer
+ * store the same number of values, and have the same compressed size.
+ *
+ * The coding unit size is configurable, allowing different rates of compression.
+ *
+ * The start of each AFRC buffer plane must be aligned to an alignment granule which
+ * depends on the coding unit size.
+ *
+ * Coding Unit Size   Plane Alignment
+ * ----------------   ---------------
+ * 16 bytes           1024 bytes
+ * 24 bytes           512  bytes
+ * 32 bytes           2048 bytes
+ *
+ * Coding units are grouped into paging tiles. AFRC buffer dimensions must be aligned
+ * to a multiple of the paging tile dimensions.
+ * The dimensions of each paging tile depend on whether the buffer is optimised for
+ * scanline (SCAN layout) or rotated (ROT layout) access.
+ *
+ * Layout   Paging Tile Width   Paging Tile Height
+ * ------   -----------------   ------------------
+ * SCAN     16 coding units     4 coding units
+ * ROT      8  coding units     8 coding units
+ *
+ * The dimensions of each coding unit depend on the number of components
+ * in the compressed plane and whether the buffer is optimised for
+ * scanline (SCAN layout) or rotated (ROT layout) access.
+ *
+ * Number of Components in Plane   Layout      Coding Unit Width   Coding Unit Height
+ * -----------------------------   ---------   -----------------   ------------------
+ * 1                               SCAN        16 samples          4 samples
+ * Example: 16x4 luma samples in a 'Y' plane
+ *          16x4 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer
+ * -----------------------------   ---------   -----------------   ------------------
+ * 1                               ROT         8 samples           8 samples
+ * Example: 8x8 luma samples in a 'Y' plane
+ *          8x8 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer
+ * -----------------------------   ---------   -----------------   ------------------
+ * 2                               DONT CARE   8 samples           4 samples
+ * Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV buffer
+ * -----------------------------   ---------   -----------------   ------------------
+ * 3                               DONT CARE   4 samples           4 samples
+ * Example: 4x4 pixels in an RGB buffer without alpha
+ * -----------------------------   ---------   -----------------   ------------------
+ * 4                               DONT CARE   4 samples           4 samples
+ * Example: 4x4 pixels in an RGB buffer with alpha
+ */
+
+#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02
+
+#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \
+	DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode)
+
+/*
+ * AFRC coding unit size modifier.
+ *
+ * Indicates the number of bytes used to store each compressed coding unit for
+ * one or more planes in an AFRC encoded buffer. The coding unit size for chrominance
+ * is the same for both Cb and Cr, which may be stored in separate planes.
+ *
+ * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store
+ * each compressed coding unit in the first plane of the buffer. For RGBA buffers
+ * this is the only plane, while for semi-planar and fully-planar YUV buffers,
+ * this corresponds to the luma plane.
+ *
+ * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store
+ * each compressed coding unit in the second and third planes in the buffer.
+ * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma plane(s).
+ *
+ * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified
+ * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero.
+ * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and
+ * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified.
+ */
+#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf
+#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL)
+#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL)
+#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL)
+
+#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size)
+#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4)
+
+/*
+ * AFRC scanline memory layout.
+ *
+ * Indicates if the buffer uses the scanline-optimised layout
+ * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout.
+ * The memory layout is the same for all planes.
+ */
+#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8)
+
+ /*
+  * Arm 16x16 Block U-Interleaved modifier
+  *
+diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
+index 053d3fafdf..688eb8dc39 100644
+--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
+@@ -603,6 +603,7 @@ enum ethtool_link_ext_state {
+ 	ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE,
+ 	ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED,
+ 	ETHTOOL_LINK_EXT_STATE_OVERHEAT,
+	ETHTOOL_LINK_EXT_STATE_MODULE,
+ };
+ 
+ /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */
+@@ -639,6 +640,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch {
+ enum ethtool_link_ext_substate_bad_signal_integrity {
+ 	ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
+ 	ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
+	ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST,
+	ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS,
+ };
+ 
+ /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */
+@@ -647,6 +650,11 @@ enum ethtool_link_ext_substate_cable_issue {
+ 	ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
+ };
+ 
+/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */
+enum ethtool_link_ext_substate_module {
+	ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1,
+};
+
+ #define ETH_GSTRING_LEN		32
+ 
+ /**
+@@ -704,6 +712,29 @@ enum ethtool_stringset {
+ 	ETH_SS_COUNT
+ };
+ 
+/**
+ * enum ethtool_module_power_mode_policy - plug-in module power mode policy
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host
+ *	to high power mode when the first port using it is put administratively
+ *	up and to low power mode when the last port using it is put
+ *	administratively down.
+ */
+enum ethtool_module_power_mode_policy {
+	ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1,
+	ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO,
+};
+
+/**
+ * enum ethtool_module_power_mode - plug-in module power mode
+ * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode.
+ * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode.
+ */
+enum ethtool_module_power_mode {
+	ETHTOOL_MODULE_POWER_MODE_LOW = 1,
+	ETHTOOL_MODULE_POWER_MODE_HIGH,
+};
+
+ /**
+  * struct ethtool_gstrings - string set for data tagging
+  * @cmd: Command number = %ETHTOOL_GSTRINGS
+diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
+index cce105bfba..23ea31708b 100644
+--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
+@@ -181,6 +181,9 @@
+  *  - add FUSE_OPEN_KILL_SUIDGID
+  *  - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT
+  *  - add FUSE_SETXATTR_ACL_KILL_SGID
+ *
+ *  7.34
+ *  - add FUSE_SYNCFS
+  */
+ 
+ #ifndef _LINUX_FUSE_H
+@@ -212,7 +215,7 @@
+ #define FUSE_KERNEL_VERSION 7
+ 
+ /** Minor version number of this interface */
+-#define FUSE_KERNEL_MINOR_VERSION 33
+#define FUSE_KERNEL_MINOR_VERSION 34
+ 
+ /** The node ID of the root inode */
+ #define FUSE_ROOT_ID 1
+@@ -505,6 +508,7 @@ enum fuse_opcode {
+ 	FUSE_COPY_FILE_RANGE	= 47,
+ 	FUSE_SETUPMAPPING	= 48,
+ 	FUSE_REMOVEMAPPING	= 49,
+	FUSE_SYNCFS		= 50,
+ 
+ 	/* CUSE specific operations */
+ 	CUSE_INIT		= 4096,
+@@ -967,4 +971,8 @@ struct fuse_removemapping_one {
+ #define FUSE_REMOVEMAPPING_MAX_ENTRY   \
+ 		(PAGE_SIZE / sizeof(struct fuse_removemapping_one))
+ 
+struct fuse_syncfs_in {
+	uint64_t	padding;
+};
+
+ #endif /* _LINUX_FUSE_H */
+diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h
+index e709ae8235..ff6ccbc6ef 100644
+--- a/include/standard-headers/linux/pci_regs.h
+++ b/include/standard-headers/linux/pci_regs.h
+@@ -504,6 +504,12 @@
+ #define  PCI_EXP_DEVCTL_URRE	0x0008	/* Unsupported Request Reporting En. */
+ #define  PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */
+ #define  PCI_EXP_DEVCTL_PAYLOAD	0x00e0	/* Max_Payload_Size */
+#define  PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */
+#define  PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */
+ #define  PCI_EXP_DEVCTL_EXT_TAG	0x0100	/* Extended Tag Field Enable */
+ #define  PCI_EXP_DEVCTL_PHANTOM	0x0200	/* Phantom Functions Enable */
+ #define  PCI_EXP_DEVCTL_AUX_PME	0x0400	/* Auxiliary Power PM Enable */
+diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h
+index 1357e4774e..2da48d3d4c 100644
+--- a/include/standard-headers/linux/virtio_gpu.h
+++ b/include/standard-headers/linux/virtio_gpu.h
+@@ -59,6 +59,11 @@
+  * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB
+  */
+ #define VIRTIO_GPU_F_RESOURCE_BLOB       3
+/*
+ * VIRTIO_GPU_CMD_CREATE_CONTEXT with
+ * context_init and multiple timelines
+ */
+#define VIRTIO_GPU_F_CONTEXT_INIT        4
+ 
+ enum virtio_gpu_ctrl_type {
+ 	VIRTIO_GPU_UNDEFINED = 0,
+@@ -122,14 +127,20 @@ enum virtio_gpu_shm_id {
+ 	VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1
+ };
+ 
+-#define VIRTIO_GPU_FLAG_FENCE (1 << 0)
+#define VIRTIO_GPU_FLAG_FENCE         (1 << 0)
+/*
+ * If the following flag is set, then ring_idx contains the index
+ * of the command ring that needs to used when creating the fence
+ */
+#define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1)
+ 
+ struct virtio_gpu_ctrl_hdr {
+ 	uint32_t type;
+ 	uint32_t flags;
+ 	uint64_t fence_id;
+ 	uint32_t ctx_id;
+-	uint32_t padding;
+	uint8_t ring_idx;
+	uint8_t padding[3];
+ };
+ 
+ /* data passed in the cursor vq */
+@@ -269,10 +280,11 @@ struct virtio_gpu_resource_create_3d {
+ };
+ 
+ /* VIRTIO_GPU_CMD_CTX_CREATE */
+#define VIRTIO_GPU_CONTEXT_INIT_CAPSET_ID_MASK 0x000000ff
+ struct virtio_gpu_ctx_create {
+ 	struct virtio_gpu_ctrl_hdr hdr;
+ 	uint32_t nlen;
+-	uint32_t padding;
+	uint32_t context_init;
+ 	char debug_name[64];
+ };
+ 
+diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h
+index 4fe842c3a3..80d76b75bc 100644
+--- a/include/standard-headers/linux/virtio_ids.h
+++ b/include/standard-headers/linux/virtio_ids.h
+@@ -54,7 +54,31 @@
+ #define VIRTIO_ID_SOUND			25 /* virtio sound */
+ #define VIRTIO_ID_FS			26 /* virtio filesystem */
+ #define VIRTIO_ID_PMEM			27 /* virtio pmem */
+#define VIRTIO_ID_RPMB			28 /* virtio rpmb */
+ #define VIRTIO_ID_MAC80211_HWSIM	29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_VIDEO_ENCODER		30 /* virtio video encoder */
+#define VIRTIO_ID_VIDEO_DECODER		31 /* virtio video decoder */
+#define VIRTIO_ID_SCMI			32 /* virtio SCMI */
+#define VIRTIO_ID_NITRO_SEC_MOD		33 /* virtio nitro secure module*/
+#define VIRTIO_ID_I2C_ADAPTER		34 /* virtio i2c adapter */
+#define VIRTIO_ID_WATCHDOG		35 /* virtio watchdog */
+#define VIRTIO_ID_CAN			36 /* virtio can */
+#define VIRTIO_ID_DMABUF		37 /* virtio dmabuf */
+#define VIRTIO_ID_PARAM_SERV		38 /* virtio parameter server */
+#define VIRTIO_ID_AUDIO_POLICY		39 /* virtio audio policy */
+ #define VIRTIO_ID_BT			40 /* virtio bluetooth */
+#define VIRTIO_ID_GPIO			41 /* virtio gpio */
+
+/*
+ * Virtio Transitional IDs
+ */
+
+#define VIRTIO_TRANS_ID_NET		1000 /* transitional virtio net */
+#define VIRTIO_TRANS_ID_BLOCK		1001 /* transitional virtio block */
+#define VIRTIO_TRANS_ID_BALLOON		1002 /* transitional virtio balloon */
+#define VIRTIO_TRANS_ID_CONSOLE		1003 /* transitional virtio console */
+#define VIRTIO_TRANS_ID_SCSI		1004 /* transitional virtio SCSI */
+#define VIRTIO_TRANS_ID_RNG		1005 /* transitional virtio rng */
+#define VIRTIO_TRANS_ID_9P		1009 /* transitional virtio 9p console */
+ 
+ #endif /* _LINUX_VIRTIO_IDS_H */
+diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h
+index 3a23488e42..467e751b17 100644
+--- a/include/standard-headers/linux/virtio_vsock.h
+++ b/include/standard-headers/linux/virtio_vsock.h
+@@ -97,7 +97,8 @@ enum virtio_vsock_shutdown {
+ 
+ /* VIRTIO_VSOCK_OP_RW flags values */
+ enum virtio_vsock_rw {
+-	VIRTIO_VSOCK_SEQ_EOR = 1,
+	VIRTIO_VSOCK_SEQ_EOM = 1,
+	VIRTIO_VSOCK_SEQ_EOR = 2,
+ };
+ 
+ #endif /* _LINUX_VIRTIO_VSOCK_H */
+diff --git a/linux-headers/asm-arm64/unistd.h b/linux-headers/asm-arm64/unistd.h
+index f83a70e07d..ce2ee8f1e3 100644
+--- a/linux-headers/asm-arm64/unistd.h
+++ b/linux-headers/asm-arm64/unistd.h
+@@ -20,5 +20,6 @@
+ #define __ARCH_WANT_SET_GET_RLIMIT
+ #define __ARCH_WANT_TIME32_SYSCALLS
+ #define __ARCH_WANT_SYS_CLONE3
+#define __ARCH_WANT_MEMFD_SECRET
+ 
+ #include <asm-generic/unistd.h>
+diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h
+index f211961ce1..4557a8b608 100644
+--- a/linux-headers/asm-generic/unistd.h
+++ b/linux-headers/asm-generic/unistd.h
+@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise)
+ #define __NR_remap_file_pages 234
+ __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+ #define __NR_mbind 235
+-__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
+__SYSCALL(__NR_mbind, sys_mbind)
+ #define __NR_get_mempolicy 236
+-__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
+__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy)
+ #define __NR_set_mempolicy 237
+-__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
+__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy)
+ #define __NR_migrate_pages 238
+-__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
+ #define __NR_move_pages 239
+-__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
+__SYSCALL(__NR_move_pages, sys_move_pages)
+ #endif
+ 
+ #define __NR_rt_tgsigqueueinfo 240
+@@ -873,8 +873,18 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
+ #define __NR_landlock_restrict_self 446
+ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
+ 
+#ifdef __ARCH_WANT_MEMFD_SECRET
+#define __NR_memfd_secret 447
+__SYSCALL(__NR_memfd_secret, sys_memfd_secret)
+#endif
+#define __NR_process_mrelease 448
+__SYSCALL(__NR_process_mrelease, sys_process_mrelease)
+
+#define __NR_futex_waitv 449
+__SYSCALL(__NR_futex_waitv, sys_futex_waitv)
+
+ #undef __NR_syscalls
+-#define __NR_syscalls 447
+#define __NR_syscalls 450
+ 
+ /*
+  * 32 bit systems traditionally used different
+diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h
+index 09cd297698..4b3e7ad1ec 100644
+--- a/linux-headers/asm-mips/unistd_n32.h
+++ b/linux-headers/asm-mips/unistd_n32.h
+@@ -376,5 +376,6 @@
+ #define __NR_landlock_create_ruleset (__NR_Linux + 444)
+ #define __NR_landlock_add_rule (__NR_Linux + 445)
+ #define __NR_landlock_restrict_self (__NR_Linux + 446)
+#define __NR_process_mrelease (__NR_Linux + 448)
+ 
+ #endif /* _ASM_UNISTD_N32_H */
+diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h
+index 780e0cead6..488d9298d9 100644
+--- a/linux-headers/asm-mips/unistd_n64.h
+++ b/linux-headers/asm-mips/unistd_n64.h
+@@ -352,5 +352,6 @@
+ #define __NR_landlock_create_ruleset (__NR_Linux + 444)
+ #define __NR_landlock_add_rule (__NR_Linux + 445)
+ #define __NR_landlock_restrict_self (__NR_Linux + 446)
+#define __NR_process_mrelease (__NR_Linux + 448)
+ 
+ #endif /* _ASM_UNISTD_N64_H */
+diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h
+index 06a2b3b55e..f47399870a 100644
+--- a/linux-headers/asm-mips/unistd_o32.h
+++ b/linux-headers/asm-mips/unistd_o32.h
+@@ -422,5 +422,6 @@
+ #define __NR_landlock_create_ruleset (__NR_Linux + 444)
+ #define __NR_landlock_add_rule (__NR_Linux + 445)
+ #define __NR_landlock_restrict_self (__NR_Linux + 446)
+#define __NR_process_mrelease (__NR_Linux + 448)
+ 
+ #endif /* _ASM_UNISTD_O32_H */
+diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h
+index cd5a8a41b2..11d54696dc 100644
+--- a/linux-headers/asm-powerpc/unistd_32.h
+++ b/linux-headers/asm-powerpc/unistd_32.h
+@@ -429,6 +429,7 @@
+ #define __NR_landlock_create_ruleset 444
+ #define __NR_landlock_add_rule 445
+ #define __NR_landlock_restrict_self 446
+#define __NR_process_mrelease 448
+ 
+ 
+ #endif /* _ASM_UNISTD_32_H */
+diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h
+index 8458effa8d..cf740bab13 100644
+--- a/linux-headers/asm-powerpc/unistd_64.h
+++ b/linux-headers/asm-powerpc/unistd_64.h
+@@ -401,6 +401,7 @@
+ #define __NR_landlock_create_ruleset 444
+ #define __NR_landlock_add_rule 445
+ #define __NR_landlock_restrict_self 446
+#define __NR_process_mrelease 448
+ 
+ 
+ #endif /* _ASM_UNISTD_64_H */
+diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h
+index 0c3cd299e4..8f97d98128 100644
+--- a/linux-headers/asm-s390/unistd_32.h
+++ b/linux-headers/asm-s390/unistd_32.h
+@@ -419,5 +419,6 @@
+ #define __NR_landlock_create_ruleset 444
+ #define __NR_landlock_add_rule 445
+ #define __NR_landlock_restrict_self 446
+#define __NR_process_mrelease 448
+ 
+ #endif /* _ASM_S390_UNISTD_32_H */
+diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h
+index 8dfc08b5e6..021ffc30e6 100644
+--- a/linux-headers/asm-s390/unistd_64.h
+++ b/linux-headers/asm-s390/unistd_64.h
+@@ -367,5 +367,6 @@
+ #define __NR_landlock_create_ruleset 444
+ #define __NR_landlock_add_rule 445
+ #define __NR_landlock_restrict_self 446
+#define __NR_process_mrelease 448
+ 
+ #endif /* _ASM_S390_UNISTD_64_H */
+diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h
+index a6c327f8ad..5a776a08f7 100644
+--- a/linux-headers/asm-x86/kvm.h
+++ b/linux-headers/asm-x86/kvm.h
+@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch {
+ #define KVM_GUESTDBG_USE_HW_BP		0x00020000
+ #define KVM_GUESTDBG_INJECT_DB		0x00040000
+ #define KVM_GUESTDBG_INJECT_BP		0x00080000
+#define KVM_GUESTDBG_BLOCKIRQ		0x00100000
+ 
+ /* for KVM_SET_GUEST_DEBUG */
+ struct kvm_guest_debug_arch {
+@@ -503,4 +504,8 @@ struct kvm_pmu_event_filter {
+ #define KVM_PMU_EVENT_ALLOW 0
+ #define KVM_PMU_EVENT_DENY 1
+ 
+/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
+#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
+#define   KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
+
+ #endif /* _ASM_X86_KVM_H */
+diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h
+index 66e96c0c68..9c9ffe312b 100644
+--- a/linux-headers/asm-x86/unistd_32.h
+++ b/linux-headers/asm-x86/unistd_32.h
+@@ -437,6 +437,9 @@
+ #define __NR_landlock_create_ruleset 444
+ #define __NR_landlock_add_rule 445
+ #define __NR_landlock_restrict_self 446
+#define __NR_memfd_secret 447
+#define __NR_process_mrelease 448
+#define __NR_futex_waitv 449
+ 
+ 
+ #endif /* _ASM_UNISTD_32_H */
+diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h
+index b8ff6f14ee..084f1eef9c 100644
+--- a/linux-headers/asm-x86/unistd_64.h
+++ b/linux-headers/asm-x86/unistd_64.h
+@@ -359,6 +359,9 @@
+ #define __NR_landlock_create_ruleset 444
+ #define __NR_landlock_add_rule 445
+ #define __NR_landlock_restrict_self 446
+#define __NR_memfd_secret 447
+#define __NR_process_mrelease 448
+#define __NR_futex_waitv 449
+ 
+ 
+ #endif /* _ASM_UNISTD_64_H */
+diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h
+index 06a1097c15..a2441affc2 100644
+--- a/linux-headers/asm-x86/unistd_x32.h
+++ b/linux-headers/asm-x86/unistd_x32.h
+@@ -312,6 +312,9 @@
+ #define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444)
+ #define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445)
+ #define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446)
+#define __NR_memfd_secret (__X32_SYSCALL_BIT + 447)
+#define __NR_process_mrelease (__X32_SYSCALL_BIT + 448)
+#define __NR_futex_waitv (__X32_SYSCALL_BIT + 449)
+ #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512)
+ #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513)
+ #define __NR_ioctl (__X32_SYSCALL_BIT + 514)
+diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
+index bcaf66cc4d..02c5e7b7bb 100644
+--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
+@@ -269,6 +269,7 @@ struct kvm_xen_exit {
+ #define KVM_EXIT_AP_RESET_HOLD    32
+ #define KVM_EXIT_X86_BUS_LOCK     33
+ #define KVM_EXIT_XEN              34
+#define KVM_EXIT_RISCV_SBI        35
+ 
+ /* For KVM_EXIT_INTERNAL_ERROR */
+ /* Emulate instruction failed. */
+@@ -397,13 +398,23 @@ struct kvm_run {
+ 		 * "ndata" is correct, that new fields are enumerated in "flags",
+ 		 * and that each flag enumerates fields that are 64-bit aligned
+ 		 * and sized (so that ndata+internal.data[] is valid/accurate).
+		 *
+		 * Space beyond the defined fields may be used to store arbitrary
+		 * debug information relating to the emulation failure. It is
+		 * accounted for in "ndata" but the format is unspecified and is
+		 * not represented in "flags". Any such information is *not* ABI!
+ 		 */
+ 		struct {
+ 			__u32 suberror;
+ 			__u32 ndata;
+ 			__u64 flags;
+-			__u8  insn_size;
+-			__u8  insn_bytes[15];
+			union {
+				struct {
+					__u8  insn_size;
+					__u8  insn_bytes[15];
+				};
+			};
+			/* Arbitrary debug data may follow. */
+ 		} emulation_failure;
+ 		/* KVM_EXIT_OSI */
+ 		struct {
+@@ -469,6 +480,13 @@ struct kvm_run {
+ 		} msr;
+ 		/* KVM_EXIT_XEN */
+ 		struct kvm_xen_exit xen;
+		/* KVM_EXIT_RISCV_SBI */
+		struct {
+			unsigned long extension_id;
+			unsigned long function_id;
+			unsigned long args[6];
+			unsigned long ret[2];
+		} riscv_sbi;
+ 		/* Fix the size of the union. */
+ 		char padding[256];
+ 	};
+@@ -1223,11 +1241,16 @@ struct kvm_irqfd {
+ 
+ /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags.  */
+ #define KVM_CLOCK_TSC_STABLE		2
+#define KVM_CLOCK_REALTIME		(1 << 2)
+#define KVM_CLOCK_HOST_TSC		(1 << 3)
+ 
+ struct kvm_clock_data {
+ 	__u64 clock;
+ 	__u32 flags;
+-	__u32 pad[9];
+	__u32 pad0;
+	__u64 realtime;
+	__u64 host_tsc;
+	__u32 pad[4];
+ };
+ 
+ /* For KVM_CAP_SW_TLB */
+@@ -1965,7 +1988,9 @@ struct kvm_stats_header {
+ #define KVM_STATS_TYPE_CUMULATIVE	(0x0 << KVM_STATS_TYPE_SHIFT)
+ #define KVM_STATS_TYPE_INSTANT		(0x1 << KVM_STATS_TYPE_SHIFT)
+ #define KVM_STATS_TYPE_PEAK		(0x2 << KVM_STATS_TYPE_SHIFT)
+-#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_PEAK
+#define KVM_STATS_TYPE_LINEAR_HIST	(0x3 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_LOG_HIST		(0x4 << KVM_STATS_TYPE_SHIFT)
+#define KVM_STATS_TYPE_MAX		KVM_STATS_TYPE_LOG_HIST
+ 
+ #define KVM_STATS_UNIT_SHIFT		4
+ #define KVM_STATS_UNIT_MASK		(0xF << KVM_STATS_UNIT_SHIFT)
+@@ -1988,8 +2013,9 @@ struct kvm_stats_header {
+  * @size: The number of data items for this stats.
+  *        Every data item is of type __u64.
+  * @offset: The offset of the stats to the start of stat structure in
+- *          struture kvm or kvm_vcpu.
+- * @unused: Unused field for future usage. Always 0 for now.
+ *          structure kvm or kvm_vcpu.
+ * @bucket_size: A parameter value used for histogram stats. It is only used
+ *		for linear histogram stats, specifying the size of the bucket;
+  * @name: The name string for the stats. Its size is indicated by the
+  *        &kvm_stats_header->name_size.
+  */
+@@ -1998,7 +2024,7 @@ struct kvm_stats_desc {
+ 	__s16 exponent;
+ 	__u16 size;
+ 	__u32 offset;
+-	__u32 unused;
+	__u32 bucket_size;
+ 	char name[];
+ };
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch
+++ b/SOURCES/kvm-migration-Add-migrate_use_tls-helper.patch
@ -0,0 +1,106 @@
+From a7c6bc008fe006f005d5c15d3f883572ad5defc5 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Wed, 18 May 2022 02:52:25 -0300
+Subject: [PATCH 20/37] migration: Add migrate_use_tls() helper
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [20/26] 02afc2e60f1abbf6db45d83e54a18b66dad52426
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+A lot of places check parameters.tls_creds in order to evaluate if TLS is
+in use, and sometimes call migrate_get_current() just for that test.
+
+Add new helper function migrate_use_tls() in order to simplify testing
+for TLS usage.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Message-Id: <20220513062836.965425-6-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit d2fafb6a6814a8998607d0baf691265032996a0f)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/channel.c   | 3 +--
+ migration/migration.c | 9 +++++++++
+ migration/migration.h | 1 +
+ migration/multifd.c   | 5 +----
+ 4 files changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/migration/channel.c b/migration/channel.c
+index c4fc000a1a..086b5c0d8b 100644
+--- a/migration/channel.c
+++ b/migration/channel.c
+@@ -38,8 +38,7 @@ void migration_channel_process_incoming(QIOChannel *ioc)
+     trace_migration_set_incoming_channel(
+         ioc, object_get_typename(OBJECT(ioc)));
+ 
+-    if (s->parameters.tls_creds &&
+-        *s->parameters.tls_creds &&
+    if (migrate_use_tls() &&
+         !object_dynamic_cast(OBJECT(ioc),
+                              TYPE_QIO_CHANNEL_TLS)) {
+         migration_tls_channel_process_incoming(s, ioc, &local_err);
+diff --git a/migration/migration.c b/migration/migration.c
+index b0fc3f68bd..8e28f2ee41 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -2568,6 +2568,15 @@ bool migrate_use_zero_copy_send(void)
+ }
+ #endif
+ 
+int migrate_use_tls(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->parameters.tls_creds && *s->parameters.tls_creds;
+}
+
+ int migrate_use_xbzrle(void)
+ {
+     MigrationState *s;
+diff --git a/migration/migration.h b/migration/migration.h
+index 908098939f..9396b7e90a 100644
+--- a/migration/migration.h
+++ b/migration/migration.h
+@@ -344,6 +344,7 @@ bool migrate_use_zero_copy_send(void);
+ #else
+ #define migrate_use_zero_copy_send() (false)
+ #endif
+int migrate_use_tls(void);
+ int migrate_use_xbzrle(void);
+ uint64_t migrate_xbzrle_cache_size(void);
+ bool migrate_colo_enabled(void);
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 3725226400..e53811f04a 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -789,14 +789,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p,
+                                     QIOChannel *ioc,
+                                     Error *error)
+ {
+-    MigrationState *s = migrate_get_current();
+-
+     trace_multifd_set_outgoing_channel(
+         ioc, object_get_typename(OBJECT(ioc)), p->tls_hostname, error);
+ 
+     if (!error) {
+-        if (s->parameters.tls_creds &&
+-            *s->parameters.tls_creds &&
+        if (migrate_use_tls() &&
+             !object_dynamic_cast(OBJECT(ioc),
+                                  TYPE_QIO_CHANNEL_TLS)) {
+             multifd_tls_channel_connect(p, ioc, &error);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-Add-migration_incoming_transport_cleanup.patch
+++ b/SOURCES/kvm-migration-Add-migration_incoming_transport_cleanup.patch
@ -0,0 +1,102 @@
+From 02eab793d82cd3c82d31f1e1f34d16fcc30caf0e Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Tue, 1 Mar 2022 16:39:14 +0800
+Subject: [PATCH 27/37] migration: Add migration_incoming_transport_cleanup()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Peter Xu <peterx@redhat.com>
+RH-MergeRequest: 195: migration: Allow migrate-recover to run multiple times
+RH-Commit: [1/2] 57b2a9a165ee7cb2d01519bd54eb8dc4185815e0
+RH-Bugzilla: 2097652
+RH-Acked-by: Leonardo Brás <leobras@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
+
+Add a helper to cleanup the transport listener.
+
+When do it, we should also null-ify the cleanup hook and the data, then it's
+even safe to call it multiple times.
+
+Move the socket_address_list cleanup altogether, because that's a mirror of the
+listener channels and only for the purpose of query-migrate.  Hence when
+someone wants to cleanup the listener transport, it should also want to cleanup
+the socket list too, always.
+
+No functional change intended.
+
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Message-Id: <20220301083925.33483-15-peterx@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit e031149c78489413038e934eec9f54ac699cf322)
+Signed-off-by: Peter Xu <peterx@redhat.com>
+---
+ migration/migration.c | 22 ++++++++++++++--------
+ migration/migration.h |  1 +
+ 2 files changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index c8aa55d2fe..b787a36789 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -263,6 +263,19 @@ MigrationIncomingState *migration_incoming_get_current(void)
+     return current_incoming;
+ }
+ 
+void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
+{
+    if (mis->socket_address_list) {
+        qapi_free_SocketAddressList(mis->socket_address_list);
+        mis->socket_address_list = NULL;
+    }
+
+    if (mis->transport_cleanup) {
+        mis->transport_cleanup(mis->transport_data);
+        mis->transport_data = mis->transport_cleanup = NULL;
+    }
+}
+
+ void migration_incoming_state_destroy(void)
+ {
+     struct MigrationIncomingState *mis = migration_incoming_get_current();
+@@ -283,10 +296,8 @@ void migration_incoming_state_destroy(void)
+         g_array_free(mis->postcopy_remote_fds, TRUE);
+         mis->postcopy_remote_fds = NULL;
+     }
+-    if (mis->transport_cleanup) {
+-        mis->transport_cleanup(mis->transport_data);
+-    }
+ 
+    migration_incoming_transport_cleanup(mis);
+     qemu_event_reset(&mis->main_thread_load_event);
+ 
+     if (mis->page_requested) {
+@@ -294,11 +305,6 @@ void migration_incoming_state_destroy(void)
+         mis->page_requested = NULL;
+     }
+ 
+-    if (mis->socket_address_list) {
+-        qapi_free_SocketAddressList(mis->socket_address_list);
+-        mis->socket_address_list = NULL;
+-    }
+-
+     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
+ }
+ 
+diff --git a/migration/migration.h b/migration/migration.h
+index 9396b7e90a..243898e3be 100644
+--- a/migration/migration.h
+++ b/migration/migration.h
+@@ -130,6 +130,7 @@ struct MigrationIncomingState {
+ 
+ MigrationIncomingState *migration_incoming_get_current(void);
+ void migration_incoming_state_destroy(void);
+void migration_incoming_transport_cleanup(MigrationIncomingState *mis);
+ /*
+  * Functions to work with blocktime context
+  */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch
+++ b/SOURCES/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch
@ -0,0 +1,250 @@
+From 2a84bf822cae38f67458043cd379a22e0fd22485 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Wed, 18 May 2022 02:52:25 -0300
+Subject: [PATCH 19/37] migration: Add zero-copy-send parameter for QMP/HMP for
+ Linux
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [19/26] 44ec703088cad75fd6e504958527e81d3261c9df
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Add property that allows zero-copy migration of memory pages
+on the sending side, and also includes a helper function
+migrate_use_zero_copy_send() to check if it's enabled.
+
+No code is introduced to actually do the migration, but it allow
+future implementations to enable/disable this feature.
+
+On non-Linux builds this parameter is compiled-out.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Acked-by: Markus Armbruster <armbru@redhat.com>
+Message-Id: <20220513062836.965425-5-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit abb6295b3ace5d17c3a65936913fc346616dbf14)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c | 32 ++++++++++++++++++++++++++++++++
+ migration/migration.h |  5 +++++
+ migration/socket.c    | 11 +++++++++--
+ monitor/hmp-cmds.c    |  6 ++++++
+ qapi/migration.json   | 24 ++++++++++++++++++++++++
+ 5 files changed, 76 insertions(+), 2 deletions(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index 8a13294da6..b0fc3f68bd 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -888,6 +888,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
+     params->multifd_zlib_level = s->parameters.multifd_zlib_level;
+     params->has_multifd_zstd_level = true;
+     params->multifd_zstd_level = s->parameters.multifd_zstd_level;
+#ifdef CONFIG_LINUX
+    params->has_zero_copy_send = true;
+    params->zero_copy_send = s->parameters.zero_copy_send;
+#endif
+     params->has_xbzrle_cache_size = true;
+     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
+     params->has_max_postcopy_bandwidth = true;
+@@ -1541,6 +1545,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
+     if (params->has_multifd_compression) {
+         dest->multifd_compression = params->multifd_compression;
+     }
+#ifdef CONFIG_LINUX
+    if (params->has_zero_copy_send) {
+        dest->zero_copy_send = params->zero_copy_send;
+    }
+#endif
+     if (params->has_xbzrle_cache_size) {
+         dest->xbzrle_cache_size = params->xbzrle_cache_size;
+     }
+@@ -1653,6 +1662,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
+     if (params->has_multifd_compression) {
+         s->parameters.multifd_compression = params->multifd_compression;
+     }
+#ifdef CONFIG_LINUX
+    if (params->has_zero_copy_send) {
+        s->parameters.zero_copy_send = params->zero_copy_send;
+    }
+#endif
+     if (params->has_xbzrle_cache_size) {
+         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
+         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
+@@ -2543,6 +2557,17 @@ int migrate_multifd_zstd_level(void)
+     return s->parameters.multifd_zstd_level;
+ }
+ 
+#ifdef CONFIG_LINUX
+bool migrate_use_zero_copy_send(void)
+{
+    MigrationState *s;
+
+    s = migrate_get_current();
+
+    return s->parameters.zero_copy_send;
+}
+#endif
+
+ int migrate_use_xbzrle(void)
+ {
+     MigrationState *s;
+@@ -4193,6 +4218,10 @@ static Property migration_properties[] = {
+     DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
+                       parameters.multifd_zstd_level,
+                       DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
+#ifdef CONFIG_LINUX
+    DEFINE_PROP_BOOL("zero_copy_send", MigrationState,
+                      parameters.zero_copy_send, false),
+#endif
+     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
+                       parameters.xbzrle_cache_size,
+                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
+@@ -4290,6 +4319,9 @@ static void migration_instance_init(Object *obj)
+     params->has_multifd_compression = true;
+     params->has_multifd_zlib_level = true;
+     params->has_multifd_zstd_level = true;
+#ifdef CONFIG_LINUX
+    params->has_zero_copy_send = true;
+#endif
+     params->has_xbzrle_cache_size = true;
+     params->has_max_postcopy_bandwidth = true;
+     params->has_max_cpu_throttle = true;
+diff --git a/migration/migration.h b/migration/migration.h
+index d016cedd9d..908098939f 100644
+--- a/migration/migration.h
+++ b/migration/migration.h
+@@ -339,6 +339,11 @@ MultiFDCompression migrate_multifd_compression(void);
+ int migrate_multifd_zlib_level(void);
+ int migrate_multifd_zstd_level(void);
+ 
+#ifdef CONFIG_LINUX
+bool migrate_use_zero_copy_send(void);
+#else
+#define migrate_use_zero_copy_send() (false)
+#endif
+ int migrate_use_xbzrle(void);
+ uint64_t migrate_xbzrle_cache_size(void);
+ bool migrate_colo_enabled(void);
+diff --git a/migration/socket.c b/migration/socket.c
+index 05705a32d8..3754d8f72c 100644
+--- a/migration/socket.c
+++ b/migration/socket.c
+@@ -74,9 +74,16 @@ static void socket_outgoing_migration(QIOTask *task,
+ 
+     if (qio_task_propagate_error(task, &err)) {
+         trace_migration_socket_outgoing_error(error_get_pretty(err));
+-    } else {
+-        trace_migration_socket_outgoing_connected(data->hostname);
+           goto out;
+     }
+
+    trace_migration_socket_outgoing_connected(data->hostname);
+
+    if (migrate_use_zero_copy_send()) {
+        error_setg(&err, "Zero copy send not available in migration");
+    }
+
+out:
+     migration_channel_connect(data->s, sioc, data->hostname, err);
+     object_unref(OBJECT(sioc));
+ }
+diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
+index 2669156b28..e02da5008b 100644
+--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
+@@ -1297,6 +1297,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
+         p->has_multifd_zstd_level = true;
+         visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
+         break;
+#ifdef CONFIG_LINUX
+    case MIGRATION_PARAMETER_ZERO_COPY_SEND:
+        p->has_zero_copy_send = true;
+        visit_type_bool(v, param, &p->zero_copy_send, &err);
+        break;
+#endif
+     case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
+         p->has_xbzrle_cache_size = true;
+         if (!visit_type_size(v, param, &cache_size, &err)) {
+diff --git a/qapi/migration.json b/qapi/migration.json
+index bbfd48cf0b..59b5c5780b 100644
+--- a/qapi/migration.json
+++ b/qapi/migration.json
+@@ -730,6 +730,13 @@
+ #                      will consume more CPU.
+ #                      Defaults to 1. (Since 5.0)
+ #
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+#                  When true, enables a zero-copy mechanism for sending
+#                  memory pages, if host supports it.
+#                  Requires that QEMU be permitted to use locked memory
+#                  for guest RAM pages.
+#                  Defaults to false. (Since 7.1)
+#
+ # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+ #                        aliases for the purpose of dirty bitmap migration.  Such
+ #                        aliases may for example be the corresponding names on the
+@@ -769,6 +776,7 @@
+            'xbzrle-cache-size', 'max-postcopy-bandwidth',
+            'max-cpu-throttle', 'multifd-compression',
+            'multifd-zlib-level' ,'multifd-zstd-level',
+           { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'},
+            'block-bitmap-mapping' ] }
+ 
+ ##
+@@ -895,6 +903,13 @@
+ #                      will consume more CPU.
+ #                      Defaults to 1. (Since 5.0)
+ #
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+#                  When true, enables a zero-copy mechanism for sending
+#                  memory pages, if host supports it.
+#                  Requires that QEMU be permitted to use locked memory
+#                  for guest RAM pages.
+#                  Defaults to false. (Since 7.1)
+#
+ # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+ #                        aliases for the purpose of dirty bitmap migration.  Such
+ #                        aliases may for example be the corresponding names on the
+@@ -949,6 +964,7 @@
+             '*multifd-compression': 'MultiFDCompression',
+             '*multifd-zlib-level': 'uint8',
+             '*multifd-zstd-level': 'uint8',
+            '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
+             '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+ 
+ ##
+@@ -1095,6 +1111,13 @@
+ #                      will consume more CPU.
+ #                      Defaults to 1. (Since 5.0)
+ #
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+#                  When true, enables a zero-copy mechanism for sending
+#                  memory pages, if host supports it.
+#                  Requires that QEMU be permitted to use locked memory
+#                  for guest RAM pages.
+#                  Defaults to false. (Since 7.1)
+#
+ # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+ #                        aliases for the purpose of dirty bitmap migration.  Such
+ #                        aliases may for example be the corresponding names on the
+@@ -1147,6 +1170,7 @@
+             '*multifd-compression': 'MultiFDCompression',
+             '*multifd-zlib-level': 'uint8',
+             '*multifd-zstd-level': 'uint8',
+            '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
+             '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+ 
+ ##
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch
+++ b/SOURCES/kvm-migration-All-this-fields-are-unsigned.patch
@ -0,0 +1,329 @@
+From b21f18afceba8231c78d29e66f58516e12c28d22 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 10/37] migration: All this fields are unsigned
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [10/26] 2c3ee27aae334db3b283ab7ef580f58e396e569d
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+So printing it as %d is wrong.  Notice that for the channel id, that
+is an uint8_t, but I changed it anyways for consistency.
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+(cherry picked from commit 04e114049406dbb69fc9043c795ddd28fdba31a6)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c | 20 ++++++++++----------
+ migration/multifd-zstd.c | 24 ++++++++++++------------
+ migration/multifd.c      | 16 ++++++++--------
+ migration/trace-events   | 26 +++++++++++++-------------
+ 4 files changed, 43 insertions(+), 43 deletions(-)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index a1950a4588..a987e4a26c 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -52,7 +52,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
+     zs->opaque = Z_NULL;
+     if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) {
+         g_free(z);
+-        error_setg(errp, "multifd %d: deflate init failed", p->id);
+        error_setg(errp, "multifd %u: deflate init failed", p->id);
+         return -1;
+     }
+     /* We will never have more than page_count pages */
+@@ -62,7 +62,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
+     if (!z->zbuff) {
+         deflateEnd(&z->zs);
+         g_free(z);
+-        error_setg(errp, "multifd %d: out of memory for zbuff", p->id);
+        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
+         return -1;
+     }
+     p->data = z;
+@@ -134,12 +134,12 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+             ret = deflate(zs, flush);
+         } while (ret == Z_OK && zs->avail_in && zs->avail_out);
+         if (ret == Z_OK && zs->avail_in) {
+-            error_setg(errp, "multifd %d: deflate failed to compress all input",
+            error_setg(errp, "multifd %u: deflate failed to compress all input",
+                        p->id);
+             return -1;
+         }
+         if (ret != Z_OK) {
+-            error_setg(errp, "multifd %d: deflate returned %d instead of Z_OK",
+            error_setg(errp, "multifd %u: deflate returned %d instead of Z_OK",
+                        p->id, ret);
+             return -1;
+         }
+@@ -193,7 +193,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp)
+     zs->avail_in = 0;
+     zs->next_in = Z_NULL;
+     if (inflateInit(zs) != Z_OK) {
+-        error_setg(errp, "multifd %d: inflate init failed", p->id);
+        error_setg(errp, "multifd %u: inflate init failed", p->id);
+         return -1;
+     }
+     /* We will never have more than page_count pages */
+@@ -203,7 +203,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp)
+     z->zbuff = g_try_malloc(z->zbuff_len);
+     if (!z->zbuff) {
+         inflateEnd(zs);
+-        error_setg(errp, "multifd %d: out of memory for zbuff", p->id);
+        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
+         return -1;
+     }
+     return 0;
+@@ -252,7 +252,7 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp)
+     int i;
+ 
+     if (flags != MULTIFD_FLAG_ZLIB) {
+-        error_setg(errp, "multifd %d: flags received %x flags expected %x",
+        error_setg(errp, "multifd %u: flags received %x flags expected %x",
+                    p->id, flags, MULTIFD_FLAG_ZLIB);
+         return -1;
+     }
+@@ -289,19 +289,19 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp)
+         } while (ret == Z_OK && zs->avail_in
+                              && (zs->total_out - start) < page_size);
+         if (ret == Z_OK && (zs->total_out - start) < page_size) {
+-            error_setg(errp, "multifd %d: inflate generated too few output",
+            error_setg(errp, "multifd %u: inflate generated too few output",
+                        p->id);
+             return -1;
+         }
+         if (ret != Z_OK) {
+-            error_setg(errp, "multifd %d: inflate returned %d instead of Z_OK",
+            error_setg(errp, "multifd %u: inflate returned %d instead of Z_OK",
+                        p->id, ret);
+             return -1;
+         }
+     }
+     out_size = zs->total_out - out_size;
+     if (out_size != expected_size) {
+-        error_setg(errp, "multifd %d: packet size received %d size expected %d",
+        error_setg(errp, "multifd %u: packet size received %u size expected %u",
+                    p->id, out_size, expected_size);
+         return -1;
+     }
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index d9ed42622b..2185a83eac 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -56,7 +56,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp)
+     z->zcs = ZSTD_createCStream();
+     if (!z->zcs) {
+         g_free(z);
+-        error_setg(errp, "multifd %d: zstd createCStream failed", p->id);
+        error_setg(errp, "multifd %u: zstd createCStream failed", p->id);
+         return -1;
+     }
+ 
+@@ -64,7 +64,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp)
+     if (ZSTD_isError(res)) {
+         ZSTD_freeCStream(z->zcs);
+         g_free(z);
+-        error_setg(errp, "multifd %d: initCStream failed with error %s",
+        error_setg(errp, "multifd %u: initCStream failed with error %s",
+                    p->id, ZSTD_getErrorName(res));
+         return -1;
+     }
+@@ -75,7 +75,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp)
+     if (!z->zbuff) {
+         ZSTD_freeCStream(z->zcs);
+         g_free(z);
+-        error_setg(errp, "multifd %d: out of memory for zbuff", p->id);
+        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
+         return -1;
+     }
+     return 0;
+@@ -146,12 +146,12 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+         } while (ret > 0 && (z->in.size - z->in.pos > 0)
+                          && (z->out.size - z->out.pos > 0));
+         if (ret > 0 && (z->in.size - z->in.pos > 0)) {
+-            error_setg(errp, "multifd %d: compressStream buffer too small",
+            error_setg(errp, "multifd %u: compressStream buffer too small",
+                        p->id);
+             return -1;
+         }
+         if (ZSTD_isError(ret)) {
+-            error_setg(errp, "multifd %d: compressStream error %s",
+            error_setg(errp, "multifd %u: compressStream error %s",
+                        p->id, ZSTD_getErrorName(ret));
+             return -1;
+         }
+@@ -201,7 +201,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp)
+     z->zds = ZSTD_createDStream();
+     if (!z->zds) {
+         g_free(z);
+-        error_setg(errp, "multifd %d: zstd createDStream failed", p->id);
+        error_setg(errp, "multifd %u: zstd createDStream failed", p->id);
+         return -1;
+     }
+ 
+@@ -209,7 +209,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp)
+     if (ZSTD_isError(ret)) {
+         ZSTD_freeDStream(z->zds);
+         g_free(z);
+-        error_setg(errp, "multifd %d: initDStream failed with error %s",
+        error_setg(errp, "multifd %u: initDStream failed with error %s",
+                    p->id, ZSTD_getErrorName(ret));
+         return -1;
+     }
+@@ -222,7 +222,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp)
+     if (!z->zbuff) {
+         ZSTD_freeDStream(z->zds);
+         g_free(z);
+-        error_setg(errp, "multifd %d: out of memory for zbuff", p->id);
+        error_setg(errp, "multifd %u: out of memory for zbuff", p->id);
+         return -1;
+     }
+     return 0;
+@@ -270,7 +270,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp)
+     int i;
+ 
+     if (flags != MULTIFD_FLAG_ZSTD) {
+-        error_setg(errp, "multifd %d: flags received %x flags expected %x",
+        error_setg(errp, "multifd %u: flags received %x flags expected %x",
+                    p->id, flags, MULTIFD_FLAG_ZSTD);
+         return -1;
+     }
+@@ -302,19 +302,19 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp)
+         } while (ret > 0 && (z->in.size - z->in.pos > 0)
+                          && (z->out.pos < page_size));
+         if (ret > 0 && (z->out.pos < page_size)) {
+-            error_setg(errp, "multifd %d: decompressStream buffer too small",
+            error_setg(errp, "multifd %u: decompressStream buffer too small",
+                        p->id);
+             return -1;
+         }
+         if (ZSTD_isError(ret)) {
+-            error_setg(errp, "multifd %d: decompressStream returned %s",
+            error_setg(errp, "multifd %u: decompressStream returned %s",
+                        p->id, ZSTD_getErrorName(ret));
+             return ret;
+         }
+         out_size += z->out.pos;
+     }
+     if (out_size != expected_size) {
+-        error_setg(errp, "multifd %d: packet size received %d size expected %d",
+        error_setg(errp, "multifd %u: packet size received %u size expected %u",
+                    p->id, out_size, expected_size);
+         return -1;
+     }
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 0533da154a..d0d19470f9 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -148,7 +148,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
+     uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+ 
+     if (flags != MULTIFD_FLAG_NOCOMP) {
+-        error_setg(errp, "multifd %d: flags received %x flags expected %x",
+        error_setg(errp, "multifd %u: flags received %x flags expected %x",
+                    p->id, flags, MULTIFD_FLAG_NOCOMP);
+         return -1;
+     }
+@@ -212,8 +212,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
+     }
+ 
+     if (msg.version != MULTIFD_VERSION) {
+-        error_setg(errp, "multifd: received packet version %d "
+-                   "expected %d", msg.version, MULTIFD_VERSION);
+        error_setg(errp, "multifd: received packet version %u "
+                   "expected %u", msg.version, MULTIFD_VERSION);
+         return -1;
+     }
+ 
+@@ -229,8 +229,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
+     }
+ 
+     if (msg.id > migrate_multifd_channels()) {
+-        error_setg(errp, "multifd: received channel version %d "
+-                   "expected %d", msg.version, MULTIFD_VERSION);
+        error_setg(errp, "multifd: received channel version %u "
+                   "expected %u", msg.version, MULTIFD_VERSION);
+         return -1;
+     }
+ 
+@@ -303,7 +303,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+     packet->version = be32_to_cpu(packet->version);
+     if (packet->version != MULTIFD_VERSION) {
+         error_setg(errp, "multifd: received packet "
+-                   "version %d and expected version %d",
+                   "version %u and expected version %u",
+                    packet->version, MULTIFD_VERSION);
+         return -1;
+     }
+@@ -317,7 +317,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+      */
+     if (packet->pages_alloc > pages_max * 100) {
+         error_setg(errp, "multifd: received packet "
+-                   "with size %d and expected a maximum size of %d",
+                   "with size %u and expected a maximum size of %u",
+                    packet->pages_alloc, pages_max * 100) ;
+         return -1;
+     }
+@@ -333,7 +333,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+     p->pages->num = be32_to_cpu(packet->pages_used);
+     if (p->pages->num > packet->pages_alloc) {
+         error_setg(errp, "multifd: received packet "
+-                   "with %d pages and expected maximum pages are %d",
+                   "with %u pages and expected maximum pages are %u",
+                    p->pages->num, packet->pages_alloc) ;
+         return -1;
+     }
+diff --git a/migration/trace-events b/migration/trace-events
+index b48d873b8a..5172cb3b3d 100644
+--- a/migration/trace-events
+++ b/migration/trace-events
+@@ -115,23 +115,23 @@ ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void *
+ ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu"
+ 
+ # multifd.c
+-multifd_new_send_channel_async(uint8_t id) "channel %d"
+-multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d"
+-multifd_recv_new_channel(uint8_t id) "channel %d"
+multifd_new_send_channel_async(uint8_t id) "channel %u"
+multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u"
+multifd_recv_new_channel(uint8_t id) "channel %u"
+ multifd_recv_sync_main(long packet_num) "packet num %ld"
+-multifd_recv_sync_main_signal(uint8_t id) "channel %d"
+-multifd_recv_sync_main_wait(uint8_t id) "channel %d"
+multifd_recv_sync_main_signal(uint8_t id) "channel %u"
+multifd_recv_sync_main_wait(uint8_t id) "channel %u"
+ multifd_recv_terminate_threads(bool error) "error %d"
+-multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64
+-multifd_recv_thread_start(uint8_t id) "%d"
+-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d"
+-multifd_send_error(uint8_t id) "channel %d"
+multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64
+multifd_recv_thread_start(uint8_t id) "%u"
+multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u"
+multifd_send_error(uint8_t id) "channel %u"
+ multifd_send_sync_main(long packet_num) "packet num %ld"
+-multifd_send_sync_main_signal(uint8_t id) "channel %d"
+-multifd_send_sync_main_wait(uint8_t id) "channel %d"
+multifd_send_sync_main_signal(uint8_t id) "channel %u"
+multifd_send_sync_main_wait(uint8_t id) "channel %u"
+ multifd_send_terminate_threads(bool error) "error %d"
+-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %"  PRIu64
+-multifd_send_thread_start(uint8_t id) "%d"
+multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %"  PRIu64
+multifd_send_thread_start(uint8_t id) "%u"
+ multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s"
+ multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s"
+ multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p"
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch
+++ b/SOURCES/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch
@ -0,0 +1,98 @@
+From f5be3d8a5944679c1239b974e0f910f1afe4f532 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Thu, 31 Mar 2022 11:08:45 -0400
+Subject: [PATCH 28/37] migration: Allow migrate-recover to run multiple times
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Peter Xu <peterx@redhat.com>
+RH-MergeRequest: 195: migration: Allow migrate-recover to run multiple times
+RH-Commit: [2/2] a2e6b02007a06c9c7f5237289095811c7d7ca1f1
+RH-Bugzilla: 2097652
+RH-Acked-by: Leonardo Brás <leobras@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
+
+Previously migration didn't have an easy way to cleanup the listening
+transport, migrate recovery only allows to execute once.  That's done with a
+trick flag in postcopy_recover_triggered.
+
+Now the facility is already there.
+
+Drop postcopy_recover_triggered and instead allows a new migrate-recover to
+release the previous listener transport.
+
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Message-Id: <20220331150857.74406-8-peterx@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 08401c0426bc1a5ce4609afd1cda5dd39abbf9fa)
+Signed-off-by: Peter Xu <peterx@redhat.com>
+---
+ migration/migration.c | 13 ++-----------
+ migration/migration.h |  1 -
+ migration/savevm.c    |  3 ---
+ 3 files changed, 2 insertions(+), 15 deletions(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index b787a36789..616c3ff32e 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -2158,11 +2158,8 @@ void qmp_migrate_recover(const char *uri, Error **errp)
+         return;
+     }
+ 
+-    if (qatomic_cmpxchg(&mis->postcopy_recover_triggered,
+-                       false, true) == true) {
+-        error_setg(errp, "Migrate recovery is triggered already");
+-        return;
+-    }
+    /* If there's an existing transport, release it */
+    migration_incoming_transport_cleanup(mis);
+ 
+     /*
+      * Note that this call will never start a real migration; it will
+@@ -2170,12 +2167,6 @@ void qmp_migrate_recover(const char *uri, Error **errp)
+      * to continue using that newly established channel.
+      */
+     qemu_start_incoming_migration(uri, errp);
+-
+-    /* Safe to dereference with the assert above */
+-    if (*errp) {
+-        /* Reset the flag so user could still retry */
+-        qatomic_set(&mis->postcopy_recover_triggered, false);
+-    }
+ }
+ 
+ void qmp_migrate_pause(Error **errp)
+diff --git a/migration/migration.h b/migration/migration.h
+index 243898e3be..0ae2133326 100644
+--- a/migration/migration.h
+++ b/migration/migration.h
+@@ -103,7 +103,6 @@ struct MigrationIncomingState {
+     struct PostcopyBlocktimeContext *blocktime_ctx;
+ 
+     /* notify PAUSED postcopy incoming migrations to try to continue */
+-    bool postcopy_recover_triggered;
+     QemuSemaphore postcopy_pause_sem_dst;
+     QemuSemaphore postcopy_pause_sem_fault;
+ 
+diff --git a/migration/savevm.c b/migration/savevm.c
+index 0bef031acb..b8382aaa64 100644
+--- a/migration/savevm.c
+++ b/migration/savevm.c
+@@ -2568,9 +2568,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis)
+ 
+     assert(migrate_postcopy_ram());
+ 
+-    /* Clear the triggered bit to allow one recovery */
+-    mis->postcopy_recover_triggered = false;
+-
+     /*
+      * Unregister yank with either from/to src would work, since ioc behind it
+      * is the same
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch
+++ b/SOURCES/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch
@ -0,0 +1,93 @@
+From 097f72427f4f5da4fdcdbeee52aea0c1f67d54dc Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Tue, 19 Jul 2022 09:23:45 -0300
+Subject: [PATCH 6/9] migration: Avoid false-positive on non-supported
+ scenarios for zero-copy-send
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [6/8] f23195f3ab4f6eba0463f38e5971ccaccdac2cfd
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+Migration with zero-copy-send currently has it's limitations, as it can't
+be used with TLS nor any kind of compression. In such scenarios, it should
+output errors during parameter / capability setting.
+
+But currently there are some ways of setting this not-supported scenarios
+without printing the error message:
+
+!) For 'compression' capability, it works by enabling it together with
+zero-copy-send. This happens because the validity test for zero-copy uses
+the helper unction migrate_use_compression(), which check for compression
+presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS].
+
+The point here is: the validity test happens before the capability gets
+enabled. If all of them get enabled together, this test will not return
+error.
+
+In order to fix that, replace migrate_use_compression() by directly testing
+the cap_list parameter migrate_caps_check().
+
+2) For features enabled by parameters such as TLS & 'multifd_compression',
+there was also a possibility of setting non-supported scenarios: setting
+zero-copy-send first, then setting the unsupported parameter.
+
+In order to fix that, also add a check for parameters conflicting with
+zero-copy-send on migrate_params_check().
+
+3) XBZRLE is also a compression capability, so it makes sense to also add
+it to the list of capabilities which are not supported with zero-copy-send.
+
+Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability")
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Message-Id: <20220719122345.253713-1-leobras@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index 952a26c5c2..35b3197eff 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -1260,7 +1260,9 @@ static bool migrate_caps_check(bool *cap_list,
+ #ifdef CONFIG_LINUX
+     if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
+         (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
+-         migrate_use_compression() ||
+         cap_list[MIGRATION_CAPABILITY_COMPRESS] ||
+         cap_list[MIGRATION_CAPABILITY_XBZRLE] ||
+         migrate_multifd_compression() ||
+          migrate_use_tls())) {
+         error_setg(errp,
+                    "Zero copy only available for non-compressed non-TLS multifd migration");
+@@ -1497,6 +1499,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
+         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
+         return false;
+     }
+
+#ifdef CONFIG_LINUX
+    if (migrate_use_zero_copy_send() &&
+        ((params->has_multifd_compression && params->multifd_compression) ||
+         (params->has_tls_creds && params->tls_creds && *params->tls_creds))) {
+        error_setg(errp,
+                   "Zero copy only available for non-compressed non-TLS multifd migration");
+        return false;
+    }
+#endif
+
+     return true;
+ }
+ 
+-- 
+2.31.1
+
--- a/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch
+++ b/SOURCES/kvm-migration-Change-zero_copy_send-from-migration-param.patch
@ -0,0 +1,289 @@
+From 70108ff9ffe77062116e47670c0e0c2396529f88 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Mon, 20 Jun 2022 02:39:45 -0300
+Subject: [PATCH 26/37] migration: Change zero_copy_send from migration
+ parameter to migration capability
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [26/26] ea61e6cbdbe47611bd22d18988e1c4c4e8357cc3
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+When originally implemented, zero_copy_send was designed as a Migration
+paramenter.
+
+But taking into account how is that supposed to work, and how
+the difference between a capability and a parameter, it only makes sense
+that zero-copy-send would work better as a capability.
+
+Taking into account how recently the change got merged, it was decided
+that it's still time to make it right, and convert zero_copy_send into
+a Migration capability.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Acked-by: Markus Armbruster <armbru@redhat.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+  dgilbert: always define the capability, even on non-Linux but error if
+set; avoids build problems with the capability
+(cherry picked from commit 1abaec9a1b2c23f7aa94709a422128d9e42c3e0b)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c | 58 +++++++++++++++++++------------------------
+ monitor/hmp-cmds.c    |  6 -----
+ qapi/migration.json   | 33 +++++++-----------------
+ 3 files changed, 34 insertions(+), 63 deletions(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index 5357efd348..c8aa55d2fe 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -162,7 +162,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
+     MIGRATION_CAPABILITY_COMPRESS,
+     MIGRATION_CAPABILITY_XBZRLE,
+     MIGRATION_CAPABILITY_X_COLO,
+-    MIGRATION_CAPABILITY_VALIDATE_UUID);
+    MIGRATION_CAPABILITY_VALIDATE_UUID,
+    MIGRATION_CAPABILITY_ZERO_COPY_SEND);
+ 
+ bool migrate_pre_2_2;
+ 
+@@ -888,10 +889,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp)
+     params->multifd_zlib_level = s->parameters.multifd_zlib_level;
+     params->has_multifd_zstd_level = true;
+     params->multifd_zstd_level = s->parameters.multifd_zstd_level;
+-#ifdef CONFIG_LINUX
+-    params->has_zero_copy_send = true;
+-    params->zero_copy_send = s->parameters.zero_copy_send;
+-#endif
+     params->has_xbzrle_cache_size = true;
+     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
+     params->has_max_postcopy_bandwidth = true;
+@@ -1249,6 +1246,24 @@ static bool migrate_caps_check(bool *cap_list,
+         }
+     }
+ 
+#ifdef CONFIG_LINUX
+    if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] &&
+        (!cap_list[MIGRATION_CAPABILITY_MULTIFD] ||
+         migrate_use_compression() ||
+         migrate_use_tls())) {
+        error_setg(errp,
+                   "Zero copy only available for non-compressed non-TLS multifd migration");
+        return false;
+    }
+#else
+    if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND]) {
+        error_setg(errp,
+                   "Zero copy currently only available on Linux");
+        return false;
+    }
+#endif
+
+
+     /* incoming side only */
+     if (runstate_check(RUN_STATE_INMIGRATE) &&
+         !migrate_multifd_is_allowed() &&
+@@ -1471,16 +1486,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
+         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
+         return false;
+     }
+-#ifdef CONFIG_LINUX
+-    if (params->zero_copy_send &&
+-        (!migrate_use_multifd() ||
+-         params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
+-         (params->tls_creds && *params->tls_creds))) {
+-        error_setg(errp,
+-                   "Zero copy only available for non-compressed non-TLS multifd migration");
+-        return false;
+-    }
+-#endif
+     return true;
+ }
+ 
+@@ -1554,11 +1559,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
+     if (params->has_multifd_compression) {
+         dest->multifd_compression = params->multifd_compression;
+     }
+-#ifdef CONFIG_LINUX
+-    if (params->has_zero_copy_send) {
+-        dest->zero_copy_send = params->zero_copy_send;
+-    }
+-#endif
+     if (params->has_xbzrle_cache_size) {
+         dest->xbzrle_cache_size = params->xbzrle_cache_size;
+     }
+@@ -1671,11 +1671,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
+     if (params->has_multifd_compression) {
+         s->parameters.multifd_compression = params->multifd_compression;
+     }
+-#ifdef CONFIG_LINUX
+-    if (params->has_zero_copy_send) {
+-        s->parameters.zero_copy_send = params->zero_copy_send;
+-    }
+-#endif
+     if (params->has_xbzrle_cache_size) {
+         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
+         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
+@@ -2573,7 +2568,7 @@ bool migrate_use_zero_copy_send(void)
+ 
+     s = migrate_get_current();
+ 
+-    return s->parameters.zero_copy_send;
+    return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
+ }
+ #endif
+ 
+@@ -4236,10 +4231,6 @@ static Property migration_properties[] = {
+     DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
+                       parameters.multifd_zstd_level,
+                       DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
+-#ifdef CONFIG_LINUX
+-    DEFINE_PROP_BOOL("zero_copy_send", MigrationState,
+-                      parameters.zero_copy_send, false),
+-#endif
+     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
+                       parameters.xbzrle_cache_size,
+                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
+@@ -4277,6 +4268,10 @@ static Property migration_properties[] = {
+     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
+     DEFINE_PROP_MIG_CAP("x-background-snapshot",
+             MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
+#ifdef CONFIG_LINUX
+    DEFINE_PROP_MIG_CAP("x-zero-copy-send",
+            MIGRATION_CAPABILITY_ZERO_COPY_SEND),
+#endif
+ 
+     DEFINE_PROP_END_OF_LIST(),
+ };
+@@ -4337,9 +4332,6 @@ static void migration_instance_init(Object *obj)
+     params->has_multifd_compression = true;
+     params->has_multifd_zlib_level = true;
+     params->has_multifd_zstd_level = true;
+-#ifdef CONFIG_LINUX
+-    params->has_zero_copy_send = true;
+-#endif
+     params->has_xbzrle_cache_size = true;
+     params->has_max_postcopy_bandwidth = true;
+     params->has_max_cpu_throttle = true;
+diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
+index e02da5008b..2669156b28 100644
+--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
+@@ -1297,12 +1297,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict)
+         p->has_multifd_zstd_level = true;
+         visit_type_uint8(v, param, &p->multifd_zstd_level, &err);
+         break;
+-#ifdef CONFIG_LINUX
+-    case MIGRATION_PARAMETER_ZERO_COPY_SEND:
+-        p->has_zero_copy_send = true;
+-        visit_type_bool(v, param, &p->zero_copy_send, &err);
+-        break;
+-#endif
+     case MIGRATION_PARAMETER_XBZRLE_CACHE_SIZE:
+         p->has_xbzrle_cache_size = true;
+         if (!visit_type_size(v, param, &cache_size, &err)) {
+diff --git a/qapi/migration.json b/qapi/migration.json
+index 59b5c5780b..fe70a0c4b2 100644
+--- a/qapi/migration.json
+++ b/qapi/migration.json
+@@ -452,6 +452,13 @@
+ #                       procedure starts. The VM RAM is saved with running VM.
+ #                       (since 6.0)
+ #
+# @zero-copy-send: Controls behavior on sending memory pages on migration.
+#                  When true, enables a zero-copy mechanism for sending
+#                  memory pages, if host supports it.
+#                  Requires that QEMU be permitted to use locked memory
+#                  for guest RAM pages.
+#                  (since 7.1)
+#
+ # Features:
+ # @unstable: Members @x-colo and @x-ignore-shared are experimental.
+ #
+@@ -465,7 +472,8 @@
+            'block', 'return-path', 'pause-before-switchover', 'multifd',
+            'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
+            { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] },
+-           'validate-uuid', 'background-snapshot'] }
+           'validate-uuid', 'background-snapshot',
+           'zero-copy-send'] }
+ 
+ ##
+ # @MigrationCapabilityStatus:
+@@ -730,12 +738,6 @@
+ #                      will consume more CPU.
+ #                      Defaults to 1. (Since 5.0)
+ #
+-# @zero-copy-send: Controls behavior on sending memory pages on migration.
+-#                  When true, enables a zero-copy mechanism for sending
+-#                  memory pages, if host supports it.
+-#                  Requires that QEMU be permitted to use locked memory
+-#                  for guest RAM pages.
+-#                  Defaults to false. (Since 7.1)
+ #
+ # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+ #                        aliases for the purpose of dirty bitmap migration.  Such
+@@ -776,7 +778,6 @@
+            'xbzrle-cache-size', 'max-postcopy-bandwidth',
+            'max-cpu-throttle', 'multifd-compression',
+            'multifd-zlib-level' ,'multifd-zstd-level',
+-           { 'name': 'zero-copy-send', 'if' : 'CONFIG_LINUX'},
+            'block-bitmap-mapping' ] }
+ 
+ ##
+@@ -903,13 +904,6 @@
+ #                      will consume more CPU.
+ #                      Defaults to 1. (Since 5.0)
+ #
+-# @zero-copy-send: Controls behavior on sending memory pages on migration.
+-#                  When true, enables a zero-copy mechanism for sending
+-#                  memory pages, if host supports it.
+-#                  Requires that QEMU be permitted to use locked memory
+-#                  for guest RAM pages.
+-#                  Defaults to false. (Since 7.1)
+-#
+ # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+ #                        aliases for the purpose of dirty bitmap migration.  Such
+ #                        aliases may for example be the corresponding names on the
+@@ -964,7 +958,6 @@
+             '*multifd-compression': 'MultiFDCompression',
+             '*multifd-zlib-level': 'uint8',
+             '*multifd-zstd-level': 'uint8',
+-            '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
+             '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+ 
+ ##
+@@ -1111,13 +1104,6 @@
+ #                      will consume more CPU.
+ #                      Defaults to 1. (Since 5.0)
+ #
+-# @zero-copy-send: Controls behavior on sending memory pages on migration.
+-#                  When true, enables a zero-copy mechanism for sending
+-#                  memory pages, if host supports it.
+-#                  Requires that QEMU be permitted to use locked memory
+-#                  for guest RAM pages.
+-#                  Defaults to false. (Since 7.1)
+-#
+ # @block-bitmap-mapping: Maps block nodes and bitmaps on them to
+ #                        aliases for the purpose of dirty bitmap migration.  Such
+ #                        aliases may for example be the corresponding names on the
+@@ -1170,7 +1156,6 @@
+             '*multifd-compression': 'MultiFDCompression',
+             '*multifd-zlib-level': 'uint8',
+             '*multifd-zstd-level': 'uint8',
+-            '*zero-copy-send': { 'type': 'bool', 'if': 'CONFIG_LINUX' },
+             '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } }
+ 
+ ##
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch
+++ b/SOURCES/kvm-migration-Introduce-ram_transferred_add.patch
@ -0,0 +1,122 @@
+From 030b54f5a2b2c8976370c962e9847af4746ac2c2 Mon Sep 17 00:00:00 2001
+From: David Edmondson <david.edmondson@oracle.com>
+Date: Tue, 21 Dec 2021 09:34:40 +0000
+Subject: [PATCH 1/9] migration: Introduce ram_transferred_add()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [1/8] a6545760b0de13d533f6164be0545a6720bb42c7
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+Replace direct manipulation of ram_counters.transferred with a
+function.
+
+Signed-off-by: David Edmondson <david.edmondson@oracle.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+(cherry picked from commit 4c2d0f6dca24f3396ab0718ad3f9f53cc53004df)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/ram.c | 23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/migration/ram.c b/migration/ram.c
+index 3e208efca7..3e82c4ff46 100644
+--- a/migration/ram.c
+++ b/migration/ram.c
+@@ -391,6 +391,11 @@ uint64_t ram_bytes_remaining(void)
+ 
+ MigrationStats ram_counters;
+ 
+static void ram_transferred_add(uint64_t bytes)
+{
+    ram_counters.transferred += bytes;
+}
+
+ /* used by the search for pages to send */
+ struct PageSearchStatus {
+     /* Current block being searched */
+@@ -772,7 +777,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
+      * RAM_SAVE_FLAG_CONTINUE.
+      */
+     xbzrle_counters.bytes += bytes_xbzrle - 8;
+-    ram_counters.transferred += bytes_xbzrle;
+    ram_transferred_add(bytes_xbzrle);
+ 
+     return 1;
+ }
+@@ -1203,7 +1208,7 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
+ 
+     if (len) {
+         ram_counters.duplicate++;
+-        ram_counters.transferred += len;
+        ram_transferred_add(len);
+         return 1;
+     }
+     return -1;
+@@ -1239,7 +1244,7 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
+     }
+ 
+     if (bytes_xmit) {
+-        ram_counters.transferred += bytes_xmit;
+        ram_transferred_add(bytes_xmit);
+         *pages = 1;
+     }
+ 
+@@ -1270,8 +1275,8 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
+ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
+                             uint8_t *buf, bool async)
+ {
+-    ram_counters.transferred += save_page_header(rs, rs->f, block,
+-                                                 offset | RAM_SAVE_FLAG_PAGE);
+    ram_transferred_add(save_page_header(rs, rs->f, block,
+                                         offset | RAM_SAVE_FLAG_PAGE));
+     if (async) {
+         qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
+                               migrate_release_ram() &
+@@ -1279,7 +1284,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
+     } else {
+         qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
+     }
+-    ram_counters.transferred += TARGET_PAGE_SIZE;
+    ram_transferred_add(TARGET_PAGE_SIZE);
+     ram_counters.normal++;
+     return 1;
+ }
+@@ -1378,7 +1383,7 @@ exit:
+ static void
+ update_compress_thread_counts(const CompressParam *param, int bytes_xmit)
+ {
+-    ram_counters.transferred += bytes_xmit;
+    ram_transferred_add(bytes_xmit);
+ 
+     if (param->zero_page) {
+         ram_counters.duplicate++;
+@@ -2303,7 +2308,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero)
+         ram_counters.duplicate += pages;
+     } else {
+         ram_counters.normal += pages;
+-        ram_counters.transferred += size;
+        ram_transferred_add(size);
+         qemu_update_position(f, size);
+     }
+ }
+@@ -3147,7 +3152,7 @@ out:
+ 
+         qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+         qemu_fflush(f);
+-        ram_counters.transferred += 8;
+        ram_transferred_add(8);
+ 
+         ret = qemu_file_get_error(f);
+     }
+-- 
+2.31.1
+
--- a/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch
+++ b/SOURCES/kvm-migration-Never-call-twice-qemu_target_page_size.patch
@ -0,0 +1,116 @@
+From 6a9a5a2809cbbe2982df156722b88efeec998e3d Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:22 -0300
+Subject: [PATCH 01/37] migration: Never call twice qemu_target_page_size()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [1/26] 809ca84dec80bafc1959df8c9e57f482ee752a97
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 144fa06b3431e806057ce1438338395b35a3e544)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c | 7 ++++---
+ migration/multifd.c   | 7 ++++---
+ migration/savevm.c    | 5 +++--
+ 3 files changed, 11 insertions(+), 8 deletions(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index a87ff01b81..8a13294da6 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -992,6 +992,8 @@ static void populate_time_info(MigrationInfo *info, MigrationState *s)
+ 
+ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
+ {
+    size_t page_size = qemu_target_page_size();
+
+     info->has_ram = true;
+     info->ram = g_malloc0(sizeof(*info->ram));
+     info->ram->transferred = ram_counters.transferred;
+@@ -1000,12 +1002,11 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
+     /* legacy value.  It is not used anymore */
+     info->ram->skipped = 0;
+     info->ram->normal = ram_counters.normal;
+-    info->ram->normal_bytes = ram_counters.normal *
+-        qemu_target_page_size();
+    info->ram->normal_bytes = ram_counters.normal * page_size;
+     info->ram->mbps = s->mbps;
+     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
+     info->ram->postcopy_requests = ram_counters.postcopy_requests;
+-    info->ram->page_size = qemu_target_page_size();
+    info->ram->page_size = page_size;
+     info->ram->multifd_bytes = ram_counters.multifd_bytes;
+     info->ram->pages_per_second = s->pages_per_second;
+ 
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 7c9deb1921..8125d0015c 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -289,7 +289,8 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
+ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+ {
+     MultiFDPacket_t *packet = p->packet;
+-    uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size();
+    size_t page_size = qemu_target_page_size();
+    uint32_t pages_max = MULTIFD_PACKET_SIZE / page_size;
+     RAMBlock *block;
+     int i;
+ 
+@@ -358,14 +359,14 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+     for (i = 0; i < p->pages->used; i++) {
+         uint64_t offset = be64_to_cpu(packet->offset[i]);
+ 
+-        if (offset > (block->used_length - qemu_target_page_size())) {
+        if (offset > (block->used_length - page_size)) {
+             error_setg(errp, "multifd: offset too long %" PRIu64
+                        " (max " RAM_ADDR_FMT ")",
+                        offset, block->used_length);
+             return -1;
+         }
+         p->pages->iov[i].iov_base = block->host + offset;
+-        p->pages->iov[i].iov_len = qemu_target_page_size();
+        p->pages->iov[i].iov_len = page_size;
+     }
+ 
+     return 0;
+diff --git a/migration/savevm.c b/migration/savevm.c
+index d59e976d50..0bef031acb 100644
+--- a/migration/savevm.c
+++ b/migration/savevm.c
+@@ -1685,6 +1685,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
+ {
+     PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE);
+     uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps;
+    size_t page_size = qemu_target_page_size();
+     Error *local_err = NULL;
+ 
+     trace_loadvm_postcopy_handle_advise();
+@@ -1741,13 +1742,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis,
+     }
+ 
+     remote_tps = qemu_get_be64(mis->from_src_file);
+-    if (remote_tps != qemu_target_page_size()) {
+    if (remote_tps != page_size) {
+         /*
+          * Again, some differences could be dealt with, but for now keep it
+          * simple.
+          */
+         error_report("Postcopy needs matching target page sizes (s=%d d=%zd)",
+-                     (int)remote_tps, qemu_target_page_size());
+                     (int)remote_tps, page_size);
+         return -1;
+     }
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch
+++ b/SOURCES/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch
@ -0,0 +1,122 @@
+From 82637509cc9197ad9d1e1b286a608bf0da04b7b3 Mon Sep 17 00:00:00 2001
+From: David Edmondson <david.edmondson@oracle.com>
+Date: Tue, 21 Dec 2021 09:34:41 +0000
+Subject: [PATCH 2/9] migration: Tally pre-copy, downtime and post-copy bytes
+ independently
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [2/8] 7d1bf37a3d93da88da6525d70fc1fce1abb92b83
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+Provide information on the number of bytes copied in the pre-copy,
+downtime and post-copy phases of migration.
+
+Signed-off-by: David Edmondson <david.edmondson@oracle.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Reviewed-by: Juan Quintela <quintela@redhat.com>
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+(cherry picked from commit ae6806688016711bb9ec7541266d76ab511c5e3b)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c |  3 +++
+ migration/ram.c       |  7 +++++++
+ monitor/hmp-cmds.c    | 12 ++++++++++++
+ qapi/migration.json   | 13 ++++++++++++-
+ 4 files changed, 34 insertions(+), 1 deletion(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index 616c3ff32e..e100b30f00 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -1016,6 +1016,9 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s)
+     info->ram->page_size = page_size;
+     info->ram->multifd_bytes = ram_counters.multifd_bytes;
+     info->ram->pages_per_second = s->pages_per_second;
+    info->ram->precopy_bytes = ram_counters.precopy_bytes;
+    info->ram->downtime_bytes = ram_counters.downtime_bytes;
+    info->ram->postcopy_bytes = ram_counters.postcopy_bytes;
+ 
+     if (migrate_use_xbzrle()) {
+         info->has_xbzrle_cache = true;
+diff --git a/migration/ram.c b/migration/ram.c
+index 3e82c4ff46..e7173da217 100644
+--- a/migration/ram.c
+++ b/migration/ram.c
+@@ -393,6 +393,13 @@ MigrationStats ram_counters;
+ 
+ static void ram_transferred_add(uint64_t bytes)
+ {
+    if (runstate_is_running()) {
+        ram_counters.precopy_bytes += bytes;
+    } else if (migration_in_postcopy()) {
+        ram_counters.postcopy_bytes += bytes;
+    } else {
+        ram_counters.downtime_bytes += bytes;
+    }
+     ram_counters.transferred += bytes;
+ }
+ 
+diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
+index 2669156b28..8c384dc1b2 100644
+--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
+@@ -293,6 +293,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
+             monitor_printf(mon, "postcopy request count: %" PRIu64 "\n",
+                            info->ram->postcopy_requests);
+         }
+        if (info->ram->precopy_bytes) {
+            monitor_printf(mon, "precopy ram: %" PRIu64 " kbytes\n",
+                           info->ram->precopy_bytes >> 10);
+        }
+        if (info->ram->downtime_bytes) {
+            monitor_printf(mon, "downtime ram: %" PRIu64 " kbytes\n",
+                           info->ram->downtime_bytes >> 10);
+        }
+        if (info->ram->postcopy_bytes) {
+            monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n",
+                           info->ram->postcopy_bytes >> 10);
+        }
+     }
+ 
+     if (info->has_disk) {
+diff --git a/qapi/migration.json b/qapi/migration.json
+index fe70a0c4b2..c8ec260ab0 100644
+--- a/qapi/migration.json
+++ b/qapi/migration.json
+@@ -46,6 +46,15 @@
+ # @pages-per-second: the number of memory pages transferred per second
+ #                    (Since 4.0)
+ #
+# @precopy-bytes: The number of bytes sent in the pre-copy phase
+#                 (since 7.0).
+#
+# @downtime-bytes: The number of bytes sent while the guest is paused
+#                  (since 7.0).
+#
+# @postcopy-bytes: The number of bytes sent during the post-copy phase
+#                  (since 7.0).
+#
+ # Since: 0.14
+ ##
+ { 'struct': 'MigrationStats',
+@@ -54,7 +63,9 @@
+            'normal-bytes': 'int', 'dirty-pages-rate' : 'int',
+            'mbps' : 'number', 'dirty-sync-count' : 'int',
+            'postcopy-requests' : 'int', 'page-size' : 'int',
+-           'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } }
+           'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64',
+           'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64',
+           'postcopy-bytes' : 'uint64' } }
+ 
+ ##
+ # @XBZRLECacheStats:
+-- 
+2.31.1
+
--- a/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch
+++ b/SOURCES/kvm-migration-add-remaining-params-has_-true-in-migratio.patch
@ -0,0 +1,62 @@
+From 8aecb49fdd771c5819fccc9e750b2e9cd4e94b58 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Mon, 25 Jul 2022 22:02:35 -0300
+Subject: [PATCH 7/9] migration: add remaining params->has_* = true in
+ migration_instance_init()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [7/8] fb622e5b88e14eb859d4903d9c088ba6ca63fc81
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+Some of params->has_* = true are missing in migration_instance_init, this
+causes migrate_params_check() to skip some tests, allowing some
+unsupported scenarios.
+
+Fix this by adding all missing params->has_* = true in
+migration_instance_init().
+
+Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters")
+Fixes: 1d58872a91 ("migration: do not wait for free thread")
+Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter")
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Message-Id: <20220726010235.342927-1-leobras@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index 35b3197eff..51e6726dac 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -4334,6 +4334,7 @@ static void migration_instance_init(Object *obj)
+     /* Set has_* up only for parameter checks */
+     params->has_compress_level = true;
+     params->has_compress_threads = true;
+    params->has_compress_wait_thread = true;
+     params->has_decompress_threads = true;
+     params->has_throttle_trigger_threshold = true;
+     params->has_cpu_throttle_initial = true;
+@@ -4354,6 +4355,9 @@ static void migration_instance_init(Object *obj)
+     params->has_announce_max = true;
+     params->has_announce_rounds = true;
+     params->has_announce_step = true;
+    params->has_tls_creds = true;
+    params->has_tls_hostname = true;
+    params->has_tls_authz = true;
+ 
+     qemu_sem_init(&ms->postcopy_pause_sem, 0);
+     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
+-- 
+2.31.1
+
--- a/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch
+++ b/SOURCES/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch
@ -0,0 +1,83 @@
+From 2516a21205e67078cb735e9fd47ba50156c166b7 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Mon, 11 Jul 2022 18:11:13 -0300
+Subject: [PATCH 5/9] migration/multifd: Report to user when zerocopy not
+ working
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 201: Zero-copy-send fixes + improvements
+RH-Commit: [5/8] 0b2e23b7f8ae72936e11369cd44ba474ef3b9e8c
+RH-Bugzilla: 2110203
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+
+Some errors, like the lack of Scatter-Gather support by the network
+interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using
+zero-copy, which causes it to fall back to the default copying mechanism.
+
+After each full dirty-bitmap scan there should be a zero-copy flush
+happening, which checks for errors each of the previous calls to
+sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then
+increment dirty_sync_missed_zero_copy migration stat to let the user know
+about it.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Message-Id: <20220711211112.18951-4-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 2 ++
+ migration/ram.c     | 5 +++++
+ migration/ram.h     | 2 ++
+ 3 files changed, 9 insertions(+)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 90ab4c4346..7c16523e6b 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -631,6 +631,8 @@ int multifd_send_sync_main(QEMUFile *f)
+             if (ret < 0) {
+                 error_report_err(err);
+                 return -1;
+            } else if (ret == 1) {
+                dirty_sync_missed_zero_copy();
+             }
+         }
+     }
+diff --git a/migration/ram.c b/migration/ram.c
+index e7173da217..93cdb456ac 100644
+--- a/migration/ram.c
+++ b/migration/ram.c
+@@ -403,6 +403,11 @@ static void ram_transferred_add(uint64_t bytes)
+     ram_counters.transferred += bytes;
+ }
+ 
+void dirty_sync_missed_zero_copy(void)
+{
+    ram_counters.dirty_sync_missed_zero_copy++;
+}
+
+ /* used by the search for pages to send */
+ struct PageSearchStatus {
+     /* Current block being searched */
+diff --git a/migration/ram.h b/migration/ram.h
+index c515396a9a..69c3ccb26a 100644
+--- a/migration/ram.h
+++ b/migration/ram.h
+@@ -88,4 +88,6 @@ void ram_write_tracking_prepare(void);
+ int ram_write_tracking_start(void);
+ void ram_write_tracking_stop(void);
+ 
+void dirty_sync_missed_zero_copy(void);
+
+ #endif
+-- 
+2.31.1
+
--- a/SOURCES/kvm-multifd-Add-missing-documentation.patch
+++ b/SOURCES/kvm-multifd-Add-missing-documentation.patch
@ -0,0 +1,82 @@
+From 3b567f762cbd8d4ffaf717b0baba9cf9fe9614c2 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 03/37] multifd: Add missing documentation
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [3/26] 924fca4305ebd8669955d456fc1c515f509e6026
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 18ede636bc29fd8bda628fe3e5c593f8c1b734f4)
+(fixed typo in commit message)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c | 2 ++
+ migration/multifd-zstd.c | 2 ++
+ migration/multifd.c      | 1 +
+ 3 files changed, 5 insertions(+)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index ab4ba75d75..f403d2f031 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -74,6 +74,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp)
+  * Close the channel and return memory.
+  *
+  * @p: Params for the channel that we are using
+ * @errp: pointer to an error
+  */
+ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
+ {
+@@ -96,6 +97,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
+  *
+  * @p: Params for the channel that we are using
+  * @used: number of pages used
+ * @errp: pointer to an error
+  */
+ static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp)
+ {
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index 693bddf8c9..8d657f8860 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -86,6 +86,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp)
+  * Close the channel and return memory.
+  *
+  * @p: Params for the channel that we are using
+ * @errp: pointer to an error
+  */
+ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp)
+ {
+@@ -109,6 +110,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp)
+  *
+  * @p: Params for the channel that we are using
+  * @used: number of pages used
+ * @errp: pointer to an error
+  */
+ static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp)
+ {
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 8ea86d81dc..cdeffdc4c5 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -66,6 +66,7 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp)
+  * For no compression this function does nothing.
+  *
+  * @p: Params for the channel that we are using
+ * @errp: pointer to an error
+  */
+ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
+ {
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch
+++ b/SOURCES/kvm-multifd-Fill-offset-and-block-for-reception.patch
@ -0,0 +1,50 @@
+From 8c1edb1889ff44506f35fa185d6569b0dd9d7260 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 07/37] multifd: Fill offset and block for reception
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [7/26] 51a9e6b76af956d63fc735172211d9bf6f0f6f80
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+We were using the iov directly, but we will need this info on the
+following patch.
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 01102a2ef6c97acc5cc8a2c3bb62b7665a20f51f)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 55d99a8232..0533da154a 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -354,6 +354,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+         return -1;
+     }
+ 
+    p->pages->block = block;
+     for (i = 0; i < p->pages->num; i++) {
+         uint64_t offset = be64_to_cpu(packet->offset[i]);
+ 
+@@ -363,6 +364,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+                        offset, block->used_length);
+             return -1;
+         }
+        p->pages->offset[i] = offset;
+         p->pages->iov[i].iov_base = block->host + offset;
+         p->pages->iov[i].iov_len = page_size;
+     }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch
+++ b/SOURCES/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch
@ -0,0 +1,182 @@
+From 7a7e2191f1ac4114380248cbd3c6ab7425250747 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Wed, 18 May 2022 02:52:25 -0300
+Subject: [PATCH 23/37] multifd: Implement zero copy write in multifd migration
+ (multifd-zero-copy)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [23/26] 904ce3909cfef62dd84cc7d3c6a3482e7e6f28e9
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Implement zero copy send on nocomp_send_write(), by making use of QIOChannel
+writev + flags & flush interface.
+
+Change multifd_send_sync_main() so flush_zero_copy() can be called
+after each iteration in order to make sure all dirty pages are sent before
+a new iteration is started. It will also flush at the beginning and at the
+end of migration.
+
+Also make it return -1 if flush_zero_copy() fails, in order to cancel
+the migration process, and avoid resuming the guest in the target host
+without receiving all current RAM.
+
+This will work fine on RAM migration because the RAM pages are not usually freed,
+and there is no problem on changing the pages content between writev_zero_copy() and
+the actual sending of the buffer, because this change will dirty the page and
+cause it to be re-sent on a next iteration anyway.
+
+A lot of locked memory may be needed in order to use multifd migration
+with zero-copy enabled, so disabling the feature should be necessary for
+low-privileged users trying to perform multifd migrations.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Message-Id: <20220513062836.965425-9-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 5b1d9bab2da4fca3a3caee97c430e5709cb32b7b)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/migration.c | 11 ++++++++++-
+ migration/multifd.c   | 37 +++++++++++++++++++++++++++++++++++--
+ migration/multifd.h   |  2 ++
+ migration/socket.c    |  5 +++--
+ 4 files changed, 50 insertions(+), 5 deletions(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index 8e28f2ee41..5357efd348 100644
+--- a/migration/migration.c
+++ b/migration/migration.c
+@@ -1471,7 +1471,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp)
+         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
+         return false;
+     }
+-
+#ifdef CONFIG_LINUX
+    if (params->zero_copy_send &&
+        (!migrate_use_multifd() ||
+         params->multifd_compression != MULTIFD_COMPRESSION_NONE ||
+         (params->tls_creds && *params->tls_creds))) {
+        error_setg(errp,
+                   "Zero copy only available for non-compressed non-TLS multifd migration");
+        return false;
+    }
+#endif
+     return true;
+ }
+ 
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 193f70cdba..90ab4c4346 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -576,6 +576,7 @@ void multifd_save_cleanup(void)
+ int multifd_send_sync_main(QEMUFile *f)
+ {
+     int i;
+    bool flush_zero_copy;
+ 
+     if (!migrate_use_multifd()) {
+         return 0;
+@@ -586,6 +587,20 @@ int multifd_send_sync_main(QEMUFile *f)
+             return -1;
+         }
+     }
+
+    /*
+     * When using zero-copy, it's necessary to flush the pages before any of
+     * the pages can be sent again, so we'll make sure the new version of the
+     * pages will always arrive _later_ than the old pages.
+     *
+     * Currently we achieve this by flushing the zero-page requested writes
+     * per ram iteration, but in the future we could potentially optimize it
+     * to be less frequent, e.g. only after we finished one whole scanning of
+     * all the dirty bitmaps.
+     */
+
+    flush_zero_copy = migrate_use_zero_copy_send();
+
+     for (i = 0; i < migrate_multifd_channels(); i++) {
+         MultiFDSendParams *p = &multifd_send_state->params[i];
+ 
+@@ -607,6 +622,17 @@ int multifd_send_sync_main(QEMUFile *f)
+         ram_counters.transferred += p->packet_len;
+         qemu_mutex_unlock(&p->mutex);
+         qemu_sem_post(&p->sem);
+
+        if (flush_zero_copy && p->c) {
+            int ret;
+            Error *err = NULL;
+
+            ret = qio_channel_flush(p->c, &err);
+            if (ret < 0) {
+                error_report_err(err);
+                return -1;
+            }
+        }
+     }
+     for (i = 0; i < migrate_multifd_channels(); i++) {
+         MultiFDSendParams *p = &multifd_send_state->params[i];
+@@ -691,8 +717,8 @@ static void *multifd_send_thread(void *opaque)
+                 p->iov[0].iov_base = p->packet;
+             }
+ 
+-            ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
+-                                         &local_err);
+            ret = qio_channel_writev_full_all(p->c, p->iov, p->iovs_num, NULL,
+                                              0, p->write_flags, &local_err);
+             if (ret != 0) {
+                 break;
+             }
+@@ -933,6 +959,13 @@ int multifd_save_setup(Error **errp)
+         /* We need one extra place for the packet header */
+         p->iov = g_new0(struct iovec, page_count + 1);
+         p->normal = g_new0(ram_addr_t, page_count);
+
+        if (migrate_use_zero_copy_send()) {
+            p->write_flags = QIO_CHANNEL_WRITE_FLAG_ZERO_COPY;
+        } else {
+            p->write_flags = 0;
+        }
+
+         socket_send_channel_create(multifd_new_send_channel_async, p);
+     }
+ 
+diff --git a/migration/multifd.h b/migration/multifd.h
+index 92de878155..11d5e273e6 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -95,6 +95,8 @@ typedef struct {
+     uint32_t packet_len;
+     /* pointer to the packet */
+     MultiFDPacket_t *packet;
+    /* multifd flags for sending ram */
+    int write_flags;
+     /* multifd flags for each packet */
+     uint32_t flags;
+     /* size of the next packet that contains pages */
+diff --git a/migration/socket.c b/migration/socket.c
+index 3754d8f72c..4fd5e85f50 100644
+--- a/migration/socket.c
+++ b/migration/socket.c
+@@ -79,8 +79,9 @@ static void socket_outgoing_migration(QIOTask *task,
+ 
+     trace_migration_socket_outgoing_connected(data->hostname);
+ 
+-    if (migrate_use_zero_copy_send()) {
+-        error_setg(&err, "Zero copy send not available in migration");
+    if (migrate_use_zero_copy_send() &&
+        !qio_channel_has_feature(sioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+        error_setg(&err, "Zero copy send feature not detected in host kernel");
+     }
+ 
+ out:
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch
+++ b/SOURCES/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch
@ -0,0 +1,98 @@
+From 75cd92cb7cff055f46163e64d66ba3f685f9ac04 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 09/37] multifd: Make zlib compression method not use iovs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [9/26] d33dd62b833d50fee989a195aebcc8d5e7d43181
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit a5ed22948873b50fcf1415d1ce15c71d61a9388d)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c | 17 +++++++++--------
+ 1 file changed, 9 insertions(+), 8 deletions(-)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index 330fc021c5..a1950a4588 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -13,6 +13,7 @@
+ #include "qemu/osdep.h"
+ #include <zlib.h>
+ #include "qemu/rcu.h"
+#include "exec/ramblock.h"
+ #include "exec/target_page.h"
+ #include "qapi/error.h"
+ #include "migration.h"
+@@ -100,8 +101,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
+  */
+ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+ {
+-    struct iovec *iov = p->pages->iov;
+     struct zlib_data *z = p->data;
+    size_t page_size = qemu_target_page_size();
+     z_stream *zs = &z->zs;
+     uint32_t out_size = 0;
+     int ret;
+@@ -115,8 +116,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+             flush = Z_SYNC_FLUSH;
+         }
+ 
+-        zs->avail_in = iov[i].iov_len;
+-        zs->next_in = iov[i].iov_base;
+        zs->avail_in = page_size;
+        zs->next_in = p->pages->block->host + p->pages->offset[i];
+ 
+         zs->avail_out = available;
+         zs->next_out = z->zbuff + out_size;
+@@ -240,6 +241,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p)
+ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp)
+ {
+     struct zlib_data *z = p->data;
+    size_t page_size = qemu_target_page_size();
+     z_stream *zs = &z->zs;
+     uint32_t in_size = p->next_packet_size;
+     /* we measure the change of total_out */
+@@ -264,7 +266,6 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp)
+     zs->next_in = z->zbuff;
+ 
+     for (i = 0; i < p->pages->num; i++) {
+-        struct iovec *iov = &p->pages->iov[i];
+         int flush = Z_NO_FLUSH;
+         unsigned long start = zs->total_out;
+ 
+@@ -272,8 +273,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp)
+             flush = Z_SYNC_FLUSH;
+         }
+ 
+-        zs->avail_out = iov->iov_len;
+-        zs->next_out = iov->iov_base;
+        zs->avail_out = page_size;
+        zs->next_out = p->pages->block->host + p->pages->offset[i];
+ 
+         /*
+          * Welcome to inflate semantics
+@@ -286,8 +287,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp)
+         do {
+             ret = inflate(zs, flush);
+         } while (ret == Z_OK && zs->avail_in
+-                             && (zs->total_out - start) < iov->iov_len);
+-        if (ret == Z_OK && (zs->total_out - start) < iov->iov_len) {
+                             && (zs->total_out - start) < page_size);
+        if (ret == Z_OK && (zs->total_out - start) < page_size) {
+             error_setg(errp, "multifd %d: inflate generated too few output",
+                        p->id);
+             return -1;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch
+++ b/SOURCES/kvm-multifd-Make-zlib-use-iov-s.patch
@ -0,0 +1,53 @@
+From 1cdab9cadef1ed84ec34651a1edbffa36c1e67d0 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:24 -0300
+Subject: [PATCH 12/37] multifd: Make zlib use iov's
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [12/26] 58630452e14802e71a9eadb17cfe4964ebf8e091
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 48a4a44c1cde382c6b8e7792d01fe7d9b0a59c69)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index a987e4a26c..96475e096e 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -145,6 +145,9 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+         }
+         out_size += available - zs->avail_out;
+     }
+    p->iov[p->iovs_num].iov_base = z->zbuff;
+    p->iov[p->iovs_num].iov_len = out_size;
+    p->iovs_num++;
+     p->next_packet_size = out_size;
+     p->flags |= MULTIFD_FLAG_ZLIB;
+ 
+@@ -164,10 +167,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+  */
+ static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp)
+ {
+-    struct zlib_data *z = p->data;
+-
+-    return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size,
+-                                 errp);
+    return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp);
+ }
+ 
+ /**
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch
+++ b/SOURCES/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch
@ -0,0 +1,94 @@
+From ab6262bd4829e3bd6437fe32737209df2af2d141 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 08/37] multifd: Make zstd compression method not use iovs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [8/26] 010579fa73b5a4c6fd631dc9fbaf6f974974bc99
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit f5ff548774c22b34a0c0e2fef85f1be11160d774)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zstd.c | 20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index f0d1105792..d9ed42622b 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -13,6 +13,7 @@
+ #include "qemu/osdep.h"
+ #include <zstd.h>
+ #include "qemu/rcu.h"
+#include "exec/ramblock.h"
+ #include "exec/target_page.h"
+ #include "qapi/error.h"
+ #include "migration.h"
+@@ -113,8 +114,8 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp)
+  */
+ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+ {
+-    struct iovec *iov = p->pages->iov;
+     struct zstd_data *z = p->data;
+    size_t page_size = qemu_target_page_size();
+     int ret;
+     uint32_t i;
+ 
+@@ -128,8 +129,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+         if (i == p->pages->num - 1) {
+             flush = ZSTD_e_flush;
+         }
+-        z->in.src = iov[i].iov_base;
+-        z->in.size = iov[i].iov_len;
+        z->in.src = p->pages->block->host + p->pages->offset[i];
+        z->in.size = page_size;
+         z->in.pos = 0;
+ 
+         /*
+@@ -261,7 +262,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp)
+ {
+     uint32_t in_size = p->next_packet_size;
+     uint32_t out_size = 0;
+-    uint32_t expected_size = p->pages->num * qemu_target_page_size();
+    size_t page_size = qemu_target_page_size();
+    uint32_t expected_size = p->pages->num * page_size;
+     uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+     struct zstd_data *z = p->data;
+     int ret;
+@@ -283,10 +285,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp)
+     z->in.pos = 0;
+ 
+     for (i = 0; i < p->pages->num; i++) {
+-        struct iovec *iov = &p->pages->iov[i];
+-
+-        z->out.dst = iov->iov_base;
+-        z->out.size = iov->iov_len;
+        z->out.dst = p->pages->block->host + p->pages->offset[i];
+        z->out.size = page_size;
+         z->out.pos = 0;
+ 
+         /*
+@@ -300,8 +300,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp)
+         do {
+             ret = ZSTD_decompressStream(z->zds, &z->out, &z->in);
+         } while (ret > 0 && (z->in.size - z->in.pos > 0)
+-                         && (z->out.pos < iov->iov_len));
+-        if (ret > 0 && (z->out.pos < iov->iov_len)) {
+                         && (z->out.pos < page_size));
+        if (ret > 0 && (z->out.pos < page_size)) {
+             error_setg(errp, "multifd %d: decompressStream buffer too small",
+                        p->id);
+             return -1;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch
+++ b/SOURCES/kvm-multifd-Make-zstd-use-iov-s.patch
@ -0,0 +1,53 @@
+From bac5ce0b4d3552d6056045f201b4e50dd6204b31 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:24 -0300
+Subject: [PATCH 13/37] multifd: Make zstd use iov's
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [13/26] 4d7036fb32efdf088d23737b9710e6ad1a4654aa
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 0a818b89eb8eaf79ae651405907d8110a0935cfd)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zstd.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index 2185a83eac..4e60cdbc54 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -156,6 +156,9 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+             return -1;
+         }
+     }
+    p->iov[p->iovs_num].iov_base = z->zbuff;
+    p->iov[p->iovs_num].iov_len = z->out.pos;
+    p->iovs_num++;
+     p->next_packet_size = z->out.pos;
+     p->flags |= MULTIFD_FLAG_ZSTD;
+ 
+@@ -175,10 +178,7 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+  */
+ static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp)
+ {
+-    struct zstd_data *z = p->data;
+-
+-    return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size,
+-                                 errp);
+    return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp);
+ }
+ 
+ /**
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch
+++ b/SOURCES/kvm-multifd-Move-iov-from-pages-to-params.patch
@ -0,0 +1,190 @@
+From 1181a9cbcaf37a82aa7bf117ef209f554b8c4a71 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:24 -0300
+Subject: [PATCH 11/37] multifd: Move iov from pages to params
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [11/26] 24dff3ef68cf3327811242193502319ed3e3940a
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+This will allow us to reduce the number of system calls on the next patch.
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+(cherry picked from commit 226468ba3dea950ab4bb0b729878dde25812da1c)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 34 ++++++++++++++++++++++++----------
+ migration/multifd.h |  8 ++++++--
+ 2 files changed, 30 insertions(+), 12 deletions(-)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index d0d19470f9..5004f394aa 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -86,7 +86,16 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
+  */
+ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
+ {
+-    p->next_packet_size = p->pages->num * qemu_target_page_size();
+    MultiFDPages_t *pages = p->pages;
+    size_t page_size = qemu_target_page_size();
+
+    for (int i = 0; i < p->pages->num; i++) {
+        p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
+        p->iov[p->iovs_num].iov_len = page_size;
+        p->iovs_num++;
+    }
+
+    p->next_packet_size = p->pages->num * page_size;
+     p->flags |= MULTIFD_FLAG_NOCOMP;
+     return 0;
+ }
+@@ -104,7 +113,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
+  */
+ static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp)
+ {
+-    return qio_channel_writev_all(p->c, p->pages->iov, used, errp);
+    return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp);
+ }
+ 
+ /**
+@@ -146,13 +155,18 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p)
+ static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
+ {
+     uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+    size_t page_size = qemu_target_page_size();
+ 
+     if (flags != MULTIFD_FLAG_NOCOMP) {
+         error_setg(errp, "multifd %u: flags received %x flags expected %x",
+                    p->id, flags, MULTIFD_FLAG_NOCOMP);
+         return -1;
+     }
+-    return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp);
+    for (int i = 0; i < p->pages->num; i++) {
+        p->iov[i].iov_base = p->pages->block->host + p->pages->offset[i];
+        p->iov[i].iov_len = page_size;
+    }
+    return qio_channel_readv_all(p->c, p->iov, p->pages->num, errp);
+ }
+ 
+ static MultiFDMethods multifd_nocomp_ops = {
+@@ -242,7 +256,6 @@ static MultiFDPages_t *multifd_pages_init(size_t size)
+     MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
+ 
+     pages->allocated = size;
+-    pages->iov = g_new0(struct iovec, size);
+     pages->offset = g_new0(ram_addr_t, size);
+ 
+     return pages;
+@@ -254,8 +267,6 @@ static void multifd_pages_clear(MultiFDPages_t *pages)
+     pages->allocated = 0;
+     pages->packet_num = 0;
+     pages->block = NULL;
+-    g_free(pages->iov);
+-    pages->iov = NULL;
+     g_free(pages->offset);
+     pages->offset = NULL;
+     g_free(pages);
+@@ -365,8 +376,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+             return -1;
+         }
+         p->pages->offset[i] = offset;
+-        p->pages->iov[i].iov_base = block->host + offset;
+-        p->pages->iov[i].iov_len = page_size;
+     }
+ 
+     return 0;
+@@ -470,8 +479,6 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
+ 
+     if (pages->block == block) {
+         pages->offset[pages->num] = offset;
+-        pages->iov[pages->num].iov_base = block->host + offset;
+-        pages->iov[pages->num].iov_len = qemu_target_page_size();
+         pages->num++;
+ 
+         if (pages->num < pages->allocated) {
+@@ -564,6 +571,8 @@ void multifd_save_cleanup(void)
+         p->packet_len = 0;
+         g_free(p->packet);
+         p->packet = NULL;
+        g_free(p->iov);
+        p->iov = NULL;
+         multifd_send_state->ops->send_cleanup(p, &local_err);
+         if (local_err) {
+             migrate_set_error(migrate_get_current(), local_err);
+@@ -651,6 +660,7 @@ static void *multifd_send_thread(void *opaque)
+             uint32_t used = p->pages->num;
+             uint64_t packet_num = p->packet_num;
+             uint32_t flags = p->flags;
+            p->iovs_num = 0;
+ 
+             if (used) {
+                 ret = multifd_send_state->ops->send_prepare(p, &local_err);
+@@ -919,6 +929,7 @@ int multifd_save_setup(Error **errp)
+         p->packet->version = cpu_to_be32(MULTIFD_VERSION);
+         p->name = g_strdup_printf("multifdsend_%d", i);
+         p->tls_hostname = g_strdup(s->hostname);
+        p->iov = g_new0(struct iovec, page_count);
+         socket_send_channel_create(multifd_new_send_channel_async, p);
+     }
+ 
+@@ -1018,6 +1029,8 @@ int multifd_load_cleanup(Error **errp)
+         p->packet_len = 0;
+         g_free(p->packet);
+         p->packet = NULL;
+        g_free(p->iov);
+        p->iov = NULL;
+         multifd_recv_state->ops->recv_cleanup(p);
+     }
+     qemu_sem_destroy(&multifd_recv_state->sem_sync);
+@@ -1158,6 +1171,7 @@ int multifd_load_setup(Error **errp)
+                       + sizeof(uint64_t) * page_count;
+         p->packet = g_malloc0(p->packet_len);
+         p->name = g_strdup_printf("multifdrecv_%d", i);
+        p->iov = g_new0(struct iovec, page_count);
+     }
+ 
+     for (i = 0; i < thread_count; i++) {
+diff --git a/migration/multifd.h b/migration/multifd.h
+index e57adc783b..c3f18af364 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -62,8 +62,6 @@ typedef struct {
+     uint64_t packet_num;
+     /* offset of each page */
+     ram_addr_t *offset;
+-    /* pointer to each page */
+-    struct iovec *iov;
+     RAMBlock *block;
+ } MultiFDPages_t;
+ 
+@@ -110,6 +108,10 @@ typedef struct {
+     uint64_t num_pages;
+     /* syncs main thread and channels */
+     QemuSemaphore sem_sync;
+    /* buffers to send */
+    struct iovec *iov;
+    /* number of iovs used */
+    uint32_t iovs_num;
+     /* used for compression methods */
+     void *data;
+ }  MultiFDSendParams;
+@@ -149,6 +151,8 @@ typedef struct {
+     uint64_t num_pages;
+     /* syncs main thread and channels */
+     QemuSemaphore sem_sync;
+    /* buffers to recv */
+    struct iovec *iov;
+     /* used for de-compression methods */
+     void *data;
+ } MultiFDRecvParams;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Remove-send_write-method.patch
+++ b/SOURCES/kvm-multifd-Remove-send_write-method.patch
@ -0,0 +1,160 @@
+From 2952487c7e5ed14796fbffae0b964a35790d6850 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:24 -0300
+Subject: [PATCH 14/37] multifd: Remove send_write() method
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [14/26] 5fa59ffa09099fbc6da84e9a192ca71af52cc98f
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Everything use now iov's.
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 468fcb5dd0c965e1af0da9efab09b1462631da18)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c | 17 -----------------
+ migration/multifd-zstd.c | 17 -----------------
+ migration/multifd.c      | 20 ++------------------
+ migration/multifd.h      |  2 --
+ 4 files changed, 2 insertions(+), 54 deletions(-)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index 96475e096e..8ed29b9633 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -154,22 +154,6 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+     return 0;
+ }
+ 
+-/**
+- * zlib_send_write: do the actual write of the data
+- *
+- * Do the actual write of the comprresed buffer.
+- *
+- * Returns 0 for success or -1 for error
+- *
+- * @p: Params for the channel that we are using
+- * @used: number of pages used
+- * @errp: pointer to an error
+- */
+-static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp)
+-{
+-    return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp);
+-}
+-
+ /**
+  * zlib_recv_setup: setup receive side
+  *
+@@ -312,7 +296,6 @@ static MultiFDMethods multifd_zlib_ops = {
+     .send_setup = zlib_send_setup,
+     .send_cleanup = zlib_send_cleanup,
+     .send_prepare = zlib_send_prepare,
+-    .send_write = zlib_send_write,
+     .recv_setup = zlib_recv_setup,
+     .recv_cleanup = zlib_recv_cleanup,
+     .recv_pages = zlib_recv_pages
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index 4e60cdbc54..25e1f517b5 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -165,22 +165,6 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+     return 0;
+ }
+ 
+-/**
+- * zstd_send_write: do the actual write of the data
+- *
+- * Do the actual write of the comprresed buffer.
+- *
+- * Returns 0 for success or -1 for error
+- *
+- * @p: Params for the channel that we are using
+- * @used: number of pages used
+- * @errp: pointer to an error
+- */
+-static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp)
+-{
+-    return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp);
+-}
+-
+ /**
+  * zstd_recv_setup: setup receive side
+  *
+@@ -325,7 +309,6 @@ static MultiFDMethods multifd_zstd_ops = {
+     .send_setup = zstd_send_setup,
+     .send_cleanup = zstd_send_cleanup,
+     .send_prepare = zstd_send_prepare,
+-    .send_write = zstd_send_write,
+     .recv_setup = zstd_recv_setup,
+     .recv_cleanup = zstd_recv_cleanup,
+     .recv_pages = zstd_recv_pages
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 5004f394aa..1e1551d78b 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -100,22 +100,6 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
+     return 0;
+ }
+ 
+-/**
+- * nocomp_send_write: do the actual write of the data
+- *
+- * For no compression we just have to write the data.
+- *
+- * Returns 0 for success or -1 for error
+- *
+- * @p: Params for the channel that we are using
+- * @used: number of pages used
+- * @errp: pointer to an error
+- */
+-static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp)
+-{
+-    return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp);
+-}
+-
+ /**
+  * nocomp_recv_setup: setup receive side
+  *
+@@ -173,7 +157,6 @@ static MultiFDMethods multifd_nocomp_ops = {
+     .send_setup = nocomp_send_setup,
+     .send_cleanup = nocomp_send_cleanup,
+     .send_prepare = nocomp_send_prepare,
+-    .send_write = nocomp_send_write,
+     .recv_setup = nocomp_recv_setup,
+     .recv_cleanup = nocomp_recv_cleanup,
+     .recv_pages = nocomp_recv_pages
+@@ -687,7 +670,8 @@ static void *multifd_send_thread(void *opaque)
+             }
+ 
+             if (used) {
+-                ret = multifd_send_state->ops->send_write(p, used, &local_err);
+                ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
+                                             &local_err);
+                 if (ret != 0) {
+                     break;
+                 }
+diff --git a/migration/multifd.h b/migration/multifd.h
+index c3f18af364..7496f951a7 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -164,8 +164,6 @@ typedef struct {
+     void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
+     /* Prepare the send packet */
+     int (*send_prepare)(MultiFDSendParams *p, Error **errp);
+-    /* Write the send packet */
+-    int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp);
+     /* Setup for receiving side */
+     int (*recv_setup)(MultiFDRecvParams *p, Error **errp);
+     /* Cleanup for receiving side */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Rename-used-field-to-num.patch
+++ b/SOURCES/kvm-multifd-Rename-used-field-to-num.patch
@ -0,0 +1,177 @@
+From 003ef20d11b33a7139fae6fbcf170188a07afc43 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:22 -0300
+Subject: [PATCH 02/37] multifd: Rename used field to num
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [2/26] 952283197ef89be4d61c7690bb6c3194e5c67217
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+We will need to split it later in zero_num (number of zero pages) and
+normal_num (number of normal pages).  This name is better.
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 90a3d2f9d5f729147b2827c177932603ae6e2d55)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 38 +++++++++++++++++++-------------------
+ migration/multifd.h |  2 +-
+ 2 files changed, 20 insertions(+), 20 deletions(-)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 8125d0015c..8ea86d81dc 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -252,7 +252,7 @@ static MultiFDPages_t *multifd_pages_init(size_t size)
+ 
+ static void multifd_pages_clear(MultiFDPages_t *pages)
+ {
+-    pages->used = 0;
+    pages->num = 0;
+     pages->allocated = 0;
+     pages->packet_num = 0;
+     pages->block = NULL;
+@@ -270,7 +270,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
+ 
+     packet->flags = cpu_to_be32(p->flags);
+     packet->pages_alloc = cpu_to_be32(p->pages->allocated);
+-    packet->pages_used = cpu_to_be32(p->pages->used);
+    packet->pages_used = cpu_to_be32(p->pages->num);
+     packet->next_packet_size = cpu_to_be32(p->next_packet_size);
+     packet->packet_num = cpu_to_be64(p->packet_num);
+ 
+@@ -278,7 +278,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
+         strncpy(packet->ramblock, p->pages->block->idstr, 256);
+     }
+ 
+-    for (i = 0; i < p->pages->used; i++) {
+    for (i = 0; i < p->pages->num; i++) {
+         /* there are architectures where ram_addr_t is 32 bit */
+         uint64_t temp = p->pages->offset[i];
+ 
+@@ -332,18 +332,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+         p->pages = multifd_pages_init(packet->pages_alloc);
+     }
+ 
+-    p->pages->used = be32_to_cpu(packet->pages_used);
+-    if (p->pages->used > packet->pages_alloc) {
+    p->pages->num = be32_to_cpu(packet->pages_used);
+    if (p->pages->num > packet->pages_alloc) {
+         error_setg(errp, "multifd: received packet "
+                    "with %d pages and expected maximum pages are %d",
+-                   p->pages->used, packet->pages_alloc) ;
+                   p->pages->num, packet->pages_alloc) ;
+         return -1;
+     }
+ 
+     p->next_packet_size = be32_to_cpu(packet->next_packet_size);
+     p->packet_num = be64_to_cpu(packet->packet_num);
+ 
+-    if (p->pages->used == 0) {
+    if (p->pages->num == 0) {
+         return 0;
+     }
+ 
+@@ -356,7 +356,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
+         return -1;
+     }
+ 
+-    for (i = 0; i < p->pages->used; i++) {
+    for (i = 0; i < p->pages->num; i++) {
+         uint64_t offset = be64_to_cpu(packet->offset[i]);
+ 
+         if (offset > (block->used_length - page_size)) {
+@@ -443,13 +443,13 @@ static int multifd_send_pages(QEMUFile *f)
+         }
+         qemu_mutex_unlock(&p->mutex);
+     }
+-    assert(!p->pages->used);
+    assert(!p->pages->num);
+     assert(!p->pages->block);
+ 
+     p->packet_num = multifd_send_state->packet_num++;
+     multifd_send_state->pages = p->pages;
+     p->pages = pages;
+-    transferred = ((uint64_t) pages->used) * qemu_target_page_size()
+    transferred = ((uint64_t) pages->num) * qemu_target_page_size()
+                 + p->packet_len;
+     qemu_file_update_transfer(f, transferred);
+     ram_counters.multifd_bytes += transferred;
+@@ -469,12 +469,12 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
+     }
+ 
+     if (pages->block == block) {
+-        pages->offset[pages->used] = offset;
+-        pages->iov[pages->used].iov_base = block->host + offset;
+-        pages->iov[pages->used].iov_len = qemu_target_page_size();
+-        pages->used++;
+        pages->offset[pages->num] = offset;
+        pages->iov[pages->num].iov_base = block->host + offset;
+        pages->iov[pages->num].iov_len = qemu_target_page_size();
+        pages->num++;
+ 
+-        if (pages->used < pages->allocated) {
+        if (pages->num < pages->allocated) {
+             return 1;
+         }
+     }
+@@ -586,7 +586,7 @@ void multifd_send_sync_main(QEMUFile *f)
+     if (!migrate_use_multifd()) {
+         return;
+     }
+-    if (multifd_send_state->pages->used) {
+    if (multifd_send_state->pages->num) {
+         if (multifd_send_pages(f) < 0) {
+             error_report("%s: multifd_send_pages fail", __func__);
+             return;
+@@ -649,7 +649,7 @@ static void *multifd_send_thread(void *opaque)
+         qemu_mutex_lock(&p->mutex);
+ 
+         if (p->pending_job) {
+-            uint32_t used = p->pages->used;
+            uint32_t used = p->pages->num;
+             uint64_t packet_num = p->packet_num;
+             flags = p->flags;
+ 
+@@ -665,7 +665,7 @@ static void *multifd_send_thread(void *opaque)
+             p->flags = 0;
+             p->num_packets++;
+             p->num_pages += used;
+-            p->pages->used = 0;
+            p->pages->num = 0;
+             p->pages->block = NULL;
+             qemu_mutex_unlock(&p->mutex);
+ 
+@@ -1091,7 +1091,7 @@ static void *multifd_recv_thread(void *opaque)
+             break;
+         }
+ 
+-        used = p->pages->used;
+        used = p->pages->num;
+         flags = p->flags;
+         /* recv methods don't know how to handle the SYNC flag */
+         p->flags &= ~MULTIFD_FLAG_SYNC;
+diff --git a/migration/multifd.h b/migration/multifd.h
+index 15c50ca0b2..86820dd028 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -55,7 +55,7 @@ typedef struct {
+ 
+ typedef struct {
+     /* number of used pages */
+-    uint32_t used;
+    uint32_t num;
+     /* number of allocated pages */
+     uint32_t allocated;
+     /* global number of generated multifd packets */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch
+++ b/SOURCES/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch
@ -0,0 +1,102 @@
+From 33a38fef5e889b45571228bde519746fd90d8877 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Wed, 18 May 2022 02:52:25 -0300
+Subject: [PATCH 22/37] multifd: Send header packet without flags if
+ zero-copy-send is enabled
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [22/26] 9abfee42b72f11911cf128519826d09cbd2f5bc3
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Since d48c3a0445 ("multifd: Use a single writev on the send side"),
+sending the header packet and the memory pages happens in the same
+writev, which can potentially make the migration faster.
+
+Using channel-socket as example, this works well with the default copying
+mechanism of sendmsg(), but with zero-copy-send=true, it will cause
+the migration to often break.
+
+This happens because the header packet buffer gets reused quite often,
+and there is a high chance that by the time the MSG_ZEROCOPY mechanism get
+to send the buffer, it has already changed, sending the wrong data and
+causing the migration to abort.
+
+It means that, as it is, the buffer for the header packet is not suitable
+for sending with MSG_ZEROCOPY.
+
+In order to enable zero copy for multifd, send the header packet on an
+individual write(), without any flags, and the remanining pages with a
+writev(), as it was happening before. This only changes how a migration
+with zero-copy-send=true works, not changing any current behavior for
+migrations with zero-copy-send=false.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Message-Id: <20220513062836.965425-8-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit b7dbdd8e76cd03453c234dbb9578d20969859d74)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 22 +++++++++++++++++++---
+ 1 file changed, 19 insertions(+), 3 deletions(-)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 1e34e01ebc..193f70cdba 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -624,6 +624,7 @@ static void *multifd_send_thread(void *opaque)
+     MultiFDSendParams *p = opaque;
+     Error *local_err = NULL;
+     int ret = 0;
+    bool use_zero_copy_send = migrate_use_zero_copy_send();
+ 
+     trace_multifd_send_thread_start(p->id);
+     rcu_register_thread();
+@@ -646,9 +647,14 @@ static void *multifd_send_thread(void *opaque)
+         if (p->pending_job) {
+             uint64_t packet_num = p->packet_num;
+             uint32_t flags = p->flags;
+-            p->iovs_num = 1;
+             p->normal_num = 0;
+ 
+            if (use_zero_copy_send) {
+                p->iovs_num = 0;
+            } else {
+                p->iovs_num = 1;
+            }
+
+             for (int i = 0; i < p->pages->num; i++) {
+                 p->normal[p->normal_num] = p->pages->offset[i];
+                 p->normal_num++;
+@@ -672,8 +678,18 @@ static void *multifd_send_thread(void *opaque)
+             trace_multifd_send(p->id, packet_num, p->normal_num, flags,
+                                p->next_packet_size);
+ 
+-            p->iov[0].iov_len = p->packet_len;
+-            p->iov[0].iov_base = p->packet;
+            if (use_zero_copy_send) {
+                /* Send header first, without zerocopy */
+                ret = qio_channel_write_all(p->c, (void *)p->packet,
+                                            p->packet_len, &local_err);
+                if (ret != 0) {
+                    break;
+                }
+            } else {
+                /* Send header using the same writev call */
+                p->iov[0].iov_len = p->packet_len;
+                p->iov[0].iov_base = p->packet;
+            }
+ 
+             ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
+                                          &local_err);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch
+++ b/SOURCES/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch
@ -0,0 +1,48 @@
+From 56cd14fc23c58707b9184da11f36d777bba6ce78 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 04/37] multifd: The variable is only used inside the loop
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [4/26] 45d8bbde75ebbef6329c41ddb56db4526739f94f
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 1943c11a62bd0741e5d9fbba78404fe47ebea820)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index cdeffdc4c5..ce7101cf9d 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -629,7 +629,6 @@ static void *multifd_send_thread(void *opaque)
+     MultiFDSendParams *p = opaque;
+     Error *local_err = NULL;
+     int ret = 0;
+-    uint32_t flags = 0;
+ 
+     trace_multifd_send_thread_start(p->id);
+     rcu_register_thread();
+@@ -652,7 +651,7 @@ static void *multifd_send_thread(void *opaque)
+         if (p->pending_job) {
+             uint32_t used = p->pages->num;
+             uint64_t packet_num = p->packet_num;
+-            flags = p->flags;
+            uint32_t flags = p->flags;
+ 
+             if (used) {
+                 ret = multifd_send_state->ops->send_prepare(p, used,
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch
+++ b/SOURCES/kvm-multifd-Use-a-single-writev-on-the-send-side.patch
@ -0,0 +1,80 @@
+From 4051de396e02ea2c1911c842426318bcd97f93c7 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:24 -0300
+Subject: [PATCH 15/37] multifd: Use a single writev on the send side
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [15/26] c37063c813fc0ba695072117f272360e5c413803
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Until now, we wrote the packet header with write(), and the rest of the
+pages with writev().  Just increase the size of the iovec and do a
+single writev().
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit d48c3a044537689866fe44e65d24c7d39a68868a)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 20 ++++++++------------
+ 1 file changed, 8 insertions(+), 12 deletions(-)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 1e1551d78b..d0f86542b1 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -643,7 +643,7 @@ static void *multifd_send_thread(void *opaque)
+             uint32_t used = p->pages->num;
+             uint64_t packet_num = p->packet_num;
+             uint32_t flags = p->flags;
+-            p->iovs_num = 0;
+            p->iovs_num = 1;
+ 
+             if (used) {
+                 ret = multifd_send_state->ops->send_prepare(p, &local_err);
+@@ -663,20 +663,15 @@ static void *multifd_send_thread(void *opaque)
+             trace_multifd_send(p->id, packet_num, used, flags,
+                                p->next_packet_size);
+ 
+-            ret = qio_channel_write_all(p->c, (void *)p->packet,
+-                                        p->packet_len, &local_err);
+            p->iov[0].iov_len = p->packet_len;
+            p->iov[0].iov_base = p->packet;
+
+            ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
+                                         &local_err);
+             if (ret != 0) {
+                 break;
+             }
+ 
+-            if (used) {
+-                ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num,
+-                                             &local_err);
+-                if (ret != 0) {
+-                    break;
+-                }
+-            }
+-
+             qemu_mutex_lock(&p->mutex);
+             p->pending_job--;
+             qemu_mutex_unlock(&p->mutex);
+@@ -913,7 +908,8 @@ int multifd_save_setup(Error **errp)
+         p->packet->version = cpu_to_be32(MULTIFD_VERSION);
+         p->name = g_strdup_printf("multifdsend_%d", i);
+         p->tls_hostname = g_strdup(s->hostname);
+-        p->iov = g_new0(struct iovec, page_count);
+        /* We need one extra place for the packet header */
+        p->iov = g_new0(struct iovec, page_count + 1);
+         socket_send_channel_create(multifd_new_send_channel_async, p);
+     }
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch
+++ b/SOURCES/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch
@ -0,0 +1,261 @@
+From 3b57c876e1eaca34fb5bd9067553de945013d4be Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:24 -0300
+Subject: [PATCH 16/37] multifd: Use normal pages array on the send side
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [16/26] 1c48806474daf48fe93920ac361311af95c6a6f3
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+We are only sending normal pages through multifd channels.
+Later on this series, we are going to also send zero pages.
+We are going to detect if a page is zero or non zero in the multifd
+channel thread, not on the main thread.
+
+So we receive an array of pages page->offset[N]
+
+And we will end with:
+
+p->normal[N - zero_pages]
+p->zero[zero_pages].
+
+In this patch, we just copy all the pages in offset to normal.
+
+for (i = 0; i < pages->num; i++) {
+    p->narmal[p->normal_num] = pages->offset[i];
+    p->normal_num++:
+}
+
+Later in the series this becomes:
+
+for (i = 0; i < pages->num; i++) {
+    if (buffer_is_zero(page->offset[i])) {
+        p->zerol[p->zero_num] = pages->offset[i];
+        p->zero_num++:
+    } else {
+        p->narmal[p->normal_num] = pages->offset[i];
+        p->normal_num++:
+    }
+}
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+---
+
+Improving comment (dave)
+Renaming num_normal_pages to total_normal_pages (peter)
+
+(cherry picked from commit 815956f03902980c771da64b17f7f791c1cb57b0)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c |  6 +++---
+ migration/multifd-zstd.c |  6 +++---
+ migration/multifd.c      | 30 +++++++++++++++++++-----------
+ migration/multifd.h      |  8 ++++++--
+ migration/trace-events   |  4 ++--
+ 5 files changed, 33 insertions(+), 21 deletions(-)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index 8ed29b9633..8508f26adf 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -108,16 +108,16 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+     int ret;
+     uint32_t i;
+ 
+-    for (i = 0; i < p->pages->num; i++) {
+    for (i = 0; i < p->normal_num; i++) {
+         uint32_t available = z->zbuff_len - out_size;
+         int flush = Z_NO_FLUSH;
+ 
+-        if (i == p->pages->num - 1) {
+        if (i == p->normal_num - 1) {
+             flush = Z_SYNC_FLUSH;
+         }
+ 
+         zs->avail_in = page_size;
+-        zs->next_in = p->pages->block->host + p->pages->offset[i];
+        zs->next_in = p->pages->block->host + p->normal[i];
+ 
+         zs->avail_out = available;
+         zs->next_out = z->zbuff + out_size;
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index 25e1f517b5..693af3a140 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -123,13 +123,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+     z->out.size = z->zbuff_len;
+     z->out.pos = 0;
+ 
+-    for (i = 0; i < p->pages->num; i++) {
+    for (i = 0; i < p->normal_num; i++) {
+         ZSTD_EndDirective flush = ZSTD_e_continue;
+ 
+-        if (i == p->pages->num - 1) {
+        if (i == p->normal_num - 1) {
+             flush = ZSTD_e_flush;
+         }
+-        z->in.src = p->pages->block->host + p->pages->offset[i];
+        z->in.src = p->pages->block->host + p->normal[i];
+         z->in.size = page_size;
+         z->in.pos = 0;
+ 
+diff --git a/migration/multifd.c b/migration/multifd.c
+index d0f86542b1..3725226400 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -89,13 +89,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
+     MultiFDPages_t *pages = p->pages;
+     size_t page_size = qemu_target_page_size();
+ 
+-    for (int i = 0; i < p->pages->num; i++) {
+-        p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i];
+    for (int i = 0; i < p->normal_num; i++) {
+        p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i];
+         p->iov[p->iovs_num].iov_len = page_size;
+         p->iovs_num++;
+     }
+ 
+-    p->next_packet_size = p->pages->num * page_size;
+    p->next_packet_size = p->normal_num * page_size;
+     p->flags |= MULTIFD_FLAG_NOCOMP;
+     return 0;
+ }
+@@ -262,7 +262,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
+ 
+     packet->flags = cpu_to_be32(p->flags);
+     packet->pages_alloc = cpu_to_be32(p->pages->allocated);
+-    packet->pages_used = cpu_to_be32(p->pages->num);
+    packet->pages_used = cpu_to_be32(p->normal_num);
+     packet->next_packet_size = cpu_to_be32(p->next_packet_size);
+     packet->packet_num = cpu_to_be64(p->packet_num);
+ 
+@@ -270,9 +270,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p)
+         strncpy(packet->ramblock, p->pages->block->idstr, 256);
+     }
+ 
+-    for (i = 0; i < p->pages->num; i++) {
+    for (i = 0; i < p->normal_num; i++) {
+         /* there are architectures where ram_addr_t is 32 bit */
+-        uint64_t temp = p->pages->offset[i];
+        uint64_t temp = p->normal[i];
+ 
+         packet->offset[i] = cpu_to_be64(temp);
+     }
+@@ -556,6 +556,8 @@ void multifd_save_cleanup(void)
+         p->packet = NULL;
+         g_free(p->iov);
+         p->iov = NULL;
+        g_free(p->normal);
+        p->normal = NULL;
+         multifd_send_state->ops->send_cleanup(p, &local_err);
+         if (local_err) {
+             migrate_set_error(migrate_get_current(), local_err);
+@@ -640,12 +642,17 @@ static void *multifd_send_thread(void *opaque)
+         qemu_mutex_lock(&p->mutex);
+ 
+         if (p->pending_job) {
+-            uint32_t used = p->pages->num;
+             uint64_t packet_num = p->packet_num;
+             uint32_t flags = p->flags;
+             p->iovs_num = 1;
+            p->normal_num = 0;
+
+            for (int i = 0; i < p->pages->num; i++) {
+                p->normal[p->normal_num] = p->pages->offset[i];
+                p->normal_num++;
+            }
+ 
+-            if (used) {
+            if (p->normal_num) {
+                 ret = multifd_send_state->ops->send_prepare(p, &local_err);
+                 if (ret != 0) {
+                     qemu_mutex_unlock(&p->mutex);
+@@ -655,12 +662,12 @@ static void *multifd_send_thread(void *opaque)
+             multifd_send_fill_packet(p);
+             p->flags = 0;
+             p->num_packets++;
+-            p->num_pages += used;
+            p->total_normal_pages += p->normal_num;
+             p->pages->num = 0;
+             p->pages->block = NULL;
+             qemu_mutex_unlock(&p->mutex);
+ 
+-            trace_multifd_send(p->id, packet_num, used, flags,
+            trace_multifd_send(p->id, packet_num, p->normal_num, flags,
+                                p->next_packet_size);
+ 
+             p->iov[0].iov_len = p->packet_len;
+@@ -710,7 +717,7 @@ out:
+     qemu_mutex_unlock(&p->mutex);
+ 
+     rcu_unregister_thread();
+-    trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
+    trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages);
+ 
+     return NULL;
+ }
+@@ -910,6 +917,7 @@ int multifd_save_setup(Error **errp)
+         p->tls_hostname = g_strdup(s->hostname);
+         /* We need one extra place for the packet header */
+         p->iov = g_new0(struct iovec, page_count + 1);
+        p->normal = g_new0(ram_addr_t, page_count);
+         socket_send_channel_create(multifd_new_send_channel_async, p);
+     }
+ 
+diff --git a/migration/multifd.h b/migration/multifd.h
+index 7496f951a7..7823199dbe 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -104,14 +104,18 @@ typedef struct {
+     /* thread local variables */
+     /* packets sent through this channel */
+     uint64_t num_packets;
+-    /* pages sent through this channel */
+-    uint64_t num_pages;
+    /* non zero pages sent through this channel */
+    uint64_t total_normal_pages;
+     /* syncs main thread and channels */
+     QemuSemaphore sem_sync;
+     /* buffers to send */
+     struct iovec *iov;
+     /* number of iovs used */
+     uint32_t iovs_num;
+    /* Pages that are not zero */
+    ram_addr_t *normal;
+    /* num of non zero pages */
+    uint32_t normal_num;
+     /* used for compression methods */
+     void *data;
+ }  MultiFDSendParams;
+diff --git a/migration/trace-events b/migration/trace-events
+index 5172cb3b3d..171a83a55d 100644
+--- a/migration/trace-events
+++ b/migration/trace-events
+@@ -124,13 +124,13 @@ multifd_recv_sync_main_wait(uint8_t id) "channel %u"
+ multifd_recv_terminate_threads(bool error) "error %d"
+ multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64
+ multifd_recv_thread_start(uint8_t id) "%u"
+-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u"
+multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u"
+ multifd_send_error(uint8_t id) "channel %u"
+ multifd_send_sync_main(long packet_num) "packet num %ld"
+ multifd_send_sync_main_signal(uint8_t id) "channel %u"
+ multifd_send_sync_main_wait(uint8_t id) "channel %u"
+ multifd_send_terminate_threads(bool error) "error %d"
+-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %"  PRIu64
+multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %"  PRIu64
+ multifd_send_thread_start(uint8_t id) "%u"
+ multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s"
+ multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s"
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch
+++ b/SOURCES/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch
@ -0,0 +1,163 @@
+From fce933410a5068220a5f29011a6d1a647e357a62 Mon Sep 17 00:00:00 2001
+From: Leonardo Bras <leobras@redhat.com>
+Date: Wed, 18 May 2022 02:52:25 -0300
+Subject: [PATCH 21/37] multifd: multifd_send_sync_main now returns negative on
+ error
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [21/26] b4e4f3663576aa87f3b2f66f1d38bad4f50bd4ac
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+Even though multifd_send_sync_main() currently emits error_reports, it's
+callers don't really check it before continuing.
+
+Change multifd_send_sync_main() to return -1 on error and 0 on success.
+Also change all it's callers to make use of this change and possibly fail
+earlier.
+
+(This change is important to next patch on  multifd zero copy
+implementation, to make it sure an error in zero-copy flush does not go
+unnoticed.
+
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Message-Id: <20220513062836.965425-7-leobras@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 33d70973a3a6e8c6b62bcbc64d9e488961981007)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd.c | 10 ++++++----
+ migration/multifd.h |  2 +-
+ migration/ram.c     | 29 ++++++++++++++++++++++-------
+ 3 files changed, 29 insertions(+), 12 deletions(-)
+
+diff --git a/migration/multifd.c b/migration/multifd.c
+index e53811f04a..1e34e01ebc 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -573,17 +573,17 @@ void multifd_save_cleanup(void)
+     multifd_send_state = NULL;
+ }
+ 
+-void multifd_send_sync_main(QEMUFile *f)
+int multifd_send_sync_main(QEMUFile *f)
+ {
+     int i;
+ 
+     if (!migrate_use_multifd()) {
+-        return;
+        return 0;
+     }
+     if (multifd_send_state->pages->num) {
+         if (multifd_send_pages(f) < 0) {
+             error_report("%s: multifd_send_pages fail", __func__);
+-            return;
+            return -1;
+         }
+     }
+     for (i = 0; i < migrate_multifd_channels(); i++) {
+@@ -596,7 +596,7 @@ void multifd_send_sync_main(QEMUFile *f)
+         if (p->quit) {
+             error_report("%s: channel %d has already quit", __func__, i);
+             qemu_mutex_unlock(&p->mutex);
+-            return;
+            return -1;
+         }
+ 
+         p->packet_num = multifd_send_state->packet_num++;
+@@ -615,6 +615,8 @@ void multifd_send_sync_main(QEMUFile *f)
+         qemu_sem_wait(&p->sem_sync);
+     }
+     trace_multifd_send_sync_main(multifd_send_state->packet_num);
+
+    return 0;
+ }
+ 
+ static void *multifd_send_thread(void *opaque)
+diff --git a/migration/multifd.h b/migration/multifd.h
+index 7823199dbe..92de878155 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp);
+ bool multifd_recv_all_channels_created(void);
+ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
+ void multifd_recv_sync_main(void);
+-void multifd_send_sync_main(QEMUFile *f);
+int multifd_send_sync_main(QEMUFile *f);
+ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
+ 
+ /* Multifd Compression flags */
+diff --git a/migration/ram.c b/migration/ram.c
+index 863035d235..3e208efca7 100644
+--- a/migration/ram.c
+++ b/migration/ram.c
+@@ -2992,6 +2992,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
+ {
+     RAMState **rsp = opaque;
+     RAMBlock *block;
+    int ret;
+ 
+     if (compress_threads_save_setup()) {
+         return -1;
+@@ -3026,7 +3027,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque)
+     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
+     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
+ 
+-    multifd_send_sync_main(f);
+    ret =  multifd_send_sync_main(f);
+    if (ret < 0) {
+        return ret;
+    }
+
+     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+     qemu_fflush(f);
+ 
+@@ -3135,7 +3140,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque)
+ out:
+     if (ret >= 0
+         && migration_is_setup_or_active(migrate_get_current()->state)) {
+-        multifd_send_sync_main(rs->f);
+        ret = multifd_send_sync_main(rs->f);
+        if (ret < 0) {
+            return ret;
+        }
+
+         qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+         qemu_fflush(f);
+         ram_counters.transferred += 8;
+@@ -3193,13 +3202,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
+         ram_control_after_iterate(f, RAM_CONTROL_FINISH);
+     }
+ 
+-    if (ret >= 0) {
+-        multifd_send_sync_main(rs->f);
+-        qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+-        qemu_fflush(f);
+    if (ret < 0) {
+        return ret;
+     }
+ 
+-    return ret;
+    ret = multifd_send_sync_main(rs->f);
+    if (ret < 0) {
+        return ret;
+    }
+
+    qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
+    qemu_fflush(f);
+
+    return 0;
+ }
+ 
+ static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch
+++ b/SOURCES/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch
@ -0,0 +1,135 @@
+From 5f53448092c944857a2b89138f22c5ab335d8250 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 05/37] multifd: remove used parameter from send_prepare()
+ method
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [5/26] ad6360d19d65e8c332dcdc3d3234478639e03db8
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+It is already there as p->pages->num.
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 02fb81043ecee338e4aeb8f5be09a46325dc5e43)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c | 7 +++----
+ migration/multifd-zstd.c | 7 +++----
+ migration/multifd.c      | 9 +++------
+ migration/multifd.h      | 2 +-
+ 4 files changed, 10 insertions(+), 15 deletions(-)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index f403d2f031..0c70a2dc78 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -96,10 +96,9 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
+  * Returns 0 for success or -1 for error
+  *
+  * @p: Params for the channel that we are using
+- * @used: number of pages used
+  * @errp: pointer to an error
+  */
+-static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp)
+static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
+ {
+     struct iovec *iov = p->pages->iov;
+     struct zlib_data *z = p->data;
+@@ -108,11 +107,11 @@ static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp)
+     int ret;
+     uint32_t i;
+ 
+-    for (i = 0; i < used; i++) {
+    for (i = 0; i < p->pages->num; i++) {
+         uint32_t available = z->zbuff_len - out_size;
+         int flush = Z_NO_FLUSH;
+ 
+-        if (i == used - 1) {
+        if (i == p->pages->num - 1) {
+             flush = Z_SYNC_FLUSH;
+         }
+ 
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index 8d657f8860..466b370cad 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -109,10 +109,9 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp)
+  * Returns 0 for success or -1 for error
+  *
+  * @p: Params for the channel that we are using
+- * @used: number of pages used
+  * @errp: pointer to an error
+  */
+-static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp)
+static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
+ {
+     struct iovec *iov = p->pages->iov;
+     struct zstd_data *z = p->data;
+@@ -123,10 +122,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp)
+     z->out.size = z->zbuff_len;
+     z->out.pos = 0;
+ 
+-    for (i = 0; i < used; i++) {
+    for (i = 0; i < p->pages->num; i++) {
+         ZSTD_EndDirective flush = ZSTD_e_continue;
+ 
+-        if (i == used - 1) {
+        if (i == p->pages->num - 1) {
+             flush = ZSTD_e_flush;
+         }
+         z->in.src = iov[i].iov_base;
+diff --git a/migration/multifd.c b/migration/multifd.c
+index ce7101cf9d..098ef8842c 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -82,13 +82,11 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp)
+  * Returns 0 for success or -1 for error
+  *
+  * @p: Params for the channel that we are using
+- * @used: number of pages used
+  * @errp: pointer to an error
+  */
+-static int nocomp_send_prepare(MultiFDSendParams *p, uint32_t used,
+-                               Error **errp)
+static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp)
+ {
+-    p->next_packet_size = used * qemu_target_page_size();
+    p->next_packet_size = p->pages->num * qemu_target_page_size();
+     p->flags |= MULTIFD_FLAG_NOCOMP;
+     return 0;
+ }
+@@ -654,8 +652,7 @@ static void *multifd_send_thread(void *opaque)
+             uint32_t flags = p->flags;
+ 
+             if (used) {
+-                ret = multifd_send_state->ops->send_prepare(p, used,
+-                                                            &local_err);
+                ret = multifd_send_state->ops->send_prepare(p, &local_err);
+                 if (ret != 0) {
+                     qemu_mutex_unlock(&p->mutex);
+                     break;
+diff --git a/migration/multifd.h b/migration/multifd.h
+index 86820dd028..7968cc5c20 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -159,7 +159,7 @@ typedef struct {
+     /* Cleanup for sending side */
+     void (*send_cleanup)(MultiFDSendParams *p, Error **errp);
+     /* Prepare the send packet */
+-    int (*send_prepare)(MultiFDSendParams *p, uint32_t used, Error **errp);
+    int (*send_prepare)(MultiFDSendParams *p, Error **errp);
+     /* Write the send packet */
+     int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp);
+     /* Setup for receiving side */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch
+++ b/SOURCES/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch
@ -0,0 +1,149 @@
+From 8cdedf86dc193673ea24516e7b44f8b4da5dd713 Mon Sep 17 00:00:00 2001
+From: Juan Quintela <quintela@redhat.com>
+Date: Wed, 18 May 2022 02:52:23 -0300
+Subject: [PATCH 06/37] multifd: remove used parameter from send_recv_pages()
+ method
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Leonardo Brás <leobras@redhat.com>
+RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7
+RH-Commit: [6/26] 5c1a506e4178501a0894ea4e7ac919e1d4d4cc32
+RH-Bugzilla: 2072049
+RH-Acked-by: Peter Xu <peterx@redhat.com>
+RH-Acked-by: Daniel P. Berrangé <berrange@redhat.com>
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+
+It is already there as p->pages->num.
+
+Signed-off-by: Juan Quintela <quintela@redhat.com>
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit 40a4bfe9d3f8ad35a9c3ffb4cbf7367e2777054b)
+Signed-off-by: Leonardo Bras <leobras@redhat.com>
+---
+ migration/multifd-zlib.c | 9 ++++-----
+ migration/multifd-zstd.c | 7 +++----
+ migration/multifd.c      | 7 +++----
+ migration/multifd.h      | 2 +-
+ 4 files changed, 11 insertions(+), 14 deletions(-)
+
+diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c
+index 0c70a2dc78..330fc021c5 100644
+--- a/migration/multifd-zlib.c
+++ b/migration/multifd-zlib.c
+@@ -235,17 +235,16 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p)
+  * Returns 0 for success or -1 for error
+  *
+  * @p: Params for the channel that we are using
+- * @used: number of pages used
+  * @errp: pointer to an error
+  */
+-static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp)
+static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp)
+ {
+     struct zlib_data *z = p->data;
+     z_stream *zs = &z->zs;
+     uint32_t in_size = p->next_packet_size;
+     /* we measure the change of total_out */
+     uint32_t out_size = zs->total_out;
+-    uint32_t expected_size = used * qemu_target_page_size();
+    uint32_t expected_size = p->pages->num * qemu_target_page_size();
+     uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+     int ret;
+     int i;
+@@ -264,12 +263,12 @@ static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp)
+     zs->avail_in = in_size;
+     zs->next_in = z->zbuff;
+ 
+-    for (i = 0; i < used; i++) {
+    for (i = 0; i < p->pages->num; i++) {
+         struct iovec *iov = &p->pages->iov[i];
+         int flush = Z_NO_FLUSH;
+         unsigned long start = zs->total_out;
+ 
+-        if (i == used - 1) {
+        if (i == p->pages->num - 1) {
+             flush = Z_SYNC_FLUSH;
+         }
+ 
+diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c
+index 466b370cad..f0d1105792 100644
+--- a/migration/multifd-zstd.c
+++ b/migration/multifd-zstd.c
+@@ -255,14 +255,13 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p)
+  * Returns 0 for success or -1 for error
+  *
+  * @p: Params for the channel that we are using
+- * @used: number of pages used
+  * @errp: pointer to an error
+  */
+-static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp)
+static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp)
+ {
+     uint32_t in_size = p->next_packet_size;
+     uint32_t out_size = 0;
+-    uint32_t expected_size = used * qemu_target_page_size();
+    uint32_t expected_size = p->pages->num * qemu_target_page_size();
+     uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+     struct zstd_data *z = p->data;
+     int ret;
+@@ -283,7 +282,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp)
+     z->in.size = in_size;
+     z->in.pos = 0;
+ 
+-    for (i = 0; i < used; i++) {
+    for (i = 0; i < p->pages->num; i++) {
+         struct iovec *iov = &p->pages->iov[i];
+ 
+         z->out.dst = iov->iov_base;
+diff --git a/migration/multifd.c b/migration/multifd.c
+index 098ef8842c..55d99a8232 100644
+--- a/migration/multifd.c
+++ b/migration/multifd.c
+@@ -141,10 +141,9 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p)
+  * Returns 0 for success or -1 for error
+  *
+  * @p: Params for the channel that we are using
+- * @used: number of pages used
+  * @errp: pointer to an error
+  */
+-static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp)
+static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp)
+ {
+     uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK;
+ 
+@@ -153,7 +152,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp)
+                    p->id, flags, MULTIFD_FLAG_NOCOMP);
+         return -1;
+     }
+-    return qio_channel_readv_all(p->c, p->pages->iov, used, errp);
+    return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp);
+ }
+ 
+ static MultiFDMethods multifd_nocomp_ops = {
+@@ -1099,7 +1098,7 @@ static void *multifd_recv_thread(void *opaque)
+         qemu_mutex_unlock(&p->mutex);
+ 
+         if (used) {
+-            ret = multifd_recv_state->ops->recv_pages(p, used, &local_err);
+            ret = multifd_recv_state->ops->recv_pages(p, &local_err);
+             if (ret != 0) {
+                 break;
+             }
+diff --git a/migration/multifd.h b/migration/multifd.h
+index 7968cc5c20..e57adc783b 100644
+--- a/migration/multifd.h
+++ b/migration/multifd.h
+@@ -167,7 +167,7 @@ typedef struct {
+     /* Cleanup for receiving side */
+     void (*recv_cleanup)(MultiFDRecvParams *p);
+     /* Read all pages */
+-    int (*recv_pages)(MultiFDRecvParams *p, uint32_t used, Error **errp);
+    int (*recv_pages)(MultiFDRecvParams *p, Error **errp);
+ } MultiFDMethods;
+ 
+ void multifd_register_ops(int method, MultiFDMethods *ops);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch
@ -0,0 +1,63 @@
+From 115507e5e8b97993b50ea7b39d6d4bb493973e46 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 5 Aug 2022 11:42:14 +0200
+Subject: [PATCH 9/9] pc-bios/s390-ccw: Fix booting with logical block size <
+ physical block size
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 207: pc-bios/s390-ccw: Fix booting with logical block size < physical block size
+RH-Commit: [1/1] ab22832592e0a48277bf7aca1b941a1be79aeab6
+RH-Bugzilla: 2112296
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Claudio Imbrenda <None>
+
+For accessing single blocks during boot, it's the logical block size that
+matters. (Physical block sizes are rather interesting e.g. for creating
+file systems with the correct alignment for speed reasons etc.).
+So the s390-ccw bios has to use the logical block size for calculating
+sector numbers during the boot phase, the "physical_block_exp" shift
+value must not be taken into account. This change fixes the boot process
+when the guest hast been installed on a disk where the logical block size
+differs from the physical one, e.g. if the guest has been installed
+like this:
+
+ qemu-system-s390x -nographic -accel kvm -m 2G \
+  -drive if=none,id=d1,file=fedora.iso,format=raw,media=cdrom \
+  -device virtio-scsi -device scsi-cd,drive=d1 \
+  -drive if=none,id=d2,file=test.qcow2,format=qcow2
+  -device virtio-blk,drive=d2,physical_block_size=4096,logical_block_size=512
+
+Linux correctly uses the logical block size of 512 for the installation,
+but the s390-ccw bios tries to boot from a disk with 4096 block size so
+far, as long as this patch has not been applied yet (well, it used to work
+by accident in the past due to the virtio_assume_scsi() hack that used to
+enforce 512 byte sectors on all virtio-block disks, but that hack has been
+well removed in commit 5447de2619050a0a4d to fix other scenarios).
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2112296
+Message-Id: <20220805094214.285223-1-thuth@redhat.com>
+Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+Reviewed-by: Eric Farman <farman@linux.ibm.com>
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+(cherry picked from commit 393296de19650e1400ca265914cfdeb313725363)
+---
+ pc-bios/s390-ccw/virtio-blkdev.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
+index 8271c47296..794f99b42c 100644
+--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
+@@ -173,7 +173,7 @@ int virtio_get_block_size(void)
+ 
+     switch (vdev->senseid.cu_model) {
+     case VIRTIO_ID_BLOCK:
+-        return vdev->config.blk.blk_size << vdev->config.blk.physical_block_exp;
+        return vdev->config.blk.blk_size;
+     case VIRTIO_ID_SCSI:
+         return vdev->scsi_block_size;
+     }
+-- 
+2.31.1
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch
@ -0,0 +1,180 @@
+From 0e7b71a3f0b3a2e1dba54f02efc15b02f337e031 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 36/37] pc-bios/s390-ccw: Split virtio-scsi code from
+ virtio_blk_setup_device()
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [8/9] 8e24806a91c91b2e3603da88e5a22d96a91e8686
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:19:00 2022 +0200
+
+    pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device()
+
+    The next patch is going to add more virtio-block specific code to
+    virtio_blk_setup_device(), and if the virtio-scsi code is also in
+    there, this is more cumbersome. And the calling function virtio_setup()
+    in main.c looks at the device type already anyway, so it's more
+    logical to separate the virtio-scsi stuff into a new function in
+    virtio-scsi.c instead.
+
+    Message-Id: <20220704111903.62400-10-thuth@redhat.com>
+    Reviewed-by: Eric Farman <farman@linux.ibm.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/main.c          | 24 +++++++++++++++++-------
+ pc-bios/s390-ccw/virtio-blkdev.c | 20 ++------------------
+ pc-bios/s390-ccw/virtio-scsi.c   | 19 ++++++++++++++++++-
+ pc-bios/s390-ccw/virtio-scsi.h   |  2 +-
+ 4 files changed, 38 insertions(+), 27 deletions(-)
+
+diff --git a/pc-bios/s390-ccw/main.c b/pc-bios/s390-ccw/main.c
+index 5d2b7ba94d..13e1d8fdf7 100644
+--- a/pc-bios/s390-ccw/main.c
+++ b/pc-bios/s390-ccw/main.c
+@@ -14,6 +14,7 @@
+ #include "s390-ccw.h"
+ #include "cio.h"
+ #include "virtio.h"
+#include "virtio-scsi.h"
+ #include "dasd-ipl.h"
+ 
+ char stack[PAGE_SIZE * 8] __attribute__((__aligned__(PAGE_SIZE)));
+@@ -218,6 +219,7 @@ static int virtio_setup(void)
+ {
+     VDev *vdev = virtio_get_device();
+     QemuIplParameters *early_qipl = (QemuIplParameters *)QIPL_ADDRESS;
+    int ret;
+ 
+     memcpy(&qipl, early_qipl, sizeof(QemuIplParameters));
+ 
+@@ -225,18 +227,26 @@ static int virtio_setup(void)
+         menu_setup();
+     }
+ 
+-    if (virtio_get_device_type() == VIRTIO_ID_NET) {
+    switch (vdev->senseid.cu_model) {
+    case VIRTIO_ID_NET:
+         sclp_print("Network boot device detected\n");
+         vdev->netboot_start_addr = qipl.netboot_start_addr;
+-    } else {
+-        int ret = virtio_blk_setup_device(blk_schid);
+-        if (ret) {
+-            return ret;
+-        }
+        return 0;
+    case VIRTIO_ID_BLOCK:
+        ret = virtio_blk_setup_device(blk_schid);
+        break;
+    case VIRTIO_ID_SCSI:
+        ret = virtio_scsi_setup_device(blk_schid);
+        break;
+    default:
+        panic("\n! No IPL device available !\n");
+    }
+
+    if (!ret) {
+         IPL_assert(virtio_ipl_disk_is_valid(), "No valid IPL device detected");
+     }
+ 
+-    return 0;
+    return ret;
+ }
+ 
+ static void ipl_boot_device(void)
+diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
+index db1f7f44aa..c175b66a47 100644
+--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
+@@ -222,27 +222,11 @@ uint64_t virtio_get_blocks(void)
+ int virtio_blk_setup_device(SubChannelId schid)
+ {
+     VDev *vdev = virtio_get_device();
+-    int ret = 0;
+ 
+     vdev->schid = schid;
+     virtio_setup_ccw(vdev);
+ 
+-    switch (vdev->senseid.cu_model) {
+-    case VIRTIO_ID_BLOCK:
+-        sclp_print("Using virtio-blk.\n");
+-        break;
+-    case VIRTIO_ID_SCSI:
+-        IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE,
+-            "Config: sense size mismatch");
+-        IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE,
+-            "Config: CDB size mismatch");
+    sclp_print("Using virtio-blk.\n");
+ 
+-        sclp_print("Using virtio-scsi.\n");
+-        ret = virtio_scsi_setup(vdev);
+-        break;
+-    default:
+-        panic("\n! No IPL device available !\n");
+-    }
+-
+-    return ret;
+    return 0;
+ }
+diff --git a/pc-bios/s390-ccw/virtio-scsi.c b/pc-bios/s390-ccw/virtio-scsi.c
+index 2c8d0f3097..3b7069270c 100644
+--- a/pc-bios/s390-ccw/virtio-scsi.c
+++ b/pc-bios/s390-ccw/virtio-scsi.c
+@@ -329,7 +329,7 @@ static void scsi_parse_capacity_report(void *data,
+     }
+ }
+ 
+-int virtio_scsi_setup(VDev *vdev)
+static int virtio_scsi_setup(VDev *vdev)
+ {
+     int retry_test_unit_ready = 3;
+     uint8_t data[256];
+@@ -430,3 +430,20 @@ int virtio_scsi_setup(VDev *vdev)
+ 
+     return 0;
+ }
+
+int virtio_scsi_setup_device(SubChannelId schid)
+{
+    VDev *vdev = virtio_get_device();
+
+    vdev->schid = schid;
+    virtio_setup_ccw(vdev);
+
+    IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE,
+               "Config: sense size mismatch");
+    IPL_assert(vdev->config.scsi.cdb_size == VIRTIO_SCSI_CDB_SIZE,
+               "Config: CDB size mismatch");
+
+    sclp_print("Using virtio-scsi.\n");
+
+    return virtio_scsi_setup(vdev);
+}
+diff --git a/pc-bios/s390-ccw/virtio-scsi.h b/pc-bios/s390-ccw/virtio-scsi.h
+index 4b14c2c2f9..e6b6cd4815 100644
+--- a/pc-bios/s390-ccw/virtio-scsi.h
+++ b/pc-bios/s390-ccw/virtio-scsi.h
+@@ -67,8 +67,8 @@ static inline bool virtio_scsi_response_ok(const VirtioScsiCmdResp *r)
+         return r->response == VIRTIO_SCSI_S_OK && r->status == CDB_STATUS_GOOD;
+ }
+ 
+-int virtio_scsi_setup(VDev *vdev);
+ int virtio_scsi_read_many(VDev *vdev,
+                           ulong sector, void *load_addr, int sec_num);
+int virtio_scsi_setup_device(SubChannelId schid);
+ 
+ #endif /* VIRTIO_SCSI_H */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch
@ -0,0 +1,102 @@
+From 8433b2ba40d0618c7086da87685e1c51b6da3b11 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 30/37] pc-bios/s390-ccw/bootmap: Improve the guessing logic in
+ zipl_load_vblk()
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [2/9] db1d2e7929352bec0e1a5d4cf3fb385bbe02304b
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit 422865f6672ee1482b98d18321b55c1ecfb06c82
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:18:54 2022 +0200
+
+    pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk()
+
+    The logic of trying an final ISO or ECKD boot on virtio-block devices is
+    very weird: Since the geometry hardly ever matches in virtio_disk_is_scsi(),
+    virtio_blk_setup_device() always sets a "guessed" disk geometry via
+    virtio_assume_scsi() (which is certainly also wrong in a lot of cases).
+
+    zipl_load_vblk() then sees that there's been a "virtio_guessed_disk_nature"
+    and tries to fix up the geometry again via virtio_assume_iso9660() before
+    always trying to do ipl_iso_el_torito(). That's a very brain-twisting
+    way of attempting to boot from ISO images, which won't work anymore after
+    the following patches that will clean up the virtio_assume_scsi() mess
+    (and thus get rid of the "virtio_guessed_disk_nature" here).
+
+    Let's try a better approach instead: ISO files always have a magic
+    string "CD001" at offset 0x8001 (see e.g. the ECMA-119 specification)
+    which we can use to decide whether we should try to boot in ISO 9660
+    mode (which we should also try if we see a sector size of 2048).
+
+    And if we were not able to boot in ISO mode here, the final boot attempt
+    before panicking is to boot in ECKD mode. Since this is our last boot
+    attempt anyway, simply always assume the ECKD geometry here (if the sector
+    size was not 4096 yet), so that we also do not depend on the guessed disk
+    geometry from virtio_blk_setup_device() here anymore.
+
+    Message-Id: <20220704111903.62400-4-thuth@redhat.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/bootmap.c | 27 +++++++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/pc-bios/s390-ccw/bootmap.c b/pc-bios/s390-ccw/bootmap.c
+index 56411ab3b6..994e59c0b0 100644
+--- a/pc-bios/s390-ccw/bootmap.c
+++ b/pc-bios/s390-ccw/bootmap.c
+@@ -780,18 +780,37 @@ static void ipl_iso_el_torito(void)
+     }
+ }
+ 
+/**
+ * Detect whether we're trying to boot from an .ISO image.
+ * These always have a signature string "CD001" at offset 0x8001.
+ */
+static bool has_iso_signature(void)
+{
+    int blksize = virtio_get_block_size();
+
+    if (!blksize || virtio_read(0x8000 / blksize, sec)) {
+        return false;
+    }
+
+    return !memcmp("CD001", &sec[1], 5);
+}
+
+ /***********************************************************************
+  * Bus specific IPL sequences
+  */
+ 
+ static void zipl_load_vblk(void)
+ {
+-    if (virtio_guessed_disk_nature()) {
+-        virtio_assume_iso9660();
+    int blksize = virtio_get_block_size();
+
+    if (blksize == VIRTIO_ISO_BLOCK_SIZE || has_iso_signature()) {
+        if (blksize != VIRTIO_ISO_BLOCK_SIZE) {
+            virtio_assume_iso9660();
+        }
+        ipl_iso_el_torito();
+     }
+-    ipl_iso_el_torito();
+ 
+-    if (virtio_guessed_disk_nature()) {
+    if (blksize != VIRTIO_DASD_DEFAULT_BLOCK_SIZE) {
+         sclp_print("Using guessed DASD geometry.\n");
+         virtio_assume_eckd();
+     }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch
@ -0,0 +1,56 @@
+From 8b05a4aa32e5ae6cdbc16a5350f6df35d2d79efc Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 35/37] pc-bios/s390-ccw/virtio: Beautify the code for reading
+ virtqueue configuration
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [7/9] 52fb7fee7d7c46397f32e35bd5f92f82616dfb5c
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit 070824885741f5d2a66626d3c4ecb2773c8e0552
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:18:59 2022 +0200
+
+    pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration
+
+    It looks nicer if we separate the run_ccw() from the IPL_assert()
+    statement, and the error message should talk about "virtio device"
+    instead of "block device", since this code is nowadays used for
+    non-block (i.e. network) devices, too.
+
+    Message-Id: <20220704111903.62400-9-thuth@redhat.com>
+    Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+    Reviewed-by: Eric Farman <farman@linux.ibm.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/virtio.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
+index d8c2b52710..f37510f312 100644
+--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
+@@ -289,9 +289,8 @@ void virtio_setup_ccw(VDev *vdev)
+             .num = 0,
+         };
+ 
+-        IPL_assert(
+-            run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false) == 0,
+-            "Could not get block device VQ configuration");
+        rc = run_ccw(vdev, CCW_CMD_READ_VQ_CONF, &config, sizeof(config), false);
+        IPL_assert(rc == 0, "Could not get virtio device VQ configuration");
+         info.num = config.num;
+         vring_init(&vdev->vrings[i], &info);
+         vdev->vrings[i].schid = vdev->schid;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch
@ -0,0 +1,63 @@
+From 511d05f31824b375057ba8dea3f0343ce6e1c1e8 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 29/37] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD
+ block size
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [1/9] 1053101fd5fb591131c567ff98c7d92b63a9dfa9
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:18:53 2022 +0200
+
+    pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size
+
+    Use VIRTIO_DASD_DEFAULT_BLOCK_SIZE instead of the magic value 4096.
+
+    Message-Id: <20220704111903.62400-3-thuth@redhat.com>
+    Reviewed-by: Eric Farman <farman@linux.ibm.com>
+    Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/virtio-blkdev.c | 2 +-
+ pc-bios/s390-ccw/virtio.h        | 1 +
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
+index 7d35050292..6483307630 100644
+--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
+@@ -155,7 +155,7 @@ void virtio_assume_eckd(void)
+     vdev->config.blk.physical_block_exp = 0;
+     switch (vdev->senseid.cu_model) {
+     case VIRTIO_ID_BLOCK:
+-        vdev->config.blk.blk_size = 4096;
+        vdev->config.blk.blk_size = VIRTIO_DASD_DEFAULT_BLOCK_SIZE;
+         break;
+     case VIRTIO_ID_SCSI:
+         vdev->config.blk.blk_size = vdev->scsi_block_size;
+diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
+index 19fceb6495..9e410bde6f 100644
+--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
+@@ -198,6 +198,7 @@ extern int virtio_read_many(ulong sector, void *load_addr, int sec_num);
+ #define VIRTIO_SECTOR_SIZE 512
+ #define VIRTIO_ISO_BLOCK_SIZE 2048
+ #define VIRTIO_SCSI_BLOCK_SIZE 512
+#define VIRTIO_DASD_DEFAULT_BLOCK_SIZE 4096
+ 
+ static inline ulong virtio_sector_adjust(ulong sector)
+ {
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch
@ -0,0 +1,67 @@
+From a60940fb7ef026f3aa968e77389efa51ea648ddf Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 34/37] pc-bios/s390-ccw/virtio: Read device config after
+ feature negotiation
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [6/9] 99ed8765d614207db19ded75d62c65171674d982
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit aa5c69ce99411c4886bcd051f288afc02b6d968d
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:18:58 2022 +0200
+
+    pc-bios/s390-ccw/virtio: Read device config after feature negotiation
+
+    Feature negotiation should be done first, since some fields in the
+    config area can depend on the negotiated features and thus should
+    rather be read afterwards.
+
+    While we're at it, also adjust the error message here a little bit
+    (the code is nowadays used for non-block virtio devices, too).
+
+    Message-Id: <20220704111903.62400-8-thuth@redhat.com>
+    Reviewed-by: Eric Farman <farman@linux.ibm.com>
+    Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/virtio.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
+index 4e85a2eb82..d8c2b52710 100644
+--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
+@@ -262,10 +262,6 @@ void virtio_setup_ccw(VDev *vdev)
+     rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
+     IPL_assert(rc == 0, "Could not write DRIVER status to host");
+ 
+-    IPL_assert(
+-        run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0,
+-       "Could not get block device configuration");
+-
+     /* Feature negotiation */
+     for (i = 0; i < ARRAY_SIZE(vdev->guest_features); i++) {
+         feats.features = 0;
+@@ -278,6 +274,9 @@ void virtio_setup_ccw(VDev *vdev)
+         IPL_assert(rc == 0, "Could not set features bits");
+     }
+ 
+    rc = run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false);
+    IPL_assert(rc == 0, "Could not get virtio device configuration");
+
+     for (i = 0; i < vdev->nr_vqs; i++) {
+         VqInfo info = {
+             .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE),
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch
@ -0,0 +1,93 @@
+From 5cf01cccb7501c801fa9f21a021bc9e7d1fc56e3 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 33/37] pc-bios/s390-ccw/virtio: Set missing status bits while
+ initializing
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [5/9] 6072245f49c229518246b4a0d1be360331305bfa
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:18:57 2022 +0200
+
+    pc-bios/s390-ccw/virtio: Set missing status bits while initializing
+
+    According chapter "3.1.1 Driver Requirements: Device Initialization"
+    of the Virtio specification (v1.1), a driver for a device has to set
+    the ACKNOWLEDGE and DRIVER bits in the status field after resetting
+    the device. The s390-ccw bios skipped these steps so far and seems
+    like QEMU never cared. Anyway, it's better to follow the spec, so
+    let's set these bits now in the right spots, too.
+
+    Message-Id: <20220704111903.62400-7-thuth@redhat.com>
+    Acked-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+    Reviewed-by: Cornelia Huck <cohuck@redhat.com>
+    Reviewed-by: Eric Farman <farman@linux.ibm.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/virtio.c | 18 ++++++++++++++----
+ 1 file changed, 14 insertions(+), 4 deletions(-)
+
+diff --git a/pc-bios/s390-ccw/virtio.c b/pc-bios/s390-ccw/virtio.c
+index 5d2c6e3381..4e85a2eb82 100644
+--- a/pc-bios/s390-ccw/virtio.c
+++ b/pc-bios/s390-ccw/virtio.c
+@@ -220,7 +220,7 @@ int virtio_run(VDev *vdev, int vqid, VirtioCmd *cmd)
+ void virtio_setup_ccw(VDev *vdev)
+ {
+     int i, rc, cfg_size = 0;
+-    unsigned char status = VIRTIO_CONFIG_S_DRIVER_OK;
+    uint8_t status;
+     struct VirtioFeatureDesc {
+         uint32_t features;
+         uint8_t index;
+@@ -234,6 +234,10 @@ void virtio_setup_ccw(VDev *vdev)
+ 
+     run_ccw(vdev, CCW_CMD_VDEV_RESET, NULL, 0, false);
+ 
+    status = VIRTIO_CONFIG_S_ACKNOWLEDGE;
+    rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
+    IPL_assert(rc == 0, "Could not write ACKNOWLEDGE status to host");
+
+     switch (vdev->senseid.cu_model) {
+     case VIRTIO_ID_NET:
+         vdev->nr_vqs = 2;
+@@ -253,6 +257,11 @@ void virtio_setup_ccw(VDev *vdev)
+     default:
+         panic("Unsupported virtio device\n");
+     }
+
+    status |= VIRTIO_CONFIG_S_DRIVER;
+    rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
+    IPL_assert(rc == 0, "Could not write DRIVER status to host");
+
+     IPL_assert(
+         run_ccw(vdev, CCW_CMD_READ_CONF, &vdev->config, cfg_size, false) == 0,
+        "Could not get block device configuration");
+@@ -291,9 +300,10 @@ void virtio_setup_ccw(VDev *vdev)
+             run_ccw(vdev, CCW_CMD_SET_VQ, &info, sizeof(info), false) == 0,
+             "Cannot set VQ info");
+     }
+-    IPL_assert(
+-        run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false) == 0,
+-        "Could not write status to host");
+
+    status |= VIRTIO_CONFIG_S_DRIVER_OK;
+    rc = run_ccw(vdev, CCW_CMD_WRITE_STATUS, &status, sizeof(status), false);
+    IPL_assert(rc == 0, "Could not write DRIVER_OK status to host");
+ }
+ 
+ bool virtio_is_supported(SubChannelId schid)
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch
@ -0,0 +1,101 @@
+From 5b3548c50e35729d724403b83e26579d31621367 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 32/37] pc-bios/s390-ccw/virtio-blkdev: Remove
+ virtio_assume_scsi()
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [4/9] 5256c4e6f4d5c5aedf1bad3fee30dd3ad230a3dd
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit 5447de2619050a0a4dd480b97f88a9b58da360d1
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:18:56 2022 +0200
+
+    pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi()
+
+    The virtio_assume_scsi() function is very questionable: First, it
+    is only called for virtio-blk, and not for virtio-scsi, so the naming
+    is already quite confusing. Second, it is called if we detected a
+    "invalid" IPL disk, trying to fix it by blindly setting a sector
+    size of 512. This of course won't work in most cases since disks
+    might have a different sector size for a reason.
+
+    Thus let's remove this strange function now. The calling code can
+    also be removed completely, since there is another spot in main.c
+    that does "IPL_assert(virtio_ipl_disk_is_valid(), ...)" to make
+    sure that we do not try to IPL from an invalid device.
+
+    Message-Id: <20220704111903.62400-6-thuth@redhat.com>
+    Reviewed-by: Eric Farman <farman@linux.ibm.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/virtio-blkdev.c | 24 ------------------------
+ pc-bios/s390-ccw/virtio.h        |  1 -
+ 2 files changed, 25 deletions(-)
+
+diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
+index 7e13155589..db1f7f44aa 100644
+--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
+@@ -112,23 +112,6 @@ VirtioGDN virtio_guessed_disk_nature(void)
+     return virtio_get_device()->guessed_disk_nature;
+ }
+ 
+-void virtio_assume_scsi(void)
+-{
+-    VDev *vdev = virtio_get_device();
+-
+-    switch (vdev->senseid.cu_model) {
+-    case VIRTIO_ID_BLOCK:
+-        vdev->guessed_disk_nature = VIRTIO_GDN_SCSI;
+-        vdev->config.blk.blk_size = VIRTIO_SCSI_BLOCK_SIZE;
+-        vdev->config.blk.physical_block_exp = 0;
+-        vdev->blk_factor = 1;
+-        break;
+-    case VIRTIO_ID_SCSI:
+-        vdev->scsi_block_size = VIRTIO_SCSI_BLOCK_SIZE;
+-        break;
+-    }
+-}
+-
+ void virtio_assume_iso9660(void)
+ {
+     VDev *vdev = virtio_get_device();
+@@ -247,13 +230,6 @@ int virtio_blk_setup_device(SubChannelId schid)
+     switch (vdev->senseid.cu_model) {
+     case VIRTIO_ID_BLOCK:
+         sclp_print("Using virtio-blk.\n");
+-        if (!virtio_ipl_disk_is_valid()) {
+-            /* make sure all getters but blocksize return 0 for
+-             * invalid IPL disk
+-             */
+-            memset(&vdev->config.blk, 0, sizeof(vdev->config.blk));
+-            virtio_assume_scsi();
+-        }
+         break;
+     case VIRTIO_ID_SCSI:
+         IPL_assert(vdev->config.scsi.sense_size == VIRTIO_SCSI_SENSE_SIZE,
+diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
+index 241730effe..600ba5052b 100644
+--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
+@@ -182,7 +182,6 @@ enum guessed_disk_nature_type {
+ typedef enum guessed_disk_nature_type VirtioGDN;
+ 
+ VirtioGDN virtio_guessed_disk_nature(void);
+-void virtio_assume_scsi(void);
+ void virtio_assume_eckd(void);
+ void virtio_assume_iso9660(void);
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch
@ -0,0 +1,63 @@
+From 042e966a70789bd3ed450fa4f57016129a34672e Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 37/37] pc-bios/s390-ccw/virtio-blkdev: Request the right
+ feature bits
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [9/9] f04835423d648b04f2187ef9890f2d1689e2b57e
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit 9125a314cca4a1838b09305a87d8efb98f80ab67
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:19:01 2022 +0200
+
+    pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits
+
+    The virtio-blk code uses the block size and geometry fields in the
+    config area. According to the virtio-spec, these have to be negotiated
+    with the right feature bits during initialization, otherwise they
+    might not be available. QEMU is so far very forgiving and always
+    provides them, but we should not rely on this behavior, so let's
+    better request them properly via the VIRTIO_BLK_F_GEOMETRY and
+    VIRTIO_BLK_F_BLK_SIZE feature bits.
+
+    Message-Id: <20220704111903.62400-11-thuth@redhat.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/virtio-blkdev.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
+index c175b66a47..8271c47296 100644
+--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
+@@ -13,6 +13,9 @@
+ #include "virtio.h"
+ #include "virtio-scsi.h"
+ 
+#define VIRTIO_BLK_F_GEOMETRY   (1 << 4)
+#define VIRTIO_BLK_F_BLK_SIZE   (1 << 6)
+
+ static int virtio_blk_read_many(VDev *vdev, ulong sector, void *load_addr,
+                                 int sec_num)
+ {
+@@ -223,6 +226,7 @@ int virtio_blk_setup_device(SubChannelId schid)
+ {
+     VDev *vdev = virtio_get_device();
+ 
+    vdev->guest_features[0] = VIRTIO_BLK_F_GEOMETRY | VIRTIO_BLK_F_BLK_SIZE;
+     vdev->schid = schid;
+     virtio_setup_ccw(vdev);
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch
+++ b/SOURCES/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch
@ -0,0 +1,124 @@
+From f09f2f12133073d6ccab3b2bd95717d435adc442 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Fri, 8 Jul 2022 12:29:50 +0200
+Subject: [PATCH 31/37] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix
+ virtio_ipl_disk_is_valid()
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry
+RH-Commit: [3/9] ca0b836a417ce5bbd26e489551f573d6b2fc9e94
+RH-Bugzilla: 2098076
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2098076
+
+commit bbf615f7b707f009ef8e757d170902ad33b90644
+Author: Thomas Huth <thuth@redhat.com>
+Date:   Mon Jul 4 13:18:55 2022 +0200
+
+    pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid()
+
+    The s390-ccw bios fails to boot if the boot disk is a virtio-blk
+    disk with a sector size of 4096. For example:
+
+     dasdfmt -b 4096 -d cdl -y -p -M quick /dev/dasdX
+     fdasd -a /dev/dasdX
+     install a guest onto /dev/dasdX1 using virtio-blk
+     qemu-system-s390x -nographic -hda /dev/dasdX1
+
+    The bios then bails out with:
+
+     ! Cannot read block 0 !
+
+    Looking at virtio_ipl_disk_is_valid() and especially the function
+    virtio_disk_is_scsi(), it does not really make sense that we expect
+    only such a limited disk geometry (like a block size of 512) for
+    our boot disks. Let's relax the check and allow everything that
+    remotely looks like a sane disk.
+
+    Message-Id: <20220704111903.62400-5-thuth@redhat.com>
+    Reviewed-by: Eric Farman <farman@linux.ibm.com>
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ pc-bios/s390-ccw/virtio-blkdev.c | 41 ++++++--------------------------
+ pc-bios/s390-ccw/virtio.h        |  2 --
+ 2 files changed, 7 insertions(+), 36 deletions(-)
+
+diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c
+index 6483307630..7e13155589 100644
+--- a/pc-bios/s390-ccw/virtio-blkdev.c
+++ b/pc-bios/s390-ccw/virtio-blkdev.c
+@@ -166,46 +166,19 @@ void virtio_assume_eckd(void)
+         virtio_eckd_sectors_for_block_size(vdev->config.blk.blk_size);
+ }
+ 
+-bool virtio_disk_is_scsi(void)
+-{
+-    VDev *vdev = virtio_get_device();
+-
+-    if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI) {
+-        return true;
+-    }
+-    switch (vdev->senseid.cu_model) {
+-    case VIRTIO_ID_BLOCK:
+-        return (vdev->config.blk.geometry.heads == 255)
+-            && (vdev->config.blk.geometry.sectors == 63)
+-            && (virtio_get_block_size()  == VIRTIO_SCSI_BLOCK_SIZE);
+-    case VIRTIO_ID_SCSI:
+-        return true;
+-    }
+-    return false;
+-}
+-
+-bool virtio_disk_is_eckd(void)
+bool virtio_ipl_disk_is_valid(void)
+ {
+    int blksize = virtio_get_block_size();
+     VDev *vdev = virtio_get_device();
+-    const int block_size = virtio_get_block_size();
+ 
+-    if (vdev->guessed_disk_nature == VIRTIO_GDN_DASD) {
+    if (vdev->guessed_disk_nature == VIRTIO_GDN_SCSI ||
+        vdev->guessed_disk_nature == VIRTIO_GDN_DASD) {
+         return true;
+     }
+-    switch (vdev->senseid.cu_model) {
+-    case VIRTIO_ID_BLOCK:
+-        return (vdev->config.blk.geometry.heads == 15)
+-            && (vdev->config.blk.geometry.sectors ==
+-                virtio_eckd_sectors_for_block_size(block_size));
+-    case VIRTIO_ID_SCSI:
+-        return false;
+-    }
+-    return false;
+-}
+ 
+-bool virtio_ipl_disk_is_valid(void)
+-{
+-    return virtio_disk_is_scsi() || virtio_disk_is_eckd();
+    return (vdev->senseid.cu_model == VIRTIO_ID_BLOCK ||
+            vdev->senseid.cu_model == VIRTIO_ID_SCSI) &&
+           blksize >= 512 && blksize <= 4096;
+ }
+ 
+ int virtio_get_block_size(void)
+diff --git a/pc-bios/s390-ccw/virtio.h b/pc-bios/s390-ccw/virtio.h
+index 9e410bde6f..241730effe 100644
+--- a/pc-bios/s390-ccw/virtio.h
+++ b/pc-bios/s390-ccw/virtio.h
+@@ -186,8 +186,6 @@ void virtio_assume_scsi(void);
+ void virtio_assume_eckd(void);
+ void virtio_assume_iso9660(void);
+ 
+-extern bool virtio_disk_is_scsi(void);
+-extern bool virtio_disk_is_eckd(void);
+ extern bool virtio_ipl_disk_is_valid(void);
+ extern int virtio_get_block_size(void);
+ extern uint8_t virtio_get_heads(void);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch
+++ b/SOURCES/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch
@ -0,0 +1,162 @@
+From 552e7c8ae2c6e281a72791aefa1729be86f96642 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Tue, 5 Apr 2022 15:46:52 +0200
+Subject: [PATCH 5/6] qcow2: Add errp to rebuild_refcount_structure()
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding
+RH-Commit: [3/4] 9dddd1d21383c4cbd528e5a0d42b0c2a7d87c8f6
+RH-Bugzilla: 1519071
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+
+Instead of fprint()-ing error messages in rebuild_refcount_structure()
+and its rebuild_refcounts_write_refblocks() helper, pass them through an
+Error object to qcow2_check_refcounts() (which will then print it).
+
+Suggested-by: Eric Blake <eblake@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220405134652.19278-4-hreitz@redhat.com>
+Reviewed-by: Eric Blake <eblake@redhat.com>
+(cherry picked from commit 0423f75351ab83b844a31349218b0eadd830e07a)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ block/qcow2-refcount.c | 33 +++++++++++++++++++--------------
+ 1 file changed, 19 insertions(+), 14 deletions(-)
+
+diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
+index 555d8ba5ac..09f8ef4927 100644
+--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
+@@ -2462,7 +2462,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
+ static int rebuild_refcounts_write_refblocks(
+         BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
+         int64_t first_cluster, int64_t end_cluster,
+-        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr
+        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
+        Error **errp
+     )
+ {
+     BDRVQcow2State *s = bs->opaque;
+@@ -2513,8 +2514,8 @@ static int rebuild_refcounts_write_refblocks(
+                                                   nb_clusters,
+                                                   &first_free_cluster);
+             if (refblock_offset < 0) {
+-                fprintf(stderr, "ERROR allocating refblock: %s\n",
+-                        strerror(-refblock_offset));
+                error_setg_errno(errp, -refblock_offset,
+                                 "ERROR allocating refblock");
+                 return refblock_offset;
+             }
+ 
+@@ -2536,6 +2537,7 @@ static int rebuild_refcounts_write_refblocks(
+                                   on_disk_reftable_entries *
+                                   REFTABLE_ENTRY_SIZE);
+                 if (!on_disk_reftable) {
+                    error_setg(errp, "ERROR allocating reftable memory");
+                     return -ENOMEM;
+                 }
+ 
+@@ -2559,7 +2561,7 @@ static int rebuild_refcounts_write_refblocks(
+         ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
+                                             s->cluster_size, false);
+         if (ret < 0) {
+-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+            error_setg_errno(errp, -ret, "ERROR writing refblock");
+             return ret;
+         }
+ 
+@@ -2575,7 +2577,7 @@ static int rebuild_refcounts_write_refblocks(
+         ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
+                           s->cluster_size);
+         if (ret < 0) {
+-            fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+            error_setg_errno(errp, -ret, "ERROR writing refblock");
+             return ret;
+         }
+ 
+@@ -2598,7 +2600,8 @@ static int rebuild_refcounts_write_refblocks(
+ static int rebuild_refcount_structure(BlockDriverState *bs,
+                                       BdrvCheckResult *res,
+                                       void **refcount_table,
+-                                      int64_t *nb_clusters)
+                                      int64_t *nb_clusters,
+                                      Error **errp)
+ {
+     BDRVQcow2State *s = bs->opaque;
+     int64_t reftable_offset = -1;
+@@ -2649,7 +2652,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+         rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                           0, *nb_clusters,
+                                           &on_disk_reftable,
+-                                          &on_disk_reftable_entries);
+                                          &on_disk_reftable_entries, errp);
+     if (reftable_size_changed < 0) {
+         res->check_errors++;
+         ret = reftable_size_changed;
+@@ -2673,8 +2676,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+                                               refcount_table, nb_clusters,
+                                               &first_free_cluster);
+         if (reftable_offset < 0) {
+-            fprintf(stderr, "ERROR allocating reftable: %s\n",
+-                    strerror(-reftable_offset));
+            error_setg_errno(errp, -reftable_offset,
+                             "ERROR allocating reftable");
+             res->check_errors++;
+             ret = reftable_offset;
+             goto fail;
+@@ -2692,7 +2695,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+                                               reftable_start_cluster,
+                                               reftable_end_cluster,
+                                               &on_disk_reftable,
+-                                              &on_disk_reftable_entries);
+                                              &on_disk_reftable_entries, errp);
+         if (reftable_size_changed < 0) {
+             res->check_errors++;
+             ret = reftable_size_changed;
+@@ -2722,7 +2725,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+     ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
+                                         false);
+     if (ret < 0) {
+-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR writing reftable");
+         goto fail;
+     }
+ 
+@@ -2730,7 +2733,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+     ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+                       reftable_length);
+     if (ret < 0) {
+-        fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR writing reftable");
+         goto fail;
+     }
+ 
+@@ -2743,7 +2746,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs,
+                            &reftable_offset_and_clusters,
+                            sizeof(reftable_offset_and_clusters));
+     if (ret < 0) {
+-        fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
+        error_setg_errno(errp, -ret, "ERROR setting reftable");
+         goto fail;
+     }
+ 
+@@ -2811,11 +2814,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+     if (rebuild && (fix & BDRV_FIX_ERRORS)) {
+         BdrvCheckResult old_res = *res;
+         int fresh_leaks = 0;
+        Error *local_err = NULL;
+ 
+         fprintf(stderr, "Rebuilding refcount structure\n");
+         ret = rebuild_refcount_structure(bs, res, &refcount_table,
+-                                         &nb_clusters);
+                                         &nb_clusters, &local_err);
+         if (ret < 0) {
+            error_report_err(local_err);
+             goto fail;
+         }
+ 
+-- 
+2.27.0
+
--- a/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch
+++ b/SOURCES/kvm-qcow2-Improve-refcount-structure-rebuilding.patch
@ -0,0 +1,465 @@
+From be54c6206b0f0a19e0ffe6a058f4f97277027a17 Mon Sep 17 00:00:00 2001
+From: Hanna Reitz <hreitz@redhat.com>
+Date: Tue, 5 Apr 2022 15:46:50 +0200
+Subject: [PATCH 3/6] qcow2: Improve refcount structure rebuilding
+
+RH-Author: Hanna Reitz <hreitz@redhat.com>
+RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding
+RH-Commit: [1/4] 0bb78f7735a0730204670ae5ec2e040ad1d23942
+RH-Bugzilla: 1519071
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Acked-by: Eric Blake <eblake@redhat.com>
+
+When rebuilding the refcount structures (when qemu-img check -r found
+errors with refcount = 0, but reference count > 0), the new refcount
+table defaults to being put at the image file end[1].  There is no good
+reason for that except that it means we will not have to rewrite any
+refblocks we already wrote to disk.
+
+Changing the code to rewrite those refblocks is not too difficult,
+though, so let us do that.  That is beneficial for images on block
+devices, where we cannot really write beyond the end of the image file.
+
+Use this opportunity to add extensive comments to the code, and refactor
+it a bit, getting rid of the backwards-jumping goto.
+
+[1] Unless there is something allocated in the area pointed to by the
+    last refblock, so we have to write that refblock.  In that case, we
+    try to put the reftable in there.
+
+Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1519071
+Closes: https://gitlab.com/qemu-project/qemu/-/issues/941
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+Message-Id: <20220405134652.19278-2-hreitz@redhat.com>
+(cherry picked from commit a8c07ec287554dcefd33733f0e5888a281ddc95e)
+Signed-off-by: Hanna Reitz <hreitz@redhat.com>
+---
+ block/qcow2-refcount.c | 332 +++++++++++++++++++++++++++++------------
+ 1 file changed, 235 insertions(+), 97 deletions(-)
+
+diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
+index 4614572252..555d8ba5ac 100644
+--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
+@@ -2435,111 +2435,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
+ }
+ 
+ /*
+- * Creates a new refcount structure based solely on the in-memory information
+- * given through *refcount_table. All necessary allocations will be reflected
+- * in that array.
+ * Helper function for rebuild_refcount_structure().
+  *
+- * On success, the old refcount structure is leaked (it will be covered by the
+- * new refcount structure).
+ * Scan the range of clusters [first_cluster, end_cluster) for allocated
+ * clusters and write all corresponding refblocks to disk.  The refblock
+ * and allocation data is taken from the in-memory refcount table
+ * *refcount_table[] (of size *nb_clusters), which is basically one big
+ * (unlimited size) refblock for the whole image.
+ *
+ * For these refblocks, clusters are allocated using said in-memory
+ * refcount table.  Care is taken that these allocations are reflected
+ * in the refblocks written to disk.
+ *
+ * The refblocks' offsets are written into a reftable, which is
+ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr).  If
+ * that reftable is of insufficient size, it will be resized to fit.
+ * This reftable is not written to disk.
+ *
+ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed
+ * to point to existing valid refblocks that do not need to be allocated
+ * again.)
+ *
+ * Return whether the on-disk reftable array was resized (true/false),
+ * or -errno on error.
+  */
+-static int rebuild_refcount_structure(BlockDriverState *bs,
+-                                      BdrvCheckResult *res,
+-                                      void **refcount_table,
+-                                      int64_t *nb_clusters)
+static int rebuild_refcounts_write_refblocks(
+        BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
+        int64_t first_cluster, int64_t end_cluster,
+        uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr
+    )
+ {
+     BDRVQcow2State *s = bs->opaque;
+-    int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
+    int64_t cluster;
+     int64_t refblock_offset, refblock_start, refblock_index;
+-    uint32_t reftable_size = 0;
+-    uint64_t *on_disk_reftable = NULL;
+    int64_t first_free_cluster = 0;
+    uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
+    uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
+     void *on_disk_refblock;
+-    int ret = 0;
+-    struct {
+-        uint64_t reftable_offset;
+-        uint32_t reftable_clusters;
+-    } QEMU_PACKED reftable_offset_and_clusters;
+-
+-    qcow2_cache_empty(bs, s->refcount_block_cache);
+    bool reftable_grown = false;
+    int ret;
+ 
+-write_refblocks:
+-    for (; cluster < *nb_clusters; cluster++) {
+    for (cluster = first_cluster; cluster < end_cluster; cluster++) {
+        /* Check all clusters to find refblocks that contain non-zero entries */
+         if (!s->get_refcount(*refcount_table, cluster)) {
+             continue;
+         }
+ 
+        /*
+         * This cluster is allocated, so we need to create a refblock
+         * for it.  The data we will write to disk is just the
+         * respective slice from *refcount_table, so it will contain
+         * accurate refcounts for all clusters belonging to this
+         * refblock.  After we have written it, we will therefore skip
+         * all remaining clusters in this refblock.
+         */
+
+         refblock_index = cluster >> s->refcount_block_bits;
+         refblock_start = refblock_index << s->refcount_block_bits;
+ 
+-        /* Don't allocate a cluster in a refblock already written to disk */
+-        if (first_free_cluster < refblock_start) {
+-            first_free_cluster = refblock_start;
+-        }
+-        refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
+-                                              nb_clusters, &first_free_cluster);
+-        if (refblock_offset < 0) {
+-            fprintf(stderr, "ERROR allocating refblock: %s\n",
+-                    strerror(-refblock_offset));
+-            res->check_errors++;
+-            ret = refblock_offset;
+-            goto fail;
+-        }
+        if (on_disk_reftable_entries > refblock_index &&
+            on_disk_reftable[refblock_index])
+        {
+            /*
+             * We can get here after a `goto write_refblocks`: We have a
+             * reftable from a previous run, and the refblock is already
+             * allocated.  No need to allocate it again.
+             */
+            refblock_offset = on_disk_reftable[refblock_index];
+        } else {
+            int64_t refblock_cluster_index;
+ 
+-        if (reftable_size <= refblock_index) {
+-            uint32_t old_reftable_size = reftable_size;
+-            uint64_t *new_on_disk_reftable;
+            /* Don't allocate a cluster in a refblock already written to disk */
+            if (first_free_cluster < refblock_start) {
+                first_free_cluster = refblock_start;
+            }
+            refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
+                                                  nb_clusters,
+                                                  &first_free_cluster);
+            if (refblock_offset < 0) {
+                fprintf(stderr, "ERROR allocating refblock: %s\n",
+                        strerror(-refblock_offset));
+                return refblock_offset;
+            }
+ 
+-            reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
+-                                     s->cluster_size) / REFTABLE_ENTRY_SIZE;
+-            new_on_disk_reftable = g_try_realloc(on_disk_reftable,
+-                                                 reftable_size *
+-                                                 REFTABLE_ENTRY_SIZE);
+-            if (!new_on_disk_reftable) {
+-                res->check_errors++;
+-                ret = -ENOMEM;
+-                goto fail;
+            refblock_cluster_index = refblock_offset / s->cluster_size;
+            if (refblock_cluster_index >= end_cluster) {
+                /*
+                 * We must write the refblock that holds this refblock's
+                 * refcount
+                 */
+                end_cluster = refblock_cluster_index + 1;
+             }
+-            on_disk_reftable = new_on_disk_reftable;
+ 
+-            memset(on_disk_reftable + old_reftable_size, 0,
+-                   (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE);
+            if (on_disk_reftable_entries <= refblock_index) {
+                on_disk_reftable_entries =
+                    ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
+                             s->cluster_size) / REFTABLE_ENTRY_SIZE;
+                on_disk_reftable =
+                    g_try_realloc(on_disk_reftable,
+                                  on_disk_reftable_entries *
+                                  REFTABLE_ENTRY_SIZE);
+                if (!on_disk_reftable) {
+                    return -ENOMEM;
+                }
+ 
+-            /* The offset we have for the reftable is now no longer valid;
+-             * this will leak that range, but we can easily fix that by running
+-             * a leak-fixing check after this rebuild operation */
+-            reftable_offset = -1;
+-        } else {
+-            assert(on_disk_reftable);
+-        }
+-        on_disk_reftable[refblock_index] = refblock_offset;
+                memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
+                       (on_disk_reftable_entries -
+                        *on_disk_reftable_entries_ptr) *
+                       REFTABLE_ENTRY_SIZE);
+ 
+-        /* If this is apparently the last refblock (for now), try to squeeze the
+-         * reftable in */
+-        if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
+-            reftable_offset < 0)
+-        {
+-            uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
+-                                                          REFTABLE_ENTRY_SIZE);
+-            reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
+-                                                  refcount_table, nb_clusters,
+-                                                  &first_free_cluster);
+-            if (reftable_offset < 0) {
+-                fprintf(stderr, "ERROR allocating reftable: %s\n",
+-                        strerror(-reftable_offset));
+-                res->check_errors++;
+-                ret = reftable_offset;
+-                goto fail;
+                *on_disk_reftable_ptr = on_disk_reftable;
+                *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
+
+                reftable_grown = true;
+            } else {
+                assert(on_disk_reftable);
+             }
+            on_disk_reftable[refblock_index] = refblock_offset;
+         }
+ 
+        /* Refblock is allocated, write it to disk */
+
+         ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
+                                             s->cluster_size, false);
+         if (ret < 0) {
+             fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+-            goto fail;
+            return ret;
+         }
+ 
+-        /* The size of *refcount_table is always cluster-aligned, therefore the
+-         * write operation will not overflow */
+        /*
+         * The refblock is simply a slice of *refcount_table.
+         * Note that the size of *refcount_table is always aligned to
+         * whole clusters, so the write operation will not result in
+         * out-of-bounds accesses.
+         */
+         on_disk_refblock = (void *)((char *) *refcount_table +
+                                     refblock_index * s->cluster_size);
+ 
+@@ -2547,23 +2576,99 @@ write_refblocks:
+                           s->cluster_size);
+         if (ret < 0) {
+             fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
+-            goto fail;
+            return ret;
+         }
+ 
+-        /* Go to the end of this refblock */
+        /* This refblock is done, skip to its end */
+         cluster = refblock_start + s->refcount_block_size - 1;
+     }
+ 
+-    if (reftable_offset < 0) {
+-        uint64_t post_refblock_start, reftable_clusters;
+    return reftable_grown;
+}
+
+/*
+ * Creates a new refcount structure based solely on the in-memory information
+ * given through *refcount_table (this in-memory information is basically just
+ * the concatenation of all refblocks).  All necessary allocations will be
+ * reflected in that array.
+ *
+ * On success, the old refcount structure is leaked (it will be covered by the
+ * new refcount structure).
+ */
+static int rebuild_refcount_structure(BlockDriverState *bs,
+                                      BdrvCheckResult *res,
+                                      void **refcount_table,
+                                      int64_t *nb_clusters)
+{
+    BDRVQcow2State *s = bs->opaque;
+    int64_t reftable_offset = -1;
+    int64_t reftable_length = 0;
+    int64_t reftable_clusters;
+    int64_t refblock_index;
+    uint32_t on_disk_reftable_entries = 0;
+    uint64_t *on_disk_reftable = NULL;
+    int ret = 0;
+    int reftable_size_changed = 0;
+    struct {
+        uint64_t reftable_offset;
+        uint32_t reftable_clusters;
+    } QEMU_PACKED reftable_offset_and_clusters;
+
+    qcow2_cache_empty(bs, s->refcount_block_cache);
+
+    /*
+     * For each refblock containing entries, we try to allocate a
+     * cluster (in the in-memory refcount table) and write its offset
+     * into on_disk_reftable[].  We then write the whole refblock to
+     * disk (as a slice of the in-memory refcount table).
+     * This is done by rebuild_refcounts_write_refblocks().
+     *
+     * Once we have scanned all clusters, we try to find space for the
+     * reftable.  This will dirty the in-memory refcount table (i.e.
+     * make it differ from the refblocks we have already written), so we
+     * need to run rebuild_refcounts_write_refblocks() again for the
+     * range of clusters where the reftable has been allocated.
+     *
+     * This second run might make the reftable grow again, in which case
+     * we will need to allocate another space for it, which is why we
+     * repeat all this until the reftable stops growing.
+     *
+     * (This loop will terminate, because with every cluster the
+     * reftable grows, it can accomodate a multitude of more refcounts,
+     * so that at some point this must be able to cover the reftable
+     * and all refblocks describing it.)
+     *
+     * We then convert the reftable to big-endian and write it to disk.
+     *
+     * Note that we never free any reftable allocations.  Doing so would
+     * needlessly complicate the algorithm: The eventual second check
+     * run we do will clean up all leaks we have caused.
+     */
+
+    reftable_size_changed =
+        rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                          0, *nb_clusters,
+                                          &on_disk_reftable,
+                                          &on_disk_reftable_entries);
+    if (reftable_size_changed < 0) {
+        res->check_errors++;
+        ret = reftable_size_changed;
+        goto fail;
+    }
+
+    /*
+     * There was no reftable before, so rebuild_refcounts_write_refblocks()
+     * must have increased its size (from 0 to something).
+     */
+    assert(reftable_size_changed);
+
+    do {
+        int64_t reftable_start_cluster, reftable_end_cluster;
+        int64_t first_free_cluster = 0;
+
+        reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
+        reftable_clusters = size_to_clusters(s, reftable_length);
+ 
+-        post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
+-        reftable_clusters =
+-            size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE);
+-        /* Not pretty but simple */
+-        if (first_free_cluster < post_refblock_start) {
+-            first_free_cluster = post_refblock_start;
+-        }
+         reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
+                                               refcount_table, nb_clusters,
+                                               &first_free_cluster);
+@@ -2575,24 +2680,55 @@ write_refblocks:
+             goto fail;
+         }
+ 
+-        goto write_refblocks;
+-    }
+        /*
+         * We need to update the affected refblocks, so re-run the
+         * write_refblocks loop for the reftable's range of clusters.
+         */
+        assert(offset_into_cluster(s, reftable_offset) == 0);
+        reftable_start_cluster = reftable_offset / s->cluster_size;
+        reftable_end_cluster = reftable_start_cluster + reftable_clusters;
+        reftable_size_changed =
+            rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+                                              reftable_start_cluster,
+                                              reftable_end_cluster,
+                                              &on_disk_reftable,
+                                              &on_disk_reftable_entries);
+        if (reftable_size_changed < 0) {
+            res->check_errors++;
+            ret = reftable_size_changed;
+            goto fail;
+        }
+
+        /*
+         * If the reftable size has changed, we will need to find a new
+         * allocation, repeating the loop.
+         */
+    } while (reftable_size_changed);
+ 
+-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+    /* The above loop must have run at least once */
+    assert(reftable_offset >= 0);
+
+    /*
+     * All allocations are done, all refblocks are written, convert the
+     * reftable to big-endian and write it to disk.
+     */
+
+    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+         refblock_index++)
+    {
+         cpu_to_be64s(&on_disk_reftable[refblock_index]);
+     }
+ 
+-    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
+-                                        reftable_size * REFTABLE_ENTRY_SIZE,
+    ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
+                                         false);
+     if (ret < 0) {
+         fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+         goto fail;
+     }
+ 
+-    assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE);
+    assert(reftable_length < INT_MAX);
+     ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
+-                      reftable_size * REFTABLE_ENTRY_SIZE);
+                      reftable_length);
+     if (ret < 0) {
+         fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+         goto fail;
+@@ -2601,7 +2737,7 @@ write_refblocks:
+     /* Enter new reftable into the image header */
+     reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
+     reftable_offset_and_clusters.reftable_clusters =
+-        cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE));
+        cpu_to_be32(reftable_clusters);
+     ret = bdrv_pwrite_sync(bs->file,
+                            offsetof(QCowHeader, refcount_table_offset),
+                            &reftable_offset_and_clusters,
+@@ -2611,12 +2747,14 @@ write_refblocks:
+         goto fail;
+     }
+ 
+-    for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+    for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+         refblock_index++)
+    {
+         be64_to_cpus(&on_disk_reftable[refblock_index]);
+     }
+     s->refcount_table = on_disk_reftable;
+     s->refcount_table_offset = reftable_offset;
+-    s->refcount_table_size = reftable_size;
+    s->refcount_table_size = on_disk_reftable_entries;
+     update_max_refcount_table_index(s);
+ 
+     return 0;
+-- 
+2.27.0
+
--- a/SOURCES/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch
+++ b/SOURCES/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch
@ -0,0 +1,97 @@
+From ddfee9d393af322938e4df466cd01b8f9570a1c9 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Tue, 5 Apr 2022 10:20:59 +0200
+Subject: [PATCH 1/6] s390x/ipl: support extended kernel command line size
+
+RH-Author: Thomas Huth <thuth@redhat.com>
+RH-MergeRequest: 144: s390x/ipl: support extended kernel command line size
+RH-Commit: [1/1] be227e50af5dbe7802605f873db29ac5358aa196
+RH-Bugzilla: 2043830
+RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: David Hildenbrand <david@redhat.com>
+
+Bugzilla: http://bugzilla.redhat.com/2043830
+
+commit b2173046a64beed76715f310f98538f159276af1
+Author: Marc Hartmayer <mhartmay@linux.ibm.com>
+Date:   Mon Nov 22 12:29:09 2021 +0100
+
+    s390x/ipl: support extended kernel command line size
+
+    In the past s390 used a fixed command line length of 896 bytes. This has changed
+    with the Linux commit 5ecb2da660ab ("s390: support command lines longer than 896
+    bytes"). There is now a parm area indicating the maximum command line size. This
+    parm area has always been initialized to zero, so with older kernels this field
+    would read zero and we must then assume that only 896 bytes are available.
+
+    Signed-off-by: Marc Hartmayer <mhartmay@linux.ibm.com>
+    Reviewed-by: David Hildenbrand <david@redhat.com>
+    Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
+    Acked-by: Viktor Mihajlovski <mihajlov@de.ibm.com>
+    Message-Id: <20211122112909.18138-1-mhartmay@linux.ibm.com>
+    [thuth: Cosmetic fixes, and use PRIu64 instead of %lu]
+    Signed-off-by: Thomas Huth <thuth@redhat.com>
+
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+---
+ hw/s390x/ipl.c | 27 +++++++++++++++++++++++----
+ 1 file changed, 23 insertions(+), 4 deletions(-)
+
+diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
+index 7ddca0127f..eb7fc4c4ae 100644
+--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
+@@ -37,8 +37,9 @@
+ 
+ #define KERN_IMAGE_START                0x010000UL
+ #define LINUX_MAGIC_ADDR                0x010008UL
+#define KERN_PARM_AREA_SIZE_ADDR        0x010430UL
+ #define KERN_PARM_AREA                  0x010480UL
+-#define KERN_PARM_AREA_SIZE             0x000380UL
+#define LEGACY_KERN_PARM_AREA_SIZE      0x000380UL
+ #define INITRD_START                    0x800000UL
+ #define INITRD_PARM_START               0x010408UL
+ #define PARMFILE_START                  0x001000UL
+@@ -110,6 +111,21 @@ static uint64_t bios_translate_addr(void *opaque, uint64_t srcaddr)
+     return srcaddr + dstaddr;
+ }
+ 
+static uint64_t get_max_kernel_cmdline_size(void)
+{
+    uint64_t *size_ptr = rom_ptr(KERN_PARM_AREA_SIZE_ADDR, sizeof(*size_ptr));
+
+    if (size_ptr) {
+        uint64_t size;
+
+        size = be64_to_cpu(*size_ptr);
+        if (size) {
+            return size;
+        }
+    }
+    return LEGACY_KERN_PARM_AREA_SIZE;
+}
+
+ static void s390_ipl_realize(DeviceState *dev, Error **errp)
+ {
+     MachineState *ms = MACHINE(qdev_get_machine());
+@@ -197,10 +213,13 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp)
+             ipl->start_addr = KERN_IMAGE_START;
+             /* Overwrite parameters in the kernel image, which are "rom" */
+             if (parm_area) {
+-                if (cmdline_size > KERN_PARM_AREA_SIZE) {
+                uint64_t max_cmdline_size = get_max_kernel_cmdline_size();
+
+                if (cmdline_size > max_cmdline_size) {
+                     error_setg(errp,
+-                               "kernel command line exceeds maximum size: %zu > %lu",
+-                               cmdline_size, KERN_PARM_AREA_SIZE);
+                               "kernel command line exceeds maximum size:"
+                               " %zu > %" PRIu64,
+                               cmdline_size, max_cmdline_size);
+                     return;
+                 }
+ 
+-- 
+2.27.0
+
--- a/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch
+++ b/SOURCES/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch
@ -0,0 +1,97 @@
+From a9a4dfdd6312e192e9134d46edfac4c1b1bfa63d Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Mon, 22 Aug 2022 14:53:20 +0200
+Subject: [PATCH] scsi-generic: Fix emulated block limits VPD page
+
+RH-Author: Kevin Wolf <kwolf@redhat.com>
+RH-MergeRequest: 212: scsi-generic: Fix emulated block limits VPD page
+RH-Commit: [1/1] d3ba6b2e03039043716ddc6b7d4a424d92249081
+RH-Bugzilla: 2120279
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+Commits 01ef8185b80 amd 24b36e9813e updated the way that the maximum
+transfer length is calculated for patching block limits VPD page in an
+INQUIRY response.
+
+The same updates also need to be made for the case where the host device
+does not support the block limits VPD page at all and we emulate the
+whole page.
+
+Without this fix, on host block devices a maximum transfer length of
+(INT_MAX - sector_size) bytes is advertised to the guest, resulting in
+I/O errors when a request that exceeds the host limits is made by the
+guest. (Prior to commit 24b36e9813e, this code path would use the
+max_transfer value from the host instead of INT_MAX, but still miss the
+fix from 01ef8185b80 where max_transfer is also capped to max_iov
+host pages, so it would be less wrong, but still wrong.)
+
+Cc: qemu-stable@nongnu.org
+Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2096251
+Fixes: 01ef8185b809af9d287e1a03a3f9d8ea8231118a
+Fixes: 24b36e9813ec15da7db62e3b3621730710c5f020
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Message-Id: <20220822125320.48257-1-kwolf@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 51e15194b0a091e5c40aab2eb234a1d36c5c58ee)
+
+Resolved conflict: qemu_real_host_page_size() is a getter function in
+current upstream, but still just a public global variable downstream.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+---
+ hw/scsi/scsi-generic.c | 21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c
+index 0306ccc7b1..3742899839 100644
+--- a/hw/scsi/scsi-generic.c
+++ b/hw/scsi/scsi-generic.c
+@@ -147,6 +147,18 @@ static int execute_command(BlockBackend *blk,
+     return 0;
+ }
+ 
+static uint64_t calculate_max_transfer(SCSIDevice *s)
+{
+    uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
+    uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk);
+
+    assert(max_transfer);
+    max_transfer = MIN_NON_ZERO(max_transfer,
+                                max_iov * qemu_real_host_page_size);
+
+    return max_transfer / s->blocksize;
+}
+
+ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len)
+ {
+     uint8_t page, page_idx;
+@@ -179,12 +191,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len)
+         (r->req.cmd.buf[1] & 0x01)) {
+         page = r->req.cmd.buf[2];
+         if (page == 0xb0) {
+-            uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk);
+-            uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk);
+-
+-            assert(max_transfer);
+-            max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size)
+-                / s->blocksize;
+            uint64_t max_transfer = calculate_max_transfer(s);
+             stl_be_p(&r->buf[8], max_transfer);
+             /* Also take care of the opt xfer len. */
+             stl_be_p(&r->buf[12],
+@@ -230,7 +237,7 @@ static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s)
+     uint8_t buf[64];
+ 
+     SCSIBlockLimits bl = {
+-        .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize
+        .max_io_sectors = calculate_max_transfer(s),
+     };
+ 
+     memset(r->buf, 0, r->buflen);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch
+++ b/SOURCES/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch
@ -0,0 +1,73 @@
+From 688c9f386635544dbc468171a32fbc84f0c9224e Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Fri, 18 Mar 2022 16:23:47 +0100
+Subject: [PATCH 12/24] target/i386: kvm: do not access uninitialized variable
+ on older kernels
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [12/13] 776fac1e7d1aa16ec5f4d99ddad3039eab8212af
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+KVM support for AMX includes a new system attribute, KVM_X86_XCOMP_GUEST_SUPP.
+Commit 19db68ca68 ("x86: Grant AMX permission for guest", 2022-03-15) however
+did not fully consider the behavior on older kernels.  First, it warns
+too aggressively.  Second, it invokes the KVM_GET_DEVICE_ATTR ioctl
+unconditionally and then uses the "bitmask" variable, which remains
+uninitialized if the ioctl fails.  Third, kvm_ioctl returns -errno rather
+than -1 on errors.
+
+While at it, explain why the ioctl is needed and KVM_GET_SUPPORTED_CPUID
+is not enough.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 3ec5ad40081b14af28496198b4d08dbe13386790)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/kvm/kvm.c | 17 +++++++++++++----
+ 1 file changed, 13 insertions(+), 4 deletions(-)
+
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index b1128b0e07..bd439e56ad 100644
+--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
+@@ -409,6 +409,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+         }
+     } else if (function == 0xd && index == 0 &&
+                (reg == R_EAX || reg == R_EDX)) {
+        /*
+         * The value returned by KVM_GET_SUPPORTED_CPUID does not include
+         * features that still have to be enabled with the arch_prctl
+         * system call.  QEMU needs the full value, which is retrieved
+         * with KVM_GET_DEVICE_ATTR.
+         */
+         struct kvm_device_attr attr = {
+             .group = 0,
+             .attr = KVM_X86_XCOMP_GUEST_SUPP,
+@@ -417,13 +423,16 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+ 
+         bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
+         if (!sys_attr) {
+-            warn_report("cannot get sys attribute capabilities %d", sys_attr);
+            return ret;
+         }
+ 
+         int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
+-        if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
+-            warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
+-                        "error: %d", rc);
+        if (rc < 0) {
+            if (rc != -ENXIO) {
+                warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
+                            "error: %d", rc);
+            }
+            return ret;
+         }
+         ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
+     } else if (function == 0x80000001 && reg == R_ECX) {
+-- 
+2.35.3
+
--- a/SOURCES/kvm-target-i386-properly-reset-TSC-on-reset.patch
+++ b/SOURCES/kvm-target-i386-properly-reset-TSC-on-reset.patch
@ -0,0 +1,83 @@
+From 416de21d11540a927cceb533bf54ce28ffa15ad6 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 24 Mar 2022 09:21:41 +0100
+Subject: [PATCH 2/3] target/i386: properly reset TSC on reset
+
+RH-Author: Paolo Bonzini <pbonzini@redhat.com>
+RH-MergeRequest: 172: target/i386: properly reset TSC on reset
+RH-Commit: [1/1] 7008bc5d02ad0a2d8b78259459d22d8f0986c989
+RH-Bugzilla: 2070417
+RH-Acked-by: Marcelo Tosatti <None>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+
+Some versions of Windows hang on reboot if their TSC value is greater
+than 2^54.  The calibration of the Hyper-V reference time overflows
+and fails; as a result the processors' clock sources are out of sync.
+
+The issue is that the TSC _should_ be reset to 0 on CPU reset and
+QEMU tries to do that.  However, KVM special cases writing 0 to the
+TSC and thinks that QEMU is trying to hot-plug a CPU, which is
+correct the first time through but not later.  Thwart this valiant
+effort and reset the TSC to 1 instead, but only if the CPU has been
+run once.
+
+For this to work, env->tsc has to be moved to the part of CPUArchState
+that is not zeroed at the beginning of x86_cpu_reset.
+
+Reported-by: Vadim Rozenfeld <vrozenfe@redhat.com>
+Supersedes: <20220324082346.72180-1-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 5286c3662294119dc2dd1e9296757337211451f6)
+---
+ target/i386/cpu.c | 13 +++++++++++++
+ target/i386/cpu.h |  2 +-
+ 2 files changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index 6e25d13339..dd6935b1dd 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -5871,6 +5871,19 @@ static void x86_cpu_reset(DeviceState *dev)
+     env->xstate_bv = 0;
+ 
+     env->pat = 0x0007040600070406ULL;
+
+    if (kvm_enabled()) {
+        /*
+         * KVM handles TSC = 0 specially and thinks we are hot-plugging
+         * a new CPU, use 1 instead to force a reset.
+         */
+        if (env->tsc != 0) {
+            env->tsc = 1;
+        }
+    } else {
+        env->tsc = 0;
+    }
+
+     env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT;
+     if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) {
+         env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT;
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 04f2b790c9..c6a6c871f1 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -1510,7 +1510,6 @@ typedef struct CPUX86State {
+     target_ulong kernelgsbase;
+ #endif
+ 
+-    uint64_t tsc;
+     uint64_t tsc_adjust;
+     uint64_t tsc_deadline;
+     uint64_t tsc_aux;
+@@ -1660,6 +1659,7 @@ typedef struct CPUX86State {
+     int64_t tsc_khz;
+     int64_t user_tsc_khz; /* for sanity check only */
+     uint64_t apic_bus_freq;
+    uint64_t tsc;
+ #if defined(CONFIG_KVM) || defined(CONFIG_HVF)
+     void *xsave_buf;
+     uint32_t xsave_buf_len;
+-- 
+2.35.1
+
--- a/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch
+++ b/SOURCES/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch
@ -0,0 +1,120 @@
+From 24af433728429578e586d179e27451b7d4a46cba Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
+Date: Thu, 18 Nov 2021 12:57:33 +0100
+Subject: [PATCH 3/3] tests/qtest/fdc-test: Add a regression test for
+ CVE-2021-3507
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jon Maloy <jmaloy@redhat.com>
+RH-MergeRequest: 194: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507)
+RH-Commit: [2/2] 31ec71276b521b06d4142fffa88a3fa4d1494d92 (jmaloy/qemu-kvm)
+RH-Bugzilla: 1951521
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Thomas Huth <thuth@redhat.com>
+RH-Acked-by: Hanna Reitz <hreitz@redhat.com>
+
+Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339
+
+Without the previous commit, when running 'make check-qtest-i386'
+with QEMU configured with '--enable-sanitizers' we get:
+
+  ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0
+  READ of size 786432 at 0x619000062a00 thread T0
+      #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919)
+      #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13
+      #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14
+      #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18
+      #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16
+      #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5
+      #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5
+      #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9
+      #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13
+      #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13
+      #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13
+      #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9
+      #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17
+
+  0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00)
+  allocated by thread T0 here:
+      #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec)
+      #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11
+      #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27
+      #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20
+      #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5
+      #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13
+
+  SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy
+  Shadow bytes around the buggy address:
+    0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
+    0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+    0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+    0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+    0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+  =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
+    0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
+    0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
+    0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
+    0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
+    0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
+  Shadow byte legend (one shadow byte represents 8 application bytes):
+    Addressable:           00
+    Heap left redzone:       fa
+    Freed heap region:       fd
+  ==4028352==ABORTING
+
+[ kwolf: Added snapshot=on to prevent write file lock failure ]
+
+Reported-by: Alexander Bulekov <alxndr@bu.edu>
+Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+(cherry picked from commit 46609b90d9e3a6304def11038a76b58ff43f77bc)
+Signed-off-by: Jon Maloy <jmaloy@redhat.com>
+---
+ tests/qtest/fdc-test.c | 21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
+index 8f6eee84a4..6f5850354f 100644
+--- a/tests/qtest/fdc-test.c
+++ b/tests/qtest/fdc-test.c
+@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void)
+     qtest_quit(s);
+ }
+ 
+static void test_cve_2021_3507(void)
+{
+    QTestState *s;
+
+    s = qtest_initf("-nographic -m 32M -nodefaults "
+                    "-drive file=%s,format=raw,if=floppy,snapshot=on",
+                    test_image);
+    qtest_outl(s, 0x9, 0x0a0206);
+    qtest_outw(s, 0x3f4, 0x1600);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outw(s, 0x3f4, 0x0200);
+    qtest_outw(s, 0x3f4, 0x0200);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_outw(s, 0x3f4, 0x0000);
+    qtest_quit(s);
+}
+
+ int main(int argc, char **argv)
+ {
+     int fd;
+@@ -614,6 +634,7 @@ int main(int argc, char **argv)
+     qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19);
+     qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
+     qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196);
+    qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507);
+ 
+     ret = g_test_run();
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch
+++ b/SOURCES/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch
@ -0,0 +1,105 @@
+From 87a318f0b8758f940a316831a77b6ebebca42b19 Mon Sep 17 00:00:00 2001
+From: Jon Maloy <jmaloy@redhat.com>
+Date: Wed, 4 May 2022 10:35:17 -0400
+Subject: [PATCH 3/3] ui/cursor: fix integer overflow in cursor_alloc
+ (CVE-2021-4206)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jon Maloy <jmaloy@redhat.com>
+RH-MergeRequest: 180: ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206)
+RH-Commit: [1/1] 7ad711347bc6248dc5aefa45401ca74448dee5e5 (jmaloy/qemu-kvm)
+RH-Bugzilla: 2040734
+RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-Acked-by: Mauro Matteo Cascella <None>
+RH-Acked-by: Gerd Hoffmann <kraxel@redhat.com>
+
+BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040734
+Upstream: Merged
+CVE: CVE-2021-4206
+
+commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa
+Author: Mauro Matteo Cascella <mcascell@redhat.com>
+Date:   Thu Apr 7 10:17:12 2022 +0200
+
+    ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206)
+
+    Prevent potential integer overflow by limiting 'width' and 'height' to
+    512x512. Also change 'datasize' type to size_t. Refer to security
+    advisory https://starlabs.sg/advisories/22-4206/ for more information.
+
+    Fixes: CVE-2021-4206
+    Signed-off-by: Mauro Matteo Cascella <mcascell@redhat.com>
+    Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
+    Message-Id: <20220407081712.345609-1-mcascell@redhat.com>
+    Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
+
+(cherry picked from commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa)
+Signed-off-by: Jon Maloy <jmaloy@redhat.com>
+---
+ hw/display/qxl-render.c | 7 +++++++
+ hw/display/vmware_vga.c | 2 ++
+ ui/cursor.c             | 8 +++++++-
+ 3 files changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c
+index 237ed293ba..ca217004bf 100644
+--- a/hw/display/qxl-render.c
+++ b/hw/display/qxl-render.c
+@@ -247,6 +247,13 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor,
+     size_t size;
+ 
+     c = cursor_alloc(cursor->header.width, cursor->header.height);
+
+    if (!c) {
+        qxl_set_guest_bug(qxl, "%s: cursor %ux%u alloc error", __func__,
+                cursor->header.width, cursor->header.height);
+        goto fail;
+    }
+
+     c->hot_x = cursor->header.hot_spot_x;
+     c->hot_y = cursor->header.hot_spot_y;
+     switch (cursor->header.type) {
+diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c
+index e2969a6c81..2b81d6122f 100644
+--- a/hw/display/vmware_vga.c
+++ b/hw/display/vmware_vga.c
+@@ -509,6 +509,8 @@ static inline void vmsvga_cursor_define(struct vmsvga_state_s *s,
+     int i, pixels;
+ 
+     qc = cursor_alloc(c->width, c->height);
+    assert(qc != NULL);
+
+     qc->hot_x = c->hot_x;
+     qc->hot_y = c->hot_y;
+     switch (c->bpp) {
+diff --git a/ui/cursor.c b/ui/cursor.c
+index 1d62ddd4d0..835f0802f9 100644
+--- a/ui/cursor.c
+++ b/ui/cursor.c
+@@ -46,6 +46,8 @@ static QEMUCursor *cursor_parse_xpm(const char *xpm[])
+ 
+     /* parse pixel data */
+     c = cursor_alloc(width, height);
+    assert(c != NULL);
+
+     for (pixel = 0, y = 0; y < height; y++, line++) {
+         for (x = 0; x < height; x++, pixel++) {
+             idx = xpm[line][x];
+@@ -91,7 +93,11 @@ QEMUCursor *cursor_builtin_left_ptr(void)
+ QEMUCursor *cursor_alloc(int width, int height)
+ {
+     QEMUCursor *c;
+-    int datasize = width * height * sizeof(uint32_t);
+    size_t datasize = width * height * sizeof(uint32_t);
+
+    if (width > 512 || height > 512) {
+        return NULL;
+    }
+ 
+     c = g_malloc0(sizeof(QEMUCursor) + datasize);
+     c->width  = width;
+-- 
+2.35.1
+
--- a/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch
+++ b/SOURCES/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch
@ -0,0 +1,56 @@
+From 9a62319b973ec33f9ccbeeae7f2f3b4b31db0c26 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:15 -0700
+Subject: [PATCH 17/24] vhost-net: fix improper cleanup in vhost_net_start
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA
+RH-Commit: [4/7] bebe7990a12e901fbb84e5e4b7a62744d75c9d9e
+RH-Bugzilla: 2069946
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+vhost_net_start() missed a corresponding stop_one() upon error from
+vhost_set_vring_enable(). While at it, make the error handling for
+err_start more robust. No real issue was found due to this though.
+
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-5-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 6f3910b5eee00b8cc959e94659c0d524c482a418)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/vhost_net.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
+index 30379d2ca4..d6d7c51f62 100644
+--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
+@@ -381,6 +381,7 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
+             r = vhost_set_vring_enable(peer, peer->vring_enable);
+ 
+             if (r < 0) {
+                vhost_net_stop_one(get_vhost_net(peer), dev);
+                 goto err_start;
+             }
+         }
+@@ -390,7 +391,8 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
+ 
+ err_start:
+     while (--i >= 0) {
+-        peer = qemu_get_peer(ncs , i);
+        peer = qemu_get_peer(ncs, i < data_queue_pairs ?
+                                  i : n->max_queue_pairs);
+         vhost_net_stop_one(get_vhost_net(peer), dev);
+     }
+     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch
+++ b/SOURCES/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch
@ -0,0 +1,58 @@
+From 01270bb66a4f7897a4fd06ba248eeeb41dc47571 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:16 -0700
+Subject: [PATCH 18/24] vhost-vdpa: backend feature should set only once
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA
+RH-Commit: [5/7] 0ab13542cf25c129dc403db95c7db12cdb012744
+RH-Bugzilla: 2069946
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+The vhost_vdpa_one_time_request() branch in
+vhost_vdpa_set_backend_cap() incorrectly sends down
+ioctls on vhost_dev with non-zero index. This may
+end up with multiple VHOST_SET_BACKEND_FEATURES
+ioctl calls sent down on the vhost-vdpa fd that is
+shared between all these vhost_dev's.
+
+To fix it, send down ioctl only once via the first
+vhost_dev with index 0. Toggle the polarity of the
+vhost_vdpa_one_time_request() test should do the
+trick.
+
+Fixes: 4d191cfdc7de ("vhost-vdpa: classify one time request")
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Eugenio Pérez <eperezma@redhat.com>
+Message-Id: <1651890498-24478-6-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 6aee7e4233f6467f69531fcd352adff028f3f5ea)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/virtio/vhost-vdpa.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
+index 78da48a333..a9be24776a 100644
+--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
+@@ -525,7 +525,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+ 
+     features &= f;
+ 
+-    if (vhost_vdpa_one_time_request(dev)) {
+    if (!vhost_vdpa_one_time_request(dev)) {
+         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+         if (r) {
+             return -EFAULT;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch
+++ b/SOURCES/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch
@ -0,0 +1,126 @@
+From c8cb46fa93a3ccad6f3e183045b270f28eed7b12 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:17 -0700
+Subject: [PATCH 19/24] vhost-vdpa: change name and polarity for
+ vhost_vdpa_one_time_request()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA
+RH-Commit: [6/7] 727ab0bb813f073e8cd2f7e68a9acda60c2cb33d
+RH-Bugzilla: 2069946
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+The name vhost_vdpa_one_time_request() was confusing. No
+matter whatever it returns, its typical occurrence had
+always been at requests that only need to be applied once.
+And the name didn't suggest what it actually checks for.
+Change it to vhost_vdpa_first_dev() with polarity flipped
+for better readibility of code. That way it is able to
+reflect what the check is really about.
+
+This call is applicable to request which performs operation
+only once, before queues are set up, and usually at the beginning
+of the caller function. Document the requirement for it in place.
+
+Conflicts: hw/virtio/vhost-vdpa.c since we don't have shadow virtqueue
+suport.
+
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+(cherry picked from commit d71b0609fc04217e28d17009f04d74b08be6f466)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/virtio/vhost-vdpa.c | 23 +++++++++++++++--------
+ 1 file changed, 15 insertions(+), 8 deletions(-)
+
+diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
+index a9be24776a..38bbcb3c18 100644
+--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
+@@ -319,11 +319,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v)
+                                     v->iova_range.last);
+ }
+ 
+-static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
+/*
+ * The use of this function is for requests that only need to be
+ * applied once. Typically such request occurs at the beginning
+ * of operation, and before setting up queues. It should not be
+ * used for request that performs operation until all queues are
+ * set, which would need to check dev->vq_index_end instead.
+ */
+static bool vhost_vdpa_first_dev(struct vhost_dev *dev)
+ {
+     struct vhost_vdpa *v = dev->opaque;
+ 
+-    return v->index != 0;
+    return v->index == 0;
+ }
+ 
+ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
+@@ -351,7 +358,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
+ 
+     vhost_vdpa_get_iova_range(v);
+ 
+-    if (vhost_vdpa_one_time_request(dev)) {
+    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -468,7 +475,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev)
+ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
+                                     struct vhost_memory *mem)
+ {
+-    if (vhost_vdpa_one_time_request(dev)) {
+    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -496,7 +503,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev,
+ {
+     int ret;
+ 
+-    if (vhost_vdpa_one_time_request(dev)) {
+    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -525,7 +532,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
+ 
+     features &= f;
+ 
+-    if (!vhost_vdpa_one_time_request(dev)) {
+    if (vhost_vdpa_first_dev(dev)) {
+         r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features);
+         if (r) {
+             return -EFAULT;
+@@ -670,7 +677,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
+ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
+                                      struct vhost_log *log)
+ {
+-    if (vhost_vdpa_one_time_request(dev)) {
+    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+@@ -739,7 +746,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev,
+ 
+ static int vhost_vdpa_set_owner(struct vhost_dev *dev)
+ {
+-    if (vhost_vdpa_one_time_request(dev)) {
+    if (!vhost_vdpa_first_dev(dev)) {
+         return 0;
+     }
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch
+++ b/SOURCES/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch
@ -0,0 +1,48 @@
+From c10ef6f79d4a4c8ccc5901b25234501c621e4e04 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:14 -0700
+Subject: [PATCH 16/24] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA
+RH-Commit: [3/7] b3b658dcb4695defe1fdb199570fb984291e8e21
+RH-Bugzilla: 2069946
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+... such that no memory leaks on dangling net clients in case of
+error.
+
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-4-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 9bd055073e375c8a0d7ebce925e05d914d69fc7f)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ net/vhost-vdpa.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
+index 25dd6dd975..814f704687 100644
+--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
+@@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
+ 
+ err:
+     if (i) {
+-        qemu_del_net_client(ncs[0]);
+        for (i--; i >= 0; i--) {
+            qemu_del_net_client(ncs[i]);
+        }
+     }
+     qemu_close(vdpa_device_fd);
+     g_free(ncs);
+-- 
+2.35.3
+
--- a/SOURCES/kvm-virtio-gpu-do-not-byteswap-padding.patch
+++ b/SOURCES/kvm-virtio-gpu-do-not-byteswap-padding.patch
@ -0,0 +1,48 @@
+From e118a451dc1ed68f1371a5d8e042120542be6d31 Mon Sep 17 00:00:00 2001
+From: Paolo Bonzini <pbonzini@redhat.com>
+Date: Thu, 11 Nov 2021 12:06:00 +0100
+Subject: [PATCH 01/24] virtio-gpu: do not byteswap padding
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [1/13] 12714f53820b7632e7fc0a8a3bf8eb4a64f41750
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+In Linux 5.16, the padding of struct virtio_gpu_ctrl_hdr has become a
+single-byte field followed by a uint8_t[3] array of padding bytes,
+and virtio_gpu_ctrl_hdr_bswap does not compile anymore.
+
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Acked-by: Cornelia Huck <cohuck@redhat.com>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
+Message-Id: <20211111110604.207376-2-pbonzini@redhat.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit a4663f1a5506626175fc64c86e52135587c36872)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ include/hw/virtio/virtio-gpu-bswap.h | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h
+index e2bee8f595..5faac0d8d5 100644
+--- a/include/hw/virtio/virtio-gpu-bswap.h
+++ b/include/hw/virtio/virtio-gpu-bswap.h
+@@ -24,7 +24,6 @@ virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr)
+     le32_to_cpus(&hdr->flags);
+     le64_to_cpus(&hdr->fence_id);
+     le32_to_cpus(&hdr->ctx_id);
+-    le32_to_cpus(&hdr->padding);
+ }
+ 
+ static inline void
+-- 
+2.35.3
+
--- a/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch
+++ b/SOURCES/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch
@ -0,0 +1,143 @@
+From 39cdd781c885b0695f8830a33420caa9e9b0bd50 Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:13 -0700
+Subject: [PATCH 15/24] virtio-net: align ctrl_vq index for non-mq guest for
+ vhost_vdpa
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA
+RH-Commit: [2/7] 2647cf59f3dd1e3d8af2d12c01e06ae26fbc1dc2
+RH-Bugzilla: 2069946
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+With MQ enabled vdpa device and non-MQ supporting guest e.g.
+booting vdpa with mq=on over OVMF of single vqp, below assert
+failure is seen:
+
+../hw/virtio/vhost-vdpa.c:560: vhost_vdpa_get_vq_index: Assertion `idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs' failed.
+
+0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
+1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
+2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
+3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
+4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
+5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
+6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
+7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
+   at ../hw/virtio/virtio-pci.c:974
+8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
+9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
+   at ../hw/net/vhost_net.c:361
+10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
+11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
+12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
+13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
+14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
+   at ../softmmu/memory.c:492
+15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
+16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
+   at ../softmmu/memory.c:1504
+17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at /home/opc/qemu-upstream/include/qemu/host-utils.h:165
+18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
+19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
+   at ../softmmu/physmem.c:2914
+20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
+   attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
+21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
+22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
+23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
+24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
+25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
+
+The cause for the assert failure is due to that the vhost_dev index
+for the ctrl vq was not aligned with actual one in use by the guest.
+Upon multiqueue feature negotiation in virtio_net_set_multiqueue(),
+if guest doesn't support multiqueue, the guest vq layout would shrink
+to a single queue pair, consisting of 3 vqs in total (rx, tx and ctrl).
+This results in ctrl_vq taking a different vhost_dev group index than
+the default. We can map vq to the correct vhost_dev group by checking
+if MQ is supported by guest and successfully negotiated. Since the
+MQ feature is only present along with CTRL_VQ, we ensure the index
+2 is only meant for the control vq while MQ is not supported by guest.
+
+Fixes: 22288fe ("virtio-net: vhost control virtqueue support")
+Suggested-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-3-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 68b0a6395f36a8f48f56f46d05f30be2067598b0)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/virtio-net.c | 33 +++++++++++++++++++++++++++++++--
+ 1 file changed, 31 insertions(+), 2 deletions(-)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index ec045c3f41..f118379bb4 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -14,6 +14,7 @@
+ #include "qemu/osdep.h"
+ #include "qemu/atomic.h"
+ #include "qemu/iov.h"
+#include "qemu/log.h"
+ #include "qemu/main-loop.h"
+ #include "qemu/module.h"
+ #include "hw/virtio/virtio.h"
+@@ -3163,8 +3164,22 @@ static NetClientInfo net_virtio_info = {
+ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
+ {
+     VirtIONet *n = VIRTIO_NET(vdev);
+-    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
+    NetClientState *nc;
+     assert(n->vhost_started);
+    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
+        /* Must guard against invalid features and bogus queue index
+         * from being set by malicious guest, or penetrated through
+         * buggy migration stream.
+         */
+        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: bogus vq index ignored\n", __func__);
+            return false;
+        }
+        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
+    } else {
+        nc = qemu_get_subqueue(n->nic, vq2q(idx));
+    }
+     return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
+ }
+ 
+@@ -3172,8 +3187,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
+                                            bool mask)
+ {
+     VirtIONet *n = VIRTIO_NET(vdev);
+-    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
+    NetClientState *nc;
+     assert(n->vhost_started);
+    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_MQ) && idx == 2) {
+        /* Must guard against invalid features and bogus queue index
+         * from being set by malicious guest, or penetrated through
+         * buggy migration stream.
+         */
+        if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: bogus vq index ignored\n", __func__);
+            return;
+        }
+        nc = qemu_get_subqueue(n->nic, n->max_queue_pairs);
+    } else {
+        nc = qemu_get_subqueue(n->nic, vq2q(idx));
+    }
+     vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
+                              vdev, idx, mask);
+ }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch
+++ b/SOURCES/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch
@ -0,0 +1,109 @@
+From c9b51d54530c526f14ca0f3b9fc0bfa0b60d45ee Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:18 -0700
+Subject: [PATCH 20/24] virtio-net: don't handle mq request in userspace
+ handler for vhost-vdpa
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA
+RH-Commit: [7/7] 0e6684d12e42752deae8f5ebc56456fed174e0ed
+RH-Bugzilla: 2069946
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+virtio_queue_host_notifier_read() tends to read pending event
+left behind on ioeventfd in the vhost_net_stop() path, and
+attempts to handle outstanding kicks from userspace vq handler.
+However, in the ctrl_vq handler, virtio_net_handle_mq() has a
+recursive call into virtio_net_set_status(), which may lead to
+segmentation fault as shown in below stack trace:
+
+0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at ../hw/core/qdev.c:376
+1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
+2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at ../hw/virtio/vhost.c:318
+3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>, buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at ../hw/virtio/vhost.c:336
+4  0x000055f800d71867 in vhost_virtqueue_stop (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
+5  0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
+6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
+7  0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1)
+   at ../hw/net/vhost_net.c:423
+8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
+9  0x000055f800d4e628 in virtio_net_set_status (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370
+10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at ../hw/net/virtio-net.c:1408
+11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
+12 0x000055f800d69f37 in virtio_queue_host_notifier_read (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
+13 0x000055f800d69f37 in virtio_queue_host_notifier_read (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
+14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
+15 0x000055f800d73106 in vhost_dev_disable_notifiers (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
+   at ../../../include/hw/virtio/virtio-bus.h:35
+16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
+17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, cvq=cvq@entry=1)
+   at ../hw/net/vhost_net.c:423
+18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>, n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
+19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, status=15 '\017') at ../hw/net/virtio-net.c:370
+20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, val=<optimized out>) at ../hw/virtio/virtio.c:1945
+21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
+22 0x000055f800d04e7a in do_vm_stop (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) at ../softmmu/cpus.c:262
+23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
+24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
+25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at ../softmmu/main.c:51
+
+For now, temporarily disable handling MQ request from the ctrl_vq
+userspace hanlder to avoid the recursive virtio_net_set_status()
+call. Some rework is needed to allow changing the number of
+queues without going through a full virtio_net_set_status cycle,
+particularly for vhost-vdpa backend.
+
+This patch will need to be reverted as soon as future patches of
+having the change of #queues handled in userspace is merged.
+
+Fixes: 402378407db ("vhost-vdpa: multiqueue support")
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-8-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit 2a7888cc3aa31faee839fa5dddad354ff8941f4c)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/virtio-net.c | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index f118379bb4..7e172ef829 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -1373,6 +1373,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
+ {
+     VirtIODevice *vdev = VIRTIO_DEVICE(n);
+     uint16_t queue_pairs;
+    NetClientState *nc = qemu_get_queue(n->nic);
+ 
+     virtio_net_disable_rss(n);
+     if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) {
+@@ -1404,6 +1405,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
+         return VIRTIO_NET_ERR;
+     }
+ 
+    /* Avoid changing the number of queue_pairs for vdpa device in
+     * userspace handler. A future fix is needed to handle the mq
+     * change in userspace handler with vhost-vdpa. Let's disable
+     * the mq handling from userspace for now and only allow get
+     * done through the kernel. Ripples may be seen when falling
+     * back to userspace, but without doing it qemu process would
+     * crash on a recursive entry to virtio_net_set_status().
+     */
+    if (nc->peer && nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+        return VIRTIO_NET_ERR;
+    }
+
+     n->curr_queue_pairs = queue_pairs;
+     /* stop the backend before changing the number of queue_pairs to avoid handling a
+      * disabled queue */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch
+++ b/SOURCES/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch
@ -0,0 +1,60 @@
+From 10b3a7b56dc9b4c88e503c36c1b13d80bcb7b066 Mon Sep 17 00:00:00 2001
+From: Jason Wang <jasowang@redhat.com>
+Date: Tue, 8 Mar 2022 10:42:51 +0800
+Subject: [PATCH 2/6] virtio-net: fix map leaking on error during receive
+
+RH-Author: Jon Maloy <jmaloy@redhat.com>
+RH-MergeRequest: 154: virtio-net: fix map leaking on error during receive
+RH-Commit: [1/1] 7178b0cd5ce7c89fe476f2e199c9212c8b89327a (jmaloy/qemu-kvm)
+RH-Bugzilla: 2063206
+RH-Acked-by: Jason Wang <None>
+RH-Acked-by: Kevin Wolf <kwolf@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2063206
+Upstream: Merged
+CVE: CVE-2022-26353
+
+commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37
+Author: Jason Wang <jasowang@redhat.com>
+Date:   Tue Mar 8 10:42:51 2022 +0800
+
+    virtio-net: fix map leaking on error during receive
+
+    Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
+    tries to fix the use after free of the sg by caching the virtqueue
+    elements in an array and unmap them at once after receiving the
+    packets, But it forgot to unmap the cached elements on error which
+    will lead to leaking of mapping and other unexpected results.
+
+    Fixing this by detaching the cached elements on error. This addresses
+    CVE-2022-26353.
+
+    Reported-by: Victor Tom <vv474172261@gmail.com>
+    Cc: qemu-stable@nongnu.org
+    Fixes: CVE-2022-26353
+    Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
+    Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+    Signed-off-by: Jason Wang <jasowang@redhat.com>
+
+(cherry picked from commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37)
+Signed-off-by: Jon Maloy <jmaloy@redhat.com>
+---
+ hw/net/virtio-net.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index f2014d5ea0..e1f4748831 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -1862,6 +1862,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
+ 
+ err:
+     for (j = 0; j < i; j++) {
+        virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
+         g_free(elems[j]);
+     }
+ 
+-- 
+2.27.0
+
--- a/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch
+++ b/SOURCES/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch
@ -0,0 +1,52 @@
+From bc307149fe4e3fe2a3e0ac52534383c955051e7e Mon Sep 17 00:00:00 2001
+From: Si-Wei Liu <si-wei.liu@oracle.com>
+Date: Fri, 6 May 2022 19:28:12 -0700
+Subject: [PATCH 14/24] virtio-net: setup vhost_dev and notifiers for cvq only
+ when feature is negotiated
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+RH-Author: Jason Wang <jasowang@redhat.com>
+RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA
+RH-Commit: [1/7] 38bcfaa661f437b3dfa6b6f152dffd60073dc054
+RH-Bugzilla: 2069946
+RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
+RH-Acked-by: Cindy Lu <lulu@redhat.com>
+RH-Acked-by: Laurent Vivier <lvivier@redhat.com>
+
+When the control virtqueue feature is absent or not negotiated,
+vhost_net_start() still tries to set up vhost_dev and install
+vhost notifiers for the control virtqueue, which results in
+erroneous ioctl calls with incorrect queue index sending down
+to driver. Do that only when needed.
+
+Fixes: 22288fe ("virtio-net: vhost control virtqueue support")
+Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Message-Id: <1651890498-24478-2-git-send-email-si-wei.liu@oracle.com>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit aa8581945a13712ff3eed0ad3ba7a9664fc1604b)
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/virtio-net.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
+index e1f4748831..ec045c3f41 100644
+--- a/hw/net/virtio-net.c
+++ b/hw/net/virtio-net.c
+@@ -244,7 +244,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
+     VirtIODevice *vdev = VIRTIO_DEVICE(n);
+     NetClientState *nc = qemu_get_queue(n->nic);
+     int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1;
+-    int cvq = n->max_ncs - n->max_queue_pairs;
+    int cvq = virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) ?
+              n->max_ncs - n->max_queue_pairs : 0;
+ 
+     if (!get_vhost_net(nc->peer)) {
+         return;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch
+++ b/SOURCES/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch
@ -0,0 +1,63 @@
+From 1da951c4c3b4e403a6c1668a54e6264381c0003d Mon Sep 17 00:00:00 2001
+From: Vivek Goyal <vgoyal@redhat.com>
+Date: Tue, 8 Feb 2022 15:48:04 -0500
+Subject: [PATCH 1/3] virtiofsd: Fix breakage due to fuse_init_in size change
+
+RH-Author: Dr. David Alan Gilbert <dgilbert@redhat.com>
+RH-MergeRequest: 193: virtiofsd: Fix breakage due to fuse_init_in size change
+RH-Commit: [1/1] 5809db034f9361fb462181d71e7cdde1324f8e54
+RH-Bugzilla: 2097209
+RH-Acked-by: German Maglione <None>
+RH-Acked-by: Laszlo Ersek <lersek@redhat.com>
+RH-Acked-by: Vivek Goyal <None>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+
+Kernel version 5.17 has increased the size of "struct fuse_init_in" struct.
+Previously this struct was 16 bytes and now it has been extended to
+64 bytes in size.
+
+Once qemu headers are updated to latest, it will expect to receive 64 byte
+size struct (for protocol version major 7 and minor > 6). But if guest is
+booting older kernel (older than 5.17), then it still sends older
+fuse_init_in of size 16 bytes. And do_init() fails. It is expecting
+64 byte struct. And this results in mount of virtiofs failing.
+
+Fix this by parsing 16 bytes only for now. Separate patches will be
+posted which will parse rest of the bytes and enable new functionality.
+Right now we don't support any of the new functionality, so we don't
+lose anything by not parsing bytes beyond 16.
+
+Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
+Message-Id: <20220208204813.682906-2-vgoyal@redhat.com>
+Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
+(cherry picked from commit a086d54c6ffa38f7e71f182b63a25315304a3392)
+---
+ tools/virtiofsd/fuse_lowlevel.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c
+index e4679c73ab..5d431a7038 100644
+--- a/tools/virtiofsd/fuse_lowlevel.c
+++ b/tools/virtiofsd/fuse_lowlevel.c
+@@ -1880,6 +1880,8 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
+                     struct fuse_mbuf_iter *iter)
+ {
+     size_t compat_size = offsetof(struct fuse_init_in, max_readahead);
+    size_t compat2_size = offsetof(struct fuse_init_in, flags) +
+                              sizeof(uint32_t);
+     struct fuse_init_in *arg;
+     struct fuse_init_out outarg;
+     struct fuse_session *se = req->se;
+@@ -1897,7 +1899,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid,
+ 
+     /* ...and now consume the new fields. */
+     if (arg->major == 7 && arg->minor >= 6) {
+-        if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) {
+        if (!fuse_mbuf_iter_advance(iter, compat2_size - compat_size)) {
+             fuse_reply_err(req, EINVAL);
+             return;
+         }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-x86-Add-AMX-CPUIDs-enumeration.patch
+++ b/SOURCES/kvm-x86-Add-AMX-CPUIDs-enumeration.patch
@ -0,0 +1,135 @@
+From d0826a8c2c3c389eeeed1014d7e316f39f083971 Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:31 -0800
+Subject: [PATCH 09/24] x86: Add AMX CPUIDs enumeration
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [9/13] fab147992ad927c9538529f018f06e2f48546c5b
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Add AMX primary feature bits XFD and AMX_TILE to
+enumerate the CPU's AMX capability. Meanwhile, add
+AMX TILE and TMUL CPUID leaf and subleaves which
+exist when AMX TILE is present to provide the maximum
+capability of TILE and TMUL.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-6-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit f21a48171cf3fa39532fc8553fd82e81b88b6474)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.c     | 55 ++++++++++++++++++++++++++++++++++++++++---
+ target/i386/kvm/kvm.c |  4 +++-
+ 2 files changed, 55 insertions(+), 4 deletions(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index cd27c0eb81..09e08f7f38 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -574,6 +574,18 @@ static CPUCacheInfo legacy_l3_cache = {
+ #define INTEL_PT_CYCLE_BITMAP    0x1fff         /* Support 0,2^(0~11) */
+ #define INTEL_PT_PSB_BITMAP      (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */
+ 
+/* CPUID Leaf 0x1D constants: */
+#define INTEL_AMX_TILE_MAX_SUBLEAF     0x1
+#define INTEL_AMX_TOTAL_TILE_BYTES     0x2000
+#define INTEL_AMX_BYTES_PER_TILE       0x400
+#define INTEL_AMX_BYTES_PER_ROW        0x40
+#define INTEL_AMX_TILE_MAX_NAMES       0x8
+#define INTEL_AMX_TILE_MAX_ROWS        0x10
+
+/* CPUID Leaf 0x1E constants: */
+#define INTEL_AMX_TMUL_MAX_K           0x10
+#define INTEL_AMX_TMUL_MAX_N           0x40
+
+ void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1,
+                               uint32_t vendor2, uint32_t vendor3)
+ {
+@@ -843,8 +855,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
+             "avx512-vp2intersect", NULL, "md-clear", NULL,
+             NULL, NULL, "serialize", NULL,
+             "tsx-ldtrk", NULL, NULL /* pconfig */, NULL,
+-            NULL, NULL, NULL, "avx512-fp16",
+-            NULL, NULL, "spec-ctrl", "stibp",
+            NULL, NULL, "amx-bf16", "avx512-fp16",
+            "amx-tile", "amx-int8", "spec-ctrl", "stibp",
+             NULL, "arch-capabilities", "core-capability", "ssbd",
+         },
+         .cpuid = {
+@@ -909,7 +921,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
+         .type = CPUID_FEATURE_WORD,
+         .feat_names = {
+             "xsaveopt", "xsavec", "xgetbv1", "xsaves",
+-            NULL, NULL, NULL, NULL,
+            "xfd", NULL, NULL, NULL,
+             NULL, NULL, NULL, NULL,
+             NULL, NULL, NULL, NULL,
+             NULL, NULL, NULL, NULL,
+@@ -5593,6 +5605,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+         }
+         break;
+     }
+    case 0x1D: {
+        /* AMX TILE */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
+            break;
+        }
+
+        if (count == 0) {
+            /* Highest numbered palette subleaf */
+            *eax = INTEL_AMX_TILE_MAX_SUBLEAF;
+        } else if (count == 1) {
+            *eax = INTEL_AMX_TOTAL_TILE_BYTES |
+                   (INTEL_AMX_BYTES_PER_TILE << 16);
+            *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16);
+            *ecx = INTEL_AMX_TILE_MAX_ROWS;
+        }
+        break;
+    }
+    case 0x1E: {
+        /* AMX TMUL */
+        *eax = 0;
+        *ebx = 0;
+        *ecx = 0;
+        *edx = 0;
+        if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) {
+            break;
+        }
+
+        if (count == 0) {
+            /* Highest numbered palette subleaf */
+            *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8);
+        }
+        break;
+    }
+     case 0x40000000:
+         /*
+          * CPUID code in kvm_arch_init_vcpu() ignores stuff
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index b5d98c4361..a64a79d870 100644
+--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
+@@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
+                 c = &cpuid_data.entries[cpuid_i++];
+             }
+             break;
+-        case 0x14: {
+        case 0x14:
+        case 0x1d:
+        case 0x1e: {
+             uint32_t times;
+ 
+             c->function = i;
+-- 
+2.35.3
+
--- a/SOURCES/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
+++ b/SOURCES/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
@ -0,0 +1,112 @@
+From 3ba6092159b6e3b25505af2a49c0f6ac99043db9 Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:28 -0800
+Subject: [PATCH 06/24] x86: Add AMX XTILECFG and XTILEDATA components
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [6/13] 95229f87b4494631d57232f374a174f7bc95843a
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+The AMX TILECFG register and the TMMx tile data registers are
+saved/restored via XSAVE, respectively in state component 17
+(64 bytes) and state component 18 (8192 bytes).
+
+Add AMX feature bits to x86_ext_save_areas array to set
+up AMX components. Add structs that define the layout of
+AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the
+structs sizes.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-3-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 1f16764f7d4515bfd5e4ae0aae814fa280a7d0c8)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.c |  8 ++++++++
+ target/i386/cpu.h | 18 +++++++++++++++++-
+ 2 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index f44fad3a2a..0453c27c9d 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = {
+     [XSTATE_PKRU_BIT] =
+           { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU,
+             .size = sizeof(XSavePKRU) },
+    [XSTATE_XTILE_CFG_BIT] = {
+        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
+        .size = sizeof(XSaveXTILECFG),
+    },
+    [XSTATE_XTILE_DATA_BIT] = {
+        .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE,
+        .size = sizeof(XSaveXTILEDATA)
+    },
+ };
+ 
+ static uint32_t xsave_area_size(uint64_t mask)
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 5d9702a991..e1dd8b9555 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -537,6 +537,8 @@ typedef enum X86Seg {
+ #define XSTATE_ZMM_Hi256_BIT            6
+ #define XSTATE_Hi16_ZMM_BIT             7
+ #define XSTATE_PKRU_BIT                 9
+#define XSTATE_XTILE_CFG_BIT            17
+#define XSTATE_XTILE_DATA_BIT           18
+ 
+ #define XSTATE_FP_MASK                  (1ULL << XSTATE_FP_BIT)
+ #define XSTATE_SSE_MASK                 (1ULL << XSTATE_SSE_BIT)
+@@ -845,6 +847,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
+ #define CPUID_7_0_EDX_TSX_LDTRK         (1U << 16)
+ /* AVX512_FP16 instruction */
+ #define CPUID_7_0_EDX_AVX512_FP16       (1U << 23)
+/* AMX tile (two-dimensional register) */
+#define CPUID_7_0_EDX_AMX_TILE          (1U << 24)
+ /* Speculation Control */
+ #define CPUID_7_0_EDX_SPEC_CTRL         (1U << 26)
+ /* Single Thread Indirect Branch Predictors */
+@@ -1348,6 +1352,16 @@ typedef struct XSavePKRU {
+     uint32_t padding;
+ } XSavePKRU;
+ 
+/* Ext. save area 17: AMX XTILECFG state */
+typedef struct XSaveXTILECFG {
+    uint8_t xtilecfg[64];
+} XSaveXTILECFG;
+
+/* Ext. save area 18: AMX XTILEDATA state */
+typedef struct XSaveXTILEDATA {
+    uint8_t xtiledata[8][1024];
+} XSaveXTILEDATA;
+
+ QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40);
+@@ -1355,6 +1369,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200);
+ QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400);
+ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40);
+QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000);
+ 
+ typedef struct ExtSaveArea {
+     uint32_t feature, bits;
+@@ -1362,7 +1378,7 @@ typedef struct ExtSaveArea {
+     uint32_t ecx;
+ } ExtSaveArea;
+ 
+-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
+#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1)
+ 
+ extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT];
+ 
+-- 
+2.35.3
+
--- a/SOURCES/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch
+++ b/SOURCES/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch
@ -0,0 +1,62 @@
+From 098d6a965ada02f5897b73f0489413a050a176bb Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:30 -0800
+Subject: [PATCH 08/24] x86: Add XFD faulting bit for state components
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [8/13] 0b1b46c5d075655ab94bc79e042b187c5dc55551
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Intel introduces XFD faulting mechanism for extended
+XSAVE features to dynamically enable the features in
+runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set
+as 1, it indicates support for XFD faulting of this
+state component.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-5-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 0f17f6b30f3b051f0f96ccc98c9f7f395713699f)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.c | 3 ++-
+ target/i386/cpu.h | 2 ++
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index c19b51ea32..cd27c0eb81 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -5503,7 +5503,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+                 const ExtSaveArea *esa = &x86_ext_save_areas[count];
+                 *eax = esa->size;
+                 *ebx = esa->offset;
+-                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
+                *ecx = esa->ecx &
+                       (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK);
+             }
+         }
+         break;
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 58676390e6..f2bdef9c26 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -555,8 +555,10 @@ typedef enum X86Seg {
+ #define XSTATE_DYNAMIC_MASK             (XSTATE_XTILE_DATA_MASK)
+ 
+ #define ESA_FEATURE_ALIGN64_BIT         1
+#define ESA_FEATURE_XFD_BIT             2
+ 
+ #define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
+#define ESA_FEATURE_XFD_MASK            (1U << ESA_FEATURE_XFD_BIT)
+ 
+ 
+ /* CPUID feature words */
+-- 
+2.35.3
+
--- a/SOURCES/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
+++ b/SOURCES/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
@ -0,0 +1,88 @@
+From 6eae12166341c236da023e5117b64b842ae72083 Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:27 -0800
+Subject: [PATCH 05/24] x86: Fix the 64-byte boundary enumeration for extended
+ state
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [5/13] 64fc93e3b0ad0fc56da9d71b33d9eefd3cbba1d7
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1]
+indicate whether the extended state component locates
+on the next 64-byte boundary following the preceding state
+component when the compacted format of an XSAVE area is
+used.
+
+Right now, they are all zero because no supported component
+needed the bit to be set, but the upcoming AMX feature will
+use it.  Fix the subleaves value according to KVM's supported
+cpuid.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-2-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 131266b7565bd437127bd231563572696bb27235)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.c         | 1 +
+ target/i386/cpu.h         | 6 ++++++
+ target/i386/kvm/kvm-cpu.c | 1 +
+ 3 files changed, 8 insertions(+)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index dd6935b1dd..f44fad3a2a 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -5495,6 +5495,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count,
+                 const ExtSaveArea *esa = &x86_ext_save_areas[count];
+                 *eax = esa->size;
+                 *ebx = esa->offset;
+                *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK;
+             }
+         }
+         break;
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index c6a6c871f1..5d9702a991 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -548,6 +548,11 @@ typedef enum X86Seg {
+ #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
+ #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
+ 
+#define ESA_FEATURE_ALIGN64_BIT         1
+
+#define ESA_FEATURE_ALIGN64_MASK        (1U << ESA_FEATURE_ALIGN64_BIT)
+
+
+ /* CPUID feature words */
+ typedef enum FeatureWord {
+     FEAT_1_EDX,         /* CPUID[1].EDX */
+@@ -1354,6 +1359,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8);
+ typedef struct ExtSaveArea {
+     uint32_t feature, bits;
+     uint32_t offset, size;
+    uint32_t ecx;
+ } ExtSaveArea;
+ 
+ #define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1)
+diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
+index 7b004065ae..86ef7b2712 100644
+--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
+@@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void)
+             if (sz != 0) {
+                 assert(esa->size == sz);
+                 esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
+                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
+             }
+         }
+     }
+-- 
+2.35.3
+
--- a/SOURCES/kvm-x86-Grant-AMX-permission-for-guest.patch
+++ b/SOURCES/kvm-x86-Grant-AMX-permission-for-guest.patch
@ -0,0 +1,215 @@
+From 50840e01d05a466a1dfbc219e49233834e5d7ed0 Mon Sep 17 00:00:00 2001
+From: Yang Zhong <yang.zhong@intel.com>
+Date: Wed, 16 Feb 2022 22:04:29 -0800
+Subject: [PATCH 07/24] x86: Grant AMX permission for guest
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [7/13] 437578191f61139ca710cc7045ab38eb0d05eae2
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+Kernel allocates 4K xstate buffer by default. For XSAVE features
+which require large state component (e.g. AMX), Linux kernel
+dynamically expands the xstate buffer only after the process has
+acquired the necessary permissions. Those are called dynamically-
+enabled XSAVE features (or dynamic xfeatures).
+
+There are separate permissions for native tasks and guests.
+
+Qemu should request the guest permissions for dynamic xfeatures
+which will be exposed to the guest. This only needs to be done
+once before the first vcpu is created.
+
+KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to
+get host side supported_xcr0 and Qemu can decide if it can request
+dynamically enabled XSAVE features permission.
+https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/
+
+Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Message-Id: <20220217060434.52460-4-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 19db68ca68a78fa033a21d419036b6e416554564)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.c          |  7 +++++
+ target/i386/cpu.h          |  4 +++
+ target/i386/kvm/kvm-cpu.c  | 12 ++++----
+ target/i386/kvm/kvm.c      | 57 ++++++++++++++++++++++++++++++++++++++
+ target/i386/kvm/kvm_i386.h |  1 +
+ 5 files changed, 75 insertions(+), 6 deletions(-)
+
+diff --git a/target/i386/cpu.c b/target/i386/cpu.c
+index 0453c27c9d..c19b51ea32 100644
+--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
+@@ -6027,6 +6027,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
+     CPUX86State *env = &cpu->env;
+     int i;
+     uint64_t mask;
+    static bool request_perm;
+ 
+     if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
+         env->features[FEAT_XSAVE_COMP_LO] = 0;
+@@ -6042,6 +6043,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu)
+         }
+     }
+ 
+    /* Only request permission for first vcpu */
+    if (kvm_enabled() && !request_perm) {
+        kvm_request_xsave_components(cpu, mask);
+        request_perm = true;
+    }
+
+     env->features[FEAT_XSAVE_COMP_LO] = mask;
+     env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
+ }
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index e1dd8b9555..58676390e6 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -549,6 +549,10 @@ typedef enum X86Seg {
+ #define XSTATE_ZMM_Hi256_MASK           (1ULL << XSTATE_ZMM_Hi256_BIT)
+ #define XSTATE_Hi16_ZMM_MASK            (1ULL << XSTATE_Hi16_ZMM_BIT)
+ #define XSTATE_PKRU_MASK                (1ULL << XSTATE_PKRU_BIT)
+#define XSTATE_XTILE_CFG_MASK           (1ULL << XSTATE_XTILE_CFG_BIT)
+#define XSTATE_XTILE_DATA_MASK          (1ULL << XSTATE_XTILE_DATA_BIT)
+
+#define XSTATE_DYNAMIC_MASK             (XSTATE_XTILE_DATA_MASK)
+ 
+ #define ESA_FEATURE_ALIGN64_BIT         1
+ 
+diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
+index 86ef7b2712..bdc967c484 100644
+--- a/target/i386/kvm/kvm-cpu.c
+++ b/target/i386/kvm/kvm-cpu.c
+@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
+ static void kvm_cpu_xsave_init(void)
+ {
+     static bool first = true;
+-    KVMState *s = kvm_state;
+    uint32_t eax, ebx, ecx, edx;
+     int i;
+ 
+     if (!first) {
+@@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void)
+         ExtSaveArea *esa = &x86_ext_save_areas[i];
+ 
+         if (esa->size) {
+-            int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
+-            if (sz != 0) {
+-                assert(esa->size == sz);
+-                esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
+-                esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
+            host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
+            if (eax != 0) {
+                assert(esa->size == eax);
+                esa->offset = ebx;
+                esa->ecx = ecx;
+             }
+         }
+     }
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index a668f521ac..b5d98c4361 100644
+--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
+@@ -17,6 +17,7 @@
+ #include "qapi/error.h"
+ #include <sys/ioctl.h>
+ #include <sys/utsname.h>
+#include <sys/syscall.h>
+ 
+ #include <linux/kvm.h>
+ #include "standard-headers/asm-x86/kvm_para.h"
+@@ -347,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+     struct kvm_cpuid2 *cpuid;
+     uint32_t ret = 0;
+     uint32_t cpuid_1_edx;
+    uint64_t bitmask;
+ 
+     cpuid = get_supported_cpuid(s);
+ 
+@@ -404,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
+         if (!has_msr_arch_capabs) {
+             ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
+         }
+    } else if (function == 0xd && index == 0 &&
+               (reg == R_EAX || reg == R_EDX)) {
+        struct kvm_device_attr attr = {
+            .group = 0,
+            .attr = KVM_X86_XCOMP_GUEST_SUPP,
+            .addr = (unsigned long) &bitmask
+        };
+
+        bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
+        if (!sys_attr) {
+            warn_report("cannot get sys attribute capabilities %d", sys_attr);
+        }
+
+        int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
+        if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
+            warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
+                        "error: %d", rc);
+        }
+        ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
+     } else if (function == 0x80000001 && reg == R_ECX) {
+         /*
+          * It's safe to enable TOPOEXT even if it's not returned by
+@@ -5054,3 +5075,39 @@ bool kvm_arch_cpu_check_are_resettable(void)
+ {
+     return !sev_es_enabled();
+ }
+
+#define ARCH_REQ_XCOMP_GUEST_PERM       0x1025
+
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
+{
+    KVMState *s = kvm_state;
+    uint64_t supported;
+
+    mask &= XSTATE_DYNAMIC_MASK;
+    if (!mask) {
+        return;
+    }
+    /*
+     * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0].
+     * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned
+     * about them already because they are not supported features.
+     */
+    supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
+    supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32;
+    mask &= supported;
+
+    while (mask) {
+        int bit = ctz64(mask);
+        int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit);
+        if (rc) {
+            /*
+             * Older kernel version (<5.17) do not support
+             * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return
+             * any dynamic feature from kvm_arch_get_supported_cpuid.
+             */
+            warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
+                        "for feature bit %d", bit);
+        }
+        mask &= ~BIT_ULL(bit);
+    }
+}
+diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
+index a978509d50..4124912c20 100644
+--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
+@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
+ uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
+ 
+ bool kvm_enable_sgx_provisioning(KVMState *s);
+void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
+ 
+ #endif
+-- 
+2.35.3
+
--- a/SOURCES/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch
+++ b/SOURCES/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch
@ -0,0 +1,178 @@
+From 90a276ed72deab84f3fdd4b57e9ccfc6514934fb Mon Sep 17 00:00:00 2001
+From: Zeng Guang <guang.zeng@intel.com>
+Date: Wed, 16 Feb 2022 22:04:33 -0800
+Subject: [PATCH 11/24] x86: Support XFD and AMX xsave data migration
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [11/13] 4ff6e5544ffdac4e6d2f568f7f63b937502ca6c5
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+XFD(eXtended Feature Disable) allows to enable a
+feature on xsave state while preventing specific
+user threads from using the feature.
+
+Support save and restore XFD MSRs if CPUID.D.1.EAX[4]
+enumerate to be valid. Likewise migrate the MSRs and
+related xsave state necessarily.
+
+Signed-off-by: Zeng Guang <guang.zeng@intel.com>
+Signed-off-by: Wei Wang <wei.w.wang@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-8-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit cdec2b753b487d9e8aab028231c35d87789ea083)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.h     |  9 +++++++++
+ target/i386/kvm/kvm.c | 18 +++++++++++++++++
+ target/i386/machine.c | 46 +++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 73 insertions(+)
+
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index 14a3501b87..8ab2a4042a 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -505,6 +505,9 @@ typedef enum X86Seg {
+ 
+ #define MSR_VM_HSAVE_PA                 0xc0010117
+ 
+#define MSR_IA32_XFD                    0x000001c4
+#define MSR_IA32_XFD_ERR                0x000001c5
+
+ #define MSR_IA32_BNDCFGS                0x00000d90
+ #define MSR_IA32_XSS                    0x00000da0
+ #define MSR_IA32_UMWAIT_CONTROL         0xe1
+@@ -870,6 +873,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS];
+ #define CPUID_7_1_EAX_AVX_VNNI          (1U << 4)
+ /* AVX512 BFloat16 Instruction */
+ #define CPUID_7_1_EAX_AVX512_BF16       (1U << 5)
+/* XFD Extend Feature Disabled */
+#define CPUID_D_1_EAX_XFD               (1U << 4)
+ 
+ /* Packets which contain IP payload have LIP values */
+ #define CPUID_14_0_ECX_LIP              (1U << 31)
+@@ -1610,6 +1615,10 @@ typedef struct CPUX86State {
+     uint64_t msr_rtit_cr3_match;
+     uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS];
+ 
+    /* Per-VCPU XFD MSRs */
+    uint64_t msr_xfd;
+    uint64_t msr_xfd_err;
+
+     /* exception/interrupt handling */
+     int error_code;
+     int exception_is_int;
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index d3d476df27..b1128b0e07 100644
+--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
+@@ -3219,6 +3219,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
+                               env->msr_ia32_sgxlepubkeyhash[3]);
+         }
+ 
+        if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+            kvm_msr_entry_add(cpu, MSR_IA32_XFD,
+                              env->msr_xfd);
+            kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR,
+                              env->msr_xfd_err);
+        }
+
+         /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see
+          *       kvm_put_msr_feature_control. */
+     }
+@@ -3571,6 +3578,11 @@ static int kvm_get_msrs(X86CPU *cpu)
+         kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0);
+     }
+ 
+    if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) {
+        kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0);
+        kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0);
+    }
+
+     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf);
+     if (ret < 0) {
+         return ret;
+@@ -3870,6 +3882,12 @@ static int kvm_get_msrs(X86CPU *cpu)
+             env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] =
+                            msrs[i].data;
+             break;
+        case MSR_IA32_XFD:
+            env->msr_xfd = msrs[i].data;
+            break;
+        case MSR_IA32_XFD_ERR:
+            env->msr_xfd_err = msrs[i].data;
+            break;
+         }
+     }
+ 
+diff --git a/target/i386/machine.c b/target/i386/machine.c
+index 83c2b91529..3977e9d8f8 100644
+--- a/target/i386/machine.c
+++ b/target/i386/machine.c
+@@ -1455,6 +1455,48 @@ static const VMStateDescription vmstate_msr_intel_sgx = {
+     }
+ };
+ 
+static bool xfd_msrs_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD);
+}
+
+static const VMStateDescription vmstate_msr_xfd = {
+    .name = "cpu/msr_xfd",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = xfd_msrs_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(env.msr_xfd, X86CPU),
+        VMSTATE_UINT64(env.msr_xfd_err, X86CPU),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+#ifdef TARGET_X86_64
+static bool amx_xtile_needed(void *opaque)
+{
+    X86CPU *cpu = opaque;
+    CPUX86State *env = &cpu->env;
+
+    return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE);
+}
+
+static const VMStateDescription vmstate_amx_xtile = {
+    .name = "cpu/intel_amx_xtile",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = amx_xtile_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64),
+        VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192),
+        VMSTATE_END_OF_LIST()
+    }
+};
+#endif
+
+ const VMStateDescription vmstate_x86_cpu = {
+     .name = "cpu",
+     .version_id = 12,
+@@ -1593,6 +1635,10 @@ const VMStateDescription vmstate_x86_cpu = {
+ #endif
+         &vmstate_msr_tsx_ctrl,
+         &vmstate_msr_intel_sgx,
+        &vmstate_msr_xfd,
+#ifdef TARGET_X86_64
+        &vmstate_amx_xtile,
+#endif
+         NULL
+     }
+ };
+-- 
+2.35.3
+
--- a/SOURCES/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
+++ b/SOURCES/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
@ -0,0 +1,182 @@
+From 28cf1b55f346a9f56e84fa57921f5a28a99cd59b Mon Sep 17 00:00:00 2001
+From: Jing Liu <jing2.liu@intel.com>
+Date: Wed, 16 Feb 2022 22:04:32 -0800
+Subject: [PATCH 10/24] x86: add support for KVM_CAP_XSAVE2 and AMX state
+ migration
+
+RH-Author: Paul Lai <plai@redhat.com>
+RH-MergeRequest: 176: Enable KVM AMX support
+RH-Commit: [10/13] d584f455ba1ecd8a4a87f3470e6aac24ba9a1f5a
+RH-Bugzilla: 1916415
+RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
+RH-Acked-by: Igor Mammedov <imammedo@redhat.com>
+RH-Acked-by: Paolo Bonzini <pbonzini@redhat.com>
+
+When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave
+area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE
+under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB.
+Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it.
+
+Signed-off-by: Jing Liu <jing2.liu@intel.com>
+Signed-off-by: Zeng Guang <guang.zeng@intel.com>
+Signed-off-by: Wei Wang <wei.w.wang@intel.com>
+Signed-off-by: Yang Zhong <yang.zhong@intel.com>
+Message-Id: <20220217060434.52460-7-yang.zhong@intel.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit e56dd3c70abb31893c61ac834109fa7a38841330)
+Signed-off-by: Paul Lai <plai@redhat.com>
+---
+ target/i386/cpu.h          |  4 ++++
+ target/i386/kvm/kvm.c      | 42 ++++++++++++++++++++++++--------------
+ target/i386/xsave_helper.c | 28 +++++++++++++++++++++++++
+ 3 files changed, 59 insertions(+), 15 deletions(-)
+
+diff --git a/target/i386/cpu.h b/target/i386/cpu.h
+index f2bdef9c26..14a3501b87 100644
+--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
+@@ -1522,6 +1522,10 @@ typedef struct CPUX86State {
+     uint64_t opmask_regs[NB_OPMASK_REGS];
+     YMMReg zmmh_regs[CPU_NB_REGS];
+     ZMMReg hi16_zmm_regs[CPU_NB_REGS];
+#ifdef TARGET_X86_64
+    uint8_t xtilecfg[64];
+    uint8_t xtiledata[8192];
+#endif
+ 
+     /* sysenter registers */
+     uint32_t sysenter_cs;
+diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
+index a64a79d870..d3d476df27 100644
+--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
+@@ -123,6 +123,7 @@ static uint32_t num_architectural_pmu_gp_counters;
+ static uint32_t num_architectural_pmu_fixed_counters;
+ 
+ static int has_xsave;
+static int has_xsave2;
+ static int has_xcrs;
+ static int has_pit_state2;
+ static int has_exception_payload;
+@@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker;
+ 
+ #define KVM_MAX_CPUID_ENTRIES  100
+ 
+static void kvm_init_xsave(CPUX86State *env)
+{
+    if (has_xsave2) {
+        env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096);
+    } else if (has_xsave) {
+        env->xsave_buf_len = sizeof(struct kvm_xsave);
+    } else {
+        return;
+    }
+
+    env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+    memset(env->xsave_buf, 0, env->xsave_buf_len);
+    /*
+     * The allocated storage must be large enough for all of the
+     * possible XSAVE state components.
+     */
+    assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <=
+           env->xsave_buf_len);
+}
+
+ int kvm_arch_init_vcpu(CPUState *cs)
+ {
+     struct {
+@@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
+ 
+     cpuid_i = 0;
+ 
+    has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2);
+
+     r = kvm_arch_set_tsc_khz(cs);
+     if (r < 0) {
+         return r;
+@@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
+     if (r) {
+         goto fail;
+     }
+-
+-    if (has_xsave) {
+-        env->xsave_buf_len = sizeof(struct kvm_xsave);
+-        env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len);
+-        memset(env->xsave_buf, 0, env->xsave_buf_len);
+-
+-        /*
+-         * The allocated storage must be large enough for all of the
+-         * possible XSAVE state components.
+-         */
+-        assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX)
+-               <= env->xsave_buf_len);
+-    }
+    kvm_init_xsave(env);
+ 
+     max_nested_state_len = kvm_max_nested_state_length();
+     if (max_nested_state_len > 0) {
+@@ -3263,13 +3274,14 @@ static int kvm_get_xsave(X86CPU *cpu)
+ {
+     CPUX86State *env = &cpu->env;
+     void *xsave = env->xsave_buf;
+-    int ret;
+    int type, ret;
+ 
+     if (!has_xsave) {
+         return kvm_get_fpu(cpu);
+     }
+ 
+-    ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave);
+    type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE;
+    ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave);
+     if (ret < 0) {
+         return ret;
+     }
+diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c
+index ac61a96344..996e9f3bfe 100644
+--- a/target/i386/xsave_helper.c
+++ b/target/i386/xsave_helper.c
+@@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen)
+ 
+         memcpy(pkru, &env->pkru, sizeof(env->pkru));
+     }
+
+    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
+    if (e->size && e->offset) {
+        XSaveXTILECFG *tilecfg = buf + e->offset;
+
+        memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg));
+    }
+
+    e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
+    if (e->size && e->offset && buflen >= e->size + e->offset) {
+        XSaveXTILEDATA *tiledata = buf + e->offset;
+
+        memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata));
+    }
+ #endif
+ }
+ 
+@@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen)
+         pkru = buf + e->offset;
+         memcpy(&env->pkru, pkru, sizeof(env->pkru));
+     }
+
+    e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT];
+    if (e->size && e->offset) {
+        const XSaveXTILECFG *tilecfg = buf + e->offset;
+
+        memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg));
+    }
+
+    e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT];
+    if (e->size && e->offset && buflen >= e->size + e->offset) {
+        const XSaveXTILEDATA *tiledata = buf + e->offset;
+
+        memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata));
+    }
+ #endif
+ }
+-- 
+2.35.3
+
--- a/SPECS/qemu-kvm.spec
+++ b/SPECS/qemu-kvm.spec
@ -83,7 +83,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release}
 Summary: QEMU is a machine emulator and virtualizer
 Name: qemu-kvm
 Version: 6.2.0
-Release: 12%{?rcrel}%{?dist}
+Release: 20%{?rcrel}%{?dist}.1
 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
 Epoch: 15
 License: GPLv2 and GPLv2+ and CC-BY
@ -257,6 +257,180 @@ Patch78: kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch
 Patch79: kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch
 # For bz#2063262 - CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8]
 Patch80: kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch
+# For bz#2043830 - [IBM 8.7 FEAT] KVM: Allow long kernel command lines for QEMU
+Patch81: kvm-s390x-ipl-support-extended-kernel-command-line-size.patch
+# For bz#2063206 - CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8]
+Patch82: kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch
+# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch83: kvm-qcow2-Improve-refcount-structure-rebuilding.patch
+# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch84: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch
+# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch85: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch
+# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)
+Patch86: kvm-iotests-108-Fix-when-missing-user_allow_other.patch
+# For bz#2065043 - Remove upstream-only devices from the qemu-kvm binary
+Patch87: kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch
+# For bz#2070417 - Windows guest hangs after updating and restarting from the guest OS [rhel-8.7.0]
+Patch88: kvm-target-i386-properly-reset-TSC-on-reset.patch
+# For bz#2040734 - CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.7]
+Patch89: kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch90: kvm-virtio-gpu-do-not-byteswap-padding.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch91: kvm-linux-headers-update-to-5.16-rc1.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch92: kvm-linux-headers-Update-headers-to-v5.17-rc1.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch93: kvm-linux-headers-include-missing-changes-from-5.17.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch94: kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch95: kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch96: kvm-x86-Grant-AMX-permission-for-guest.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch97: kvm-x86-Add-XFD-faulting-bit-for-state-components.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch98: kvm-x86-Add-AMX-CPUIDs-enumeration.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch99: kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch100: kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch101: kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch
+# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions
+Patch102: kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch
+# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA
+Patch103: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch
+# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA
+Patch104: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch
+# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA
+Patch105: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch
+# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA
+Patch106: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch
+# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA
+Patch107: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch
+# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA
+Patch108: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch
+# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA
+Patch109: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch
+# For bz#2029980 - Failed assertion in IDE emulation with Ceph backend
+Patch110: kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch
+# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7]
+Patch111: kvm-block-Make-bdrv_refresh_limits-non-recursive.patch
+# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7]
+Patch112: kvm-iotests-Allow-using-QMP-with-the-QSD.patch
+# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7]
+Patch113: kvm-iotests-graph-changes-while-io-New-test.patch
+# For bz#2097209 - [virtiofs] mount virtiofs failed: SELinux: (dev virtiofs, type virtiofs) getxattr errno 111
+Patch114: kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch
+# For bz#1951521 - CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8]
+Patch115: kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch
+# For bz#1951521 - CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8]
+Patch116: kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch117: kvm-migration-Never-call-twice-qemu_target_page_size.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch118: kvm-multifd-Rename-used-field-to-num.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch119: kvm-multifd-Add-missing-documentation.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch120: kvm-multifd-The-variable-is-only-used-inside-the-loop.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch121: kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch122: kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch123: kvm-multifd-Fill-offset-and-block-for-reception.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch124: kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch125: kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch126: kvm-migration-All-this-fields-are-unsigned.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch127: kvm-multifd-Move-iov-from-pages-to-params.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch128: kvm-multifd-Make-zlib-use-iov-s.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch129: kvm-multifd-Make-zstd-use-iov-s.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch130: kvm-multifd-Remove-send_write-method.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch131: kvm-multifd-Use-a-single-writev-on-the-send-side.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch132: kvm-multifd-Use-normal-pages-array-on-the-send-side.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch133: kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch134: kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch135: kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch136: kvm-migration-Add-migrate_use_tls-helper.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch137: kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch138: kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch139: kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch140: kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch141: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch
+# For bz#2072049 - Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8
+Patch142: kvm-migration-Change-zero_copy_send-from-migration-param.patch
+# For bz#2097652 - The migration port is not released if use it again for recovering postcopy migration
+Patch143: kvm-migration-Add-migration_incoming_transport_cleanup.patch
+# For bz#2097652 - The migration port is not released if use it again for recovering postcopy migration
+Patch144: kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch145: kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch146: kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch147: kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch148: kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch149: kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch150: kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch151: kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch152: kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch
+# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions
+Patch153: kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch
+# For bz#2105410 - Stalled IO Operations in VM
+Patch154: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch
+# For bz#2105410 - Stalled IO Operations in VM
+Patch155: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch156: kvm-migration-Introduce-ram_transferred_add.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch157: kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch158: kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch159: kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch160: kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch161: kvm-migration-Avoid-false-positive-on-non-supported-scen.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch162: kvm-migration-add-remaining-params-has_-true-in-migratio.patch
+# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together
+Patch163: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch
+# For bz#2112296 - virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions
+Patch164: kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch
+# For bz#2120279 - Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-8.7]
+Patch165: kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch
+# For bz#2116743 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command
+Patch166: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch
+# For bz#2116743 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command
+Patch167: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch

 BuildRequires: wget
 BuildRequires: rpm-build
@ -1426,6 +1600,151 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || :


 %changelog
+* Wed Sep 14 2022 Miroslav Rezanina <mrezanin@redhat.com> - 6.2.0-20.el8.1
+- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2116743]
+- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2116743]
+- Resolves: bz#2116743
+  ([RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command)
+
+* Fri Aug 26 2022 Jon Maloy <jmaloy@redhat.com> - 6.2.0-20
+- kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120279]
+- Resolves: bz#2120279
+  (Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-8.7])
+
+* Tue Aug 16 2022 Miroslav Rezanina <mrezanin@redhat.com> - 6.2.0-19
+- kvm-migration-Introduce-ram_transferred_add.patch [bz#2110203]
+- kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch [bz#2110203]
+- kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2110203]
+- kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch [bz#2110203]
+- kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch [bz#2110203]
+- kvm-migration-Avoid-false-positive-on-non-supported-scen.patch [bz#2110203]
+- kvm-migration-add-remaining-params-has_-true-in-migratio.patch [bz#2110203]
+- kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch [bz#2110203]
+- kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch [bz#2112296]
+- Resolves: bz#2110203
+  (zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together)
+- Resolves: bz#2112296
+  (virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions)
+
+* Tue Jul 19 2022 Camilla Conte <cconte@redhat.com> - 6.2.0-18
+- kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch [bz#2105410]
+- kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch [bz#2105410]
+- Resolves: bz#2105410
+  (Stalled IO Operations in VM)
+
+* Tue Jul 12 2022 Camilla Conte <cconte@redhat.com> - 6.2.0-17
+- kvm-migration-Never-call-twice-qemu_target_page_size.patch [bz#2072049]
+- kvm-multifd-Rename-used-field-to-num.patch [bz#2072049]
+- kvm-multifd-Add-missing-documentation.patch [bz#2072049]
+- kvm-multifd-The-variable-is-only-used-inside-the-loop.patch [bz#2072049]
+- kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch [bz#2072049]
+- kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch [bz#2072049]
+- kvm-multifd-Fill-offset-and-block-for-reception.patch [bz#2072049]
+- kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch [bz#2072049]
+- kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch [bz#2072049]
+- kvm-migration-All-this-fields-are-unsigned.patch [bz#2072049]
+- kvm-multifd-Move-iov-from-pages-to-params.patch [bz#2072049]
+- kvm-multifd-Make-zlib-use-iov-s.patch [bz#2072049]
+- kvm-multifd-Make-zstd-use-iov-s.patch [bz#2072049]
+- kvm-multifd-Remove-send_write-method.patch [bz#2072049]
+- kvm-multifd-Use-a-single-writev-on-the-send-side.patch [bz#2072049]
+- kvm-multifd-Use-normal-pages-array-on-the-send-side.patch [bz#2072049]
+- kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch [bz#2072049]
+- kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch [bz#2072049]
+- kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch [bz#2072049]
+- kvm-migration-Add-migrate_use_tls-helper.patch [bz#2072049]
+- kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch [bz#2072049]
+- kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch [bz#2072049]
+- kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch [bz#2072049]
+- kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch [bz#2072049]
+- kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch [bz#2072049]
+- kvm-migration-Change-zero_copy_send-from-migration-param.patch [bz#2072049]
+- kvm-migration-Add-migration_incoming_transport_cleanup.patch [bz#2097652]
+- kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch [bz#2097652]
+- kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch [bz#2098076]
+- kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch [bz#2098076]
+- Resolves: bz#2072049
+  (Pull  MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8)
+- Resolves: bz#2097652
+  (The migration port is not released if use it again for recovering postcopy migration)
+- Resolves: bz#2098076
+  (virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions)
+
+* Thu Jun 23 2022 Jon Maloy <jmaloy@redhat.com> - 6.2.0-16
+- kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch [bz#2097209]
+- kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch [bz#1951521]
+- kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch [bz#1951521]
+- Resolves: bz#2097209
+  ([virtiofs] mount virtiofs failed: SELinux: (dev virtiofs, type virtiofs) getxattr errno 111)
+- Resolves: bz#1951521
+  (CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8])
+
+* Tue Jun 14 2022 Jon Maloy <jmaloy@redhat.com> - 6.2.0-15
+- kvm-virtio-gpu-do-not-byteswap-padding.patch [bz#1916415]
+- kvm-linux-headers-update-to-5.16-rc1.patch [bz#1916415]
+- kvm-linux-headers-Update-headers-to-v5.17-rc1.patch [bz#1916415]
+- kvm-linux-headers-include-missing-changes-from-5.17.patch [bz#1916415]
+- kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch [bz#1916415]
+- kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch [bz#1916415]
+- kvm-x86-Grant-AMX-permission-for-guest.patch [bz#1916415]
+- kvm-x86-Add-XFD-faulting-bit-for-state-components.patch [bz#1916415]
+- kvm-x86-Add-AMX-CPUIDs-enumeration.patch [bz#1916415]
+- kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch [bz#1916415]
+- kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch [bz#1916415]
+- kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch [bz#1916415]
+- kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch [bz#1916415]
+- kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2069946]
+- kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2069946]
+- kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2069946]
+- kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2069946]
+- kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2069946]
+- kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2069946]
+- kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2069946]
+- kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch [bz#2029980]
+- kvm-block-Make-bdrv_refresh_limits-non-recursive.patch [bz#2072932]
+- kvm-iotests-Allow-using-QMP-with-the-QSD.patch [bz#2072932]
+- kvm-iotests-graph-changes-while-io-New-test.patch [bz#2072932]
+- Resolves: bz#1916415
+  ([Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions)
+- Resolves: bz#2069946
+  (PXE boot crash qemu when using multiqueue vDPA)
+- Resolves: bz#2029980
+  (Failed assertion in IDE emulation with Ceph backend)
+- Resolves: bz#2072932
+  (Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7])
+
+* Thu May 19 2022 Jon Maloy <jmaloy@redhat.com> - 6.2.0-14
+- kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch [bz#2065043]
+- kvm-target-i386-properly-reset-TSC-on-reset.patch [bz#2070417]
+- kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch [bz#2040734]
+- Resolves: bz#2065043
+  (Remove upstream-only devices from the qemu-kvm binary)
+- Resolves: bz#2070417
+  (Windows guest hangs after updating and restarting from the guest OS [rhel-8.7.0])
+- Resolves: bz#2040734
+  (CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.7])
+
+* Tue May 03 2022 Jon Maloy <jmaloy@redhat.com> - 6.2.0-13
+- kvm-s390x-ipl-support-extended-kernel-command-line-size.patch [bz#2043830]
+- kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch [bz#2063206]
+- kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#1519071]
+- kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#1519071]
+- kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#1519071]
+- kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#1519071]
+- Resolves: bz#2043830
+  ([IBM 8.7 FEAT] KVM: Allow long kernel command lines for QEMU)
+- Resolves: bz#2063206
+  (CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8])
+- Resolves: bz#1519071
+  (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs))
+
 * Thu Apr 21 2022 Jon Maloy <jmaloy@redhat.com> - 6.2.0-12
 - kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch [bz#2040738]
 - kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch [bz#2063262]