From cffb6bb06a757eb02c0dd97c37fccfb451d45fd7 Mon Sep 17 00:00:00 2001
From: Miroslav Rezanina <mrezanin@redhat.com>
Date: Tue, 25 Jul 2023 05:26:45 -0400
Subject: [PATCH] * Tue Jul 25 2023 Miroslav Rezanina <mrezanin@redhat.com> -
 6.2.0-37

- kvm-util-iov-Make-qiov_slice-public.patch [bz#2141964]
- kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch [bz#2141964]
- kvm-util-iov-Remove-qemu_iovec_init_extended.patch [bz#2141964]
- kvm-iotests-iov-padding-New-test.patch [bz#2141964]
- kvm-block-Fix-pad_request-s-request-restriction.patch [bz#2141964]
- Resolves: bz#2141964
  (Guest hit EXT4-fs error on host 4K disk  when repeatedly hot-plug/unplug running IO disk)
---
 ...se-padded-I-O-vecs-exceeding-IOV_MAX.patch | 359 ++++++++++++++++++
 ...ix-pad_request-s-request-restriction.patch |  75 ++++
 kvm-iotests-iov-padding-New-test.patch        | 187 +++++++++
 kvm-util-iov-Make-qiov_slice-public.patch     |  98 +++++
 ...-iov-Remove-qemu_iovec_init_extended.patch | 157 ++++++++
 qemu-kvm.spec                                 |  21 +-
 6 files changed, 896 insertions(+), 1 deletion(-)
 create mode 100644 kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch
 create mode 100644 kvm-block-Fix-pad_request-s-request-restriction.patch
 create mode 100644 kvm-iotests-iov-padding-New-test.patch
 create mode 100644 kvm-util-iov-Make-qiov_slice-public.patch
 create mode 100644 kvm-util-iov-Remove-qemu_iovec_init_extended.patch

diff --git a/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch b/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch
new file mode 100644
index 0000000..94f19c6
--- /dev/null
+++ b/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch
@@ -0,0 +1,359 @@
+From 1f7520baa6f0bf02ccba2ebfe7d1d5bf6520f95a Mon Sep 17 00:00:00 2001
+From: Hanna Czenczek <hreitz@redhat.com>
+Date: Tue, 11 Apr 2023 19:34:16 +0200
+Subject: [PATCH 2/5] block: Collapse padded I/O vecs exceeding IOV_MAX
+
+RH-Author: Hanna Czenczek <hreitz@redhat.com>
+RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
+RH-Bugzilla: 2141964
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Commit: [2/5] 1d86ce8398e4ab66e308a686f9855c963e52b0a9
+
+When processing vectored guest requests that are not aligned to the
+storage request alignment, we pad them by adding head and/or tail
+buffers for a read-modify-write cycle.
+
+The guest can submit I/O vectors up to IOV_MAX (1024) in length, but
+with this padding, the vector can exceed that limit.  As of
+4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make
+qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the
+limit, instead returning an error to the guest.
+
+To the guest, this appears as a random I/O error.  We should not return
+an I/O error to the guest when it issued a perfectly valid request.
+
+Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector
+longer than IOV_MAX, which generally seems to work (because the guest
+assumes a smaller alignment than we really have, file-posix's
+raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and
+so emulate the request, so that the IOV_MAX does not matter).  However,
+that does not seem exactly great.
+
+I see two ways to fix this problem:
+1. We split such long requests into two requests.
+2. We join some elements of the vector into new buffers to make it
+   shorter.
+
+I am wary of (1), because it seems like it may have unintended side
+effects.
+
+(2) on the other hand seems relatively simple to implement, with
+hopefully few side effects, so this patch does that.
+
+To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request()
+is effectively replaced by the new function bdrv_create_padded_qiov(),
+which not only wraps the request IOV with padding head/tail, but also
+ensures that the resulting vector will not have more than IOV_MAX
+elements.  Putting that functionality into qemu_iovec_init_extended() is
+infeasible because it requires allocating a bounce buffer; doing so
+would require many more parameters (buffer alignment, how to initialize
+the buffer, and out parameters like the buffer, its length, and the
+original elements), which is not reasonable.
+
+Conversely, it is not difficult to move qemu_iovec_init_extended()'s
+functionality into bdrv_create_padded_qiov() by using public
+qemu_iovec_* functions, so that is what this patch does.
+
+Because bdrv_pad_request() was the only "serious" user of
+qemu_iovec_init_extended(), the next patch will remove the latter
+function, so the functionality is not implemented twice.
+
+Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+Message-Id: <20230411173418.19549-3-hreitz@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a)
+
+Conflicts:
+	block/io.c: Downstream bdrv_pad_request() has no @flags
+        parameter.
+
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+---
+ block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 151 insertions(+), 15 deletions(-)
+
+diff --git a/block/io.c b/block/io.c
+index c3e7301613..0fe8f0dd40 100644
+--- a/block/io.c
++++ b/block/io.c
+@@ -1624,6 +1624,14 @@ out:
+  * @merge_reads is true for small requests,
+  * if @buf_len == @head + bytes + @tail. In this case it is possible that both
+  * head and tail exist but @buf_len == align and @tail_buf == @buf.
++ *
++ * @write is true for write requests, false for read requests.
++ *
++ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
++ * merge existing vector elements into a single one.  @collapse_bounce_buf acts
++ * as the bounce buffer in such cases.  @pre_collapse_qiov has the pre-collapse
++ * I/O vector elements so for read requests, the data can be copied back after
++ * the read is done.
+  */
+ typedef struct BdrvRequestPadding {
+     uint8_t *buf;
+@@ -1632,11 +1640,17 @@ typedef struct BdrvRequestPadding {
+     size_t head;
+     size_t tail;
+     bool merge_reads;
++    bool write;
+     QEMUIOVector local_qiov;
++
++    uint8_t *collapse_bounce_buf;
++    size_t collapse_len;
++    QEMUIOVector pre_collapse_qiov;
+ } BdrvRequestPadding;
+ 
+ static bool bdrv_init_padding(BlockDriverState *bs,
+                               int64_t offset, int64_t bytes,
++                              bool write,
+                               BdrvRequestPadding *pad)
+ {
+     int64_t align = bs->bl.request_alignment;
+@@ -1668,6 +1682,8 @@ static bool bdrv_init_padding(BlockDriverState *bs,
+         pad->tail_buf = pad->buf + pad->buf_len - align;
+     }
+ 
++    pad->write = write;
++
+     return true;
+ }
+ 
+@@ -1733,8 +1749,23 @@ zero_mem:
+     return 0;
+ }
+ 
+-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
++/**
++ * Free *pad's associated buffers, and perform any necessary finalization steps.
++ */
++static void bdrv_padding_finalize(BdrvRequestPadding *pad)
+ {
++    if (pad->collapse_bounce_buf) {
++        if (!pad->write) {
++            /*
++             * If padding required elements in the vector to be collapsed into a
++             * bounce buffer, copy the bounce buffer content back
++             */
++            qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
++                                pad->collapse_bounce_buf, pad->collapse_len);
++        }
++        qemu_vfree(pad->collapse_bounce_buf);
++        qemu_iovec_destroy(&pad->pre_collapse_qiov);
++    }
+     if (pad->buf) {
+         qemu_vfree(pad->buf);
+         qemu_iovec_destroy(&pad->local_qiov);
+@@ -1742,6 +1773,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+     memset(pad, 0, sizeof(*pad));
+ }
+ 
++/*
++ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
++ * ensuring that the resulting vector will not exceed IOV_MAX elements.
++ *
++ * To ensure this, when necessary, the first two or three elements of @iov are
++ * merged into pad->collapse_bounce_buf and replaced by a reference to that
++ * bounce buffer in pad->local_qiov.
++ *
++ * After performing a read request, the data from the bounce buffer must be
++ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
++ */
++static int bdrv_create_padded_qiov(BlockDriverState *bs,
++                                   BdrvRequestPadding *pad,
++                                   struct iovec *iov, int niov,
++                                   size_t iov_offset, size_t bytes)
++{
++    int padded_niov, surplus_count, collapse_count;
++
++    /* Assert this invariant */
++    assert(niov <= IOV_MAX);
++
++    /*
++     * Cannot pad if resulting length would exceed SIZE_MAX.  Returning an error
++     * to the guest is not ideal, but there is little else we can do.  At least
++     * this will practically never happen on 64-bit systems.
++     */
++    if (SIZE_MAX - pad->head < bytes ||
++        SIZE_MAX - pad->head - bytes < pad->tail)
++    {
++        return -EINVAL;
++    }
++
++    /* Length of the resulting IOV if we just concatenated everything */
++    padded_niov = !!pad->head + niov + !!pad->tail;
++
++    qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
++
++    if (pad->head) {
++        qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
++    }
++
++    /*
++     * If padded_niov > IOV_MAX, we cannot just concatenate everything.
++     * Instead, merge the first two or three elements of @iov to reduce the
++     * number of vector elements as necessary.
++     */
++    if (padded_niov > IOV_MAX) {
++        /*
++         * Only head and tail can have lead to the number of entries exceeding
++         * IOV_MAX, so we can exceed it by the head and tail at most.  We need
++         * to reduce the number of elements by `surplus_count`, so we merge that
++         * many elements plus one into one element.
++         */
++        surplus_count = padded_niov - IOV_MAX;
++        assert(surplus_count <= !!pad->head + !!pad->tail);
++        collapse_count = surplus_count + 1;
++
++        /*
++         * Move the elements to collapse into `pad->pre_collapse_qiov`, then
++         * advance `iov` (and associated variables) by those elements.
++         */
++        qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
++        qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
++                              collapse_count, iov_offset, SIZE_MAX);
++        iov += collapse_count;
++        iov_offset = 0;
++        niov -= collapse_count;
++        bytes -= pad->pre_collapse_qiov.size;
++
++        /*
++         * Construct the bounce buffer to match the length of the to-collapse
++         * vector elements, and for write requests, initialize it with the data
++         * from those elements.  Then add it to `pad->local_qiov`.
++         */
++        pad->collapse_len = pad->pre_collapse_qiov.size;
++        pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
++        if (pad->write) {
++            qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
++                              pad->collapse_bounce_buf, pad->collapse_len);
++        }
++        qemu_iovec_add(&pad->local_qiov,
++                       pad->collapse_bounce_buf, pad->collapse_len);
++    }
++
++    qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
++
++    if (pad->tail) {
++        qemu_iovec_add(&pad->local_qiov,
++                       pad->buf + pad->buf_len - pad->tail, pad->tail);
++    }
++
++    assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
++    return 0;
++}
++
+ /*
+  * bdrv_pad_request
+  *
+@@ -1749,6 +1875,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+  * read of padding, bdrv_padding_rmw_read() should be called separately if
+  * needed.
+  *
++ * @write is true for write requests, false for read requests.
++ *
+  * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
+  *  - on function start they represent original request
+  *  - on failure or when padding is not needed they are unchanged
+@@ -1757,25 +1885,33 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+ static int bdrv_pad_request(BlockDriverState *bs,
+                             QEMUIOVector **qiov, size_t *qiov_offset,
+                             int64_t *offset, int64_t *bytes,
++                            bool write,
+                             BdrvRequestPadding *pad, bool *padded)
+ {
+     int ret;
++    struct iovec *sliced_iov;
++    int sliced_niov;
++    size_t sliced_head, sliced_tail;
+ 
+     bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
+ 
+-    if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
++    if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
+         if (padded) {
+             *padded = false;
+         }
+         return 0;
+     }
+ 
+-    ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
+-                                   *qiov, *qiov_offset, *bytes,
+-                                   pad->buf + pad->buf_len - pad->tail,
+-                                   pad->tail);
++    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
++                                  &sliced_head, &sliced_tail,
++                                  &sliced_niov);
++
++    /* Guaranteed by bdrv_check_qiov_request() */
++    assert(*bytes <= SIZE_MAX);
++    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
++                                  sliced_head, *bytes);
+     if (ret < 0) {
+-        bdrv_padding_destroy(pad);
++        bdrv_padding_finalize(pad);
+         return ret;
+     }
+     *bytes += pad->head + pad->tail;
+@@ -1836,8 +1972,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+         flags |= BDRV_REQ_COPY_ON_READ;
+     }
+ 
+-    ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+-                           NULL);
++    ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
++                           &pad, NULL);
+     if (ret < 0) {
+         goto fail;
+     }
+@@ -1847,7 +1983,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+                               bs->bl.request_alignment,
+                               qiov, qiov_offset, flags);
+     tracked_request_end(&req);
+-    bdrv_padding_destroy(&pad);
++    bdrv_padding_finalize(&pad);
+ 
+ fail:
+     bdrv_dec_in_flight(bs);
+@@ -2167,7 +2303,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
+     bool padding;
+     BdrvRequestPadding pad;
+ 
+-    padding = bdrv_init_padding(bs, offset, bytes, &pad);
++    padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
+     if (padding) {
+         bdrv_make_request_serialising(req, align);
+ 
+@@ -2214,7 +2350,7 @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
+     }
+ 
+ out:
+-    bdrv_padding_destroy(&pad);
++    bdrv_padding_finalize(&pad);
+ 
+     return ret;
+ }
+@@ -2280,8 +2416,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+          * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
+          * alignment only if there is no ZERO flag.
+          */
+-        ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+-                               &padded);
++        ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
++                               &pad, &padded);
+         if (ret < 0) {
+             return ret;
+         }
+@@ -2310,7 +2446,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+     ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
+                                qiov, qiov_offset, flags);
+ 
+-    bdrv_padding_destroy(&pad);
++    bdrv_padding_finalize(&pad);
+ 
+ out:
+     tracked_request_end(&req);
+-- 
+2.39.3
+
diff --git a/kvm-block-Fix-pad_request-s-request-restriction.patch b/kvm-block-Fix-pad_request-s-request-restriction.patch
new file mode 100644
index 0000000..ce4eb0b
--- /dev/null
+++ b/kvm-block-Fix-pad_request-s-request-restriction.patch
@@ -0,0 +1,75 @@
+From b9866279996ee065cb524bf30bc70e22efbab303 Mon Sep 17 00:00:00 2001
+From: Hanna Czenczek <hreitz@redhat.com>
+Date: Fri, 14 Jul 2023 10:59:38 +0200
+Subject: [PATCH 5/5] block: Fix pad_request's request restriction
+
+RH-Author: Hanna Czenczek <hreitz@redhat.com>
+RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
+RH-Bugzilla: 2141964
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Commit: [5/5] f9188bd089d6c67185ea1accde20d491a2ed3193
+
+bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX,
+which bdrv_check_qiov_request() does not guarantee.
+
+bdrv_check_request32() however will guarantee this, and both of
+bdrv_pad_request()'s callers (bdrv_co_preadv_part() and
+bdrv_co_pwritev_part()) already run it before calling
+bdrv_pad_request().  Therefore, bdrv_pad_request() can safely call
+bdrv_check_request32() without expecting error, too.
+
+In effect, this patch will not change guest-visible behavior.  It is a
+clean-up to tighten a condition to match what is guaranteed by our
+callers, and which exists purely to show clearly why the subsequent
+assertion (`assert(*bytes <= SIZE_MAX)`) is always true.
+
+Note there is a difference between the interfaces of
+bdrv_check_qiov_request() and bdrv_check_request32(): The former takes
+an errp, the latter does not, so we can no longer just pass
+&error_abort.  Instead, we need to check the returned value.  While we
+do expect success (because the callers have already run this function),
+an assert(ret == 0) is not much simpler than just to return an error if
+it occurs, so let us handle errors by returning them up the stack now.
+
+Reported-by: Peter Maydell <peter.maydell@linaro.org>
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+Message-id: 20230714085938.202730-1-hreitz@redhat.com
+Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a
+       ("block: Collapse padded I/O vecs exceeding IOV_MAX")
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+---
+ block/io.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/block/io.c b/block/io.c
+index 0fe8f0dd40..8ae57728a6 100644
+--- a/block/io.c
++++ b/block/io.c
+@@ -1893,7 +1893,11 @@ static int bdrv_pad_request(BlockDriverState *bs,
+     int sliced_niov;
+     size_t sliced_head, sliced_tail;
+ 
+-    bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
++    /* Should have been checked by the caller already */
++    ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
++    if (ret < 0) {
++        return ret;
++    }
+ 
+     if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
+         if (padded) {
+@@ -1906,7 +1910,7 @@ static int bdrv_pad_request(BlockDriverState *bs,
+                                   &sliced_head, &sliced_tail,
+                                   &sliced_niov);
+ 
+-    /* Guaranteed by bdrv_check_qiov_request() */
++    /* Guaranteed by bdrv_check_request32() */
+     assert(*bytes <= SIZE_MAX);
+     ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+                                   sliced_head, *bytes);
+-- 
+2.39.3
+
diff --git a/kvm-iotests-iov-padding-New-test.patch b/kvm-iotests-iov-padding-New-test.patch
new file mode 100644
index 0000000..aa3db62
--- /dev/null
+++ b/kvm-iotests-iov-padding-New-test.patch
@@ -0,0 +1,187 @@
+From 084e211448f40c3e9d9b1907f6c98dca9f998bc3 Mon Sep 17 00:00:00 2001
+From: Hanna Czenczek <hreitz@redhat.com>
+Date: Tue, 11 Apr 2023 19:34:18 +0200
+Subject: [PATCH 4/5] iotests/iov-padding: New test
+
+RH-Author: Hanna Czenczek <hreitz@redhat.com>
+RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
+RH-Bugzilla: 2141964
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Commit: [4/5] a80be9c26ebd5503745989cd6823cb4814264258
+
+Test that even vectored IO requests with 1024 vector elements that are
+not aligned to the device's request alignment will succeed.
+
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+Message-Id: <20230411173418.19549-5-hreitz@redhat.com>
+(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d)
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+---
+ tests/qemu-iotests/tests/iov-padding     | 85 ++++++++++++++++++++++++
+ tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++
+ 2 files changed, 144 insertions(+)
+ create mode 100755 tests/qemu-iotests/tests/iov-padding
+ create mode 100644 tests/qemu-iotests/tests/iov-padding.out
+
+diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding
+new file mode 100755
+index 0000000000..b9604900c7
+--- /dev/null
++++ b/tests/qemu-iotests/tests/iov-padding
+@@ -0,0 +1,85 @@
++#!/usr/bin/env bash
++# group: rw quick
++#
++# Check the interaction of request padding (to fit alignment restrictions) with
++# vectored I/O from the guest
++#
++# Copyright Red Hat
++#
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 2 of the License, or
++# (at your option) any later version.
++#
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with this program.  If not, see <http://www.gnu.org/licenses/>.
++#
++
++seq=$(basename $0)
++echo "QA output created by $seq"
++
++status=1	# failure is the default!
++
++_cleanup()
++{
++    _cleanup_test_img
++}
++trap "_cleanup; exit \$status" 0 1 2 3 15
++
++# get standard environment, filters and checks
++cd ..
++. ./common.rc
++. ./common.filter
++
++_supported_fmt raw
++_supported_proto file
++
++_make_test_img 1M
++
++IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG"
++
++# Four combinations:
++# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k
++# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not
++# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned
++# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not
++for start_offset in 4096 512; do
++    for last_element_length in 512 4096; do
++        length=$((1023 * 512 + $last_element_length))
++
++        echo
++        echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) =="
++
++        # Fill with data for testing
++        $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io
++
++        # 1023 512-byte buffers, and then one with length $last_element_length
++        cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length"
++        QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
++            -c "writev $cmd_params" \
++            --image-opts \
++            "$IMGSPEC" \
++            | _filter_qemu_io
++
++        # Read all patterns -- read the part we just wrote with writev twice,
++        # once "normally", and once with a readv, so we see that that works, too
++        QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \
++            -c "read -P 1 0 $start_offset" \
++            -c "read -P 2 $start_offset $length" \
++            -c "readv $cmd_params" \
++            -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \
++            --image-opts \
++            "$IMGSPEC" \
++            | _filter_qemu_io
++    done
++done
++
++# success, all done
++echo "*** done"
++rm -f $seq.full
++status=0
+diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out
+new file mode 100644
+index 0000000000..e07a91fac7
+--- /dev/null
++++ b/tests/qemu-iotests/tests/iov-padding.out
+@@ -0,0 +1,59 @@
++QA output created by iov-padding
++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576
++
++== performing 1024-element vectored requests to image (offset: 4096; length: 524288) ==
++wrote 1048576/1048576 bytes at offset 0
++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++wrote 524288/524288 bytes at offset 4096
++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 4096/4096 bytes at offset 0
++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 524288/524288 bytes at offset 4096
++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 524288/524288 bytes at offset 4096
++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 520192/520192 bytes at offset 528384
++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++== performing 1024-element vectored requests to image (offset: 4096; length: 527872) ==
++wrote 1048576/1048576 bytes at offset 0
++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++wrote 527872/527872 bytes at offset 4096
++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 4096/4096 bytes at offset 0
++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 527872/527872 bytes at offset 4096
++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 527872/527872 bytes at offset 4096
++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 516608/516608 bytes at offset 531968
++504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++== performing 1024-element vectored requests to image (offset: 512; length: 524288) ==
++wrote 1048576/1048576 bytes at offset 0
++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++wrote 524288/524288 bytes at offset 512
++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 512/512 bytes at offset 0
++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 524288/524288 bytes at offset 512
++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 524288/524288 bytes at offset 512
++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 523776/523776 bytes at offset 524800
++511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++
++== performing 1024-element vectored requests to image (offset: 512; length: 527872) ==
++wrote 1048576/1048576 bytes at offset 0
++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++wrote 527872/527872 bytes at offset 512
++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 512/512 bytes at offset 0
++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 527872/527872 bytes at offset 512
++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 527872/527872 bytes at offset 512
++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++read 520192/520192 bytes at offset 528384
++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
++*** done
+-- 
+2.39.3
+
diff --git a/kvm-util-iov-Make-qiov_slice-public.patch b/kvm-util-iov-Make-qiov_slice-public.patch
new file mode 100644
index 0000000..807707a
--- /dev/null
+++ b/kvm-util-iov-Make-qiov_slice-public.patch
@@ -0,0 +1,98 @@
+From 884e6dfecc8b0f155015f0a25888300d8e1707f8 Mon Sep 17 00:00:00 2001
+From: Hanna Czenczek <hreitz@redhat.com>
+Date: Tue, 11 Apr 2023 19:34:15 +0200
+Subject: [PATCH 1/5] util/iov: Make qiov_slice() public
+
+RH-Author: Hanna Czenczek <hreitz@redhat.com>
+RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
+RH-Bugzilla: 2141964
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Commit: [1/5] 7f082982e49bacbcc21ca24e471b4399e64321a9
+
+We want to inline qemu_iovec_init_extended() in block/io.c for padding
+requests, and having access to qiov_slice() is useful for this.  As a
+public function, it is renamed to qemu_iovec_slice().
+
+(We will need to count the number of I/O vector elements of a slice
+there, and then later process this slice.  Without qiov_slice(), we
+would need to call qemu_iovec_subvec_niov(), and all further
+IOV-processing functions may need to skip prefixing elements to
+accomodate for a qiov_offset.  Because qemu_iovec_subvec_niov()
+internally calls qiov_slice(), we can just have the block/io.c code call
+qiov_slice() itself, thus get the number of elements, and also create an
+iovec array with the superfluous prefixing elements stripped, so the
+following processing functions no longer need to skip them.)
+
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+Message-Id: <20230411173418.19549-2-hreitz@redhat.com>
+(cherry picked from commit 3d06cea8256d54a6b0238934c31012f7f17100f5)
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+---
+ include/qemu/iov.h |  3 +++
+ util/iov.c         | 14 +++++++-------
+ 2 files changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/include/qemu/iov.h b/include/qemu/iov.h
+index 9330746680..46fadfb27a 100644
+--- a/include/qemu/iov.h
++++ b/include/qemu/iov.h
+@@ -229,6 +229,9 @@ int qemu_iovec_init_extended(
+         void *tail_buf, size_t tail_len);
+ void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
+                            size_t offset, size_t len);
++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
++                               size_t offset, size_t len,
++                               size_t *head, size_t *tail, int *niov);
+ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len);
+ void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len);
+ void qemu_iovec_concat(QEMUIOVector *dst,
+diff --git a/util/iov.c b/util/iov.c
+index 58c7b3eeee..3ccb530b16 100644
+--- a/util/iov.c
++++ b/util/iov.c
+@@ -373,15 +373,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset,
+ }
+ 
+ /*
+- * qiov_slice
++ * qemu_iovec_slice
+  *
+  * Find subarray of iovec's, containing requested range. @head would
+  * be offset in first iov (returned by the function), @tail would be
+  * count of extra bytes in last iovec (returned iov + @niov - 1).
+  */
+-static struct iovec *qiov_slice(QEMUIOVector *qiov,
+-                                size_t offset, size_t len,
+-                                size_t *head, size_t *tail, int *niov)
++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
++                               size_t offset, size_t len,
++                               size_t *head, size_t *tail, int *niov)
+ {
+     struct iovec *iov, *end_iov;
+ 
+@@ -406,7 +406,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
+     size_t head, tail;
+     int niov;
+ 
+-    qiov_slice(qiov, offset, len, &head, &tail, &niov);
++    qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov);
+ 
+     return niov;
+ }
+@@ -434,8 +434,8 @@ int qemu_iovec_init_extended(
+     }
+ 
+     if (mid_len) {
+-        mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len,
+-                             &mid_head, &mid_tail, &mid_niov);
++        mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
++                                   &mid_head, &mid_tail, &mid_niov);
+     }
+ 
+     total_niov = !!head_len + mid_niov + !!tail_len;
+-- 
+2.39.3
+
diff --git a/kvm-util-iov-Remove-qemu_iovec_init_extended.patch b/kvm-util-iov-Remove-qemu_iovec_init_extended.patch
new file mode 100644
index 0000000..c49c3ac
--- /dev/null
+++ b/kvm-util-iov-Remove-qemu_iovec_init_extended.patch
@@ -0,0 +1,157 @@
+From cc31f7eb1c362dc308a163b7364c96ed098a793a Mon Sep 17 00:00:00 2001
+From: Hanna Czenczek <hreitz@redhat.com>
+Date: Tue, 11 Apr 2023 19:34:17 +0200
+Subject: [PATCH 3/5] util/iov: Remove qemu_iovec_init_extended()
+
+RH-Author: Hanna Czenczek <hreitz@redhat.com>
+RH-MergeRequest: 291: block: Split padded I/O vectors exceeding IOV_MAX
+RH-Bugzilla: 2141964
+RH-Acked-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
+RH-Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
+RH-Commit: [3/5] 19c8307ef1289f1991199d1d1f6ab6c89a4b59ce
+
+bdrv_pad_request() was the main user of qemu_iovec_init_extended().
+HEAD^ has removed that use, so we can remove qemu_iovec_init_extended()
+now.
+
+The only remaining user is qemu_iovec_init_slice(), which can easily
+inline the small part it really needs.
+
+Note that qemu_iovec_init_extended() offered a memcpy() optimization to
+initialize the new I/O vector.  qemu_iovec_concat_iov(), which is used
+to replace its functionality, does not, but calls qemu_iovec_add() for
+every single element.  If we decide this optimization was important, we
+will need to re-implement it in qemu_iovec_concat_iov(), which might
+also benefit its pre-existing users.
+
+Reviewed-by: Eric Blake <eblake@redhat.com>
+Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+Message-Id: <20230411173418.19549-4-hreitz@redhat.com>
+(cherry picked from commit cc63f6f6fa1aaa4b6405dd69432c693e9c8d18ca)
+Signed-off-by: Hanna Czenczek <hreitz@redhat.com>
+---
+ include/qemu/iov.h |  5 ---
+ util/iov.c         | 79 +++++++---------------------------------------
+ 2 files changed, 11 insertions(+), 73 deletions(-)
+
+diff --git a/include/qemu/iov.h b/include/qemu/iov.h
+index 46fadfb27a..63a1c01965 100644
+--- a/include/qemu/iov.h
++++ b/include/qemu/iov.h
+@@ -222,11 +222,6 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov)
+ 
+ void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint);
+ void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov);
+-int qemu_iovec_init_extended(
+-        QEMUIOVector *qiov,
+-        void *head_buf, size_t head_len,
+-        QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
+-        void *tail_buf, size_t tail_len);
+ void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
+                            size_t offset, size_t len);
+ struct iovec *qemu_iovec_slice(QEMUIOVector *qiov,
+diff --git a/util/iov.c b/util/iov.c
+index 3ccb530b16..af3ccc2546 100644
+--- a/util/iov.c
++++ b/util/iov.c
+@@ -411,70 +411,6 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len)
+     return niov;
+ }
+ 
+-/*
+- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov,
+- * and @tail_buf buffer into new qiov.
+- */
+-int qemu_iovec_init_extended(
+-        QEMUIOVector *qiov,
+-        void *head_buf, size_t head_len,
+-        QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len,
+-        void *tail_buf, size_t tail_len)
+-{
+-    size_t mid_head, mid_tail;
+-    int total_niov, mid_niov = 0;
+-    struct iovec *p, *mid_iov = NULL;
+-
+-    assert(mid_qiov->niov <= IOV_MAX);
+-
+-    if (SIZE_MAX - head_len < mid_len ||
+-        SIZE_MAX - head_len - mid_len < tail_len)
+-    {
+-        return -EINVAL;
+-    }
+-
+-    if (mid_len) {
+-        mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len,
+-                                   &mid_head, &mid_tail, &mid_niov);
+-    }
+-
+-    total_niov = !!head_len + mid_niov + !!tail_len;
+-    if (total_niov > IOV_MAX) {
+-        return -EINVAL;
+-    }
+-
+-    if (total_niov == 1) {
+-        qemu_iovec_init_buf(qiov, NULL, 0);
+-        p = &qiov->local_iov;
+-    } else {
+-        qiov->niov = qiov->nalloc = total_niov;
+-        qiov->size = head_len + mid_len + tail_len;
+-        p = qiov->iov = g_new(struct iovec, qiov->niov);
+-    }
+-
+-    if (head_len) {
+-        p->iov_base = head_buf;
+-        p->iov_len = head_len;
+-        p++;
+-    }
+-
+-    assert(!mid_niov == !mid_len);
+-    if (mid_niov) {
+-        memcpy(p, mid_iov, mid_niov * sizeof(*p));
+-        p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head;
+-        p[0].iov_len -= mid_head;
+-        p[mid_niov - 1].iov_len -= mid_tail;
+-        p += mid_niov;
+-    }
+-
+-    if (tail_len) {
+-        p->iov_base = tail_buf;
+-        p->iov_len = tail_len;
+-    }
+-
+-    return 0;
+-}
+-
+ /*
+  * Check if the contents of subrange of qiov data is all zeroes.
+  */
+@@ -506,14 +442,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes)
+ void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source,
+                            size_t offset, size_t len)
+ {
+-    int ret;
++    struct iovec *slice_iov;
++    int slice_niov;
++    size_t slice_head, slice_tail;
+ 
+     assert(source->size >= len);
+     assert(source->size - len >= offset);
+ 
+-    /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */
+-    ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0);
+-    assert(ret == 0);
++    slice_iov = qemu_iovec_slice(source, offset, len,
++                                 &slice_head, &slice_tail, &slice_niov);
++    if (slice_niov == 1) {
++        qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len);
++    } else {
++        qemu_iovec_init(qiov, slice_niov);
++        qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len);
++    }
+ }
+ 
+ void qemu_iovec_destroy(QEMUIOVector *qiov)
+-- 
+2.39.3
+
diff --git a/qemu-kvm.spec b/qemu-kvm.spec
index 091ed4d..6483966 100644
--- a/qemu-kvm.spec
+++ b/qemu-kvm.spec
@@ -83,7 +83,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release}
 Summary: QEMU is a machine emulator and virtualizer
 Name: qemu-kvm
 Version: 6.2.0
-Release: 36%{?rcrel}%{?dist}
+Release: 37%{?rcrel}%{?dist}
 # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
 Epoch: 15
 License: GPLv2 and GPLv2+ and CC-BY
@@ -759,6 +759,16 @@ Patch298: kvm-target-i386-add-support-for-FLUSH_L1D-feature.patch
 Patch299: kvm-target-i386-add-support-for-FB_CLEAR-feature.patch
 # For bz#2169733 - Qemu on destination host crashed if migrate with postcopy and multifd enabled
 Patch300: kvm-migration-Disable-postcopy-multifd-migration.patch
+# For bz#2141964 - Guest hit EXT4-fs error on host 4K disk  when repeatedly hot-plug/unplug running IO disk
+Patch301: kvm-util-iov-Make-qiov_slice-public.patch
+# For bz#2141964 - Guest hit EXT4-fs error on host 4K disk  when repeatedly hot-plug/unplug running IO disk
+Patch302: kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch
+# For bz#2141964 - Guest hit EXT4-fs error on host 4K disk  when repeatedly hot-plug/unplug running IO disk
+Patch303: kvm-util-iov-Remove-qemu_iovec_init_extended.patch
+# For bz#2141964 - Guest hit EXT4-fs error on host 4K disk  when repeatedly hot-plug/unplug running IO disk
+Patch304: kvm-iotests-iov-padding-New-test.patch
+# For bz#2141964 - Guest hit EXT4-fs error on host 4K disk  when repeatedly hot-plug/unplug running IO disk
+Patch305: kvm-block-Fix-pad_request-s-request-restriction.patch
 
 BuildRequires: wget
 BuildRequires: rpm-build
@@ -1928,6 +1938,15 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || :
 
 
 %changelog
+* Tue Jul 25 2023 Miroslav Rezanina <mrezanin@redhat.com> - 6.2.0-37
+- kvm-util-iov-Make-qiov_slice-public.patch [bz#2141964]
+- kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch [bz#2141964]
+- kvm-util-iov-Remove-qemu_iovec_init_extended.patch [bz#2141964]
+- kvm-iotests-iov-padding-New-test.patch [bz#2141964]
+- kvm-block-Fix-pad_request-s-request-restriction.patch [bz#2141964]
+- Resolves: bz#2141964
+  (Guest hit EXT4-fs error on host 4K disk  when repeatedly hot-plug/unplug running IO disk)
+
 * Thu Jun 29 2023 Jon Maloy <jmaloy@redhat.com> - 6.2.0-36
 - kvm-memory-prevent-dma-reentracy-issues.patch [bz#1999236]
 - kvm-async-Add-an-optional-reentrancy-guard-to-the-BH-API.patch [bz#1999236]