diff --git a/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch b/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch new file mode 100644 index 0000000..6de5d65 --- /dev/null +++ b/kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch @@ -0,0 +1,354 @@ +From ff05c0b0d3414c0e5b3903048280accdc6c75ca0 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:16 +0200 +Subject: [PATCH 2/9] block: Collapse padded I/O vecs exceeding IOV_MAX + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/5] 84c56bd16f841a18cf2baa918dfeab3240e3944d (hreitz/qemu-kvm-c-9-s) + +When processing vectored guest requests that are not aligned to the +storage request alignment, we pad them by adding head and/or tail +buffers for a read-modify-write cycle. + +The guest can submit I/O vectors up to IOV_MAX (1024) in length, but +with this padding, the vector can exceed that limit. As of +4c002cef0e9abe7135d7916c51abce47f7fc1ee2 ("util/iov: make +qemu_iovec_init_extended() honest"), we refuse to pad vectors beyond the +limit, instead returning an error to the guest. + +To the guest, this appears as a random I/O error. We should not return +an I/O error to the guest when it issued a perfectly valid request. + +Before 4c002cef0e9abe7135d7916c51abce47f7fc1ee2, we just made the vector +longer than IOV_MAX, which generally seems to work (because the guest +assumes a smaller alignment than we really have, file-posix's +raw_co_prw() will generally see bdrv_qiov_is_aligned() return false, and +so emulate the request, so that the IOV_MAX does not matter). However, +that does not seem exactly great. + +I see two ways to fix this problem: +1. We split such long requests into two requests. +2. We join some elements of the vector into new buffers to make it + shorter. + +I am wary of (1), because it seems like it may have unintended side +effects. + +(2) on the other hand seems relatively simple to implement, with +hopefully few side effects, so this patch does that. + +To do this, the use of qemu_iovec_init_extended() in bdrv_pad_request() +is effectively replaced by the new function bdrv_create_padded_qiov(), +which not only wraps the request IOV with padding head/tail, but also +ensures that the resulting vector will not have more than IOV_MAX +elements. Putting that functionality into qemu_iovec_init_extended() is +infeasible because it requires allocating a bounce buffer; doing so +would require many more parameters (buffer alignment, how to initialize +the buffer, and out parameters like the buffer, its length, and the +original elements), which is not reasonable. + +Conversely, it is not difficult to move qemu_iovec_init_extended()'s +functionality into bdrv_create_padded_qiov() by using public +qemu_iovec_* functions, so that is what this patch does. + +Because bdrv_pad_request() was the only "serious" user of +qemu_iovec_init_extended(), the next patch will remove the latter +function, so the functionality is not implemented twice. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2141964 +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-3-hreitz@redhat.com> +Reviewed-by: Vladimir Sementsov-Ogievskiy +(cherry picked from commit 18743311b829cafc1737a5f20bc3248d5f91ee2a) +Signed-off-by: Hanna Czenczek +--- + block/io.c | 166 ++++++++++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 151 insertions(+), 15 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 2e267a85ab..4e8e90208b 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1439,6 +1439,14 @@ out: + * @merge_reads is true for small requests, + * if @buf_len == @head + bytes + @tail. In this case it is possible that both + * head and tail exist but @buf_len == align and @tail_buf == @buf. ++ * ++ * @write is true for write requests, false for read requests. ++ * ++ * If padding makes the vector too long (exceeding IOV_MAX), then we need to ++ * merge existing vector elements into a single one. @collapse_bounce_buf acts ++ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse ++ * I/O vector elements so for read requests, the data can be copied back after ++ * the read is done. + */ + typedef struct BdrvRequestPadding { + uint8_t *buf; +@@ -1447,11 +1455,17 @@ typedef struct BdrvRequestPadding { + size_t head; + size_t tail; + bool merge_reads; ++ bool write; + QEMUIOVector local_qiov; ++ ++ uint8_t *collapse_bounce_buf; ++ size_t collapse_len; ++ QEMUIOVector pre_collapse_qiov; + } BdrvRequestPadding; + + static bool bdrv_init_padding(BlockDriverState *bs, + int64_t offset, int64_t bytes, ++ bool write, + BdrvRequestPadding *pad) + { + int64_t align = bs->bl.request_alignment; +@@ -1483,6 +1497,8 @@ static bool bdrv_init_padding(BlockDriverState *bs, + pad->tail_buf = pad->buf + pad->buf_len - align; + } + ++ pad->write = write; ++ + return true; + } + +@@ -1547,8 +1563,23 @@ zero_mem: + return 0; + } + +-static void bdrv_padding_destroy(BdrvRequestPadding *pad) ++/** ++ * Free *pad's associated buffers, and perform any necessary finalization steps. ++ */ ++static void bdrv_padding_finalize(BdrvRequestPadding *pad) + { ++ if (pad->collapse_bounce_buf) { ++ if (!pad->write) { ++ /* ++ * If padding required elements in the vector to be collapsed into a ++ * bounce buffer, copy the bounce buffer content back ++ */ ++ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ qemu_vfree(pad->collapse_bounce_buf); ++ qemu_iovec_destroy(&pad->pre_collapse_qiov); ++ } + if (pad->buf) { + qemu_vfree(pad->buf); + qemu_iovec_destroy(&pad->local_qiov); +@@ -1556,6 +1587,101 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + memset(pad, 0, sizeof(*pad)); + } + ++/* ++ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while ++ * ensuring that the resulting vector will not exceed IOV_MAX elements. ++ * ++ * To ensure this, when necessary, the first two or three elements of @iov are ++ * merged into pad->collapse_bounce_buf and replaced by a reference to that ++ * bounce buffer in pad->local_qiov. ++ * ++ * After performing a read request, the data from the bounce buffer must be ++ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()). ++ */ ++static int bdrv_create_padded_qiov(BlockDriverState *bs, ++ BdrvRequestPadding *pad, ++ struct iovec *iov, int niov, ++ size_t iov_offset, size_t bytes) ++{ ++ int padded_niov, surplus_count, collapse_count; ++ ++ /* Assert this invariant */ ++ assert(niov <= IOV_MAX); ++ ++ /* ++ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error ++ * to the guest is not ideal, but there is little else we can do. At least ++ * this will practically never happen on 64-bit systems. ++ */ ++ if (SIZE_MAX - pad->head < bytes || ++ SIZE_MAX - pad->head - bytes < pad->tail) ++ { ++ return -EINVAL; ++ } ++ ++ /* Length of the resulting IOV if we just concatenated everything */ ++ padded_niov = !!pad->head + niov + !!pad->tail; ++ ++ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX)); ++ ++ if (pad->head) { ++ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head); ++ } ++ ++ /* ++ * If padded_niov > IOV_MAX, we cannot just concatenate everything. ++ * Instead, merge the first two or three elements of @iov to reduce the ++ * number of vector elements as necessary. ++ */ ++ if (padded_niov > IOV_MAX) { ++ /* ++ * Only head and tail can have lead to the number of entries exceeding ++ * IOV_MAX, so we can exceed it by the head and tail at most. We need ++ * to reduce the number of elements by `surplus_count`, so we merge that ++ * many elements plus one into one element. ++ */ ++ surplus_count = padded_niov - IOV_MAX; ++ assert(surplus_count <= !!pad->head + !!pad->tail); ++ collapse_count = surplus_count + 1; ++ ++ /* ++ * Move the elements to collapse into `pad->pre_collapse_qiov`, then ++ * advance `iov` (and associated variables) by those elements. ++ */ ++ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count); ++ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov, ++ collapse_count, iov_offset, SIZE_MAX); ++ iov += collapse_count; ++ iov_offset = 0; ++ niov -= collapse_count; ++ bytes -= pad->pre_collapse_qiov.size; ++ ++ /* ++ * Construct the bounce buffer to match the length of the to-collapse ++ * vector elements, and for write requests, initialize it with the data ++ * from those elements. Then add it to `pad->local_qiov`. ++ */ ++ pad->collapse_len = pad->pre_collapse_qiov.size; ++ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len); ++ if (pad->write) { ++ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ qemu_iovec_add(&pad->local_qiov, ++ pad->collapse_bounce_buf, pad->collapse_len); ++ } ++ ++ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes); ++ ++ if (pad->tail) { ++ qemu_iovec_add(&pad->local_qiov, ++ pad->buf + pad->buf_len - pad->tail, pad->tail); ++ } ++ ++ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX)); ++ return 0; ++} ++ + /* + * bdrv_pad_request + * +@@ -1563,6 +1689,8 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + * read of padding, bdrv_padding_rmw_read() should be called separately if + * needed. + * ++ * @write is true for write requests, false for read requests. ++ * + * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out: + * - on function start they represent original request + * - on failure or when padding is not needed they are unchanged +@@ -1571,26 +1699,34 @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) + static int bdrv_pad_request(BlockDriverState *bs, + QEMUIOVector **qiov, size_t *qiov_offset, + int64_t *offset, int64_t *bytes, ++ bool write, + BdrvRequestPadding *pad, bool *padded, + BdrvRequestFlags *flags) + { + int ret; ++ struct iovec *sliced_iov; ++ int sliced_niov; ++ size_t sliced_head, sliced_tail; + + bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); + +- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { ++ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { + if (padded) { + *padded = false; + } + return 0; + } + +- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, +- *qiov, *qiov_offset, *bytes, +- pad->buf + pad->buf_len - pad->tail, +- pad->tail); ++ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes, ++ &sliced_head, &sliced_tail, ++ &sliced_niov); ++ ++ /* Guaranteed by bdrv_check_qiov_request() */ ++ assert(*bytes <= SIZE_MAX); ++ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, ++ sliced_head, *bytes); + if (ret < 0) { +- bdrv_padding_destroy(pad); ++ bdrv_padding_finalize(pad); + return ret; + } + *bytes += pad->head + pad->tail; +@@ -1657,8 +1793,8 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + flags |= BDRV_REQ_COPY_ON_READ; + } + +- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, +- NULL, &flags); ++ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false, ++ &pad, NULL, &flags); + if (ret < 0) { + goto fail; + } +@@ -1668,7 +1804,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, + bs->bl.request_alignment, + qiov, qiov_offset, flags); + tracked_request_end(&req); +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + fail: + bdrv_dec_in_flight(bs); +@@ -2000,7 +2136,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, + /* This flag doesn't make sense for padding or zero writes */ + flags &= ~BDRV_REQ_REGISTERED_BUF; + +- padding = bdrv_init_padding(bs, offset, bytes, &pad); ++ padding = bdrv_init_padding(bs, offset, bytes, true, &pad); + if (padding) { + assert(!(flags & BDRV_REQ_NO_WAIT)); + bdrv_make_request_serialising(req, align); +@@ -2048,7 +2184,7 @@ bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes, + } + + out: +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + return ret; + } +@@ -2116,8 +2252,8 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do + * alignment only if there is no ZERO flag. + */ +- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad, +- &padded, &flags); ++ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true, ++ &pad, &padded, &flags); + if (ret < 0) { + return ret; + } +@@ -2147,7 +2283,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, + ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, + qiov, qiov_offset, flags); + +- bdrv_padding_destroy(&pad); ++ bdrv_padding_finalize(&pad); + + out: + tracked_request_end(&req); +-- +2.39.3 + diff --git a/kvm-block-Fix-pad_request-s-request-restriction.patch b/kvm-block-Fix-pad_request-s-request-restriction.patch new file mode 100644 index 0000000..c0ab8c2 --- /dev/null +++ b/kvm-block-Fix-pad_request-s-request-restriction.patch @@ -0,0 +1,73 @@ +From 547f6bf93734f7c13675eebb93273ef2273f7c31 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Fri, 14 Jul 2023 10:59:38 +0200 +Subject: [PATCH 5/9] block: Fix pad_request's request restriction + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/5] e8abc0485f6e0608a1ec55143ff40a14d273dfc8 (hreitz/qemu-kvm-c-9-s) + +bdrv_pad_request() relies on requests' lengths not to exceed SIZE_MAX, +which bdrv_check_qiov_request() does not guarantee. + +bdrv_check_request32() however will guarantee this, and both of +bdrv_pad_request()'s callers (bdrv_co_preadv_part() and +bdrv_co_pwritev_part()) already run it before calling +bdrv_pad_request(). Therefore, bdrv_pad_request() can safely call +bdrv_check_request32() without expecting error, too. + +In effect, this patch will not change guest-visible behavior. It is a +clean-up to tighten a condition to match what is guaranteed by our +callers, and which exists purely to show clearly why the subsequent +assertion (`assert(*bytes <= SIZE_MAX)`) is always true. + +Note there is a difference between the interfaces of +bdrv_check_qiov_request() and bdrv_check_request32(): The former takes +an errp, the latter does not, so we can no longer just pass +&error_abort. Instead, we need to check the returned value. While we +do expect success (because the callers have already run this function), +an assert(ret == 0) is not much simpler than just to return an error if +it occurs, so let us handle errors by returning them up the stack now. + +Reported-by: Peter Maydell +Signed-off-by: Hanna Czenczek +Message-id: 20230714085938.202730-1-hreitz@redhat.com +Fixes: 18743311b829cafc1737a5f20bc3248d5f91ee2a + ("block: Collapse padded I/O vecs exceeding IOV_MAX") +Signed-off-by: Hanna Czenczek +Signed-off-by: Stefan Hajnoczi +--- + block/io.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 4e8e90208b..807c9fb720 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -1708,7 +1708,11 @@ static int bdrv_pad_request(BlockDriverState *bs, + int sliced_niov; + size_t sliced_head, sliced_tail; + +- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort); ++ /* Should have been checked by the caller already */ ++ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset); ++ if (ret < 0) { ++ return ret; ++ } + + if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) { + if (padded) { +@@ -1721,7 +1725,7 @@ static int bdrv_pad_request(BlockDriverState *bs, + &sliced_head, &sliced_tail, + &sliced_niov); + +- /* Guaranteed by bdrv_check_qiov_request() */ ++ /* Guaranteed by bdrv_check_request32() */ + assert(*bytes <= SIZE_MAX); + ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov, + sliced_head, *bytes); +-- +2.39.3 + diff --git a/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch b/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch new file mode 100644 index 0000000..fe9cd8c --- /dev/null +++ b/kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch @@ -0,0 +1,44 @@ +From 491cf9e251026d135f315b7fe0d8771841f06e9f Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 25 Jul 2023 15:34:45 -0300 +Subject: [PATCH 8/9] hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type + <= pc-q35-rhel9.2.0 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 192: hw/pci: Disable PCI_ERR_UNCOR_MASK reg for machine type <= pc-q35-rhel9.2.0 +RH-Bugzilla: 2223691 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: quintela1 +RH-Commit: [1/1] e57816f8ad15a9ce5f342b061c103ae011ec1223 (LeoBras/centos-qemu-kvm) + +This is a downstream-only patch to that sets off the property +x-pcie-err-unc-mask for machine types <= pc-q35-rhel9.2.0, allowing +live migrations to RHEL9.2 happen successfully. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2223691 +Fixes: 293a34b4be ("hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine +type < 8.0") +Signed-off-by: Leonardo Bras +--- + hw/core/machine.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/hw/core/machine.c b/hw/core/machine.c +index 5ea52317b9..6f5117669d 100644 +--- a/hw/core/machine.c ++++ b/hw/core/machine.c +@@ -62,6 +62,8 @@ GlobalProperty hw_compat_rhel_9_2[] = { + { "virtio-mem", "x-early-migration", "false" }, + /* hw_compat_rhel_9_2 from hw_compat_7_2 */ + { "migration", "x-preempt-pre-7-2", "true" }, ++ /* hw_compat_rhel_9_2 from hw_compat_7_2 */ ++ { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, + }; + const size_t hw_compat_rhel_9_2_len = G_N_ELEMENTS(hw_compat_rhel_9_2); + +-- +2.39.3 + diff --git a/kvm-iotests-iov-padding-New-test.patch b/kvm-iotests-iov-padding-New-test.patch new file mode 100644 index 0000000..9ef37a2 --- /dev/null +++ b/kvm-iotests-iov-padding-New-test.patch @@ -0,0 +1,186 @@ +From add833b5de202d6765dda56c8773985fbe7f40a6 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:18 +0200 +Subject: [PATCH 4/9] iotests/iov-padding: New test + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/5] b32715b5c2a3e2add39c5ed6e8f71df56e0b91a0 (hreitz/qemu-kvm-c-9-s) + +Test that even vectored IO requests with 1024 vector elements that are +not aligned to the device's request alignment will succeed. + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-5-hreitz@redhat.com> +(cherry picked from commit d7e1905e3f54ff9512db4c7a946a8603b62b108d) +Signed-off-by: Hanna Czenczek +--- + tests/qemu-iotests/tests/iov-padding | 85 ++++++++++++++++++++++++ + tests/qemu-iotests/tests/iov-padding.out | 59 ++++++++++++++++ + 2 files changed, 144 insertions(+) + create mode 100755 tests/qemu-iotests/tests/iov-padding + create mode 100644 tests/qemu-iotests/tests/iov-padding.out + +diff --git a/tests/qemu-iotests/tests/iov-padding b/tests/qemu-iotests/tests/iov-padding +new file mode 100755 +index 0000000000..b9604900c7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iov-padding +@@ -0,0 +1,85 @@ ++#!/usr/bin/env bash ++# group: rw quick ++# ++# Check the interaction of request padding (to fit alignment restrictions) with ++# vectored I/O from the guest ++# ++# Copyright Red Hat ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++seq=$(basename $0) ++echo "QA output created by $seq" ++ ++status=1 # failure is the default! ++ ++_cleanup() ++{ ++ _cleanup_test_img ++} ++trap "_cleanup; exit \$status" 0 1 2 3 15 ++ ++# get standard environment, filters and checks ++cd .. ++. ./common.rc ++. ./common.filter ++ ++_supported_fmt raw ++_supported_proto file ++ ++_make_test_img 1M ++ ++IMGSPEC="driver=blkdebug,align=4096,image.driver=file,image.filename=$TEST_IMG" ++ ++# Four combinations: ++# - Offset 4096, length 1023 * 512 + 512: Fully aligned to 4k ++# - Offset 4096, length 1023 * 512 + 4096: Head is aligned, tail is not ++# - Offset 512, length 1023 * 512 + 512: Neither head nor tail are aligned ++# - Offset 512, length 1023 * 512 + 4096: Tail is aligned, head is not ++for start_offset in 4096 512; do ++ for last_element_length in 512 4096; do ++ length=$((1023 * 512 + $last_element_length)) ++ ++ echo ++ echo "== performing 1024-element vectored requests to image (offset: $start_offset; length: $length) ==" ++ ++ # Fill with data for testing ++ $QEMU_IO -c 'write -P 1 0 1M' "$TEST_IMG" | _filter_qemu_io ++ ++ # 1023 512-byte buffers, and then one with length $last_element_length ++ cmd_params="-P 2 $start_offset $(yes 512 | head -n 1023 | tr '\n' ' ') $last_element_length" ++ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ ++ -c "writev $cmd_params" \ ++ --image-opts \ ++ "$IMGSPEC" \ ++ | _filter_qemu_io ++ ++ # Read all patterns -- read the part we just wrote with writev twice, ++ # once "normally", and once with a readv, so we see that that works, too ++ QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS_NO_FMT" $QEMU_IO \ ++ -c "read -P 1 0 $start_offset" \ ++ -c "read -P 2 $start_offset $length" \ ++ -c "readv $cmd_params" \ ++ -c "read -P 1 $((start_offset + length)) $((1024 * 1024 - length - start_offset))" \ ++ --image-opts \ ++ "$IMGSPEC" \ ++ | _filter_qemu_io ++ done ++done ++ ++# success, all done ++echo "*** done" ++rm -f $seq.full ++status=0 +diff --git a/tests/qemu-iotests/tests/iov-padding.out b/tests/qemu-iotests/tests/iov-padding.out +new file mode 100644 +index 0000000000..e07a91fac7 +--- /dev/null ++++ b/tests/qemu-iotests/tests/iov-padding.out +@@ -0,0 +1,59 @@ ++QA output created by iov-padding ++Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 ++ ++== performing 1024-element vectored requests to image (offset: 4096; length: 524288) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 4096 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 520192/520192 bytes at offset 528384 ++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 4096; length: 527872) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 4096/4096 bytes at offset 0 ++4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 4096 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 516608/516608 bytes at offset 531968 ++504.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 512; length: 524288) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 524288/524288 bytes at offset 512 ++512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 523776/523776 bytes at offset 524800 ++511.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++ ++== performing 1024-element vectored requests to image (offset: 512; length: 527872) == ++wrote 1048576/1048576 bytes at offset 0 ++1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++wrote 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 512/512 bytes at offset 0 ++512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 527872/527872 bytes at offset 512 ++515.500 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++read 520192/520192 bytes at offset 528384 ++508 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) ++*** done +-- +2.39.3 + diff --git a/kvm-util-iov-Make-qiov_slice-public.patch b/kvm-util-iov-Make-qiov_slice-public.patch new file mode 100644 index 0000000..fe68d18 --- /dev/null +++ b/kvm-util-iov-Make-qiov_slice-public.patch @@ -0,0 +1,97 @@ +From 64652225695c23855cfb1252cea2b55c24da2260 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:15 +0200 +Subject: [PATCH 1/9] util/iov: Make qiov_slice() public + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/5] 9c3cd661f7139ce124ee4f4d5fcbeaf3dbb9c45c (hreitz/qemu-kvm-c-9-s) + +We want to inline qemu_iovec_init_extended() in block/io.c for padding +requests, and having access to qiov_slice() is useful for this. As a +public function, it is renamed to qemu_iovec_slice(). + +(We will need to count the number of I/O vector elements of a slice +there, and then later process this slice. Without qiov_slice(), we +would need to call qemu_iovec_subvec_niov(), and all further +IOV-processing functions may need to skip prefixing elements to +accomodate for a qiov_offset. Because qemu_iovec_subvec_niov() +internally calls qiov_slice(), we can just have the block/io.c code call +qiov_slice() itself, thus get the number of elements, and also create an +iovec array with the superfluous prefixing elements stripped, so the +following processing functions no longer need to skip them.) + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-2-hreitz@redhat.com> +(cherry picked from commit 3d06cea8256d54a6b0238934c31012f7f17100f5) +Signed-off-by: Hanna Czenczek +--- + include/qemu/iov.h | 3 +++ + util/iov.c | 14 +++++++------- + 2 files changed, 10 insertions(+), 7 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 9330746680..46fadfb27a 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -229,6 +229,9 @@ int qemu_iovec_init_extended( + void *tail_buf, size_t tail_len); + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); ++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov); + int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); + void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); + void qemu_iovec_concat(QEMUIOVector *dst, +diff --git a/util/iov.c b/util/iov.c +index b4be580022..65a70449da 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -378,15 +378,15 @@ static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, + } + + /* +- * qiov_slice ++ * qemu_iovec_slice + * + * Find subarray of iovec's, containing requested range. @head would + * be offset in first iov (returned by the function), @tail would be + * count of extra bytes in last iovec (returned iov + @niov - 1). + */ +-static struct iovec *qiov_slice(QEMUIOVector *qiov, +- size_t offset, size_t len, +- size_t *head, size_t *tail, int *niov) ++struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, ++ size_t offset, size_t len, ++ size_t *head, size_t *tail, int *niov) + { + struct iovec *iov, *end_iov; + +@@ -411,7 +411,7 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) + size_t head, tail; + int niov; + +- qiov_slice(qiov, offset, len, &head, &tail, &niov); ++ qemu_iovec_slice(qiov, offset, len, &head, &tail, &niov); + + return niov; + } +@@ -439,8 +439,8 @@ int qemu_iovec_init_extended( + } + + if (mid_len) { +- mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, +- &mid_head, &mid_tail, &mid_niov); ++ mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, ++ &mid_head, &mid_tail, &mid_niov); + } + + total_niov = !!head_len + mid_niov + !!tail_len; +-- +2.39.3 + diff --git a/kvm-util-iov-Remove-qemu_iovec_init_extended.patch b/kvm-util-iov-Remove-qemu_iovec_init_extended.patch new file mode 100644 index 0000000..fd21880 --- /dev/null +++ b/kvm-util-iov-Remove-qemu_iovec_init_extended.patch @@ -0,0 +1,156 @@ +From 8ff973985a04fec1a3cdf886976a03e0dca7b0ea Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Tue, 11 Apr 2023 19:34:17 +0200 +Subject: [PATCH 3/9] util/iov: Remove qemu_iovec_init_extended() + +RH-Author: Hanna Czenczek +RH-MergeRequest: 189: block: Split padded I/O vectors exceeding IOV_MAX +RH-Bugzilla: 2174676 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/5] 1740d7b15ea4fbfbe71e7adc122741e85e83fb8c (hreitz/qemu-kvm-c-9-s) + +bdrv_pad_request() was the main user of qemu_iovec_init_extended(). +HEAD^ has removed that use, so we can remove qemu_iovec_init_extended() +now. + +The only remaining user is qemu_iovec_init_slice(), which can easily +inline the small part it really needs. + +Note that qemu_iovec_init_extended() offered a memcpy() optimization to +initialize the new I/O vector. qemu_iovec_concat_iov(), which is used +to replace its functionality, does not, but calls qemu_iovec_add() for +every single element. If we decide this optimization was important, we +will need to re-implement it in qemu_iovec_concat_iov(), which might +also benefit its pre-existing users. + +Reviewed-by: Eric Blake +Reviewed-by: Vladimir Sementsov-Ogievskiy +Signed-off-by: Hanna Czenczek +Message-Id: <20230411173418.19549-4-hreitz@redhat.com> +(cherry picked from commit cc63f6f6fa1aaa4b6405dd69432c693e9c8d18ca) +Signed-off-by: Hanna Czenczek +--- + include/qemu/iov.h | 5 --- + util/iov.c | 79 +++++++--------------------------------------- + 2 files changed, 11 insertions(+), 73 deletions(-) + +diff --git a/include/qemu/iov.h b/include/qemu/iov.h +index 46fadfb27a..63a1c01965 100644 +--- a/include/qemu/iov.h ++++ b/include/qemu/iov.h +@@ -222,11 +222,6 @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) + + void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); + void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); +-int qemu_iovec_init_extended( +- QEMUIOVector *qiov, +- void *head_buf, size_t head_len, +- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, +- void *tail_buf, size_t tail_len); + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len); + struct iovec *qemu_iovec_slice(QEMUIOVector *qiov, +diff --git a/util/iov.c b/util/iov.c +index 65a70449da..866fb577f3 100644 +--- a/util/iov.c ++++ b/util/iov.c +@@ -416,70 +416,6 @@ int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) + return niov; + } + +-/* +- * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, +- * and @tail_buf buffer into new qiov. +- */ +-int qemu_iovec_init_extended( +- QEMUIOVector *qiov, +- void *head_buf, size_t head_len, +- QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, +- void *tail_buf, size_t tail_len) +-{ +- size_t mid_head, mid_tail; +- int total_niov, mid_niov = 0; +- struct iovec *p, *mid_iov = NULL; +- +- assert(mid_qiov->niov <= IOV_MAX); +- +- if (SIZE_MAX - head_len < mid_len || +- SIZE_MAX - head_len - mid_len < tail_len) +- { +- return -EINVAL; +- } +- +- if (mid_len) { +- mid_iov = qemu_iovec_slice(mid_qiov, mid_offset, mid_len, +- &mid_head, &mid_tail, &mid_niov); +- } +- +- total_niov = !!head_len + mid_niov + !!tail_len; +- if (total_niov > IOV_MAX) { +- return -EINVAL; +- } +- +- if (total_niov == 1) { +- qemu_iovec_init_buf(qiov, NULL, 0); +- p = &qiov->local_iov; +- } else { +- qiov->niov = qiov->nalloc = total_niov; +- qiov->size = head_len + mid_len + tail_len; +- p = qiov->iov = g_new(struct iovec, qiov->niov); +- } +- +- if (head_len) { +- p->iov_base = head_buf; +- p->iov_len = head_len; +- p++; +- } +- +- assert(!mid_niov == !mid_len); +- if (mid_niov) { +- memcpy(p, mid_iov, mid_niov * sizeof(*p)); +- p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; +- p[0].iov_len -= mid_head; +- p[mid_niov - 1].iov_len -= mid_tail; +- p += mid_niov; +- } +- +- if (tail_len) { +- p->iov_base = tail_buf; +- p->iov_len = tail_len; +- } +- +- return 0; +-} +- + /* + * Check if the contents of subrange of qiov data is all zeroes. + */ +@@ -511,14 +447,21 @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) + void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, + size_t offset, size_t len) + { +- int ret; ++ struct iovec *slice_iov; ++ int slice_niov; ++ size_t slice_head, slice_tail; + + assert(source->size >= len); + assert(source->size - len >= offset); + +- /* We shrink the request, so we can't overflow neither size_t nor MAX_IOV */ +- ret = qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); +- assert(ret == 0); ++ slice_iov = qemu_iovec_slice(source, offset, len, ++ &slice_head, &slice_tail, &slice_niov); ++ if (slice_niov == 1) { ++ qemu_iovec_init_buf(qiov, slice_iov[0].iov_base + slice_head, len); ++ } else { ++ qemu_iovec_init(qiov, slice_niov); ++ qemu_iovec_concat_iov(qiov, slice_iov, slice_niov, slice_head, len); ++ } + } + + void qemu_iovec_destroy(QEMUIOVector *qiov) +-- +2.39.3 + diff --git a/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch b/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch new file mode 100644 index 0000000..56b9aed --- /dev/null +++ b/kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch @@ -0,0 +1,61 @@ +From 74c2f378bdf278a03c02ae48948b00b4431a3fd6 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= +Date: Fri, 2 Jun 2023 16:38:52 +0200 +Subject: [PATCH 6/9] vdpa: do not block migration if device has cvq and + x-svq=on +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Eugenio Pérez +RH-MergeRequest: 190: vdpa: do not block migration if device has cvq and x-svq=on +RH-Jira: RHEL-573 +RH-Acked-by: Jason Wang +RH-Acked-by: Laurent Vivier +RH-Commit: [1/1] b0e2ec3c9e5c17252cf6a043fe1374ddc3c37de7 (eperezmartin/qemu-kvm) + +It was a mistake to forbid in all cases, as SVQ is already able to send +all the CVQ messages before start forwarding data vqs. It actually +caused a regression, making impossible to migrate device previously +migratable. + +Fixes: 36e4647247f2 ("vdpa: add vhost_vdpa_net_valid_svq_features") +Signed-off-by: Eugenio Pérez +Message-Id: <20230602143854.1879091-2-eperezma@redhat.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Tested-by: Lei Yang +(cherry picked from commit 8bc0049eadafb984d305c847cedff550b58e5fc0) +Signed-off-by: Eugenio Pérez +--- + net/vhost-vdpa.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c +index 8c8900f0f4..1ae839da34 100644 +--- a/net/vhost-vdpa.c ++++ b/net/vhost-vdpa.c +@@ -844,13 +844,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, + s->vhost_vdpa.shadow_vq_ops_opaque = s; + + /* +- * TODO: We cannot migrate devices with CVQ as there is no way to set +- * the device state (MAC, MQ, etc) before starting the datapath. ++ * TODO: We cannot migrate devices with CVQ and no x-svq enabled as ++ * there is no way to set the device state (MAC, MQ, etc) before ++ * starting the datapath. + * + * Migration blocker ownership now belongs to s->vhost_vdpa. + */ +- error_setg(&s->vhost_vdpa.migration_blocker, +- "net vdpa cannot migrate with CVQ feature"); ++ if (!svq) { ++ error_setg(&s->vhost_vdpa.migration_blocker, ++ "net vdpa cannot migrate with CVQ feature"); ++ } + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); + if (ret) { +-- +2.39.3 + diff --git a/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch b/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch new file mode 100644 index 0000000..3711949 --- /dev/null +++ b/kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch @@ -0,0 +1,86 @@ +From 3b51a7b84ea21360c6d551284aecb8b6f371e888 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Tue, 4 Jul 2023 09:19:31 +0200 +Subject: [PATCH 9/9] vhost-vdpa: mute unaligned memory error report +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 193: vhost-vdpa: mute unaligned memory error report +RH-Bugzilla: 2141965 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/1] 60f5385d41269ce9310e1e8e0a2f1106e3a16ada (lvivier/qemu-kvm-centos) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141965 + +With TPM CRM device, vhost-vdpa reports an error when it tries +to register a listener for a non aligned memory region: + + qemu-system-x86_64: vhost_vdpa_listener_region_add received unaligned region + qemu-system-x86_64: vhost_vdpa_listener_region_del received unaligned region + +This error can be confusing for the user whereas we only need to skip +the region (as it's already done after the error_report()) + +Rather than introducing a special case for TPM CRB memory section +to not display the message in this case, simply replace the +error_report() by a trace function (with more information, like the +memory region name). + +Signed-off-by: Laurent Vivier +Message-Id: <20230704071931.575888-2-lvivier@redhat.com> +Reviewed-by: David Hildenbrand +Acked-by: Jason Wang +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 77812aa7b1fdf8f547c35a7f9a4eb1cbf3a073db) +--- + hw/virtio/trace-events | 2 ++ + hw/virtio/vhost-vdpa.c | 8 ++++++-- + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events +index 68b752e304..300dec8d3e 100644 +--- a/hw/virtio/trace-events ++++ b/hw/virtio/trace-events +@@ -34,7 +34,9 @@ vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_ + vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 + vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 + vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 ++vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 + vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" ++vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 + vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 + vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 + vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" +diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c +index bc6bad23d5..c04f14420d 100644 +--- a/hw/virtio/vhost-vdpa.c ++++ b/hw/virtio/vhost-vdpa.c +@@ -202,7 +202,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { +- error_report("%s received unaligned region", __func__); ++ trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, ++ section->offset_within_address_space & ~TARGET_PAGE_MASK, ++ section->offset_within_region & ~TARGET_PAGE_MASK); + return; + } + +@@ -281,7 +283,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, + + if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != + (section->offset_within_region & ~TARGET_PAGE_MASK))) { +- error_report("%s received unaligned region", __func__); ++ trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, ++ section->offset_within_address_space & ~TARGET_PAGE_MASK, ++ section->offset_within_region & ~TARGET_PAGE_MASK); + return; + } + +-- +2.39.3 + diff --git a/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch b/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch new file mode 100644 index 0000000..119ea84 --- /dev/null +++ b/kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch @@ -0,0 +1,92 @@ +From 4fe096a6fad61ab721fd29324d48383c7f427ac9 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Mon, 5 Jun 2023 16:21:25 +0200 +Subject: [PATCH 7/9] virtio-net: correctly report maximum tx_queue_size value +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 191: virtio-net: correctly report maximum tx_queue_size value +RH-Bugzilla: 2040509 +RH-Acked-by: Stefano Garzarella +RH-Acked-by: Eugenio Pérez +RH-Commit: [1/1] afb944c6d75fe476ac86fe267b1cca5f272dfbbd (lvivier/qemu-kvm-centos) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040509 + +Maximum value for tx_queue_size depends on the backend type. +1024 for vDPA/vhost-user, 256 for all the others. + +The value is returned by virtio_net_max_tx_queue_size() to set the +parameter: + + n->net_conf.tx_queue_size = MIN(virtio_net_max_tx_queue_size(n), + n->net_conf.tx_queue_size); + +But the parameter checking uses VIRTQUEUE_MAX_SIZE (1024). + +So the parameter is silently ignored and ethtool reports a different +value than the one provided by the user. + + ... -netdev tap,... -device virtio-net,tx_queue_size=1024 + + # ethtool -g enp0s2 + Ring parameters for enp0s2: + Pre-set maximums: + RX: 256 + RX Mini: n/a + RX Jumbo: n/a + TX: 256 + Current hardware settings: + RX: 256 + RX Mini: n/a + RX Jumbo: n/a + TX: 256 + + ... -netdev vhost-user,... -device virtio-net,tx_queue_size=2048 + + Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 + +With this patch the correct maximum value is checked and displayed. + +For vDPA/vhost-user: + + Invalid tx_queue_size (= 2048), must be a power of 2 between 256 and 1024 + +For all the others: + + Invalid tx_queue_size (= 512), must be a power of 2 between 256 and 256 + +Fixes: 2eef278b9e63 ("virtio-net: fix tx queue size for !vhost-user") +Cc: mst@redhat.com +Cc: qemu-stable@nongnu.org +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit 4271f4038372f174dbafffacca1a748d058a03ba) +--- + hw/net/virtio-net.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 447f669921..ae1e6a5e3d 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -3628,12 +3628,12 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + } + + if (n->net_conf.tx_queue_size < VIRTIO_NET_TX_QUEUE_MIN_SIZE || +- n->net_conf.tx_queue_size > VIRTQUEUE_MAX_SIZE || ++ n->net_conf.tx_queue_size > virtio_net_max_tx_queue_size(n) || + !is_power_of_2(n->net_conf.tx_queue_size)) { + error_setg(errp, "Invalid tx_queue_size (= %" PRIu16 "), " + "must be a power of 2 between %d and %d", + n->net_conf.tx_queue_size, VIRTIO_NET_TX_QUEUE_MIN_SIZE, +- VIRTQUEUE_MAX_SIZE); ++ virtio_net_max_tx_queue_size(n)); + virtio_cleanup(vdev); + return; + } +-- +2.39.3 + diff --git a/qemu.spec b/qemu.spec index 91cc8b9..b57517f 100644 --- a/qemu.spec +++ b/qemu.spec @@ -149,7 +149,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 8.0.0 -Release: 9%{?rcrel}%{?dist}%{?cc_suffix} +Release: 10%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -468,6 +468,24 @@ Patch155: kvm-net-socket-move-fd-type-checking-to-its-own-function.patch Patch156: kvm-net-socket-remove-net_init_socket.patch # For bz#2215819 - Migration test failed while guest with PCIe devices Patch157: kvm-pcie-Add-hotplug-detect-state-register-to-cmask.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch158: kvm-util-iov-Make-qiov_slice-public.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch159: kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch160: kvm-util-iov-Remove-qemu_iovec_init_extended.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch161: kvm-iotests-iov-padding-New-test.patch +# For bz#2174676 - Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9] +Patch162: kvm-block-Fix-pad_request-s-request-restriction.patch +# For RHEL-573 - [mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature" +Patch163: kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch +# For bz#2040509 - [RFE]:Add support for changing "tx_queue_size" to a setable value +Patch164: kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch +# For bz#2223691 - [machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2 +Patch165: kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch +# For bz#2141965 - [TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region +Patch166: kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch # ELN specific changes Patch1001: 1001-keymaps-use-ara-X11-keyboard-map-instead-of-ar.patch @@ -1532,6 +1550,27 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jul 31 2023 Miroslav Rezanina - 8.0.0-10 +- kvm-util-iov-Make-qiov_slice-public.patch [bz#2174676] +- kvm-block-Collapse-padded-I-O-vecs-exceeding-IOV_MAX.patch [bz#2174676] +- kvm-util-iov-Remove-qemu_iovec_init_extended.patch [bz#2174676] +- kvm-iotests-iov-padding-New-test.patch [bz#2174676] +- kvm-block-Fix-pad_request-s-request-restriction.patch [bz#2174676] +- kvm-vdpa-do-not-block-migration-if-device-has-cvq-and-x-.patch [RHEL-573] +- kvm-virtio-net-correctly-report-maximum-tx_queue_size-va.patch [bz#2040509] +- kvm-hw-pci-Disable-PCI_ERR_UNCOR_MASK-reg-for-machine-ty.patch [bz#2223691] +- kvm-vhost-vdpa-mute-unaligned-memory-error-report.patch [bz#2141965] +- Resolves: bz#2174676 + (Guest hit EXT4-fs error on host 4K disk when repeatedly hot-plug/unplug running IO disk [RHEL9]) +- Resolves: RHEL-573 + ([mlx vhost_vdpa][rhel 9.3]live migration fail with "net vdpa cannot migrate with CVQ feature") +- Resolves: bz#2040509 + ([RFE]:Add support for changing "tx_queue_size" to a setable value) +- Resolves: bz#2223691 + ([machine type 9.2]Failed to migrate VM from RHEL 9.3 to RHEL 9.2) +- Resolves: bz#2141965 + ([TPM][vhost-vdpa][rhel9.2]Boot a guest with "vhost-vdpa + TPM emulator", qemu output: qemu-kvm: vhost_vdpa_listener_region_add received unaligned region) + * Mon Jul 24 2023 Miroslav Rezanina - 8.0.0-9 - kvm-scsi-fetch-unit-attention-when-creating-the-request.patch [bz#2176702] - kvm-scsi-cleanup-scsi_clear_unit_attention.patch [bz#2176702]