From af295c3a48f11073424cc5af8baf572720df1ee2 Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Mon, 2 Feb 2026 11:25:52 +0100 Subject: [PATCH] * Mon Feb 02 2026 Miroslav Rezanina - 10.1.0-12 - kvm-rbd-Run-co-BH-CB-in-the-coroutine-s-AioContext.patch [RHEL-79118] - kvm-curl-Fix-coroutine-waking.patch [RHEL-79118] - kvm-block-io-Take-reqs_lock-for-tracked_requests.patch [RHEL-79118] - kvm-qcow2-Re-initialize-lock-in-invalidate_cache.patch [RHEL-79118] - kvm-qcow2-Fix-cache_clean_timer.patch [RHEL-79118] - kvm-net-bundle-all-offloads-in-a-single-struct.patch [RHEL-143785] - kvm-linux-headers-deal-with-counted_by-annotation.patch [RHEL-143785] - kvm-linux-headers-Update-to-Linux-v6.17-rc1.patch [RHEL-143785] - kvm-virtio-introduce-extended-features-type.patch [RHEL-143785] - kvm-virtio-serialize-extended-features-state.patch [RHEL-143785] - kvm-virtio-add-support-for-negotiating-extended-features.patch [RHEL-143785] - kvm-virtio-pci-implement-support-for-extended-features.patch [RHEL-143785] - kvm-vhost-add-support-for-negotiating-extended-features.patch [RHEL-143785] - kvm-qmp-update-virtio-features-map-to-support-extended-f.patch [RHEL-143785] - kvm-vhost-backend-implement-extended-features-support.patch [RHEL-143785] - kvm-vhost-net-implement-extended-features-support.patch [RHEL-143785] - kvm-virtio-net-implement-extended-features-support.patch [RHEL-143785] - kvm-net-implement-tunnel-probing.patch [RHEL-143785] - kvm-net-implement-UDP-tunnel-features-offloading.patch [RHEL-143785] - Resolves: RHEL-79118 ([network-storage][rbd][core-dump]installation of guest failed sometimes with multiqueue enabled [rhel10]) - Resolves: RHEL-143785 (backport support for GSO over UDP tunnel offload) --- ...-Take-reqs_lock-for-tracked_requests.patch | 66 ++ kvm-curl-Fix-coroutine-waking.patch | 172 ++++ ...ux-headers-Update-to-Linux-v6.17-rc1.patch | 941 ++++++++++++++++++ ...ders-deal-with-counted_by-annotation.patch | 47 + ...ndle-all-offloads-in-a-single-struct.patch | 366 +++++++ ...ement-UDP-tunnel-features-offloading.patch | 209 ++++ kvm-net-implement-tunnel-probing.patch | 317 ++++++ kvm-qcow2-Fix-cache_clean_timer.patch | 338 +++++++ ...-initialize-lock-in-invalidate_cache.patch | 45 + ...o-features-map-to-support-extended-f.patch | 329 ++++++ ...-BH-CB-in-the-coroutine-s-AioContext.patch | 127 +++ ...rt-for-negotiating-extended-features.patch | 292 ++++++ ...-implement-extended-features-support.patch | 133 +++ ...-implement-extended-features-support.patch | 237 +++++ ...rt-for-negotiating-extended-features.patch | 133 +++ ...tio-introduce-extended-features-type.patch | 201 ++++ ...-implement-extended-features-support.patch | 331 ++++++ ...lement-support-for-extended-features.patch | 196 ++++ ...io-serialize-extended-features-state.patch | 222 +++++ qemu-kvm.spec | 65 +- 20 files changed, 4766 insertions(+), 1 deletion(-) create mode 100644 kvm-block-io-Take-reqs_lock-for-tracked_requests.patch create mode 100644 kvm-curl-Fix-coroutine-waking.patch create mode 100644 kvm-linux-headers-Update-to-Linux-v6.17-rc1.patch create mode 100644 kvm-linux-headers-deal-with-counted_by-annotation.patch create mode 100644 kvm-net-bundle-all-offloads-in-a-single-struct.patch create mode 100644 kvm-net-implement-UDP-tunnel-features-offloading.patch create mode 100644 kvm-net-implement-tunnel-probing.patch create mode 100644 kvm-qcow2-Fix-cache_clean_timer.patch create mode 100644 kvm-qcow2-Re-initialize-lock-in-invalidate_cache.patch create mode 100644 kvm-qmp-update-virtio-features-map-to-support-extended-f.patch create mode 100644 kvm-rbd-Run-co-BH-CB-in-the-coroutine-s-AioContext.patch create mode 100644 kvm-vhost-add-support-for-negotiating-extended-features.patch create mode 100644 kvm-vhost-backend-implement-extended-features-support.patch create mode 100644 kvm-vhost-net-implement-extended-features-support.patch create mode 100644 kvm-virtio-add-support-for-negotiating-extended-features.patch create mode 100644 kvm-virtio-introduce-extended-features-type.patch create mode 100644 kvm-virtio-net-implement-extended-features-support.patch create mode 100644 kvm-virtio-pci-implement-support-for-extended-features.patch create mode 100644 kvm-virtio-serialize-extended-features-state.patch diff --git a/kvm-block-io-Take-reqs_lock-for-tracked_requests.patch b/kvm-block-io-Take-reqs_lock-for-tracked_requests.patch new file mode 100644 index 0000000..d5a053a --- /dev/null +++ b/kvm-block-io-Take-reqs_lock-for-tracked_requests.patch @@ -0,0 +1,66 @@ +From c2e109dd6504a8fb06fddf3b5e86956a4699bdef Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Mon, 10 Nov 2025 16:48:45 +0100 +Subject: [PATCH 03/19] block/io: Take reqs_lock for tracked_requests + +RH-Author: Hanna Czenczek +RH-MergeRequest: 454: Multithreading fixes for rbd, curl, qcow2 +RH-Jira: RHEL-79118 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/5] 91512b6569546b6e82eca0034608fb5c7b426916 (hreitz/qemu-kvm-c-9-s) + +bdrv_co_get_self_request() does not take a lock around iterating through +bs->tracked_requests. With multiqueue, it may thus iterate over a list +that is in the process of being modified, producing an assertion +failure: + +../block/file-posix.c:3702: raw_do_pwrite_zeroes: Assertion `req' failed. + +[0] abort() at /lib64/libc.so.6 +[1] __assert_fail_base.cold() at /lib64/libc.so.6 +[2] raw_do_pwrite_zeroes() at ../block/file-posix.c:3702 +[3] bdrv_co_do_pwrite_zeroes() at ../block/io.c:1910 +[4] bdrv_aligned_pwritev() at ../block/io.c:2109 +[5] bdrv_co_do_zero_pwritev() at ../block/io.c:2192 +[6] bdrv_co_pwritev_part() at ../block/io.c:2292 +[7] bdrv_co_pwritev() at ../block/io.c:2225 +[8] handle_alloc_space() at ../block/qcow2.c:2573 +[9] qcow2_co_pwritev_task() at ../block/qcow2.c:2625 + +Fix this by taking reqs_lock. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Czenczek +Message-ID: <20251110154854.151484-11-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 9b9ee60c07f52009f9bb659f54c42afae95c1d94) +Signed-off-by: Hanna Czenczek +--- + block/io.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/block/io.c b/block/io.c +index 9bd8ba8431..37df1e0253 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -721,11 +721,14 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs) + Coroutine *self = qemu_coroutine_self(); + IO_CODE(); + ++ qemu_mutex_lock(&bs->reqs_lock); + QLIST_FOREACH(req, &bs->tracked_requests, list) { + if (req->co == self) { ++ qemu_mutex_unlock(&bs->reqs_lock); + return req; + } + } ++ qemu_mutex_unlock(&bs->reqs_lock); + + return NULL; + } +-- +2.47.3 + diff --git a/kvm-curl-Fix-coroutine-waking.patch b/kvm-curl-Fix-coroutine-waking.patch new file mode 100644 index 0000000..5dbf7ce --- /dev/null +++ b/kvm-curl-Fix-coroutine-waking.patch @@ -0,0 +1,172 @@ +From 14d1e6aa70f97aa75c8f3f78e3c730e286e3b683 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Mon, 10 Nov 2025 16:48:40 +0100 +Subject: [PATCH 02/19] curl: Fix coroutine waking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Hanna Czenczek +RH-MergeRequest: 454: Multithreading fixes for rbd, curl, qcow2 +RH-Jira: RHEL-79118 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/5] 20268ed1de88de45a7525d01dbb64899b8c4e443 (hreitz/qemu-kvm-c-9-s) + +If we wake a coroutine from a different context, we must ensure that it +will yield exactly once (now or later), awaiting that wake. + +curl’s current .ret == -EINPROGRESS loop may lead to the coroutine not +yielding if the request finishes before the loop gets run. To fix it, +we must drop the loop and yield exactly once, if we need to yield. + +Finding out that latter part ("if we need to yield") makes it a bit +complicated: Requests may be served from a cache internal to the curl +block driver, or fail before being submitted. In these cases, we must +not yield. However, if we find a matching but still ongoing request in +the cache, we will have to await that, i.e. still yield. + +To address this, move the yield inside of the respective functions: +- Inside of curl_find_buf() when awaiting ongoing concurrent requests, +- Inside of curl_setup_preadv() when having created a new request. + +Rename curl_setup_preadv() to curl_do_preadv() to reflect this. + +(Can be reproduced with multiqueue by adding a usleep(100000) before the +`while (acb.ret == -EINPROGRESS)` loop.) + +Also, add a comment why aio_co_wake() is safe regardless of whether the +coroutine and curl_multi_check_completion() run in the same context. + +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Czenczek +Message-ID: <20251110154854.151484-6-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 53d5c7ffac7bd4e0d12174432ebb2b3e88614b15) +Signed-off-by: Hanna Czenczek +--- + block/curl.c | 45 +++++++++++++++++++++++++++++++-------------- + 1 file changed, 31 insertions(+), 14 deletions(-) + +diff --git a/block/curl.c b/block/curl.c +index 5467678024..d69bcdff79 100644 +--- a/block/curl.c ++++ b/block/curl.c +@@ -262,8 +262,8 @@ read_end: + } + + /* Called with s->mutex held. */ +-static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len, +- CURLAIOCB *acb) ++static bool coroutine_fn ++curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len, CURLAIOCB *acb) + { + int i; + uint64_t end = start + len; +@@ -311,6 +311,10 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len, + for (j=0; jacb[j]) { + state->acb[j] = acb; ++ /* Await ongoing request */ ++ qemu_mutex_unlock(&s->mutex); ++ qemu_coroutine_yield(); ++ qemu_mutex_lock(&s->mutex); + return true; + } + } +@@ -382,6 +386,16 @@ static void curl_multi_check_completion(BDRVCURLState *s) + acb->ret = error ? -EIO : 0; + state->acb[i] = NULL; + qemu_mutex_unlock(&s->mutex); ++ /* ++ * Current AioContext is the BDS context, which may or may not ++ * be the request (coroutine) context. ++ * - If it is, the coroutine must have yielded or the FD handler ++ * (curl_multi_do()/curl_multi_timeout_do()) could not have ++ * been called and we would not be here ++ * - If it is not, it doesn't matter whether it has already ++ * yielded or not; it will be scheduled once it does yield ++ * So aio_co_wake() is safe to call. ++ */ + aio_co_wake(acb->co); + qemu_mutex_lock(&s->mutex); + } +@@ -882,7 +896,7 @@ out_noclean: + return -EINVAL; + } + +-static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) ++static void coroutine_fn curl_do_preadv(BlockDriverState *bs, CURLAIOCB *acb) + { + CURLState *state; + int running; +@@ -894,10 +908,13 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) + + qemu_mutex_lock(&s->mutex); + +- // In case we have the requested data already (e.g. read-ahead), +- // we can just call the callback and be done. ++ /* ++ * In case we have the requested data already (e.g. read-ahead), ++ * we can just call the callback and be done. This may have to ++ * await an ongoing request, in which case it itself will yield. ++ */ + if (curl_find_buf(s, start, acb->bytes, acb)) { +- goto out; ++ goto dont_yield; + } + + // No cache found, so let's start a new request +@@ -912,7 +929,7 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) + if (curl_init_state(s, state) < 0) { + curl_clean_state(state); + acb->ret = -EIO; +- goto out; ++ goto dont_yield; + } + + acb->start = 0; +@@ -927,7 +944,7 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) + if (state->buf_len && state->orig_buf == NULL) { + curl_clean_state(state); + acb->ret = -ENOMEM; +- goto out; ++ goto dont_yield; + } + state->acb[0] = acb; + +@@ -939,13 +956,16 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb) + acb->ret = -EIO; + + curl_clean_state(state); +- goto out; ++ goto dont_yield; + } + + /* Tell curl it needs to kick things off */ + curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running); ++ qemu_mutex_unlock(&s->mutex); ++ qemu_coroutine_yield(); ++ return; + +-out: ++dont_yield: + qemu_mutex_unlock(&s->mutex); + } + +@@ -961,10 +981,7 @@ static int coroutine_fn curl_co_preadv(BlockDriverState *bs, + .bytes = bytes + }; + +- curl_setup_preadv(bs, &acb); +- while (acb.ret == -EINPROGRESS) { +- qemu_coroutine_yield(); +- } ++ curl_do_preadv(bs, &acb); + return acb.ret; + } + +-- +2.47.3 + diff --git a/kvm-linux-headers-Update-to-Linux-v6.17-rc1.patch b/kvm-linux-headers-Update-to-Linux-v6.17-rc1.patch new file mode 100644 index 0000000..efc0d20 --- /dev/null +++ b/kvm-linux-headers-Update-to-Linux-v6.17-rc1.patch @@ -0,0 +1,941 @@ +From b5bbab573e01c1225e7368dba469086a78bc1570 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:17 +0200 +Subject: [PATCH 08/19] linux-headers: Update to Linux v6.17-rc1 + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [3/14] 5b5aab965427deef98614bcfc99c4b3b9818b46d (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Update headers to include the virtio GSO over UDP tunnel features + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Acked-by: Stefano Garzarella +Reviewed-by: Michael S. Tsirkin +Message-ID: <0b1f3c011f90583ab52aa4fef04df6db35cc4a69.1758549625.git.pabeni@redhat.com> +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 8de6cd5452eb9c58c0d105dbc9718bd0e83cc70f) +Signed-off-by: Laurent Vivier +--- + include/standard-headers/drm/drm_fourcc.h | 56 ++++++- + include/standard-headers/linux/ethtool.h | 4 +- + .../linux/input-event-codes.h | 8 + + include/standard-headers/linux/input.h | 1 + + include/standard-headers/linux/pci_regs.h | 9 + + include/standard-headers/linux/vhost_types.h | 5 + + include/standard-headers/linux/virtio_net.h | 33 ++++ + linux-headers/LICENSES/preferred/GPL-2.0 | 10 +- + linux-headers/asm-arm64/unistd_64.h | 2 + + linux-headers/asm-generic/unistd.h | 8 +- + linux-headers/asm-loongarch/unistd_64.h | 2 + + linux-headers/asm-mips/unistd_n32.h | 2 + + linux-headers/asm-mips/unistd_n64.h | 2 + + linux-headers/asm-mips/unistd_o32.h | 2 + + linux-headers/asm-powerpc/kvm.h | 13 -- + linux-headers/asm-powerpc/unistd_32.h | 2 + + linux-headers/asm-powerpc/unistd_64.h | 2 + + linux-headers/asm-riscv/kvm.h | 1 + + linux-headers/asm-riscv/unistd_32.h | 2 + + linux-headers/asm-riscv/unistd_64.h | 2 + + linux-headers/asm-s390/unistd_32.h | 2 + + linux-headers/asm-s390/unistd_64.h | 2 + + linux-headers/asm-x86/unistd_32.h | 2 + + linux-headers/asm-x86/unistd_64.h | 2 + + linux-headers/asm-x86/unistd_x32.h | 2 + + linux-headers/linux/iommufd.h | 154 +++++++++++++++++- + linux-headers/linux/kvm.h | 2 + + linux-headers/linux/vfio.h | 12 +- + linux-headers/linux/vhost.h | 35 ++++ + 29 files changed, 352 insertions(+), 27 deletions(-) + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index c8309d378b..cef077dfb3 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -209,6 +209,10 @@ extern "C" { + #define DRM_FORMAT_RGBA1010102 fourcc_code('R', 'A', '3', '0') /* [31:0] R:G:B:A 10:10:10:2 little endian */ + #define DRM_FORMAT_BGRA1010102 fourcc_code('B', 'A', '3', '0') /* [31:0] B:G:R:A 10:10:10:2 little endian */ + ++/* 48 bpp RGB */ ++#define DRM_FORMAT_RGB161616 fourcc_code('R', 'G', '4', '8') /* [47:0] R:G:B 16:16:16 little endian */ ++#define DRM_FORMAT_BGR161616 fourcc_code('B', 'G', '4', '8') /* [47:0] B:G:R 16:16:16 little endian */ ++ + /* 64 bpp RGB */ + #define DRM_FORMAT_XRGB16161616 fourcc_code('X', 'R', '4', '8') /* [63:0] x:R:G:B 16:16:16:16 little endian */ + #define DRM_FORMAT_XBGR16161616 fourcc_code('X', 'B', '4', '8') /* [63:0] x:B:G:R 16:16:16:16 little endian */ +@@ -217,7 +221,7 @@ extern "C" { + #define DRM_FORMAT_ABGR16161616 fourcc_code('A', 'B', '4', '8') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + + /* +- * Floating point 64bpp RGB ++ * Half-Floating point - 16b/component + * IEEE 754-2008 binary16 half-precision float + * [15:0] sign:exponent:mantissa 1:5:10 + */ +@@ -227,6 +231,20 @@ extern "C" { + #define DRM_FORMAT_ARGB16161616F fourcc_code('A', 'R', '4', 'H') /* [63:0] A:R:G:B 16:16:16:16 little endian */ + #define DRM_FORMAT_ABGR16161616F fourcc_code('A', 'B', '4', 'H') /* [63:0] A:B:G:R 16:16:16:16 little endian */ + ++#define DRM_FORMAT_R16F fourcc_code('R', ' ', ' ', 'H') /* [15:0] R 16 little endian */ ++#define DRM_FORMAT_GR1616F fourcc_code('G', 'R', ' ', 'H') /* [31:0] G:R 16:16 little endian */ ++#define DRM_FORMAT_BGR161616F fourcc_code('B', 'G', 'R', 'H') /* [47:0] B:G:R 16:16:16 little endian */ ++ ++/* ++ * Floating point - 32b/component ++ * IEEE 754-2008 binary32 float ++ * [31:0] sign:exponent:mantissa 1:8:23 ++ */ ++#define DRM_FORMAT_R32F fourcc_code('R', ' ', ' ', 'F') /* [31:0] R 32 little endian */ ++#define DRM_FORMAT_GR3232F fourcc_code('G', 'R', ' ', 'F') /* [63:0] R:G 32:32 little endian */ ++#define DRM_FORMAT_BGR323232F fourcc_code('B', 'G', 'R', 'F') /* [95:0] R:G:B 32:32:32 little endian */ ++#define DRM_FORMAT_ABGR32323232F fourcc_code('A', 'B', '8', 'F') /* [127:0] R:G:B:A 32:32:32:32 little endian */ ++ + /* + * RGBA format with 10-bit components packed in 64-bit per pixel, with 6 bits + * of unused padding per component: +@@ -376,6 +394,42 @@ extern "C" { + */ + #define DRM_FORMAT_Q401 fourcc_code('Q', '4', '0', '1') + ++/* ++ * 3 plane YCbCr LSB aligned ++ * In order to use these formats in a similar fashion to MSB aligned ones ++ * implementation can multiply the values by 2^6=64. For that reason the padding ++ * must only contain zeros. ++ * index 0 = Y plane, [15:0] z:Y [6:10] little endian ++ * index 1 = Cr plane, [15:0] z:Cr [6:10] little endian ++ * index 2 = Cb plane, [15:0] z:Cb [6:10] little endian ++ */ ++#define DRM_FORMAT_S010 fourcc_code('S', '0', '1', '0') /* 2x2 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ ++#define DRM_FORMAT_S210 fourcc_code('S', '2', '1', '0') /* 2x1 subsampled Cb (1) and Cr (2) planes 10 bits per channel */ ++#define DRM_FORMAT_S410 fourcc_code('S', '4', '1', '0') /* non-subsampled Cb (1) and Cr (2) planes 10 bits per channel */ ++ ++/* ++ * 3 plane YCbCr LSB aligned ++ * In order to use these formats in a similar fashion to MSB aligned ones ++ * implementation can multiply the values by 2^4=16. For that reason the padding ++ * must only contain zeros. ++ * index 0 = Y plane, [15:0] z:Y [4:12] little endian ++ * index 1 = Cr plane, [15:0] z:Cr [4:12] little endian ++ * index 2 = Cb plane, [15:0] z:Cb [4:12] little endian ++ */ ++#define DRM_FORMAT_S012 fourcc_code('S', '0', '1', '2') /* 2x2 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ ++#define DRM_FORMAT_S212 fourcc_code('S', '2', '1', '2') /* 2x1 subsampled Cb (1) and Cr (2) planes 12 bits per channel */ ++#define DRM_FORMAT_S412 fourcc_code('S', '4', '1', '2') /* non-subsampled Cb (1) and Cr (2) planes 12 bits per channel */ ++ ++/* ++ * 3 plane YCbCr ++ * index 0 = Y plane, [15:0] Y little endian ++ * index 1 = Cr plane, [15:0] Cr little endian ++ * index 2 = Cb plane, [15:0] Cb little endian ++ */ ++#define DRM_FORMAT_S016 fourcc_code('S', '0', '1', '6') /* 2x2 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ ++#define DRM_FORMAT_S216 fourcc_code('S', '2', '1', '6') /* 2x1 subsampled Cb (1) and Cr (2) planes 16 bits per channel */ ++#define DRM_FORMAT_S416 fourcc_code('S', '4', '1', '6') /* non-subsampled Cb (1) and Cr (2) planes 16 bits per channel */ ++ + /* + * 3 plane YCbCr + * index 0: Y plane, [7:0] Y +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index cef0d207a6..eb80314028 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -2314,7 +2314,7 @@ enum { + IPV6_USER_FLOW = 0x0e, /* spec only (usr_ip6_spec; nfc only) */ + IPV4_FLOW = 0x10, /* hash only */ + IPV6_FLOW = 0x11, /* hash only */ +- ETHER_FLOW = 0x12, /* spec only (ether_spec) */ ++ ETHER_FLOW = 0x12, /* hash or spec (ether_spec) */ + + /* Used for GTP-U IPv4 and IPv6. + * The format of GTP packets only includes +@@ -2371,7 +2371,7 @@ enum { + /* Flag to enable RSS spreading of traffic matching rule (nfc only) */ + #define FLOW_RSS 0x20000000 + +-/* L3-L4 network traffic flow hash options */ ++/* L2-L4 network traffic flow hash options */ + #define RXH_L2DA (1 << 1) + #define RXH_VLAN (1 << 2) + #define RXH_L3_PROTO (1 << 3) +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index a82ff795e0..00dc9caac9 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -601,6 +601,11 @@ + #define BTN_DPAD_LEFT 0x222 + #define BTN_DPAD_RIGHT 0x223 + ++#define BTN_GRIPL 0x224 ++#define BTN_GRIPR 0x225 ++#define BTN_GRIPL2 0x226 ++#define BTN_GRIPR2 0x227 ++ + #define KEY_ALS_TOGGLE 0x230 /* Ambient light sensor */ + #define KEY_ROTATE_LOCK_TOGGLE 0x231 /* Display rotation lock */ + #define KEY_REFRESH_RATE_TOGGLE 0x232 /* Display refresh rate toggle */ +@@ -765,6 +770,9 @@ + #define KEY_KBD_LCD_MENU4 0x2bb + #define KEY_KBD_LCD_MENU5 0x2bc + ++/* Performance Boost key (Alienware)/G-Mode key (Dell) */ ++#define KEY_PERFORMANCE 0x2bd ++ + #define BTN_TRIGGER_HAPPY 0x2c0 + #define BTN_TRIGGER_HAPPY1 0x2c0 + #define BTN_TRIGGER_HAPPY2 0x2c1 +diff --git a/include/standard-headers/linux/input.h b/include/standard-headers/linux/input.h +index 942ea6aaa9..d4512c20b5 100644 +--- a/include/standard-headers/linux/input.h ++++ b/include/standard-headers/linux/input.h +@@ -272,6 +272,7 @@ struct input_mask { + #define BUS_CEC 0x1E + #define BUS_INTEL_ISHTP 0x1F + #define BUS_AMD_SFH 0x20 ++#define BUS_SDW 0x21 + + /* + * MT_TOOL types +diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h +index a3a3e942de..f5b17745de 100644 +--- a/include/standard-headers/linux/pci_regs.h ++++ b/include/standard-headers/linux/pci_regs.h +@@ -745,6 +745,7 @@ + #define PCI_EXT_CAP_ID_L1SS 0x1E /* L1 PM Substates */ + #define PCI_EXT_CAP_ID_PTM 0x1F /* Precision Time Measurement */ + #define PCI_EXT_CAP_ID_DVSEC 0x23 /* Designated Vendor-Specific */ ++#define PCI_EXT_CAP_ID_VF_REBAR 0x24 /* VF Resizable BAR */ + #define PCI_EXT_CAP_ID_DLF 0x25 /* Data Link Feature */ + #define PCI_EXT_CAP_ID_PL_16GT 0x26 /* Physical Layer 16.0 GT/s */ + #define PCI_EXT_CAP_ID_NPEM 0x29 /* Native PCIe Enclosure Management */ +@@ -1141,6 +1142,14 @@ + #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ + #define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) + ++/* VF Resizable BARs, same layout as PCI_REBAR */ ++#define PCI_VF_REBAR_CAP PCI_REBAR_CAP ++#define PCI_VF_REBAR_CAP_SIZES PCI_REBAR_CAP_SIZES ++#define PCI_VF_REBAR_CTRL PCI_REBAR_CTRL ++#define PCI_VF_REBAR_CTRL_BAR_IDX PCI_REBAR_CTRL_BAR_IDX ++#define PCI_VF_REBAR_CTRL_NBAR_MASK PCI_REBAR_CTRL_NBAR_MASK ++#define PCI_VF_REBAR_CTRL_BAR_SIZE PCI_REBAR_CTRL_BAR_SIZE ++ + /* Data Link Feature */ + #define PCI_DLF_CAP 0x04 /* Capabilities Register */ + #define PCI_DLF_EXCHANGE_ENABLE 0x80000000 /* Data Link Feature Exchange Enable */ +diff --git a/include/standard-headers/linux/vhost_types.h b/include/standard-headers/linux/vhost_types.h +index fd54044936..79b53a931a 100644 +--- a/include/standard-headers/linux/vhost_types.h ++++ b/include/standard-headers/linux/vhost_types.h +@@ -110,6 +110,11 @@ struct vhost_msg_v2 { + }; + }; + ++struct vhost_features_array { ++ uint64_t count; /* number of entries present in features array */ ++ uint64_t features[] ; ++}; ++ + struct vhost_memory_region { + uint64_t guest_phys_addr; + uint64_t memory_size; /* bytes */ +diff --git a/include/standard-headers/linux/virtio_net.h b/include/standard-headers/linux/virtio_net.h +index 982e854f14..93abaae0b9 100644 +--- a/include/standard-headers/linux/virtio_net.h ++++ b/include/standard-headers/linux/virtio_net.h +@@ -70,6 +70,28 @@ + * with the same MAC. + */ + #define VIRTIO_NET_F_SPEED_DUPLEX 63 /* Device set linkspeed and duplex */ ++#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO 65 /* Driver can receive ++ * GSO-over-UDP-tunnel packets ++ */ ++#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM 66 /* Driver handles ++ * GSO-over-UDP-tunnel ++ * packets with partial csum ++ * for the outer header ++ */ ++#define VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO 67 /* Device can receive ++ * GSO-over-UDP-tunnel packets ++ */ ++#define VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM 68 /* Device handles ++ * GSO-over-UDP-tunnel ++ * packets with partial csum ++ * for the outer header ++ */ ++ ++/* Offloads bits corresponding to VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO{,_CSUM} ++ * features ++ */ ++#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED 46 ++#define VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED 47 + + #ifndef VIRTIO_NET_NO_LEGACY + #define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */ +@@ -131,12 +153,17 @@ struct virtio_net_hdr_v1 { + #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ + #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ + #define VIRTIO_NET_HDR_F_RSC_INFO 4 /* rsc info in csum_ fields */ ++#define VIRTIO_NET_HDR_F_UDP_TUNNEL_CSUM 8 /* UDP tunnel csum offload */ + uint8_t flags; + #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ + #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ + #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ + #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* GSO frame, IPv6 TCP */ + #define VIRTIO_NET_HDR_GSO_UDP_L4 5 /* GSO frame, IPv4& IPv6 UDP (USO) */ ++#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 0x20 /* UDPv4 tunnel present */ ++#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6 0x40 /* UDPv6 tunnel present */ ++#define VIRTIO_NET_HDR_GSO_UDP_TUNNEL (VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV4 | \ ++ VIRTIO_NET_HDR_GSO_UDP_TUNNEL_IPV6) + #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* TCP has ECN set */ + uint8_t gso_type; + __virtio16 hdr_len; /* Ethernet + IP + tcp/udp hdrs */ +@@ -181,6 +208,12 @@ struct virtio_net_hdr_v1_hash { + uint16_t padding; + }; + ++struct virtio_net_hdr_v1_hash_tunnel { ++ struct virtio_net_hdr_v1_hash hash_hdr; ++ uint16_t outer_th_offset; ++ uint16_t inner_nh_offset; ++}; ++ + #ifndef VIRTIO_NET_NO_LEGACY + /* This header comes first in the scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must +diff --git a/linux-headers/LICENSES/preferred/GPL-2.0 b/linux-headers/LICENSES/preferred/GPL-2.0 +index ff0812fd89..ea8e93dc44 100644 +--- a/linux-headers/LICENSES/preferred/GPL-2.0 ++++ b/linux-headers/LICENSES/preferred/GPL-2.0 +@@ -20,8 +20,8 @@ License-Text: + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + +- Copyright (C) 1989, 1991 Free Software Foundation, Inc. +- 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++ Copyright (C) 1989, 1991 Free Software Foundation, Inc., ++ + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +@@ -322,10 +322,8 @@ the "copyright" line and a pointer to where the full notice is found. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + +- You should have received a copy of the GNU General Public License +- along with this program; if not, write to the Free Software +- Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +- ++ You should have received a copy of the GNU General Public License along ++ with this program; if not, see . + + Also add information on how to contact you by electronic and paper mail. + +diff --git a/linux-headers/asm-arm64/unistd_64.h b/linux-headers/asm-arm64/unistd_64.h +index ee9aaebdf3..4ae25c2b91 100644 +--- a/linux-headers/asm-arm64/unistd_64.h ++++ b/linux-headers/asm-arm64/unistd_64.h +@@ -324,6 +324,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index 2892a45023..04e0077fb4 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat) + #define __NR_open_tree_attr 467 + __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) + ++/* fs/inode.c */ ++#define __NR_file_getattr 468 ++__SYSCALL(__NR_file_getattr, sys_file_getattr) ++#define __NR_file_setattr 469 ++__SYSCALL(__NR_file_setattr, sys_file_setattr) ++ + #undef __NR_syscalls +-#define __NR_syscalls 468 ++#define __NR_syscalls 470 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-loongarch/unistd_64.h b/linux-headers/asm-loongarch/unistd_64.h +index 50d22df8f7..5033fc8f2f 100644 +--- a/linux-headers/asm-loongarch/unistd_64.h ++++ b/linux-headers/asm-loongarch/unistd_64.h +@@ -320,6 +320,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index bdcc2f460b..c99c10e5bf 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -396,5 +396,7 @@ + #define __NR_listxattrat (__NR_Linux + 465) + #define __NR_removexattrat (__NR_Linux + 466) + #define __NR_open_tree_attr (__NR_Linux + 467) ++#define __NR_file_getattr (__NR_Linux + 468) ++#define __NR_file_setattr (__NR_Linux + 469) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index 3b6b0193b6..0d975bb185 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -372,5 +372,7 @@ + #define __NR_listxattrat (__NR_Linux + 465) + #define __NR_removexattrat (__NR_Linux + 466) + #define __NR_open_tree_attr (__NR_Linux + 467) ++#define __NR_file_getattr (__NR_Linux + 468) ++#define __NR_file_setattr (__NR_Linux + 469) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index 4609a4b4d3..86ac0ac84b 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -442,5 +442,7 @@ + #define __NR_listxattrat (__NR_Linux + 465) + #define __NR_removexattrat (__NR_Linux + 466) + #define __NR_open_tree_attr (__NR_Linux + 467) ++#define __NR_file_getattr (__NR_Linux + 468) ++#define __NR_file_setattr (__NR_Linux + 469) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h +index eaeda00178..077c5437f5 100644 +--- a/linux-headers/asm-powerpc/kvm.h ++++ b/linux-headers/asm-powerpc/kvm.h +@@ -1,18 +1,5 @@ + /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + /* +- * This program is free software; you can redistribute it and/or modify +- * it under the terms of the GNU General Public License, version 2, as +- * published by the Free Software Foundation. +- * +- * This program is distributed in the hope that it will be useful, +- * but WITHOUT ANY WARRANTY; without even the implied warranty of +- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +- * GNU General Public License for more details. +- * +- * You should have received a copy of the GNU General Public License +- * along with this program; if not, write to the Free Software +- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +- * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index 5d38a427e0..d7a32c5e06 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -449,6 +449,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index 860a488e4d..ff35c51fc6 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -421,6 +421,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 5f59fd226c..ef27d4289d 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -18,6 +18,7 @@ + #define __KVM_HAVE_IRQ_LINE + + #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 ++#define KVM_DIRTY_LOG_PAGE_OFFSET 64 + + #define KVM_INTERRUPT_SET -1U + #define KVM_INTERRUPT_UNSET -2U +diff --git a/linux-headers/asm-riscv/unistd_32.h b/linux-headers/asm-riscv/unistd_32.h +index a5e769f1d9..6083373e88 100644 +--- a/linux-headers/asm-riscv/unistd_32.h ++++ b/linux-headers/asm-riscv/unistd_32.h +@@ -315,6 +315,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-riscv/unistd_64.h b/linux-headers/asm-riscv/unistd_64.h +index 8df4d64841..f0c7585c60 100644 +--- a/linux-headers/asm-riscv/unistd_64.h ++++ b/linux-headers/asm-riscv/unistd_64.h +@@ -325,6 +325,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index 85eedbd18e..37b8f6f358 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -440,5 +440,7 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index c03b1b9701..0652ba6331 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -388,5 +388,7 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index 491d6b4eb6..8f784a5634 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -458,6 +458,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index 7cf88bf9bd..2f55bebb81 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -381,6 +381,8 @@ + #define __NR_listxattrat 465 + #define __NR_removexattrat 466 + #define __NR_open_tree_attr 467 ++#define __NR_file_getattr 468 ++#define __NR_file_setattr 469 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index 82959111e6..8cc8673f15 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -334,6 +334,8 @@ + #define __NR_listxattrat (__X32_SYSCALL_BIT + 465) + #define __NR_removexattrat (__X32_SYSCALL_BIT + 466) + #define __NR_open_tree_attr (__X32_SYSCALL_BIT + 467) ++#define __NR_file_getattr (__X32_SYSCALL_BIT + 468) ++#define __NR_file_setattr (__X32_SYSCALL_BIT + 469) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/iommufd.h b/linux-headers/linux/iommufd.h +index cb0f7d6b4d..2105a03955 100644 +--- a/linux-headers/linux/iommufd.h ++++ b/linux-headers/linux/iommufd.h +@@ -56,6 +56,7 @@ enum { + IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, + IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, + IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, ++ IOMMUFD_CMD_HW_QUEUE_ALLOC = 0x94, + }; + + /** +@@ -590,17 +591,44 @@ struct iommu_hw_info_arm_smmuv3 { + __u32 aidr; + }; + ++/** ++ * struct iommu_hw_info_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Hardware ++ * Information (IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV) ++ * ++ * @flags: Must be 0 ++ * @version: Version number for the CMDQ-V HW for PARAM bits[03:00] ++ * @log2vcmdqs: Log2 of the total number of VCMDQs for PARAM bits[07:04] ++ * @log2vsids: Log2 of the total number of SID replacements for PARAM bits[15:12] ++ * @__reserved: Must be 0 ++ * ++ * VMM can use these fields directly in its emulated global PARAM register. Note ++ * that only one Virtual Interface (VINTF) should be exposed to a VM, i.e. PARAM ++ * bits[11:08] should be set to 0 for log2 of the total number of VINTFs. ++ */ ++struct iommu_hw_info_tegra241_cmdqv { ++ __u32 flags; ++ __u8 version; ++ __u8 log2vcmdqs; ++ __u8 log2vsids; ++ __u8 __reserved; ++}; ++ + /** + * enum iommu_hw_info_type - IOMMU Hardware Info Types +- * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware ++ * @IOMMU_HW_INFO_TYPE_NONE: Output by the drivers that do not report hardware + * info ++ * @IOMMU_HW_INFO_TYPE_DEFAULT: Input to request for a default type + * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type + * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type ++ * @IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM ++ * SMMUv3) info type + */ + enum iommu_hw_info_type { + IOMMU_HW_INFO_TYPE_NONE = 0, ++ IOMMU_HW_INFO_TYPE_DEFAULT = 0, + IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, + IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, ++ IOMMU_HW_INFO_TYPE_TEGRA241_CMDQV = 3, + }; + + /** +@@ -625,6 +653,15 @@ enum iommufd_hw_capabilities { + IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, + }; + ++/** ++ * enum iommufd_hw_info_flags - Flags for iommu_hw_info ++ * @IOMMU_HW_INFO_FLAG_INPUT_TYPE: If set, @in_data_type carries an input type ++ * for user space to request for a specific info ++ */ ++enum iommufd_hw_info_flags { ++ IOMMU_HW_INFO_FLAG_INPUT_TYPE = 1 << 0, ++}; ++ + /** + * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) + * @size: sizeof(struct iommu_hw_info) +@@ -634,6 +671,12 @@ enum iommufd_hw_capabilities { + * data that kernel supports + * @data_uptr: User pointer to a user-space buffer used by the kernel to fill + * the iommu type specific hardware information data ++ * @in_data_type: This shares the same field with @out_data_type, making it be ++ * a bidirectional field. When IOMMU_HW_INFO_FLAG_INPUT_TYPE is ++ * set, an input type carried via this @in_data_type field will ++ * be valid, requesting for the info data to the given type. If ++ * IOMMU_HW_INFO_FLAG_INPUT_TYPE is unset, any input value will ++ * be seen as IOMMU_HW_INFO_TYPE_DEFAULT + * @out_data_type: Output the iommu hardware info type as defined in the enum + * iommu_hw_info_type. + * @out_capabilities: Output the generic iommu capability info type as defined +@@ -663,7 +706,10 @@ struct iommu_hw_info { + __u32 dev_id; + __u32 data_len; + __aligned_u64 data_uptr; +- __u32 out_data_type; ++ union { ++ __u32 in_data_type; ++ __u32 out_data_type; ++ }; + __u8 out_max_pasid_log2; + __u8 __reserved[3]; + __aligned_u64 out_capabilities; +@@ -951,10 +997,29 @@ struct iommu_fault_alloc { + * enum iommu_viommu_type - Virtual IOMMU Type + * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type ++ * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM ++ * SMMUv3) enabled ARM SMMUv3 type + */ + enum iommu_viommu_type { + IOMMU_VIOMMU_TYPE_DEFAULT = 0, + IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, ++ IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2, ++}; ++ ++/** ++ * struct iommu_viommu_tegra241_cmdqv - NVIDIA Tegra241 CMDQV Virtual Interface ++ * (IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV) ++ * @out_vintf_mmap_offset: mmap offset argument for VINTF's page0 ++ * @out_vintf_mmap_length: mmap length argument for VINTF's page0 ++ * ++ * Both @out_vintf_mmap_offset and @out_vintf_mmap_length are reported by kernel ++ * for user space to mmap the VINTF page0 from the host physical address space ++ * to the guest physical address space so that a guest kernel can directly R/W ++ * access to the VINTF page0 in order to control its virtual command queues. ++ */ ++struct iommu_viommu_tegra241_cmdqv { ++ __aligned_u64 out_vintf_mmap_offset; ++ __aligned_u64 out_vintf_mmap_length; + }; + + /** +@@ -965,6 +1030,9 @@ enum iommu_viommu_type { + * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU + * @hwpt_id: ID of a nesting parent HWPT to associate to + * @out_viommu_id: Output virtual IOMMU ID for the allocated object ++ * @data_len: Length of the type specific data ++ * @__reserved: Must be 0 ++ * @data_uptr: User pointer to a driver-specific virtual IOMMU data + * + * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's + * virtualization support that is a security-isolated slice of the real IOMMU HW +@@ -985,6 +1053,9 @@ struct iommu_viommu_alloc { + __u32 dev_id; + __u32 hwpt_id; + __u32 out_viommu_id; ++ __u32 data_len; ++ __u32 __reserved; ++ __aligned_u64 data_uptr; + }; + #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) + +@@ -995,10 +1066,15 @@ struct iommu_viommu_alloc { + * @dev_id: The physical device to allocate a virtual instance on the vIOMMU + * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY + * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID +- * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table ++ * of AMD IOMMU, and vRID of Intel VT-d + * + * Allocate a virtual device instance (for a physical device) against a vIOMMU. + * This instance holds the device's information (related to its vIOMMU) in a VM. ++ * User should use IOMMU_DESTROY to destroy the virtual device before ++ * destroying the physical device (by closing vfio_cdev fd). Otherwise the ++ * virtual device would be forcibly destroyed on physical device destruction, ++ * its vdevice_id would be permanently leaked (unremovable & unreusable) until ++ * iommu fd closed. + */ + struct iommu_vdevice_alloc { + __u32 size; +@@ -1075,10 +1151,12 @@ struct iommufd_vevent_header { + * enum iommu_veventq_type - Virtual Event Queue Type + * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use + * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue ++ * @IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV Extension IRQ + */ + enum iommu_veventq_type { + IOMMU_VEVENTQ_TYPE_DEFAULT = 0, + IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, ++ IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV = 2, + }; + + /** +@@ -1102,6 +1180,19 @@ struct iommu_vevent_arm_smmuv3 { + __aligned_le64 evt[4]; + }; + ++/** ++ * struct iommu_vevent_tegra241_cmdqv - Tegra241 CMDQV IRQ ++ * (IOMMU_VEVENTQ_TYPE_TEGRA241_CMDQV) ++ * @lvcmdq_err_map: 128-bit logical vcmdq error map, little-endian. ++ * (Refer to register LVCMDQ_ERR_MAPs per VINTF ) ++ * ++ * The 128-bit register value from HW exclusively reflect the error bits for a ++ * Virtual Interface represented by a vIOMMU object. Read and report directly. ++ */ ++struct iommu_vevent_tegra241_cmdqv { ++ __aligned_le64 lvcmdq_err_map[2]; ++}; ++ + /** + * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) + * @size: sizeof(struct iommu_veventq_alloc) +@@ -1141,4 +1232,61 @@ struct iommu_veventq_alloc { + __u32 __reserved; + }; + #define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) ++ ++/** ++ * enum iommu_hw_queue_type - HW Queue Type ++ * @IOMMU_HW_QUEUE_TYPE_DEFAULT: Reserved for future use ++ * @IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM ++ * SMMUv3) Virtual Command Queue (VCMDQ) ++ */ ++enum iommu_hw_queue_type { ++ IOMMU_HW_QUEUE_TYPE_DEFAULT = 0, ++ /* ++ * TEGRA241_CMDQV requirements (otherwise, allocation will fail) ++ * - alloc starts from the lowest @index=0 in ascending order ++ * - destroy starts from the last allocated @index in descending order ++ * - @base_addr must be aligned to @length in bytes and mapped in IOAS ++ * - @length must be a power of 2, with a minimum 32 bytes and a maximum ++ * 2 ^ idr[1].CMDQS * 16 bytes (use GET_HW_INFO call to read idr[1] ++ * from struct iommu_hw_info_arm_smmuv3) ++ * - suggest to back the queue memory with contiguous physical pages or ++ * a single huge page with alignment of the queue size, and limit the ++ * emulated vSMMU's IDR1.CMDQS to log2(huge page size / 16 bytes) ++ */ ++ IOMMU_HW_QUEUE_TYPE_TEGRA241_CMDQV = 1, ++}; ++ ++/** ++ * struct iommu_hw_queue_alloc - ioctl(IOMMU_HW_QUEUE_ALLOC) ++ * @size: sizeof(struct iommu_hw_queue_alloc) ++ * @flags: Must be 0 ++ * @viommu_id: Virtual IOMMU ID to associate the HW queue with ++ * @type: One of enum iommu_hw_queue_type ++ * @index: The logical index to the HW queue per virtual IOMMU for a multi-queue ++ * model ++ * @out_hw_queue_id: The ID of the new HW queue ++ * @nesting_parent_iova: Base address of the queue memory in the guest physical ++ * address space ++ * @length: Length of the queue memory ++ * ++ * Allocate a HW queue object for a vIOMMU-specific HW-accelerated queue, which ++ * allows HW to access a guest queue memory described using @nesting_parent_iova ++ * and @length. ++ * ++ * A vIOMMU can allocate multiple queues, but it must use a different @index per ++ * type to separate each allocation, e.g:: ++ * ++ * Type1 HW queue0, Type1 HW queue1, Type2 HW queue0, ... ++ */ ++struct iommu_hw_queue_alloc { ++ __u32 size; ++ __u32 flags; ++ __u32 viommu_id; ++ __u32 type; ++ __u32 index; ++ __u32 out_hw_queue_id; ++ __aligned_u64 nesting_parent_iova; ++ __aligned_u64 length; ++}; ++#define IOMMU_HW_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HW_QUEUE_ALLOC) + #endif +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 32c5885a3c..be704965d8 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -636,6 +636,7 @@ struct kvm_ioeventfd { + #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) + #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) + #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) ++#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4) + + /* for KVM_ENABLE_CAP */ + struct kvm_enable_cap { +@@ -952,6 +953,7 @@ struct kvm_enable_cap { + #define KVM_CAP_ARM_EL2 240 + #define KVM_CAP_ARM_EL2_E2H0 241 + #define KVM_CAP_RISCV_MP_STATE_RESET 242 ++#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243 + + struct kvm_irq_routing_irqchip { + __u32 irqchip; +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index 79bf8c0cc5..4d96d1fc12 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -905,10 +905,12 @@ struct vfio_device_feature { + * VFIO_DEVICE_BIND_IOMMUFD - _IOR(VFIO_TYPE, VFIO_BASE + 18, + * struct vfio_device_bind_iommufd) + * @argsz: User filled size of this data. +- * @flags: Must be 0. ++ * @flags: Must be 0 or a bit flags of VFIO_DEVICE_BIND_* + * @iommufd: iommufd to bind. + * @out_devid: The device id generated by this bind. devid is a handle for + * this device/iommufd bond and can be used in IOMMUFD commands. ++ * @token_uuid_ptr: Valid if VFIO_DEVICE_BIND_FLAG_TOKEN. Points to a 16 byte ++ * UUID in the same format as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN. + * + * Bind a vfio_device to the specified iommufd. + * +@@ -917,13 +919,21 @@ struct vfio_device_feature { + * + * Unbind is automatically conducted when device fd is closed. + * ++ * A token is sometimes required to open the device, unless this is known to be ++ * needed VFIO_DEVICE_BIND_FLAG_TOKEN should not be set and token_uuid_ptr is ++ * ignored. The only case today is a PF/VF relationship where the VF bind must ++ * be provided the same token as VFIO_DEVICE_FEATURE_PCI_VF_TOKEN provided to ++ * the PF. ++ * + * Return: 0 on success, -errno on failure. + */ + struct vfio_device_bind_iommufd { + __u32 argsz; + __u32 flags; ++#define VFIO_DEVICE_BIND_FLAG_TOKEN (1 << 0) + __s32 iommufd; + __u32 out_devid; ++ __aligned_u64 token_uuid_ptr; + }; + + #define VFIO_DEVICE_BIND_IOMMUFD _IO(VFIO_TYPE, VFIO_BASE + 18) +diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h +index d4b3e2ae13..283348b64a 100644 +--- a/linux-headers/linux/vhost.h ++++ b/linux-headers/linux/vhost.h +@@ -235,4 +235,39 @@ + */ + #define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ + struct vhost_vring_state) ++ ++/* Extended features manipulation */ ++#define VHOST_GET_FEATURES_ARRAY _IOR(VHOST_VIRTIO, 0x83, \ ++ struct vhost_features_array) ++#define VHOST_SET_FEATURES_ARRAY _IOW(VHOST_VIRTIO, 0x83, \ ++ struct vhost_features_array) ++ ++/* fork_owner values for vhost */ ++#define VHOST_FORK_OWNER_KTHREAD 0 ++#define VHOST_FORK_OWNER_TASK 1 ++ ++/** ++ * VHOST_SET_FORK_FROM_OWNER - Set the fork_owner flag for the vhost device, ++ * This ioctl must called before VHOST_SET_OWNER. ++ * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y ++ * ++ * @param fork_owner: An 8-bit value that determines the vhost thread mode ++ * ++ * When fork_owner is set to VHOST_FORK_OWNER_TASK(default value): ++ * - Vhost will create vhost worker as tasks forked from the owner, ++ * inheriting all of the owner's attributes. ++ * ++ * When fork_owner is set to VHOST_FORK_OWNER_KTHREAD: ++ * - Vhost will create vhost workers as kernel threads. ++ */ ++#define VHOST_SET_FORK_FROM_OWNER _IOW(VHOST_VIRTIO, 0x83, __u8) ++ ++/** ++ * VHOST_GET_FORK_OWNER - Get the current fork_owner flag for the vhost device. ++ * Only available when CONFIG_VHOST_ENABLE_FORK_OWNER_CONTROL=y ++ * ++ * @return: An 8-bit value indicating the current thread mode. ++ */ ++#define VHOST_GET_FORK_FROM_OWNER _IOR(VHOST_VIRTIO, 0x84, __u8) ++ + #endif +-- +2.47.3 + diff --git a/kvm-linux-headers-deal-with-counted_by-annotation.patch b/kvm-linux-headers-deal-with-counted_by-annotation.patch new file mode 100644 index 0000000..eb0be84 --- /dev/null +++ b/kvm-linux-headers-deal-with-counted_by-annotation.patch @@ -0,0 +1,47 @@ +From aa56db1a8eb6ede62e58ab4827fc8a7ba8a2ec4a Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:16 +0200 +Subject: [PATCH 07/19] linux-headers: deal with counted_by annotation + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [2/14] dacb8059a0de06a0b9665152e0725da7da08467e (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Such annotation is present into the kernel uAPI headers since +v6.7, and will be used soon by the vhost_type.h. Deal with it +just stripping it. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit c3d9dcd87f0d228ea3ac5a42076da829cff401f0) +Signed-off-by: Laurent Vivier +--- + scripts/update-linux-headers.sh | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh +index 828a7809f7..844d9cb9f5 100755 +--- a/scripts/update-linux-headers.sh ++++ b/scripts/update-linux-headers.sh +@@ -90,6 +90,7 @@ cp_portable() { + -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ + -e "$arch_cmd" \ + -e 's/__bitwise//' \ ++ -e 's/__counted_by(\w*)//' \ + -e 's/__attribute__((packed))/QEMU_PACKED/' \ + -e 's/__inline__/inline/' \ + -e 's/__BITS_PER_LONG/HOST_LONG_BITS/' \ +-- +2.47.3 + diff --git a/kvm-net-bundle-all-offloads-in-a-single-struct.patch b/kvm-net-bundle-all-offloads-in-a-single-struct.patch new file mode 100644 index 0000000..7d62dd7 --- /dev/null +++ b/kvm-net-bundle-all-offloads-in-a-single-struct.patch @@ -0,0 +1,366 @@ +From e98a5779344e632fab5cce16508fb6fbdb847332 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:15 +0200 +Subject: [PATCH 06/19] net: bundle all offloads in a single struct + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [1/14] 6895db45d4c3490df93d5156a72fa11f706fa5d2 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +The set_offload() argument list is already pretty long and +we are going to introduce soon a bunch of additional offloads. + +Replace the offload arguments with a single struct and update +all the relevant call-sites. + +No functional changes intended. + +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit e5fd02d8253abdc25c0eb145765734890c256b71) +Signed-off-by: Laurent Vivier +--- + hw/net/e1000e_core.c | 5 +++-- + hw/net/igb_core.c | 5 +++-- + hw/net/virtio-net.c | 19 +++++++++++-------- + hw/net/vmxnet3.c | 13 +++++-------- + include/net/net.h | 15 ++++++++++++--- + net/net.c | 5 ++--- + net/netmap.c | 3 +-- + net/tap-bsd.c | 3 +-- + net/tap-linux.c | 21 ++++++++++++--------- + net/tap-solaris.c | 4 ++-- + net/tap-stub.c | 3 +-- + net/tap.c | 8 ++++---- + net/tap_int.h | 4 ++-- + 13 files changed, 59 insertions(+), 49 deletions(-) + +diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c +index 06657bb3ac..8fef598b49 100644 +--- a/hw/net/e1000e_core.c ++++ b/hw/net/e1000e_core.c +@@ -2822,8 +2822,9 @@ e1000e_update_rx_offloads(E1000ECore *core) + trace_e1000e_rx_set_cso(cso_state); + + if (core->has_vnet) { +- qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, +- cso_state, 0, 0, 0, 0, 0, 0); ++ NetOffloads ol = { .csum = cso_state }; ++ ++ qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, &ol); + } + } + +diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c +index 39e3ce1c8f..45d8fd795b 100644 +--- a/hw/net/igb_core.c ++++ b/hw/net/igb_core.c +@@ -3058,8 +3058,9 @@ igb_update_rx_offloads(IGBCore *core) + trace_e1000e_rx_set_cso(cso_state); + + if (core->has_vnet) { +- qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, +- cso_state, 0, 0, 0, 0, 0, 0); ++ NetOffloads ol = {.csum = cso_state }; ++ ++ qemu_set_offload(qemu_get_queue(core->owner_nic)->peer, &ol); + } + } + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 6b5b5dace3..b86ba1fd27 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -773,14 +773,17 @@ static uint64_t virtio_net_bad_features(VirtIODevice *vdev) + + static void virtio_net_apply_guest_offloads(VirtIONet *n) + { +- qemu_set_offload(qemu_get_queue(n->nic)->peer, +- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), +- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), +- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), +- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), +- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), +- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), +- !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6))); ++ NetOffloads ol = { ++ .csum = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)), ++ .tso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)), ++ .tso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)), ++ .ecn = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)), ++ .ufo = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), ++ .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), ++ .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)), ++ }; ++ ++ qemu_set_offload(qemu_get_queue(n->nic)->peer, &ol); + } + + static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) +diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c +index af73aa8ef2..03732375a7 100644 +--- a/hw/net/vmxnet3.c ++++ b/hw/net/vmxnet3.c +@@ -1322,14 +1322,11 @@ static void vmxnet3_update_features(VMXNET3State *s) + s->lro_supported, rxcso_supported, + s->rx_vlan_stripping); + if (s->peer_has_vhdr) { +- qemu_set_offload(qemu_get_queue(s->nic)->peer, +- rxcso_supported, +- s->lro_supported, +- s->lro_supported, +- 0, +- 0, +- 0, +- 0); ++ NetOffloads ol = { .csum = rxcso_supported, ++ .tso4 = s->lro_supported, ++ .tso6 = s->lro_supported }; ++ ++ qemu_set_offload(qemu_get_queue(s->nic)->peer, &ol); + } + } + +diff --git a/include/net/net.h b/include/net/net.h +index 84ee18e0f9..48ba333d02 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -35,6 +35,16 @@ typedef struct NICConf { + int32_t bootindex; + } NICConf; + ++typedef struct NetOffloads { ++ bool csum; ++ bool tso4; ++ bool tso6; ++ bool ecn; ++ bool ufo; ++ bool uso4; ++ bool uso6; ++} NetOffloads; ++ + #define DEFINE_NIC_PROPERTIES(_state, _conf) \ + DEFINE_PROP_MACADDR("mac", _state, _conf.macaddr), \ + DEFINE_PROP_NETDEV("netdev", _state, _conf.peers) +@@ -57,7 +67,7 @@ typedef bool (HasUfo)(NetClientState *); + typedef bool (HasUso)(NetClientState *); + typedef bool (HasVnetHdr)(NetClientState *); + typedef bool (HasVnetHdrLen)(NetClientState *, int); +-typedef void (SetOffload)(NetClientState *, int, int, int, int, int, int, int); ++typedef void (SetOffload)(NetClientState *, const NetOffloads *); + typedef int (GetVnetHdrLen)(NetClientState *); + typedef void (SetVnetHdrLen)(NetClientState *, int); + typedef bool (GetVnetHashSupportedTypes)(NetClientState *, uint32_t *); +@@ -189,8 +199,7 @@ bool qemu_has_ufo(NetClientState *nc); + bool qemu_has_uso(NetClientState *nc); + bool qemu_has_vnet_hdr(NetClientState *nc); + bool qemu_has_vnet_hdr_len(NetClientState *nc, int len); +-void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6, +- int ecn, int ufo, int uso4, int uso6); ++void qemu_set_offload(NetClientState *nc, const NetOffloads *ol); + int qemu_get_vnet_hdr_len(NetClientState *nc); + void qemu_set_vnet_hdr_len(NetClientState *nc, int len); + bool qemu_get_vnet_hash_supported_types(NetClientState *nc, uint32_t *types); +diff --git a/net/net.c b/net/net.c +index da275db86e..63872b6855 100644 +--- a/net/net.c ++++ b/net/net.c +@@ -540,14 +540,13 @@ bool qemu_has_vnet_hdr_len(NetClientState *nc, int len) + return nc->info->has_vnet_hdr_len(nc, len); + } + +-void qemu_set_offload(NetClientState *nc, int csum, int tso4, int tso6, +- int ecn, int ufo, int uso4, int uso6) ++void qemu_set_offload(NetClientState *nc, const NetOffloads *ol) + { + if (!nc || !nc->info->set_offload) { + return; + } + +- nc->info->set_offload(nc, csum, tso4, tso6, ecn, ufo, uso4, uso6); ++ nc->info->set_offload(nc, ol); + } + + int qemu_get_vnet_hdr_len(NetClientState *nc) +diff --git a/net/netmap.c b/net/netmap.c +index 297510e190..6cd8f2bdc5 100644 +--- a/net/netmap.c ++++ b/net/netmap.c +@@ -366,8 +366,7 @@ static void netmap_set_vnet_hdr_len(NetClientState *nc, int len) + } + } + +-static void netmap_set_offload(NetClientState *nc, int csum, int tso4, int tso6, +- int ecn, int ufo, int uso4, int uso6) ++static void netmap_set_offload(NetClientState *nc, const NetOffloads *ol) + { + NetmapState *s = DO_UPCAST(NetmapState, nc, nc); + +diff --git a/net/tap-bsd.c b/net/tap-bsd.c +index b4c84441ba..86b6edee94 100644 +--- a/net/tap-bsd.c ++++ b/net/tap-bsd.c +@@ -231,8 +231,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) + return -EINVAL; + } + +-void tap_fd_set_offload(int fd, int csum, int tso4, +- int tso6, int ecn, int ufo, int uso4, int uso6) ++void tap_fd_set_offload(int fd, const NetOffloads *ol) + { + } + +diff --git a/net/tap-linux.c b/net/tap-linux.c +index 22ec2f45d2..a1c58f74f5 100644 +--- a/net/tap-linux.c ++++ b/net/tap-linux.c +@@ -239,8 +239,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) + abort(); + } + +-void tap_fd_set_offload(int fd, int csum, int tso4, +- int tso6, int ecn, int ufo, int uso4, int uso6) ++void tap_fd_set_offload(int fd, const NetOffloads *ol) + { + unsigned int offload = 0; + +@@ -249,20 +248,24 @@ void tap_fd_set_offload(int fd, int csum, int tso4, + return; + } + +- if (csum) { ++ if (ol->csum) { + offload |= TUN_F_CSUM; +- if (tso4) ++ if (ol->tso4) { + offload |= TUN_F_TSO4; +- if (tso6) ++ } ++ if (ol->tso6) { + offload |= TUN_F_TSO6; +- if ((tso4 || tso6) && ecn) ++ } ++ if ((ol->tso4 || ol->tso6) && ol->ecn) { + offload |= TUN_F_TSO_ECN; +- if (ufo) ++ } ++ if (ol->ufo) { + offload |= TUN_F_UFO; +- if (uso4) { ++ } ++ if (ol->uso4) { + offload |= TUN_F_USO4; + } +- if (uso6) { ++ if (ol->uso6) { + offload |= TUN_F_USO6; + } + } +diff --git a/net/tap-solaris.c b/net/tap-solaris.c +index 51b7830bef..833c066bee 100644 +--- a/net/tap-solaris.c ++++ b/net/tap-solaris.c +@@ -27,6 +27,7 @@ + #include "tap_int.h" + #include "qemu/ctype.h" + #include "qemu/cutils.h" ++#include "net/net.h" + + #include + #include +@@ -235,8 +236,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) + return -EINVAL; + } + +-void tap_fd_set_offload(int fd, int csum, int tso4, +- int tso6, int ecn, int ufo, int uso4, int uso6) ++void tap_fd_set_offload(int fd, const NetOffloads *ol) + { + } + +diff --git a/net/tap-stub.c b/net/tap-stub.c +index 38673434cb..67d14ad4d5 100644 +--- a/net/tap-stub.c ++++ b/net/tap-stub.c +@@ -66,8 +66,7 @@ int tap_fd_set_vnet_be(int fd, int is_be) + return -EINVAL; + } + +-void tap_fd_set_offload(int fd, int csum, int tso4, +- int tso6, int ecn, int ufo, int uso4, int uso6) ++void tap_fd_set_offload(int fd, const NetOffloads *ol) + { + } + +diff --git a/net/tap.c b/net/tap.c +index f7df702f97..72046a43aa 100644 +--- a/net/tap.c ++++ b/net/tap.c +@@ -285,15 +285,14 @@ static int tap_set_vnet_be(NetClientState *nc, bool is_be) + return tap_fd_set_vnet_be(s->fd, is_be); + } + +-static void tap_set_offload(NetClientState *nc, int csum, int tso4, +- int tso6, int ecn, int ufo, int uso4, int uso6) ++static void tap_set_offload(NetClientState *nc, const NetOffloads *ol) + { + TAPState *s = DO_UPCAST(TAPState, nc, nc); + if (s->fd < 0) { + return; + } + +- tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo, uso4, uso6); ++ tap_fd_set_offload(s->fd, ol); + } + + static void tap_exit_notify(Notifier *notifier, void *data) +@@ -391,6 +390,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer, + int fd, + int vnet_hdr) + { ++ NetOffloads ol = {}; + NetClientState *nc; + TAPState *s; + +@@ -404,7 +404,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer, + s->has_ufo = tap_probe_has_ufo(s->fd); + s->has_uso = tap_probe_has_uso(s->fd); + s->enabled = true; +- tap_set_offload(&s->nc, 0, 0, 0, 0, 0, 0, 0); ++ tap_set_offload(&s->nc, &ol); + /* + * Make sure host header length is set correctly in tap: + * it might have been modified by another instance of qemu. +diff --git a/net/tap_int.h b/net/tap_int.h +index 8857ff299d..f8bbe1cb0c 100644 +--- a/net/tap_int.h ++++ b/net/tap_int.h +@@ -27,6 +27,7 @@ + #define NET_TAP_INT_H + + #include "qapi/qapi-types-net.h" ++#include "net/net.h" + + int tap_open(char *ifname, int ifname_size, int *vnet_hdr, + int vnet_hdr_required, int mq_required, Error **errp); +@@ -37,8 +38,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp); + int tap_probe_vnet_hdr(int fd, Error **errp); + int tap_probe_has_ufo(int fd); + int tap_probe_has_uso(int fd); +-void tap_fd_set_offload(int fd, int csum, int tso4, int tso6, int ecn, int ufo, +- int uso4, int uso6); ++void tap_fd_set_offload(int fd, const NetOffloads *ol); + void tap_fd_set_vnet_hdr_len(int fd, int len); + int tap_fd_set_vnet_le(int fd, int vnet_is_le); + int tap_fd_set_vnet_be(int fd, int vnet_is_be); +-- +2.47.3 + diff --git a/kvm-net-implement-UDP-tunnel-features-offloading.patch b/kvm-net-implement-UDP-tunnel-features-offloading.patch new file mode 100644 index 0000000..f7ecee1 --- /dev/null +++ b/kvm-net-implement-UDP-tunnel-features-offloading.patch @@ -0,0 +1,209 @@ +From 97f8e0bbaddf37dcf147f6405a8a96921469d60d Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:28 +0200 +Subject: [PATCH 19/19] net: implement UDP tunnel features offloading + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [14/14] 5ff3488d210020730a71b944519a9938f1628dcc (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +When any host or guest GSO over UDP tunnel offload is enabled the +virtio net header includes the additional tunnel-related fields, +update the size accordingly. + +Push the GSO over UDP tunnel offloads all the way down to the tap +device extending the newly introduced NetFeatures struct, and +eventually enable the associated features. + +As per virtio specification, to convert features bit to offload bit, +map the extended features into the reserved range. + +Finally, make the vhost backend aware of the exact header layout, to +copy it correctly. The tunnel-related field are present if either +the guest or the host negotiated any UDP tunnel related feature: +add them to the kernel supported features list, to allow qemu +transfer to the backend the needed information. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Acked-by: Stefano Garzarella +Reviewed-by: Michael S. Tsirkin +Message-ID: <093b4bc68368046bffbcab2202227632d6e4e83b.1758549625.git.pabeni@redhat.com> +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a5289563ad74a2a37e8d2101d82935454c71fef4) +Signed-off-by: Laurent Vivier +--- + hw/net/virtio-net.c | 34 ++++++++++++++++++++++++++-------- + include/net/net.h | 2 ++ + net/net.c | 3 ++- + net/tap-linux.c | 6 ++++++ + net/tap.c | 2 ++ + 5 files changed, 38 insertions(+), 9 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 0abb8c8a62..f021663f92 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -103,6 +103,12 @@ + #define VIRTIO_NET_F2O_SHIFT (VIRTIO_NET_OFFLOAD_MAP_MIN - \ + VIRTIO_NET_FEATURES_MAP_MIN + 64) + ++static bool virtio_has_tunnel_hdr(const uint64_t *features) ++{ ++ return virtio_has_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO) || ++ virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO); ++} ++ + static const VirtIOFeature feature_sizes[] = { + {.flags = 1ULL << VIRTIO_NET_F_MAC, + .end = endof(struct virtio_net_config, mac)}, +@@ -659,7 +665,8 @@ static bool peer_has_tunnel(VirtIONet *n) + } + + static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, +- int version_1, int hash_report) ++ int version_1, int hash_report, ++ int tunnel) + { + int i; + NetClientState *nc; +@@ -667,9 +674,11 @@ static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + n->mergeable_rx_bufs = mergeable_rx_bufs; + + if (version_1) { +- n->guest_hdr_len = hash_report ? +- sizeof(struct virtio_net_hdr_v1_hash) : +- sizeof(struct virtio_net_hdr_mrg_rxbuf); ++ n->guest_hdr_len = tunnel ? ++ sizeof(struct virtio_net_hdr_v1_hash_tunnel) : ++ (hash_report ? ++ sizeof(struct virtio_net_hdr_v1_hash) : ++ sizeof(struct virtio_net_hdr_mrg_rxbuf)); + n->rss_data.populate_hash = !!hash_report; + } else { + n->guest_hdr_len = n->mergeable_rx_bufs ? +@@ -803,6 +812,10 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n) + .ufo = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)), + .uso4 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO4)), + .uso6 = !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_USO6)), ++ .tnl = !!(n->curr_guest_offloads & ++ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED)), ++ .tnl_csum = !!(n->curr_guest_offloads & ++ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED)), + }; + + qemu_set_offload(qemu_get_queue(n->nic)->peer, &ol); +@@ -824,7 +837,9 @@ virtio_net_guest_offloads_by_features(const uint64_t *features) + (1ULL << VIRTIO_NET_F_GUEST_ECN) | + (1ULL << VIRTIO_NET_F_GUEST_UFO) | + (1ULL << VIRTIO_NET_F_GUEST_USO4) | +- (1ULL << VIRTIO_NET_F_GUEST_USO6); ++ (1ULL << VIRTIO_NET_F_GUEST_USO6) | ++ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_MAPPED) | ++ (1ULL << VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM_MAPPED); + + return guest_offloads_mask & virtio_net_features_to_offload(features); + } +@@ -937,7 +952,8 @@ static void virtio_net_set_features(VirtIODevice *vdev, + virtio_has_feature_ex(features, + VIRTIO_F_VERSION_1), + virtio_has_feature_ex(features, +- VIRTIO_NET_F_HASH_REPORT)); ++ VIRTIO_NET_F_HASH_REPORT), ++ virtio_has_tunnel_hdr(features)); + + n->rsc4_enabled = virtio_has_feature_ex(features, VIRTIO_NET_F_RSC_EXT) && + virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_TSO4); +@@ -3163,13 +3179,15 @@ static int virtio_net_post_load_device(void *opaque, int version_id) + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + int i, link_down; ++ bool has_tunnel_hdr = virtio_has_tunnel_hdr(vdev->guest_features_ex); + + trace_virtio_net_post_load_device(); + virtio_net_set_mrg_rx_bufs(n, n->mergeable_rx_bufs, + virtio_vdev_has_feature(vdev, + VIRTIO_F_VERSION_1), + virtio_vdev_has_feature(vdev, +- VIRTIO_NET_F_HASH_REPORT)); ++ VIRTIO_NET_F_HASH_REPORT), ++ has_tunnel_hdr); + + /* MAC_TABLE_ENTRIES may be different from the saved image */ + if (n->mac_table.in_use > MAC_TABLE_ENTRIES) { +@@ -3989,7 +4007,7 @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) + + n->vqs[0].tx_waiting = 0; + n->tx_burst = n->net_conf.txburst; +- virtio_net_set_mrg_rx_bufs(n, 0, 0, 0); ++ virtio_net_set_mrg_rx_bufs(n, 0, 0, 0, 0); + n->promisc = 1; /* for compatibility */ + + n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN); +diff --git a/include/net/net.h b/include/net/net.h +index 9a9084690d..72b476ee1d 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -43,6 +43,8 @@ typedef struct NetOffloads { + bool ufo; + bool uso4; + bool uso6; ++ bool tnl; ++ bool tnl_csum; + } NetOffloads; + + #define DEFINE_NIC_PROPERTIES(_state, _conf) \ +diff --git a/net/net.c b/net/net.c +index 9536184a0c..27e0d27807 100644 +--- a/net/net.c ++++ b/net/net.c +@@ -575,7 +575,8 @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len) + + assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) || + len == sizeof(struct virtio_net_hdr) || +- len == sizeof(struct virtio_net_hdr_v1_hash)); ++ len == sizeof(struct virtio_net_hdr_v1_hash) || ++ len == sizeof(struct virtio_net_hdr_v1_hash_tunnel)); + + nc->vnet_hdr_len = len; + nc->info->set_vnet_hdr_len(nc, len); +diff --git a/net/tap-linux.c b/net/tap-linux.c +index e2628be798..8e275d2ea4 100644 +--- a/net/tap-linux.c ++++ b/net/tap-linux.c +@@ -279,6 +279,12 @@ void tap_fd_set_offload(int fd, const NetOffloads *ol) + if (ol->uso6) { + offload |= TUN_F_USO6; + } ++ if (ol->tnl) { ++ offload |= TUN_F_UDP_TUNNEL_GSO; ++ } ++ if (ol->tnl_csum) { ++ offload |= TUN_F_UDP_TUNNEL_GSO_CSUM; ++ } + } + + if (ioctl(fd, TUNSETOFFLOAD, offload) != 0) { +diff --git a/net/tap.c b/net/tap.c +index 9f65e3fb3d..dc2a2859ec 100644 +--- a/net/tap.c ++++ b/net/tap.c +@@ -62,6 +62,8 @@ static const int kernel_feature_bits[] = { + VIRTIO_F_NOTIFICATION_DATA, + VIRTIO_NET_F_RSC_EXT, + VIRTIO_NET_F_HASH_REPORT, ++ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, ++ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, + VHOST_INVALID_FEATURE_BIT + }; + +-- +2.47.3 + diff --git a/kvm-net-implement-tunnel-probing.patch b/kvm-net-implement-tunnel-probing.patch new file mode 100644 index 0000000..24305cb --- /dev/null +++ b/kvm-net-implement-tunnel-probing.patch @@ -0,0 +1,317 @@ +From 0cad25c7acac95a4af0d46f4d0207de4ac973370 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:27 +0200 +Subject: [PATCH 18/19] net: implement tunnel probing + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [13/14] 3cd000a61b90ffc176f950686fb9d36d93bbee3f (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Tap devices support GSO over UDP tunnel offload. Probe for such +feature in a similar manner to other offloads. + +GSO over UDP tunnel needs to be enabled in addition to a "plain" +offload (TSO or USO). + +No need to check separately for the outer header checksum offload: +the kernel is going to support both of them or none. + +The new features are disabled by default to avoid compat issues, +and could be enabled, after that hw_compat_10_1 will be added, +together with the related compat entries. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Acked-by: Stefano Garzarella +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit fffac046282c99801b62fa7fa1032cdc261bca6d) +Signed-off-by: Laurent Vivier +--- + hw/net/virtio-net.c | 41 +++++++++++++++++++++++++++++++++++++++++ + include/net/net.h | 3 +++ + net/net.c | 9 +++++++++ + net/tap-bsd.c | 5 +++++ + net/tap-linux.c | 11 +++++++++++ + net/tap-linux.h | 9 +++++++++ + net/tap-solaris.c | 5 +++++ + net/tap-stub.c | 5 +++++ + net/tap.c | 11 +++++++++++ + net/tap_int.h | 1 + + 10 files changed, 100 insertions(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index 89cf008401..0abb8c8a62 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -649,6 +649,15 @@ static int peer_has_uso(VirtIONet *n) + return qemu_has_uso(qemu_get_queue(n->nic)->peer); + } + ++static bool peer_has_tunnel(VirtIONet *n) ++{ ++ if (!peer_has_vnet_hdr(n)) { ++ return false; ++ } ++ ++ return qemu_has_tunnel(qemu_get_queue(n->nic)->peer); ++} ++ + static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs, + int version_1, int hash_report) + { +@@ -3073,6 +3082,13 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features, + virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO4); + virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO6); + ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO); ++ virtio_clear_feature_ex(features, ++ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM); ++ virtio_clear_feature_ex(features, ++ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM); ++ + virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT); + } + +@@ -3086,6 +3102,15 @@ static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features, + virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO6); + } + ++ if (!peer_has_tunnel(n)) { ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO); ++ virtio_clear_feature_ex(features, ++ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM); ++ virtio_clear_feature_ex(features, ++ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM); ++ } ++ + if (!get_vhost_net(nc->peer)) { + if (!use_own_hash) { + virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT); +@@ -4248,6 +4273,22 @@ static const Property virtio_net_properties[] = { + rss_data.specified_hash_types, + VIRTIO_NET_HASH_REPORT_UDPv6_EX - 1, + ON_OFF_AUTO_AUTO), ++ VIRTIO_DEFINE_PROP_FEATURE("host_tunnel", VirtIONet, ++ host_features_ex, ++ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, ++ false), ++ VIRTIO_DEFINE_PROP_FEATURE("host_tunnel_csum", VirtIONet, ++ host_features_ex, ++ VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, ++ false), ++ VIRTIO_DEFINE_PROP_FEATURE("guest_tunnel", VirtIONet, ++ host_features_ex, ++ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, ++ false), ++ VIRTIO_DEFINE_PROP_FEATURE("guest_tunnel_csum", VirtIONet, ++ host_features_ex, ++ VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, ++ false), + }; + + static void virtio_net_class_init(ObjectClass *klass, const void *data) +diff --git a/include/net/net.h b/include/net/net.h +index 48ba333d02..9a9084690d 100644 +--- a/include/net/net.h ++++ b/include/net/net.h +@@ -65,6 +65,7 @@ typedef void (NetClientDestructor)(NetClientState *); + typedef RxFilterInfo *(QueryRxFilter)(NetClientState *); + typedef bool (HasUfo)(NetClientState *); + typedef bool (HasUso)(NetClientState *); ++typedef bool (HasTunnel)(NetClientState *); + typedef bool (HasVnetHdr)(NetClientState *); + typedef bool (HasVnetHdrLen)(NetClientState *, int); + typedef void (SetOffload)(NetClientState *, const NetOffloads *); +@@ -95,6 +96,7 @@ typedef struct NetClientInfo { + NetPoll *poll; + HasUfo *has_ufo; + HasUso *has_uso; ++ HasTunnel *has_tunnel; + HasVnetHdr *has_vnet_hdr; + HasVnetHdrLen *has_vnet_hdr_len; + SetOffload *set_offload; +@@ -197,6 +199,7 @@ void qemu_set_info_str(NetClientState *nc, + void qemu_format_nic_info_str(NetClientState *nc, uint8_t macaddr[6]); + bool qemu_has_ufo(NetClientState *nc); + bool qemu_has_uso(NetClientState *nc); ++bool qemu_has_tunnel(NetClientState *nc); + bool qemu_has_vnet_hdr(NetClientState *nc); + bool qemu_has_vnet_hdr_len(NetClientState *nc, int len); + void qemu_set_offload(NetClientState *nc, const NetOffloads *ol); +diff --git a/net/net.c b/net/net.c +index 63872b6855..9536184a0c 100644 +--- a/net/net.c ++++ b/net/net.c +@@ -522,6 +522,15 @@ bool qemu_has_uso(NetClientState *nc) + return nc->info->has_uso(nc); + } + ++bool qemu_has_tunnel(NetClientState *nc) ++{ ++ if (!nc || !nc->info->has_tunnel) { ++ return false; ++ } ++ ++ return nc->info->has_tunnel(nc); ++} ++ + bool qemu_has_vnet_hdr(NetClientState *nc) + { + if (!nc || !nc->info->has_vnet_hdr) { +diff --git a/net/tap-bsd.c b/net/tap-bsd.c +index 86b6edee94..751d4c819c 100644 +--- a/net/tap-bsd.c ++++ b/net/tap-bsd.c +@@ -217,6 +217,11 @@ int tap_probe_has_uso(int fd) + return 0; + } + ++bool tap_probe_has_tunnel(int fd) ++{ ++ return false; ++} ++ + void tap_fd_set_vnet_hdr_len(int fd, int len) + { + } +diff --git a/net/tap-linux.c b/net/tap-linux.c +index a1c58f74f5..e2628be798 100644 +--- a/net/tap-linux.c ++++ b/net/tap-linux.c +@@ -196,6 +196,17 @@ int tap_probe_has_uso(int fd) + return 1; + } + ++bool tap_probe_has_tunnel(int fd) ++{ ++ unsigned offload; ++ ++ offload = TUN_F_CSUM | TUN_F_TSO4 | TUN_F_UDP_TUNNEL_GSO; ++ if (ioctl(fd, TUNSETOFFLOAD, offload) < 0) { ++ return false; ++ } ++ return true; ++} ++ + void tap_fd_set_vnet_hdr_len(int fd, int len) + { + if (ioctl(fd, TUNSETVNETHDRSZ, &len) == -1) { +diff --git a/net/tap-linux.h b/net/tap-linux.h +index 9a58cecb7f..8cd6b5874b 100644 +--- a/net/tap-linux.h ++++ b/net/tap-linux.h +@@ -53,4 +53,13 @@ + #define TUN_F_USO4 0x20 /* I can handle USO for IPv4 packets */ + #define TUN_F_USO6 0x40 /* I can handle USO for IPv6 packets */ + ++/* I can handle TSO/USO for UDP tunneled packets */ ++#define TUN_F_UDP_TUNNEL_GSO 0x080 ++ ++/* ++ * I can handle TSO/USO for UDP tunneled packets requiring csum offload for ++ * the outer header ++ */ ++#define TUN_F_UDP_TUNNEL_GSO_CSUM 0x100 ++ + #endif /* QEMU_TAP_LINUX_H */ +diff --git a/net/tap-solaris.c b/net/tap-solaris.c +index 833c066bee..ac1ae25761 100644 +--- a/net/tap-solaris.c ++++ b/net/tap-solaris.c +@@ -222,6 +222,11 @@ int tap_probe_has_uso(int fd) + return 0; + } + ++bool tap_probe_has_tunnel(int fd) ++{ ++ return false; ++} ++ + void tap_fd_set_vnet_hdr_len(int fd, int len) + { + } +diff --git a/net/tap-stub.c b/net/tap-stub.c +index 67d14ad4d5..f7a5e0c163 100644 +--- a/net/tap-stub.c ++++ b/net/tap-stub.c +@@ -52,6 +52,11 @@ int tap_probe_has_uso(int fd) + return 0; + } + ++bool tap_probe_has_tunnel(int fd) ++{ ++ return false; ++} ++ + void tap_fd_set_vnet_hdr_len(int fd, int len) + { + } +diff --git a/net/tap.c b/net/tap.c +index 72046a43aa..9f65e3fb3d 100644 +--- a/net/tap.c ++++ b/net/tap.c +@@ -76,6 +76,7 @@ typedef struct TAPState { + bool using_vnet_hdr; + bool has_ufo; + bool has_uso; ++ bool has_tunnel; + bool enabled; + VHostNetState *vhost_net; + unsigned host_vnet_hdr_len; +@@ -246,6 +247,14 @@ static bool tap_has_uso(NetClientState *nc) + return s->has_uso; + } + ++static bool tap_has_tunnel(NetClientState *nc) ++{ ++ TAPState *s = DO_UPCAST(TAPState, nc, nc); ++ ++ assert(nc->info->type == NET_CLIENT_DRIVER_TAP); ++ return s->has_tunnel; ++} ++ + static bool tap_has_vnet_hdr(NetClientState *nc) + { + TAPState *s = DO_UPCAST(TAPState, nc, nc); +@@ -374,6 +383,7 @@ static NetClientInfo net_tap_info = { + .cleanup = tap_cleanup, + .has_ufo = tap_has_ufo, + .has_uso = tap_has_uso, ++ .has_tunnel = tap_has_tunnel, + .has_vnet_hdr = tap_has_vnet_hdr, + .has_vnet_hdr_len = tap_has_vnet_hdr_len, + .set_offload = tap_set_offload, +@@ -403,6 +413,7 @@ static TAPState *net_tap_fd_init(NetClientState *peer, + s->using_vnet_hdr = false; + s->has_ufo = tap_probe_has_ufo(s->fd); + s->has_uso = tap_probe_has_uso(s->fd); ++ s->has_tunnel = tap_probe_has_tunnel(s->fd); + s->enabled = true; + tap_set_offload(&s->nc, &ol); + /* +diff --git a/net/tap_int.h b/net/tap_int.h +index f8bbe1cb0c..b76a05044b 100644 +--- a/net/tap_int.h ++++ b/net/tap_int.h +@@ -38,6 +38,7 @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp); + int tap_probe_vnet_hdr(int fd, Error **errp); + int tap_probe_has_ufo(int fd); + int tap_probe_has_uso(int fd); ++bool tap_probe_has_tunnel(int fd); + void tap_fd_set_offload(int fd, const NetOffloads *ol); + void tap_fd_set_vnet_hdr_len(int fd, int len); + int tap_fd_set_vnet_le(int fd, int vnet_is_le); +-- +2.47.3 + diff --git a/kvm-qcow2-Fix-cache_clean_timer.patch b/kvm-qcow2-Fix-cache_clean_timer.patch new file mode 100644 index 0000000..00befca --- /dev/null +++ b/kvm-qcow2-Fix-cache_clean_timer.patch @@ -0,0 +1,338 @@ +From ebcb7dbc060ca295d2ec1daba67ba84fe2ede3bc Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Mon, 10 Nov 2025 16:48:47 +0100 +Subject: [PATCH 05/19] qcow2: Fix cache_clean_timer +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Hanna Czenczek +RH-MergeRequest: 454: Multithreading fixes for rbd, curl, qcow2 +RH-Jira: RHEL-79118 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/5] ff4bbc4f7c7cebeb8ba34a146854aca1f8aed86f (hreitz/qemu-kvm-c-9-s) + +The cache-cleaner runs as a timer CB in the BDS AioContext. With +multiqueue, it can run concurrently to I/O requests, and because it does +not take any lock, this can break concurrent cache accesses, corrupting +the image. While the chances of this happening are low, it can be +reproduced e.g. by modifying the code to schedule the timer CB every +5 ms (instead of at most once per second) and modifying the last (inner) +while loop of qcow2_cache_clean_unused() like so: + + while (i < c->size && can_clean_entry(c, i)) { + for (int j = 0; j < 1000 && can_clean_entry(c, i); j++) { + usleep(100); + } + c->entries[i].offset = 0; + c->entries[i].lru_counter = 0; + i++; + to_clean++; + } + +i.e. making it wait on purpose for the point in time where the cache is +in use by something else. + +The solution chosen for this in this patch is not the best solution, I +hope, but I admittedly can’t come up with anything strictly better. + +We can protect from concurrent cache accesses either by taking the +existing s->lock, or we introduce a new (non-coroutine) mutex +specifically for cache accesses. I would prefer to avoid the latter so +as not to introduce additional (very slight) overhead. + +Using s->lock, which is a coroutine mutex, however means that we need to +take it in a coroutine, so the timer must run in a coroutine. We can +transform it from the current timer CB style into a coroutine that +sleeps for the set interval. As a result, however, we can no longer +just deschedule the timer to instantly guarantee it won’t run anymore, +but have to await the coroutine’s exit. + +(Note even before this patch there were places that may not have been so +guaranteed after all: Anything calling cache_clean_timer_del() from the +QEMU main AioContext could have been running concurrently to an existing +timer CB invocation.) + +Polling to await the timer to actually settle seems very complicated for +something that’s rather a minor problem, but I can’t come up with any +better solution that doesn’t again just overlook potential problems. + +(Not Cc-ing qemu-stable, as the issue is quite unlikely to be hit, and +I’m not too fond of this solution.) + +Signed-off-by: Hanna Czenczek +Message-ID: <20251110154854.151484-13-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit f86dde9a1524f0d7fca0f4037f560e6131d31f7f) +Signed-off-by: Hanna Czenczek +--- + block/qcow2.c | 141 ++++++++++++++++++++++++++++++++++++++++---------- + block/qcow2.h | 5 +- + 2 files changed, 117 insertions(+), 29 deletions(-) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 374ca53829..a6adfa9f84 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -835,41 +835,113 @@ static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = { + [QCOW2_OL_BITMAP_DIRECTORY_BITNR] = QCOW2_OPT_OVERLAP_BITMAP_DIRECTORY, + }; + +-static void cache_clean_timer_cb(void *opaque) ++static void coroutine_fn cache_clean_timer(void *opaque) + { +- BlockDriverState *bs = opaque; +- BDRVQcow2State *s = bs->opaque; +- qcow2_cache_clean_unused(s->l2_table_cache); +- qcow2_cache_clean_unused(s->refcount_block_cache); +- timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + +- (int64_t) s->cache_clean_interval * 1000); ++ BDRVQcow2State *s = opaque; ++ uint64_t wait_ns; ++ ++ WITH_QEMU_LOCK_GUARD(&s->lock) { ++ wait_ns = s->cache_clean_interval * NANOSECONDS_PER_SECOND; ++ } ++ ++ while (wait_ns > 0) { ++ qemu_co_sleep_ns_wakeable(&s->cache_clean_timer_wake, ++ QEMU_CLOCK_VIRTUAL, wait_ns); ++ ++ WITH_QEMU_LOCK_GUARD(&s->lock) { ++ if (s->cache_clean_interval > 0) { ++ qcow2_cache_clean_unused(s->l2_table_cache); ++ qcow2_cache_clean_unused(s->refcount_block_cache); ++ } ++ ++ wait_ns = s->cache_clean_interval * NANOSECONDS_PER_SECOND; ++ } ++ } ++ ++ WITH_QEMU_LOCK_GUARD(&s->lock) { ++ s->cache_clean_timer_co = NULL; ++ qemu_co_queue_restart_all(&s->cache_clean_timer_exit); ++ } + } + + static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context) + { + BDRVQcow2State *s = bs->opaque; + if (s->cache_clean_interval > 0) { +- s->cache_clean_timer = +- aio_timer_new_with_attrs(context, QEMU_CLOCK_VIRTUAL, +- SCALE_MS, QEMU_TIMER_ATTR_EXTERNAL, +- cache_clean_timer_cb, bs); +- timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + +- (int64_t) s->cache_clean_interval * 1000); ++ assert(!s->cache_clean_timer_co); ++ s->cache_clean_timer_co = qemu_coroutine_create(cache_clean_timer, s); ++ aio_co_enter(context, s->cache_clean_timer_co); + } + } + +-static void cache_clean_timer_del(BlockDriverState *bs) ++/** ++ * Delete the cache clean timer and await any yet running instance. ++ * Called holding s->lock. ++ */ ++static void coroutine_fn ++cache_clean_timer_co_locked_del_and_wait(BlockDriverState *bs) ++{ ++ BDRVQcow2State *s = bs->opaque; ++ ++ if (s->cache_clean_timer_co) { ++ s->cache_clean_interval = 0; ++ qemu_co_sleep_wake(&s->cache_clean_timer_wake); ++ qemu_co_queue_wait(&s->cache_clean_timer_exit, &s->lock); ++ } ++} ++ ++/** ++ * Same as cache_clean_timer_co_locked_del_and_wait(), but takes s->lock. ++ */ ++static void coroutine_fn ++cache_clean_timer_co_del_and_wait(BlockDriverState *bs) + { + BDRVQcow2State *s = bs->opaque; +- if (s->cache_clean_timer) { +- timer_free(s->cache_clean_timer); +- s->cache_clean_timer = NULL; ++ ++ WITH_QEMU_LOCK_GUARD(&s->lock) { ++ cache_clean_timer_co_locked_del_and_wait(bs); ++ } ++} ++ ++struct CacheCleanTimerDelAndWaitCoParams { ++ BlockDriverState *bs; ++ bool done; ++}; ++ ++static void coroutine_fn cache_clean_timer_del_and_wait_co_entry(void *opaque) ++{ ++ struct CacheCleanTimerDelAndWaitCoParams *p = opaque; ++ ++ cache_clean_timer_co_del_and_wait(p->bs); ++ p->done = true; ++ aio_wait_kick(); ++} ++ ++/** ++ * Delete the cache clean timer and await any yet running instance. ++ * Must be called from the main or BDS AioContext without s->lock held. ++ */ ++static void coroutine_mixed_fn ++cache_clean_timer_del_and_wait(BlockDriverState *bs) ++{ ++ IO_OR_GS_CODE(); ++ ++ if (qemu_in_coroutine()) { ++ cache_clean_timer_co_del_and_wait(bs); ++ } else { ++ struct CacheCleanTimerDelAndWaitCoParams p = { .bs = bs }; ++ Coroutine *co; ++ ++ co = qemu_coroutine_create(cache_clean_timer_del_and_wait_co_entry, &p); ++ qemu_coroutine_enter(co); ++ ++ BDRV_POLL_WHILE(bs, !p.done); + } + } + + static void qcow2_detach_aio_context(BlockDriverState *bs) + { +- cache_clean_timer_del(bs); ++ cache_clean_timer_del_and_wait(bs); + } + + static void qcow2_attach_aio_context(BlockDriverState *bs, +@@ -1214,12 +1286,24 @@ fail: + return ret; + } + ++/* s_locked specifies whether s->lock is held or not */ + static void qcow2_update_options_commit(BlockDriverState *bs, +- Qcow2ReopenState *r) ++ Qcow2ReopenState *r, ++ bool s_locked) + { + BDRVQcow2State *s = bs->opaque; + int i; + ++ /* ++ * We need to stop the cache-clean-timer before destroying the metadata ++ * table caches ++ */ ++ if (s_locked) { ++ cache_clean_timer_co_locked_del_and_wait(bs); ++ } else { ++ cache_clean_timer_del_and_wait(bs); ++ } ++ + if (s->l2_table_cache) { + qcow2_cache_destroy(s->l2_table_cache); + } +@@ -1228,6 +1312,7 @@ static void qcow2_update_options_commit(BlockDriverState *bs, + } + s->l2_table_cache = r->l2_table_cache; + s->refcount_block_cache = r->refcount_block_cache; ++ + s->l2_slice_size = r->l2_slice_size; + + s->overlap_check = r->overlap_check; +@@ -1239,11 +1324,8 @@ static void qcow2_update_options_commit(BlockDriverState *bs, + + s->discard_no_unref = r->discard_no_unref; + +- if (s->cache_clean_interval != r->cache_clean_interval) { +- cache_clean_timer_del(bs); +- s->cache_clean_interval = r->cache_clean_interval; +- cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); +- } ++ s->cache_clean_interval = r->cache_clean_interval; ++ cache_clean_timer_init(bs, bdrv_get_aio_context(bs)); + + qapi_free_QCryptoBlockOpenOptions(s->crypto_opts); + s->crypto_opts = r->crypto_opts; +@@ -1261,6 +1343,7 @@ static void qcow2_update_options_abort(BlockDriverState *bs, + qapi_free_QCryptoBlockOpenOptions(r->crypto_opts); + } + ++/* Called with s->lock held */ + static int coroutine_fn GRAPH_RDLOCK + qcow2_update_options(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +@@ -1270,7 +1353,7 @@ qcow2_update_options(BlockDriverState *bs, QDict *options, int flags, + + ret = qcow2_update_options_prepare(bs, &r, options, flags, errp); + if (ret >= 0) { +- qcow2_update_options_commit(bs, &r); ++ qcow2_update_options_commit(bs, &r, true); + } else { + qcow2_update_options_abort(bs, &r); + } +@@ -1914,7 +1997,7 @@ qcow2_do_open(BlockDriverState *bs, QDict *options, int flags, + qemu_vfree(s->l1_table); + /* else pre-write overlap checks in cache_destroy may crash */ + s->l1_table = NULL; +- cache_clean_timer_del(bs); ++ cache_clean_timer_co_locked_del_and_wait(bs); + if (s->l2_table_cache) { + qcow2_cache_destroy(s->l2_table_cache); + } +@@ -1969,6 +2052,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, + + /* Initialise locks */ + qemu_co_mutex_init(&s->lock); ++ qemu_co_queue_init(&s->cache_clean_timer_exit); + + assert(!qemu_in_coroutine()); + assert(qemu_get_current_aio_context() == qemu_get_aio_context()); +@@ -2054,7 +2138,7 @@ static void qcow2_reopen_commit(BDRVReopenState *state) + + GRAPH_RDLOCK_GUARD_MAINLOOP(); + +- qcow2_update_options_commit(state->bs, state->opaque); ++ qcow2_update_options_commit(state->bs, state->opaque, false); + if (!s->data_file) { + /* + * If we don't have an external data file, s->data_file was cleared by +@@ -2811,7 +2895,7 @@ qcow2_do_close(BlockDriverState *bs, bool close_data_file) + qcow2_inactivate(bs); + } + +- cache_clean_timer_del(bs); ++ cache_clean_timer_del_and_wait(bs); + qcow2_cache_destroy(s->l2_table_cache); + qcow2_cache_destroy(s->refcount_block_cache); + +@@ -2881,6 +2965,7 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp) + s->data_file = data_file; + /* Re-initialize objects initialized in qcow2_open() */ + qemu_co_mutex_init(&s->lock); ++ qemu_co_queue_init(&s->cache_clean_timer_exit); + + options = qdict_clone_shallow(bs->options); + +diff --git a/block/qcow2.h b/block/qcow2.h +index a9e3481c6e..3e38bccd87 100644 +--- a/block/qcow2.h ++++ b/block/qcow2.h +@@ -345,8 +345,11 @@ typedef struct BDRVQcow2State { + + Qcow2Cache *l2_table_cache; + Qcow2Cache *refcount_block_cache; +- QEMUTimer *cache_clean_timer; ++ /* Non-NULL while the timer is running */ ++ Coroutine *cache_clean_timer_co; + unsigned cache_clean_interval; ++ QemuCoSleep cache_clean_timer_wake; ++ CoQueue cache_clean_timer_exit; + + QLIST_HEAD(, QCowL2Meta) cluster_allocs; + +-- +2.47.3 + diff --git a/kvm-qcow2-Re-initialize-lock-in-invalidate_cache.patch b/kvm-qcow2-Re-initialize-lock-in-invalidate_cache.patch new file mode 100644 index 0000000..738ae4a --- /dev/null +++ b/kvm-qcow2-Re-initialize-lock-in-invalidate_cache.patch @@ -0,0 +1,45 @@ +From cfe4b6407a08b56490c3cadd4e01f810fb22070c Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Mon, 10 Nov 2025 16:48:46 +0100 +Subject: [PATCH 04/19] qcow2: Re-initialize lock in invalidate_cache + +RH-Author: Hanna Czenczek +RH-MergeRequest: 454: Multithreading fixes for rbd, curl, qcow2 +RH-Jira: RHEL-79118 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/5] 48ead2f35d8558fda48a2098240c0203f4c547b0 (hreitz/qemu-kvm-c-9-s) + +After clearing our state (memset()-ing it to 0), we should +re-initialize objects that need it. Specifically, that applies to +s->lock, which is originally initialized in qcow2_open(). + +Given qemu_co_mutex_init() is just a memset() to 0, this is functionally +a no-op, but still seems like the right thing to do. + +Signed-off-by: Hanna Czenczek +Message-ID: <20251110154854.151484-12-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 90db3a1721b30daf839901813616c0854f383cc5) +Signed-off-by: Hanna Czenczek +--- + block/qcow2.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/block/qcow2.c b/block/qcow2.c +index 6df65aab93..374ca53829 100644 +--- a/block/qcow2.c ++++ b/block/qcow2.c +@@ -2879,6 +2879,8 @@ qcow2_co_invalidate_cache(BlockDriverState *bs, Error **errp) + data_file = s->data_file; + memset(s, 0, sizeof(BDRVQcow2State)); + s->data_file = data_file; ++ /* Re-initialize objects initialized in qcow2_open() */ ++ qemu_co_mutex_init(&s->lock); + + options = qdict_clone_shallow(bs->options); + +-- +2.47.3 + diff --git a/kvm-qmp-update-virtio-features-map-to-support-extended-f.patch b/kvm-qmp-update-virtio-features-map-to-support-extended-f.patch new file mode 100644 index 0000000..279bf95 --- /dev/null +++ b/kvm-qmp-update-virtio-features-map-to-support-extended-f.patch @@ -0,0 +1,329 @@ +From af8775763edabeeb175f72d3db0b23a20fc177e0 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:23 +0200 +Subject: [PATCH 14/19] qmp: update virtio features map to support extended + features + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [9/14] c4ebb3187b6b7aa0c0e631363110a05f538c827b (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Extend the VirtioDeviceFeatures struct with an additional u64 +to track unknown features in the 64-127 bit range and decode +the full virtio features spaces for vhost and virtio devices. + +Also add entries for the soon-to-be-supported virtio net GSO over +UDP features. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Markus Armbruster +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit a76f5b795cab8ea0654e4813caa694470d7250a9) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio-hmp-cmds.c | 3 +- + hw/virtio/virtio-qmp.c | 91 +++++++++++++++++++++++++------------ + hw/virtio/virtio-qmp.h | 3 +- + qapi/virtio.json | 9 +++- + 4 files changed, 74 insertions(+), 32 deletions(-) + +diff --git a/hw/virtio/virtio-hmp-cmds.c b/hw/virtio/virtio-hmp-cmds.c +index 7d8677bcf0..1daae482d3 100644 +--- a/hw/virtio/virtio-hmp-cmds.c ++++ b/hw/virtio/virtio-hmp-cmds.c +@@ -74,7 +74,8 @@ static void hmp_virtio_dump_features(Monitor *mon, + } + + if (features->has_unknown_dev_features) { +- monitor_printf(mon, " unknown-features(0x%016"PRIx64")\n", ++ monitor_printf(mon, " unknown-features(0x%016"PRIx64"%016"PRIx64")\n", ++ features->unknown_dev_features2, + features->unknown_dev_features); + } + } +diff --git a/hw/virtio/virtio-qmp.c b/hw/virtio/virtio-qmp.c +index 3b6377cf0d..b338344c6c 100644 +--- a/hw/virtio/virtio-qmp.c ++++ b/hw/virtio/virtio-qmp.c +@@ -325,6 +325,20 @@ static const qmp_virtio_feature_map_t virtio_net_feature_map[] = { + FEATURE_ENTRY(VHOST_USER_F_PROTOCOL_FEATURES, \ + "VHOST_USER_F_PROTOCOL_FEATURES: Vhost-user protocol features " + "negotiation supported"), ++ FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO, \ ++ "VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO: Driver can receive GSO over " ++ "UDP tunnel packets"), ++ FEATURE_ENTRY(VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO_CSUM, \ ++ "VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO: Driver can receive GSO over " ++ "UDP tunnel packets requiring checksum offload for the outer " ++ "header"), ++ FEATURE_ENTRY(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO, \ ++ "VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO: Device can receive GSO over " ++ "UDP tunnel packets"), ++ FEATURE_ENTRY(VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM, \ ++ "VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO_CSUM: Device can receive GSO over " ++ "UDP tunnel packets requiring checksum offload for the outer " ++ "header"), + { -1, "" } + }; + #endif +@@ -510,6 +524,24 @@ static const qmp_virtio_feature_map_t virtio_gpio_feature_map[] = { + list; \ + }) + ++#define CONVERT_FEATURES_EX(type, map, bitmap) \ ++ ({ \ ++ type *list = NULL; \ ++ type *node; \ ++ for (i = 0; map[i].virtio_bit != -1; i++) { \ ++ bit = map[i].virtio_bit; \ ++ if (!virtio_has_feature_ex(bitmap, bit)) { \ ++ continue; \ ++ } \ ++ node = g_new0(type, 1); \ ++ node->value = g_strdup(map[i].feature_desc); \ ++ node->next = list; \ ++ list = node; \ ++ virtio_clear_feature_ex(bitmap, bit); \ ++ } \ ++ list; \ ++ }) ++ + VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap) + { + VirtioDeviceStatus *status; +@@ -545,109 +577,112 @@ VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap) + return vhu_protocols; + } + +-VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) ++VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, ++ const uint64_t *bmap) + { ++ uint64_t bitmap[VIRTIO_FEATURES_NU64S]; + VirtioDeviceFeatures *features; + uint64_t bit; + int i; + ++ virtio_features_copy(bitmap, bmap); + features = g_new0(VirtioDeviceFeatures, 1); + features->has_dev_features = true; + + /* transport features */ +- features->transports = CONVERT_FEATURES(strList, virtio_transport_map, 0, +- bitmap); ++ features->transports = CONVERT_FEATURES_EX(strList, virtio_transport_map, ++ bitmap); + + /* device features */ + switch (device_id) { + #ifdef CONFIG_VIRTIO_SERIAL + case VIRTIO_ID_CONSOLE: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_serial_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_serial_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_BLK + case VIRTIO_ID_BLOCK: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_blk_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_blk_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_GPU + case VIRTIO_ID_GPU: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_gpu_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_gpu_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_NET + case VIRTIO_ID_NET: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_net_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_net_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_SCSI + case VIRTIO_ID_SCSI: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_scsi_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_scsi_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_BALLOON + case VIRTIO_ID_BALLOON: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_balloon_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_balloon_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_IOMMU + case VIRTIO_ID_IOMMU: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_iommu_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_iommu_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_INPUT + case VIRTIO_ID_INPUT: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_input_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_input_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VHOST_USER_FS + case VIRTIO_ID_FS: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_fs_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_fs_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VHOST_VSOCK + case VIRTIO_ID_VSOCK: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_vsock_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_vsock_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_CRYPTO + case VIRTIO_ID_CRYPTO: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_crypto_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_crypto_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_MEM + case VIRTIO_ID_MEM: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_mem_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_mem_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_I2C_ADAPTER + case VIRTIO_ID_I2C_ADAPTER: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_i2c_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_i2c_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VIRTIO_RNG + case VIRTIO_ID_RNG: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_rng_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_rng_feature_map, bitmap); + break; + #endif + #ifdef CONFIG_VHOST_USER_GPIO + case VIRTIO_ID_GPIO: + features->dev_features = +- CONVERT_FEATURES(strList, virtio_gpio_feature_map, 0, bitmap); ++ CONVERT_FEATURES_EX(strList, virtio_gpio_feature_map, bitmap); + break; + #endif + /* No features */ +@@ -680,10 +715,9 @@ VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap) + g_assert_not_reached(); + } + +- features->has_unknown_dev_features = bitmap != 0; +- if (features->has_unknown_dev_features) { +- features->unknown_dev_features = bitmap; +- } ++ features->has_unknown_dev_features = !virtio_features_empty(bitmap); ++ features->unknown_dev_features = bitmap[0]; ++ features->unknown_dev_features2 = bitmap[1]; + + return features; + } +@@ -743,11 +777,11 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) + status->device_id = vdev->device_id; + status->vhost_started = vdev->vhost_started; + status->guest_features = qmp_decode_features(vdev->device_id, +- vdev->guest_features); ++ vdev->guest_features_ex); + status->host_features = qmp_decode_features(vdev->device_id, +- vdev->host_features); ++ vdev->host_features_ex); + status->backend_features = qmp_decode_features(vdev->device_id, +- vdev->backend_features); ++ vdev->backend_features_ex); + + switch (vdev->device_endian) { + case VIRTIO_DEVICE_ENDIAN_LITTLE: +@@ -785,11 +819,12 @@ VirtioStatus *qmp_x_query_virtio_status(const char *path, Error **errp) + status->vhost_dev->nvqs = hdev->nvqs; + status->vhost_dev->vq_index = hdev->vq_index; + status->vhost_dev->features = +- qmp_decode_features(vdev->device_id, hdev->features); ++ qmp_decode_features(vdev->device_id, hdev->features_ex); + status->vhost_dev->acked_features = +- qmp_decode_features(vdev->device_id, hdev->acked_features); ++ qmp_decode_features(vdev->device_id, hdev->acked_features_ex); + status->vhost_dev->backend_features = +- qmp_decode_features(vdev->device_id, hdev->backend_features); ++ qmp_decode_features(vdev->device_id, hdev->backend_features_ex); ++ + status->vhost_dev->protocol_features = + qmp_decode_protocols(hdev->protocol_features); + status->vhost_dev->max_queues = hdev->max_queues; +diff --git a/hw/virtio/virtio-qmp.h b/hw/virtio/virtio-qmp.h +index 245a446a56..e0a1e49035 100644 +--- a/hw/virtio/virtio-qmp.h ++++ b/hw/virtio/virtio-qmp.h +@@ -18,6 +18,7 @@ + VirtIODevice *qmp_find_virtio_device(const char *path); + VirtioDeviceStatus *qmp_decode_status(uint8_t bitmap); + VhostDeviceProtocols *qmp_decode_protocols(uint64_t bitmap); +-VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, uint64_t bitmap); ++VirtioDeviceFeatures *qmp_decode_features(uint16_t device_id, ++ const uint64_t *bitmap); + + #endif +diff --git a/qapi/virtio.json b/qapi/virtio.json +index 9d652fe4a8..05295ab665 100644 +--- a/qapi/virtio.json ++++ b/qapi/virtio.json +@@ -247,6 +247,7 @@ + # }, + # "host-features": { + # "unknown-dev-features": 1073741824, ++# "unknown-dev-features2": 0, + # "dev-features": [], + # "transports": [ + # "VIRTIO_RING_F_EVENT_IDX: Used & avail. event fields enabled", +@@ -490,14 +491,18 @@ + # unique features) + # + # @unknown-dev-features: Virtio device features bitmap that have not +-# been decoded ++# been decoded (bits 0-63) ++# ++# @unknown-dev-features2: Virtio device features bitmap that have not ++# been decoded (bits 64-127) (since 10.2) + # + # Since: 7.2 + ## + { 'struct': 'VirtioDeviceFeatures', + 'data': { 'transports': [ 'str' ], + '*dev-features': [ 'str' ], +- '*unknown-dev-features': 'uint64' } } ++ '*unknown-dev-features': 'uint64', ++ '*unknown-dev-features2': 'uint64' } } + + ## + # @VirtQueueStatus: +-- +2.47.3 + diff --git a/kvm-rbd-Run-co-BH-CB-in-the-coroutine-s-AioContext.patch b/kvm-rbd-Run-co-BH-CB-in-the-coroutine-s-AioContext.patch new file mode 100644 index 0000000..789185b --- /dev/null +++ b/kvm-rbd-Run-co-BH-CB-in-the-coroutine-s-AioContext.patch @@ -0,0 +1,127 @@ +From 339f5506d877c9e32852aff3cbe165a0aa2964c5 Mon Sep 17 00:00:00 2001 +From: Hanna Czenczek +Date: Mon, 10 Nov 2025 16:48:37 +0100 +Subject: [PATCH 01/19] =?UTF-8?q?rbd:=20Run=20co=20BH=20CB=20in=20the=20co?= + =?UTF-8?q?routine=E2=80=99s=20AioContext?= +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Hanna Czenczek +RH-MergeRequest: 454: Multithreading fixes for rbd, curl, qcow2 +RH-Jira: RHEL-79118 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/5] 6d858f7d65d6358bf7b8b58dc6e5a2ea9e8599fe (hreitz/qemu-kvm-c-9-s) + +qemu_rbd_completion_cb() schedules the request completion code +(qemu_rbd_finish_bh()) to run in the BDS’s AioContext, assuming that +this is the same thread in which qemu_rbd_start_co() runs. + +To explain, this is how both latter functions interact: + +In qemu_rbd_start_co(): + + while (!task.complete) + qemu_coroutine_yield(); + +In qemu_rbd_finish_bh(): + + task->complete = true; + aio_co_wake(task->co); // task->co is qemu_rbd_start_co() + +For this interaction to work reliably, both must run in the same thread +so that qemu_rbd_finish_bh() can only run once the coroutine yields. +Otherwise, finish_bh() may run before start_co() checks task.complete, +which will result in the latter seeing .complete as true immediately and +skipping the yield altogether, even though finish_bh() still wakes it. + +With multiqueue, the BDS’s AioContext is not necessarily the thread +start_co() runs in, and so finish_bh() may be scheduled to run in a +different thread than start_co(). With the right timing, this will +cause the problems described above; waking a non-yielding coroutine is +not good, as can be reproduced by putting e.g. a usleep(100000) above +the while loop in start_co() (and using multiqueue), giving finish_bh() +a much better chance at exiting before start_co() can yield. + +So instead of scheduling finish_bh() in the BDS’s AioContext, schedule +finish_bh() in task->co’s AioContext. + +In addition, we can get rid of task.complete altogether because we will +get woken exactly once, when the task is indeed complete, no need to +check. + +(We could go further and drop the BH, running aio_co_wake() directly in +qemu_rbd_completion_cb() because we are allowed to do that even if the +coroutine isn’t yet yielding and we’re in a different thread – but the +doc comment on qemu_rbd_completion_cb() says to be careful, so I decided +not to go so far here.) + +Buglink: https://issues.redhat.com/browse/RHEL-67115 +Reported-by: Junyao Zhao +Cc: qemu-stable@nongnu.org +Signed-off-by: Hanna Czenczek +Message-ID: <20251110154854.151484-3-hreitz@redhat.com> +Reviewed-by: Kevin Wolf +Signed-off-by: Kevin Wolf +(cherry picked from commit 89d22536d1a1715083ef8118fe7e6e9239f900c1) +Signed-off-by: Hanna Czenczek +--- + block/rbd.c | 12 ++++-------- + 1 file changed, 4 insertions(+), 8 deletions(-) + +diff --git a/block/rbd.c b/block/rbd.c +index 3611dc81cf..2a70b5a983 100644 +--- a/block/rbd.c ++++ b/block/rbd.c +@@ -110,9 +110,7 @@ typedef struct BDRVRBDState { + } BDRVRBDState; + + typedef struct RBDTask { +- BlockDriverState *bs; + Coroutine *co; +- bool complete; + int64_t ret; + } RBDTask; + +@@ -1309,7 +1307,6 @@ static int qemu_rbd_resize(BlockDriverState *bs, uint64_t size) + static void qemu_rbd_finish_bh(void *opaque) + { + RBDTask *task = opaque; +- task->complete = true; + aio_co_wake(task->co); + } + +@@ -1326,7 +1323,7 @@ static void qemu_rbd_completion_cb(rbd_completion_t c, RBDTask *task) + { + task->ret = rbd_aio_get_return_value(c); + rbd_aio_release(c); +- aio_bh_schedule_oneshot(bdrv_get_aio_context(task->bs), ++ aio_bh_schedule_oneshot(qemu_coroutine_get_aio_context(task->co), + qemu_rbd_finish_bh, task); + } + +@@ -1338,7 +1335,7 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs, + RBDAIOCmd cmd) + { + BDRVRBDState *s = bs->opaque; +- RBDTask task = { .bs = bs, .co = qemu_coroutine_self() }; ++ RBDTask task = { .co = qemu_coroutine_self() }; + rbd_completion_t c; + int r; + +@@ -1401,9 +1398,8 @@ static int coroutine_fn qemu_rbd_start_co(BlockDriverState *bs, + return r; + } + +- while (!task.complete) { +- qemu_coroutine_yield(); +- } ++ /* Expect exactly a single wake from qemu_rbd_finish_bh() */ ++ qemu_coroutine_yield(); + + if (task.ret < 0) { + error_report("rbd request failed: cmd %d offset %" PRIu64 " bytes %" +-- +2.47.3 + diff --git a/kvm-vhost-add-support-for-negotiating-extended-features.patch b/kvm-vhost-add-support-for-negotiating-extended-features.patch new file mode 100644 index 0000000..d3e50e5 --- /dev/null +++ b/kvm-vhost-add-support-for-negotiating-extended-features.patch @@ -0,0 +1,292 @@ +From 32cf8e1f3e289c67f49240e8301979d78801258b Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:22 +0200 +Subject: [PATCH 13/19] vhost: add support for negotiating extended features + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [8/14] 7546d4a0222e49da3a0536a703b89a015a80932c (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Similar to virtio infra, vhost core maintains the features status +in the full extended format and allows the devices to implement +extended version of the getter/setter. + +Note that 'protocol_features' are not extended: they are only +used by vhost-user, and the latter device is not going to implement +extended features soon. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 9f979ef0e01a2dd47d167c482a9e2d1dcdff2d3f) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost.c | 68 ++++++++++++++++++++++--------- + include/hw/virtio/vhost-backend.h | 6 +++ + include/hw/virtio/vhost.h | 56 +++++++++++++++++++++---- + 3 files changed, 103 insertions(+), 27 deletions(-) + +diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c +index 6557c58d12..5f485ad6cb 100644 +--- a/hw/virtio/vhost.c ++++ b/hw/virtio/vhost.c +@@ -972,20 +972,34 @@ static int vhost_virtqueue_set_addr(struct vhost_dev *dev, + static int vhost_dev_set_features(struct vhost_dev *dev, + bool enable_log) + { +- uint64_t features = dev->acked_features; ++ uint64_t features[VIRTIO_FEATURES_NU64S]; + int r; ++ ++ virtio_features_copy(features, dev->acked_features_ex); + if (enable_log) { +- features |= 0x1ULL << VHOST_F_LOG_ALL; ++ virtio_add_feature_ex(features, VHOST_F_LOG_ALL); + } + if (!vhost_dev_has_iommu(dev)) { +- features &= ~(0x1ULL << VIRTIO_F_IOMMU_PLATFORM); ++ virtio_clear_feature_ex(features, VIRTIO_F_IOMMU_PLATFORM); + } + if (dev->vhost_ops->vhost_force_iommu) { + if (dev->vhost_ops->vhost_force_iommu(dev) == true) { +- features |= 0x1ULL << VIRTIO_F_IOMMU_PLATFORM; ++ virtio_add_feature_ex(features, VIRTIO_F_IOMMU_PLATFORM); + } + } +- r = dev->vhost_ops->vhost_set_features(dev, features); ++ ++ if (virtio_features_use_ex(features) && ++ !dev->vhost_ops->vhost_set_features_ex) { ++ r = -EINVAL; ++ VHOST_OPS_DEBUG(r, "extended features without device support"); ++ goto out; ++ } ++ ++ if (dev->vhost_ops->vhost_set_features_ex) { ++ r = dev->vhost_ops->vhost_set_features_ex(dev, features); ++ } else { ++ r = dev->vhost_ops->vhost_set_features(dev, features[0]); ++ } + if (r < 0) { + VHOST_OPS_DEBUG(r, "vhost_set_features failed"); + goto out; +@@ -1508,12 +1522,27 @@ static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq) + } + } + ++static int vhost_dev_get_features(struct vhost_dev *hdev, ++ uint64_t *features) ++{ ++ uint64_t features64; ++ int r; ++ ++ if (hdev->vhost_ops->vhost_get_features_ex) { ++ return hdev->vhost_ops->vhost_get_features_ex(hdev, features); ++ } ++ ++ r = hdev->vhost_ops->vhost_get_features(hdev, &features64); ++ virtio_features_from_u64(features, features64); ++ return r; ++} ++ + int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + VhostBackendType backend_type, uint32_t busyloop_timeout, + Error **errp) + { ++ uint64_t features[VIRTIO_FEATURES_NU64S]; + unsigned int used, reserved, limit; +- uint64_t features; + int i, r, n_initialized_vqs = 0; + + hdev->vdev = NULL; +@@ -1533,7 +1562,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + goto fail; + } + +- r = hdev->vhost_ops->vhost_get_features(hdev, &features); ++ r = vhost_dev_get_features(hdev, features); + if (r < 0) { + error_setg_errno(errp, -r, "vhost_get_features failed"); + goto fail; +@@ -1571,7 +1600,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + } + } + +- hdev->features = features; ++ virtio_features_copy(hdev->features_ex, features); + + hdev->memory_listener = (MemoryListener) { + .name = "vhost", +@@ -1594,7 +1623,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, + }; + + if (hdev->migration_blocker == NULL) { +- if (!(hdev->features & (0x1ULL << VHOST_F_LOG_ALL))) { ++ if (!virtio_has_feature_ex(hdev->features_ex, VHOST_F_LOG_ALL)) { + error_setg(&hdev->migration_blocker, + "Migration disabled: vhost lacks VHOST_F_LOG_ALL feature."); + } else if (vhost_dev_log_is_shared(hdev) && !qemu_memfd_alloc_check()) { +@@ -1859,28 +1888,27 @@ static void vhost_start_config_intr(struct vhost_dev *dev) + } + } + +-uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, +- uint64_t features) ++void vhost_get_features_ex(struct vhost_dev *hdev, ++ const int *feature_bits, ++ uint64_t *features) + { + const int *bit = feature_bits; ++ + while (*bit != VHOST_INVALID_FEATURE_BIT) { +- uint64_t bit_mask = (1ULL << *bit); +- if (!(hdev->features & bit_mask)) { +- features &= ~bit_mask; ++ if (!virtio_has_feature_ex(hdev->features_ex, *bit)) { ++ virtio_clear_feature_ex(features, *bit); + } + bit++; + } +- return features; + } + +-void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, +- uint64_t features) ++void vhost_ack_features_ex(struct vhost_dev *hdev, const int *feature_bits, ++ const uint64_t *features) + { + const int *bit = feature_bits; + while (*bit != VHOST_INVALID_FEATURE_BIT) { +- uint64_t bit_mask = (1ULL << *bit); +- if (features & bit_mask) { +- hdev->acked_features |= bit_mask; ++ if (virtio_has_feature_ex(features, *bit)) { ++ virtio_add_feature_ex(hdev->acked_features_ex, *bit); + } + bit++; + } +diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h +index d6df209a2f..ff94fa1734 100644 +--- a/include/hw/virtio/vhost-backend.h ++++ b/include/hw/virtio/vhost-backend.h +@@ -95,6 +95,10 @@ typedef int (*vhost_new_worker_op)(struct vhost_dev *dev, + struct vhost_worker_state *worker); + typedef int (*vhost_free_worker_op)(struct vhost_dev *dev, + struct vhost_worker_state *worker); ++typedef int (*vhost_set_features_ex_op)(struct vhost_dev *dev, ++ const uint64_t *features); ++typedef int (*vhost_get_features_ex_op)(struct vhost_dev *dev, ++ uint64_t *features); + typedef int (*vhost_set_features_op)(struct vhost_dev *dev, + uint64_t features); + typedef int (*vhost_get_features_op)(struct vhost_dev *dev, +@@ -186,6 +190,8 @@ typedef struct VhostOps { + vhost_free_worker_op vhost_free_worker; + vhost_get_vring_worker_op vhost_get_vring_worker; + vhost_attach_vring_worker_op vhost_attach_vring_worker; ++ vhost_set_features_ex_op vhost_set_features_ex; ++ vhost_get_features_ex_op vhost_get_features_ex; + vhost_set_features_op vhost_set_features; + vhost_get_features_op vhost_get_features; + vhost_set_backend_cap_op vhost_set_backend_cap; +diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h +index 66be6afc88..08bbb4dfe9 100644 +--- a/include/hw/virtio/vhost.h ++++ b/include/hw/virtio/vhost.h +@@ -107,9 +107,9 @@ struct vhost_dev { + * future use should be discouraged and the variable retired as + * its easy to confuse with the VirtIO backend_features. + */ +- uint64_t features; +- uint64_t acked_features; +- uint64_t backend_features; ++ VIRTIO_DECLARE_FEATURES(features); ++ VIRTIO_DECLARE_FEATURES(acked_features); ++ VIRTIO_DECLARE_FEATURES(backend_features); + + /** + * @protocol_features: is the vhost-user only feature set by +@@ -320,6 +320,20 @@ bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n); + void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + bool mask); + ++/** ++ * vhost_get_features_ex() - sanitize the extended features set ++ * @hdev: common vhost_dev structure ++ * @feature_bits: pointer to terminated table of feature bits ++ * @features: original features set, filtered out on return ++ * ++ * This is the extended variant of vhost_get_features(), supporting the ++ * the extended features set. Filter it with the intersection of what is ++ * supported by the vhost backend (hdev->features) and the supported ++ * feature_bits. ++ */ ++void vhost_get_features_ex(struct vhost_dev *hdev, ++ const int *feature_bits, ++ uint64_t *features); + /** + * vhost_get_features() - return a sanitised set of feature bits + * @hdev: common vhost_dev structure +@@ -330,8 +344,28 @@ void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n, + * is supported by the vhost backend (hdev->features), the supported + * feature_bits and the requested feature set. + */ +-uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, +- uint64_t features); ++static inline uint64_t vhost_get_features(struct vhost_dev *hdev, ++ const int *feature_bits, ++ uint64_t features) ++{ ++ uint64_t features_ex[VIRTIO_FEATURES_NU64S]; ++ ++ virtio_features_from_u64(features_ex, features); ++ vhost_get_features_ex(hdev, feature_bits, features_ex); ++ return features_ex[0]; ++} ++ ++/** ++ * vhost_ack_features_ex() - set vhost full set of acked_features ++ * @hdev: common vhost_dev structure ++ * @feature_bits: pointer to terminated table of feature bits ++ * @features: requested feature set ++ * ++ * This sets the internal hdev->acked_features to the intersection of ++ * the backends advertised features and the supported feature_bits. ++ */ ++void vhost_ack_features_ex(struct vhost_dev *hdev, const int *feature_bits, ++ const uint64_t *features); + + /** + * vhost_ack_features() - set vhost acked_features +@@ -342,8 +376,16 @@ uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits, + * This sets the internal hdev->acked_features to the intersection of + * the backends advertised features and the supported feature_bits. + */ +-void vhost_ack_features(struct vhost_dev *hdev, const int *feature_bits, +- uint64_t features); ++static inline void vhost_ack_features(struct vhost_dev *hdev, ++ const int *feature_bits, ++ uint64_t features) ++{ ++ uint64_t features_ex[VIRTIO_FEATURES_NU64S]; ++ ++ virtio_features_from_u64(features_ex, features); ++ vhost_ack_features_ex(hdev, feature_bits, features_ex); ++} ++ + unsigned int vhost_get_max_memslots(void); + unsigned int vhost_get_free_memslots(void); + +-- +2.47.3 + diff --git a/kvm-vhost-backend-implement-extended-features-support.patch b/kvm-vhost-backend-implement-extended-features-support.patch new file mode 100644 index 0000000..fe43faa --- /dev/null +++ b/kvm-vhost-backend-implement-extended-features-support.patch @@ -0,0 +1,133 @@ +From d0e31af77cd51df8c17fa4d304571f8b1eb5ce29 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:24 +0200 +Subject: [PATCH 15/19] vhost-backend: implement extended features support + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [10/14] bd12c2f08dad502a7359d4344a2220b10ac96a48 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Leverage the kernel extended features manipulation ioctls(), if +available, and fallback to old ops otherwise. Error out when setting +extended features but kernel support is not available. + +Note that extended support for get/set backend features is not needed, +as the only feature that can be changed belongs to the 64 bit range. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: <150daade3d59e77629276920e014ee8e5fc12121.1758549625.git.pabeni@redhat.com> +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit f412c1f57ab58fd595efee26264193759220ca6f) +Signed-off-by: Laurent Vivier +--- + hw/virtio/vhost-backend.c | 62 ++++++++++++++++++++++++++++++++------- + 1 file changed, 51 insertions(+), 11 deletions(-) + +diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c +index 833804dd40..4367db0d95 100644 +--- a/hw/virtio/vhost-backend.c ++++ b/hw/virtio/vhost-backend.c +@@ -20,6 +20,11 @@ + #include + #include + ++struct vhost_features { ++ uint64_t count; ++ uint64_t features[VIRTIO_FEATURES_NU64S]; ++}; ++ + static int vhost_kernel_call(struct vhost_dev *dev, unsigned long int request, + void *arg) + { +@@ -182,12 +187,6 @@ static int vhost_kernel_get_vring_worker(struct vhost_dev *dev, + return vhost_kernel_call(dev, VHOST_GET_VRING_WORKER, worker); + } + +-static int vhost_kernel_set_features(struct vhost_dev *dev, +- uint64_t features) +-{ +- return vhost_kernel_call(dev, VHOST_SET_FEATURES, &features); +-} +- + static int vhost_kernel_set_backend_cap(struct vhost_dev *dev) + { + uint64_t features; +@@ -210,10 +209,51 @@ static int vhost_kernel_set_backend_cap(struct vhost_dev *dev) + return 0; + } + +-static int vhost_kernel_get_features(struct vhost_dev *dev, +- uint64_t *features) ++static int vhost_kernel_set_features(struct vhost_dev *dev, ++ const uint64_t *features) + { +- return vhost_kernel_call(dev, VHOST_GET_FEATURES, features); ++ struct vhost_features farray; ++ bool extended_in_use; ++ int r; ++ ++ farray.count = VIRTIO_FEATURES_NU64S; ++ virtio_features_copy(farray.features, features); ++ extended_in_use = virtio_features_use_ex(farray.features); ++ ++ /* ++ * Can't check for ENOTTY: for unknown ioctls the kernel interprets ++ * the argument as a virtio queue id and most likely errors out validating ++ * such id, instead of reporting an unknown operation. ++ */ ++ r = vhost_kernel_call(dev, VHOST_SET_FEATURES_ARRAY, &farray); ++ if (!r) { ++ return 0; ++ } ++ ++ if (extended_in_use) { ++ error_report("Trying to set extended features without kernel support"); ++ return -EINVAL; ++ } ++ return vhost_kernel_call(dev, VHOST_SET_FEATURES, &farray.features[0]); ++} ++ ++static int vhost_kernel_get_features(struct vhost_dev *dev, uint64_t *features) ++{ ++ struct vhost_features farray; ++ int r; ++ ++ farray.count = VIRTIO_FEATURES_NU64S; ++ r = vhost_kernel_call(dev, VHOST_GET_FEATURES_ARRAY, &farray); ++ if (r) { ++ memset(&farray, 0, sizeof(farray)); ++ r = vhost_kernel_call(dev, VHOST_GET_FEATURES, &farray.features[0]); ++ } ++ if (r) { ++ return r; ++ } ++ ++ virtio_features_copy(features, farray.features); ++ return 0; + } + + static int vhost_kernel_set_owner(struct vhost_dev *dev) +@@ -341,8 +381,8 @@ const VhostOps kernel_ops = { + .vhost_attach_vring_worker = vhost_kernel_attach_vring_worker, + .vhost_new_worker = vhost_kernel_new_worker, + .vhost_free_worker = vhost_kernel_free_worker, +- .vhost_set_features = vhost_kernel_set_features, +- .vhost_get_features = vhost_kernel_get_features, ++ .vhost_set_features_ex = vhost_kernel_set_features, ++ .vhost_get_features_ex = vhost_kernel_get_features, + .vhost_set_backend_cap = vhost_kernel_set_backend_cap, + .vhost_set_owner = vhost_kernel_set_owner, + .vhost_get_vq_index = vhost_kernel_get_vq_index, +-- +2.47.3 + diff --git a/kvm-vhost-net-implement-extended-features-support.patch b/kvm-vhost-net-implement-extended-features-support.patch new file mode 100644 index 0000000..bf04384 --- /dev/null +++ b/kvm-vhost-net-implement-extended-features-support.patch @@ -0,0 +1,237 @@ +From 7eb0a4d5d5a03fd631b87c59dc531080f93a0640 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:25 +0200 +Subject: [PATCH 16/19] vhost-net: implement extended features support + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [11/14] 48cf0458bd30eeca10dace74ca2a8871dc0c8bca (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Provide extended version of the features manipulation helpers, +and let the device initialization deal with the full features space, +adjusting the relevant format strings accordingly. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Acked-by: Stefano Garzarella +Reviewed-by: Michael S. Tsirkin +Message-ID: <69c78c432e28e146a8874b2a7d00e9cbd111b1ba.1758549625.git.pabeni@redhat.com> +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit d55ad8c9a9a5edd8152f13fc97879d66972f103e) +Signed-off-by: Laurent Vivier +--- + hw/net/vhost_net-stub.c | 8 +++----- + hw/net/vhost_net.c | 45 +++++++++++++++++++++++------------------ + include/net/vhost_net.h | 33 +++++++++++++++++++++++++++--- + 3 files changed, 58 insertions(+), 28 deletions(-) + +diff --git a/hw/net/vhost_net-stub.c b/hw/net/vhost_net-stub.c +index 7d49f82906..0740d5a2eb 100644 +--- a/hw/net/vhost_net-stub.c ++++ b/hw/net/vhost_net-stub.c +@@ -46,9 +46,8 @@ void vhost_net_cleanup(struct vhost_net *net) + { + } + +-uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) ++void vhost_net_get_features_ex(struct vhost_net *net, uint64_t *features) + { +- return features; + } + + int vhost_net_get_config(struct vhost_net *net, uint8_t *config, +@@ -62,13 +61,12 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, + return 0; + } + +-void vhost_net_ack_features(struct vhost_net *net, uint64_t features) ++void vhost_net_ack_features_ex(struct vhost_net *net, const uint64_t *features) + { + } + +-uint64_t vhost_net_get_acked_features(VHostNetState *net) ++void vhost_net_get_acked_features_ex(VHostNetState *net, uint64_t *features) + { +- return 0; + } + + bool vhost_net_virtqueue_pending(VHostNetState *net, int idx) +diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c +index 540492b37d..a8ee18a912 100644 +--- a/hw/net/vhost_net.c ++++ b/hw/net/vhost_net.c +@@ -35,10 +35,9 @@ + #include "hw/virtio/virtio-bus.h" + #include "linux-headers/linux/vhost.h" + +-uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features) ++void vhost_net_get_features_ex(struct vhost_net *net, uint64_t *features) + { +- return vhost_get_features(&net->dev, net->feature_bits, +- features); ++ vhost_get_features_ex(&net->dev, net->feature_bits, features); + } + int vhost_net_get_config(struct vhost_net *net, uint8_t *config, + uint32_t config_len) +@@ -51,10 +50,11 @@ int vhost_net_set_config(struct vhost_net *net, const uint8_t *data, + return vhost_dev_set_config(&net->dev, data, offset, size, flags); + } + +-void vhost_net_ack_features(struct vhost_net *net, uint64_t features) ++void vhost_net_ack_features_ex(struct vhost_net *net, const uint64_t *features) + { +- net->dev.acked_features = net->dev.backend_features; +- vhost_ack_features(&net->dev, net->feature_bits, features); ++ virtio_features_copy(net->dev.acked_features_ex, ++ net->dev.backend_features_ex); ++ vhost_ack_features_ex(&net->dev, net->feature_bits, features); + } + + uint64_t vhost_net_get_max_queues(VHostNetState *net) +@@ -62,9 +62,9 @@ uint64_t vhost_net_get_max_queues(VHostNetState *net) + return net->dev.max_queues; + } + +-uint64_t vhost_net_get_acked_features(VHostNetState *net) ++void vhost_net_get_acked_features_ex(VHostNetState *net, uint64_t *features) + { +- return net->dev.acked_features; ++ virtio_features_copy(features, net->dev.acked_features_ex); + } + + void vhost_net_save_acked_features(NetClientState *nc) +@@ -234,7 +234,8 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) + int r; + bool backend_kernel = options->backend_type == VHOST_BACKEND_TYPE_KERNEL; + struct vhost_net *net = g_new0(struct vhost_net, 1); +- uint64_t features = 0; ++ uint64_t missing_features[VIRTIO_FEATURES_NU64S]; ++ uint64_t features[VIRTIO_FEATURES_NU64S]; + Error *local_err = NULL; + + if (!options->net_backend) { +@@ -247,6 +248,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) + net->save_acked_features = options->save_acked_features; + net->max_tx_queue_size = options->max_tx_queue_size; + net->is_vhost_user = options->is_vhost_user; ++ virtio_features_clear(features); + + net->dev.max_queues = 1; + net->dev.vqs = net->vqs; +@@ -261,7 +263,7 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) + net->backend = r; + net->dev.protocol_features = 0; + } else { +- net->dev.backend_features = 0; ++ virtio_features_clear(net->dev.backend_features_ex); + net->dev.protocol_features = 0; + net->backend = -1; + +@@ -281,26 +283,29 @@ struct vhost_net *vhost_net_init(VhostNetOptions *options) + sizeof(struct virtio_net_hdr_mrg_rxbuf))) { + net->dev.features &= ~(1ULL << VIRTIO_NET_F_MRG_RXBUF); + } +- if (~net->dev.features & net->dev.backend_features) { +- fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 +- " for backend\n", +- (uint64_t)(~net->dev.features & net->dev.backend_features)); ++ ++ if (virtio_features_andnot(missing_features, ++ net->dev.backend_features_ex, ++ net->dev.features_ex)) { ++ fprintf(stderr, "vhost lacks feature mask 0x" VIRTIO_FEATURES_FMT ++ " for backend\n", VIRTIO_FEATURES_PR(missing_features)); + goto fail; + } + } + + /* Set sane init value. Override when guest acks. */ + if (options->get_acked_features) { +- features = options->get_acked_features(net->nc); +- if (~net->dev.features & features) { +- fprintf(stderr, "vhost lacks feature mask 0x%" PRIx64 +- " for backend\n", +- (uint64_t)(~net->dev.features & features)); ++ virtio_features_from_u64(features, ++ options->get_acked_features(net->nc)); ++ if (virtio_features_andnot(missing_features, features, ++ net->dev.features_ex)) { ++ fprintf(stderr, "vhost lacks feature mask 0x" VIRTIO_FEATURES_FMT ++ " for backend\n", VIRTIO_FEATURES_PR(missing_features)); + goto fail; + } + } + +- vhost_net_ack_features(net, features); ++ vhost_net_ack_features_ex(net, features); + + return net; + +diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h +index 879781dad7..0225207491 100644 +--- a/include/net/vhost_net.h ++++ b/include/net/vhost_net.h +@@ -2,6 +2,7 @@ + #define VHOST_NET_H + + #include "net/net.h" ++#include "hw/virtio/virtio-features.h" + #include "hw/virtio/vhost-backend.h" + + struct vhost_net; +@@ -33,8 +34,26 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs, + + void vhost_net_cleanup(VHostNetState *net); + +-uint64_t vhost_net_get_features(VHostNetState *net, uint64_t features); +-void vhost_net_ack_features(VHostNetState *net, uint64_t features); ++void vhost_net_get_features_ex(VHostNetState *net, uint64_t *features); ++static inline uint64_t vhost_net_get_features(VHostNetState *net, ++ uint64_t features) ++{ ++ uint64_t features_array[VIRTIO_FEATURES_NU64S]; ++ ++ virtio_features_from_u64(features_array, features); ++ vhost_net_get_features_ex(net, features_array); ++ return features_array[0]; ++} ++ ++void vhost_net_ack_features_ex(VHostNetState *net, const uint64_t *features); ++static inline void vhost_net_ack_features(VHostNetState *net, ++ uint64_t features) ++{ ++ uint64_t features_array[VIRTIO_FEATURES_NU64S]; ++ ++ virtio_features_from_u64(features_array, features); ++ vhost_net_ack_features_ex(net, features_array); ++} + + int vhost_net_get_config(struct vhost_net *net, uint8_t *config, + uint32_t config_len); +@@ -51,7 +70,15 @@ VHostNetState *get_vhost_net(NetClientState *nc); + + int vhost_net_set_vring_enable(NetClientState *nc, int enable); + +-uint64_t vhost_net_get_acked_features(VHostNetState *net); ++void vhost_net_get_acked_features_ex(VHostNetState *net, uint64_t *features); ++static inline uint64_t vhost_net_get_acked_features(VHostNetState *net) ++{ ++ uint64_t features[VIRTIO_FEATURES_NU64S]; ++ ++ vhost_net_get_acked_features_ex(net, features); ++ assert(!virtio_features_use_ex(features)); ++ return features[0]; ++} + + int vhost_net_set_mtu(struct vhost_net *net, uint16_t mtu); + +-- +2.47.3 + diff --git a/kvm-virtio-add-support-for-negotiating-extended-features.patch b/kvm-virtio-add-support-for-negotiating-extended-features.patch new file mode 100644 index 0000000..53a5d72 --- /dev/null +++ b/kvm-virtio-add-support-for-negotiating-extended-features.patch @@ -0,0 +1,133 @@ +From d7e0acde126d1f5385096d6b7247de82ee6b0e8d Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:20 +0200 +Subject: [PATCH 11/19] virtio: add support for negotiating extended features + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [6/14] 56553123a4941b457a34af8be01b5757a97706e6 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +The virtio specifications allows for a device features space up +to 128 bits and more. Soon we are going to use some of the 'extended' +bits features for the virtio net driver. + +Add support to allow extended features negotiation on a per +devices basis. Devices willing to negotiated extended features +need to implemented a new pair of features getter/setter, the +core will conditionally use them instead of the basic one. + +Note that 'bad_features' don't need to be extended, as they are +bound to the 64 bits limit. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: <9bb29d70adc3f2b8c7756d4e3cd076cffee87826.1758549625.git.pabeni@redhat.com> +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 64a6a336f42bc6305ab7589fd874cb4a3d403bd0) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio-bus.c | 11 ++++++++--- + hw/virtio/virtio.c | 14 +++++++++++--- + include/hw/virtio/virtio.h | 4 ++++ + 3 files changed, 23 insertions(+), 6 deletions(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index 11adfbf3ab..cef944e015 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -62,9 +62,14 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + } + + /* Get the features of the plugged device. */ +- assert(vdc->get_features != NULL); +- vdev->host_features = vdc->get_features(vdev, vdev->host_features, +- &local_err); ++ if (vdc->get_features_ex) { ++ vdc->get_features_ex(vdev, vdev->host_features_ex, &local_err); ++ } else { ++ assert(vdc->get_features != NULL); ++ virtio_features_from_u64(vdev->host_features_ex, ++ vdc->get_features(vdev, vdev->host_features, ++ &local_err)); ++ } + if (local_err) { + error_propagate(errp, local_err); + return; +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index bf53c211e5..34f977a3c9 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -3103,7 +3103,9 @@ static int virtio_set_features_nocheck(VirtIODevice *vdev, const uint64_t *val) + bad = virtio_features_andnot(tmp, val, vdev->host_features_ex); + virtio_features_and(tmp, val, vdev->host_features_ex); + +- if (k->set_features) { ++ if (k->set_features_ex) { ++ k->set_features_ex(vdev, val); ++ } else if (k->set_features) { + bad = bad || virtio_features_use_ex(tmp); + k->set_features(vdev, tmp[0]); + } +@@ -3149,6 +3151,13 @@ virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, + int virtio_set_features(VirtIODevice *vdev, uint64_t val) + { + uint64_t features[VIRTIO_FEATURES_NU64S]; ++ ++ virtio_features_from_u64(features, val); ++ return virtio_set_features_ex(vdev, features); ++} ++ ++int virtio_set_features_ex(VirtIODevice *vdev, const uint64_t *features) ++{ + int ret; + /* + * The driver must not attempt to set features after feature negotiation +@@ -3158,13 +3167,12 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) + return -EINVAL; + } + +- if (val & (1ull << VIRTIO_F_BAD_FEATURE)) { ++ if (features[0] & (1ull << VIRTIO_F_BAD_FEATURE)) { + qemu_log_mask(LOG_GUEST_ERROR, + "%s: guest driver for %s has enabled UNUSED(30) feature bit!\n", + __func__, vdev->name); + } + +- virtio_features_from_u64(features, val); + ret = virtio_set_features_nocheck(vdev, features); + if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { + /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index 39e4059a66..2aeb021fb3 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -178,6 +178,9 @@ struct VirtioDeviceClass { + /* This is what a VirtioDevice must implement */ + DeviceRealize realize; + DeviceUnrealize unrealize; ++ void (*get_features_ex)(VirtIODevice *vdev, uint64_t *requested_features, ++ Error **errp); ++ void (*set_features_ex)(VirtIODevice *vdev, const uint64_t *val); + uint64_t (*get_features)(VirtIODevice *vdev, + uint64_t requested_features, + Error **errp); +@@ -373,6 +376,7 @@ void virtio_queue_reset(VirtIODevice *vdev, uint32_t queue_index); + void virtio_queue_enable(VirtIODevice *vdev, uint32_t queue_index); + void virtio_update_irq(VirtIODevice *vdev); + int virtio_set_features(VirtIODevice *vdev, uint64_t val); ++int virtio_set_features_ex(VirtIODevice *vdev, const uint64_t *val); + + /* Base devices. */ + typedef struct VirtIOBlkConf VirtIOBlkConf; +-- +2.47.3 + diff --git a/kvm-virtio-introduce-extended-features-type.patch b/kvm-virtio-introduce-extended-features-type.patch new file mode 100644 index 0000000..caf0e14 --- /dev/null +++ b/kvm-virtio-introduce-extended-features-type.patch @@ -0,0 +1,201 @@ +From 4c9ae4b3a9e2c556dcf284924a154ed85534d77f Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:18 +0200 +Subject: [PATCH 09/19] virtio: introduce extended features type + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [4/14] fa4ef2d101589c700358f90be08943f1198503e4 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +The virtio specifications allows for up to 128 bits for the +device features. Soon we are going to use some of the 'extended' +bits features (bit 64 and above) for the virtio net driver. + +Represent the virtio features bitmask with a fixed size array, and +introduce a few helpers to help manipulate them. + +Most drivers will keep using only 64 bits features space: use union +to allow them access the lower part of the extended space without any +per driver change. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: <6a9bbb5eb33830f20afbcb7e64d300af4126dd98.1758549625.git.pabeni@redhat.com> +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit b15a61fdae976eb1ca8f2deee6a63dc3407d7ec6) +Signed-off-by: Laurent Vivier +--- + include/hw/virtio/virtio-features.h | 126 ++++++++++++++++++++++++++++ + include/hw/virtio/virtio.h | 7 +- + 2 files changed, 130 insertions(+), 3 deletions(-) + create mode 100644 include/hw/virtio/virtio-features.h + +diff --git a/include/hw/virtio/virtio-features.h b/include/hw/virtio/virtio-features.h +new file mode 100644 +index 0000000000..e29b7fe48f +--- /dev/null ++++ b/include/hw/virtio/virtio-features.h +@@ -0,0 +1,126 @@ ++/* ++ * Virtio features helpers ++ * ++ * Copyright 2025 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#ifndef QEMU_VIRTIO_FEATURES_H ++#define QEMU_VIRTIO_FEATURES_H ++ ++#include "qemu/bitops.h" ++ ++#define VIRTIO_FEATURES_FMT "%016"PRIx64"%016"PRIx64 ++#define VIRTIO_FEATURES_PR(f) (f)[1], (f)[0] ++ ++#define VIRTIO_FEATURES_MAX 128 ++#define VIRTIO_FEATURES_BIT(b) BIT_ULL((b) % 64) ++#define VIRTIO_FEATURES_U64(b) ((b) / 64) ++#define VIRTIO_FEATURES_NU32S (VIRTIO_FEATURES_MAX / 32) ++#define VIRTIO_FEATURES_NU64S (VIRTIO_FEATURES_MAX / 64) ++ ++#define VIRTIO_DECLARE_FEATURES(name) \ ++ union { \ ++ uint64_t name; \ ++ uint64_t name##_ex[VIRTIO_FEATURES_NU64S]; \ ++ } ++ ++#define VIRTIO_DEFINE_PROP_FEATURE(_name, _state, _field, _bit, _defval) \ ++ DEFINE_PROP_BIT64(_name, _state, _field[VIRTIO_FEATURES_U64(_bit)], \ ++ (_bit) % 64, _defval) ++ ++static inline void virtio_features_clear(uint64_t *features) ++{ ++ memset(features, 0, sizeof(features[0]) * VIRTIO_FEATURES_NU64S); ++} ++ ++static inline void virtio_features_from_u64(uint64_t *features, uint64_t from) ++{ ++ virtio_features_clear(features); ++ features[0] = from; ++} ++ ++static inline bool virtio_has_feature_ex(const uint64_t *features, ++ unsigned int fbit) ++{ ++ assert(fbit < VIRTIO_FEATURES_MAX); ++ return features[VIRTIO_FEATURES_U64(fbit)] & VIRTIO_FEATURES_BIT(fbit); ++} ++ ++static inline void virtio_add_feature_ex(uint64_t *features, ++ unsigned int fbit) ++{ ++ assert(fbit < VIRTIO_FEATURES_MAX); ++ features[VIRTIO_FEATURES_U64(fbit)] |= VIRTIO_FEATURES_BIT(fbit); ++} ++ ++static inline void virtio_clear_feature_ex(uint64_t *features, ++ unsigned int fbit) ++{ ++ assert(fbit < VIRTIO_FEATURES_MAX); ++ features[VIRTIO_FEATURES_U64(fbit)] &= ~VIRTIO_FEATURES_BIT(fbit); ++} ++ ++static inline bool virtio_features_equal(const uint64_t *f1, ++ const uint64_t *f2) ++{ ++ return !memcmp(f1, f2, sizeof(uint64_t) * VIRTIO_FEATURES_NU64S); ++} ++ ++static inline bool virtio_features_use_ex(const uint64_t *features) ++{ ++ int i; ++ ++ for (i = 1; i < VIRTIO_FEATURES_NU64S; ++i) { ++ if (features[i]) { ++ return true; ++ } ++ } ++ return false; ++} ++ ++static inline bool virtio_features_empty(const uint64_t *features) ++{ ++ return !virtio_features_use_ex(features) && !features[0]; ++} ++ ++static inline void virtio_features_copy(uint64_t *to, const uint64_t *from) ++{ ++ memcpy(to, from, sizeof(to[0]) * VIRTIO_FEATURES_NU64S); ++} ++ ++static inline bool virtio_features_andnot(uint64_t *to, const uint64_t *f1, ++ const uint64_t *f2) ++{ ++ uint64_t diff = 0; ++ int i; ++ ++ for (i = 0; i < VIRTIO_FEATURES_NU64S; i++) { ++ to[i] = f1[i] & ~f2[i]; ++ diff |= to[i]; ++ } ++ return diff; ++} ++ ++static inline void virtio_features_and(uint64_t *to, const uint64_t *f1, ++ const uint64_t *f2) ++{ ++ int i; ++ ++ for (i = 0; i < VIRTIO_FEATURES_NU64S; i++) { ++ to[i] = f1[i] & f2[i]; ++ } ++} ++ ++static inline void virtio_features_or(uint64_t *to, const uint64_t *f1, ++ const uint64_t *f2) ++{ ++ int i; ++ ++ for (i = 0; i < VIRTIO_FEATURES_NU64S; i++) { ++ to[i] = f1[i] | f2[i]; ++ } ++} ++ ++#endif +diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h +index c594764f23..39e4059a66 100644 +--- a/include/hw/virtio/virtio.h ++++ b/include/hw/virtio/virtio.h +@@ -16,6 +16,7 @@ + + #include "system/memory.h" + #include "hw/qdev-core.h" ++#include "hw/virtio/virtio-features.h" + #include "net/net.h" + #include "migration/vmstate.h" + #include "qemu/event_notifier.h" +@@ -121,9 +122,9 @@ struct VirtIODevice + * backend (e.g. vhost) and could potentially be a subset of the + * total feature set offered by QEMU. + */ +- uint64_t host_features; +- uint64_t guest_features; +- uint64_t backend_features; ++ VIRTIO_DECLARE_FEATURES(host_features); ++ VIRTIO_DECLARE_FEATURES(guest_features); ++ VIRTIO_DECLARE_FEATURES(backend_features); + + size_t config_len; + void *config; +-- +2.47.3 + diff --git a/kvm-virtio-net-implement-extended-features-support.patch b/kvm-virtio-net-implement-extended-features-support.patch new file mode 100644 index 0000000..1af6377 --- /dev/null +++ b/kvm-virtio-net-implement-extended-features-support.patch @@ -0,0 +1,331 @@ +From cd61fca5f53b5047a432db36f33ee85edf4f6ac3 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:26 +0200 +Subject: [PATCH 17/19] virtio-net: implement extended features support + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [12/14] 22829a4e87459998cbb2a658d24434f1e94c8597 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Use the extended types and helpers to manipulate the virtio_net +features. + +Note that offloads are still 64bits wide, as per specification, +and extended offloads will be mapped into such range. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Acked-by: Stefano Garzarella +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 3a7741c3bdc3537de4159418d712debbd22e4df6) +Signed-off-by: Laurent Vivier +--- + hw/net/virtio-net.c | 140 +++++++++++++++++++-------------- + include/hw/virtio/virtio-net.h | 2 +- + 2 files changed, 83 insertions(+), 59 deletions(-) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index b86ba1fd27..89cf008401 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -90,6 +90,19 @@ + VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | \ + VIRTIO_NET_RSS_HASH_TYPE_UDP_EX) + ++/* ++ * Features starting from VIRTIO_NET_FEATURES_MAP_MIN bit correspond ++ * to guest offloads in the VIRTIO_NET_OFFLOAD_MAP range ++ */ ++#define VIRTIO_NET_OFFLOAD_MAP_MIN 46 ++#define VIRTIO_NET_OFFLOAD_MAP_LENGTH 4 ++#define VIRTIO_NET_OFFLOAD_MAP MAKE_64BIT_MASK( \ ++ VIRTIO_NET_OFFLOAD_MAP_MIN, \ ++ VIRTIO_NET_OFFLOAD_MAP_LENGTH) ++#define VIRTIO_NET_FEATURES_MAP_MIN 65 ++#define VIRTIO_NET_F2O_SHIFT (VIRTIO_NET_OFFLOAD_MAP_MIN - \ ++ VIRTIO_NET_FEATURES_MAP_MIN + 64) ++ + static const VirtIOFeature feature_sizes[] = { + {.flags = 1ULL << VIRTIO_NET_F_MAC, + .end = endof(struct virtio_net_config, mac)}, +@@ -786,7 +799,14 @@ static void virtio_net_apply_guest_offloads(VirtIONet *n) + qemu_set_offload(qemu_get_queue(n->nic)->peer, &ol); + } + +-static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) ++static uint64_t virtio_net_features_to_offload(const uint64_t *features) ++{ ++ return (features[0] & ~VIRTIO_NET_OFFLOAD_MAP) | ++ ((features[1] << VIRTIO_NET_F2O_SHIFT) & VIRTIO_NET_OFFLOAD_MAP); ++} ++ ++static uint64_t ++virtio_net_guest_offloads_by_features(const uint64_t *features) + { + static const uint64_t guest_offloads_mask = + (1ULL << VIRTIO_NET_F_GUEST_CSUM) | +@@ -797,13 +817,13 @@ static uint64_t virtio_net_guest_offloads_by_features(uint64_t features) + (1ULL << VIRTIO_NET_F_GUEST_USO4) | + (1ULL << VIRTIO_NET_F_GUEST_USO6); + +- return guest_offloads_mask & features; ++ return guest_offloads_mask & virtio_net_features_to_offload(features); + } + + uint64_t virtio_net_supported_guest_offloads(const VirtIONet *n) + { + VirtIODevice *vdev = VIRTIO_DEVICE(n); +- return virtio_net_guest_offloads_by_features(vdev->guest_features); ++ return virtio_net_guest_offloads_by_features(vdev->guest_features_ex); + } + + typedef struct { +@@ -882,34 +902,39 @@ static void failover_add_primary(VirtIONet *n, Error **errp) + error_propagate(errp, err); + } + +-static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) ++static void virtio_net_set_features(VirtIODevice *vdev, ++ const uint64_t *in_features) + { ++ uint64_t features[VIRTIO_FEATURES_NU64S]; + VirtIONet *n = VIRTIO_NET(vdev); + Error *err = NULL; + int i; + ++ virtio_features_copy(features, in_features); + if (n->mtu_bypass_backend && + !virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_MTU)) { +- features &= ~(1ULL << VIRTIO_NET_F_MTU); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_MTU); + } + + virtio_net_set_multiqueue(n, +- virtio_has_feature(features, VIRTIO_NET_F_RSS) || +- virtio_has_feature(features, VIRTIO_NET_F_MQ)); ++ virtio_has_feature_ex(features, ++ VIRTIO_NET_F_RSS) || ++ virtio_has_feature_ex(features, ++ VIRTIO_NET_F_MQ)); + + virtio_net_set_mrg_rx_bufs(n, +- virtio_has_feature(features, ++ virtio_has_feature_ex(features, + VIRTIO_NET_F_MRG_RXBUF), +- virtio_has_feature(features, ++ virtio_has_feature_ex(features, + VIRTIO_F_VERSION_1), +- virtio_has_feature(features, ++ virtio_has_feature_ex(features, + VIRTIO_NET_F_HASH_REPORT)); + +- n->rsc4_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && +- virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO4); +- n->rsc6_enabled = virtio_has_feature(features, VIRTIO_NET_F_RSC_EXT) && +- virtio_has_feature(features, VIRTIO_NET_F_GUEST_TSO6); +- n->rss_data.redirect = virtio_has_feature(features, VIRTIO_NET_F_RSS); ++ n->rsc4_enabled = virtio_has_feature_ex(features, VIRTIO_NET_F_RSC_EXT) && ++ virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_TSO4); ++ n->rsc6_enabled = virtio_has_feature_ex(features, VIRTIO_NET_F_RSC_EXT) && ++ virtio_has_feature_ex(features, VIRTIO_NET_F_GUEST_TSO6); ++ n->rss_data.redirect = virtio_has_feature_ex(features, VIRTIO_NET_F_RSS); + + if (n->has_vnet_hdr) { + n->curr_guest_offloads = +@@ -923,7 +948,7 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + if (!get_vhost_net(nc->peer)) { + continue; + } +- vhost_net_ack_features(get_vhost_net(nc->peer), features); ++ vhost_net_ack_features_ex(get_vhost_net(nc->peer), features); + + /* + * keep acked_features in NetVhostUserState up-to-date so it +@@ -932,12 +957,14 @@ static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features) + vhost_net_save_acked_features(nc->peer); + } + +- if (virtio_has_feature(vdev->guest_features ^ features, VIRTIO_NET_F_CTRL_VLAN)) { +- bool vlan = virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN); ++ if (virtio_has_feature_ex(features, VIRTIO_NET_F_CTRL_VLAN) != ++ virtio_has_feature_ex(vdev->guest_features_ex, ++ VIRTIO_NET_F_CTRL_VLAN)) { ++ bool vlan = virtio_has_feature_ex(features, VIRTIO_NET_F_CTRL_VLAN); + memset(n->vlans, vlan ? 0 : 0xff, MAX_VLAN >> 3); + } + +- if (virtio_has_feature(features, VIRTIO_NET_F_STANDBY)) { ++ if (virtio_has_feature_ex(features, VIRTIO_NET_F_STANDBY)) { + qapi_event_send_failover_negotiated(n->netclient_name); + qatomic_set(&n->failover_primary_hidden, false); + failover_add_primary(n, &err); +@@ -1902,10 +1929,10 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + virtio_error(vdev, "virtio-net unexpected empty queue: " + "i %zd mergeable %d offset %zd, size %zd, " + "guest hdr len %zd, host hdr len %zd " +- "guest features 0x%" PRIx64, ++ "guest features 0x" VIRTIO_FEATURES_FMT, + i, n->mergeable_rx_bufs, offset, size, + n->guest_hdr_len, n->host_hdr_len, +- vdev->guest_features); ++ VIRTIO_FEATURES_PR(vdev->guest_features_ex)); + } + err = -1; + goto err; +@@ -3012,8 +3039,8 @@ static int virtio_net_pre_load_queues(VirtIODevice *vdev, uint32_t n) + return 0; + } + +-static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, +- Error **errp) ++static void virtio_net_get_features(VirtIODevice *vdev, uint64_t *features, ++ Error **errp) + { + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc = qemu_get_queue(n->nic); +@@ -3027,68 +3054,67 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, + (supported_hash_types & peer_hash_types) == supported_hash_types; + + /* Firstly sync all virtio-net possible supported features */ +- features |= n->host_features; ++ virtio_features_or(features, features, n->host_features_ex); + +- virtio_add_feature(&features, VIRTIO_NET_F_MAC); ++ virtio_add_feature_ex(features, VIRTIO_NET_F_MAC); + + if (!peer_has_vnet_hdr(n)) { +- virtio_clear_feature(&features, VIRTIO_NET_F_CSUM); +- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4); +- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6); +- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_CSUM); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_TSO4); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_TSO6); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_ECN); + +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM); +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4); +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6); +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_CSUM); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_TSO4); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_TSO6); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_ECN); + +- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_USO); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO4); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO6); + +- virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT); + } + + if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) { +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO); +- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_UFO); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_UFO); + } +- + if (!peer_has_uso(n)) { +- virtio_clear_feature(&features, VIRTIO_NET_F_HOST_USO); +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO4); +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_USO6); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HOST_USO); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO4); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_USO6); + } + + if (!get_vhost_net(nc->peer)) { + if (!use_own_hash) { +- virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); +- virtio_clear_feature(&features, VIRTIO_NET_F_RSS); +- } else if (virtio_has_feature(features, VIRTIO_NET_F_RSS)) { ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_RSS); ++ } else if (virtio_has_feature_ex(features, VIRTIO_NET_F_RSS)) { + virtio_net_load_ebpf(n, errp); + } + +- return features; ++ return; + } + + if (!use_peer_hash) { +- virtio_clear_feature(&features, VIRTIO_NET_F_HASH_REPORT); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_HASH_REPORT); + + if (!use_own_hash || !virtio_net_attach_ebpf_to_backend(n->nic, -1)) { + if (!virtio_net_load_ebpf(n, errp)) { +- return features; ++ return; + } + +- virtio_clear_feature(&features, VIRTIO_NET_F_RSS); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_RSS); + } + } + +- features = vhost_net_get_features(get_vhost_net(nc->peer), features); +- vdev->backend_features = features; ++ vhost_net_get_features_ex(get_vhost_net(nc->peer), features); ++ virtio_features_copy(vdev->backend_features_ex, features); + + if (n->mtu_bypass_backend && + (n->host_features & 1ULL << VIRTIO_NET_F_MTU)) { +- features |= (1ULL << VIRTIO_NET_F_MTU); ++ virtio_add_feature_ex(features, VIRTIO_NET_F_MTU); + } + + /* +@@ -3103,10 +3129,8 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, + * support it. + */ + if (!virtio_has_feature(vdev->backend_features, VIRTIO_NET_F_CTRL_VQ)) { +- virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ANNOUNCE); ++ virtio_clear_feature_ex(features, VIRTIO_NET_F_GUEST_ANNOUNCE); + } +- +- return features; + } + + static int virtio_net_post_load_device(void *opaque, int version_id) +@@ -4238,8 +4262,8 @@ static void virtio_net_class_init(ObjectClass *klass, const void *data) + vdc->unrealize = virtio_net_device_unrealize; + vdc->get_config = virtio_net_get_config; + vdc->set_config = virtio_net_set_config; +- vdc->get_features = virtio_net_get_features; +- vdc->set_features = virtio_net_set_features; ++ vdc->get_features_ex = virtio_net_get_features; ++ vdc->set_features_ex = virtio_net_set_features; + vdc->bad_features = virtio_net_bad_features; + vdc->reset = virtio_net_reset; + vdc->queue_reset = virtio_net_queue_reset; +diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h +index 73fdefc0dc..5b8ab7bda7 100644 +--- a/include/hw/virtio/virtio-net.h ++++ b/include/hw/virtio/virtio-net.h +@@ -182,7 +182,7 @@ struct VirtIONet { + uint32_t has_vnet_hdr; + size_t host_hdr_len; + size_t guest_hdr_len; +- uint64_t host_features; ++ VIRTIO_DECLARE_FEATURES(host_features); + uint32_t rsc_timeout; + uint8_t rsc4_enabled; + uint8_t rsc6_enabled; +-- +2.47.3 + diff --git a/kvm-virtio-pci-implement-support-for-extended-features.patch b/kvm-virtio-pci-implement-support-for-extended-features.patch new file mode 100644 index 0000000..98821d9 --- /dev/null +++ b/kvm-virtio-pci-implement-support-for-extended-features.patch @@ -0,0 +1,196 @@ +From 71096da8a19183bbe11f28d5dfe6cf6cc38d9585 Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:21 +0200 +Subject: [PATCH 12/19] virtio-pci: implement support for extended features + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [7/14] 7d5683a98169722acb250f9e01344ec3d56faad4 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +Extend the features configuration space to 128 bits. If the virtio +device supports any extended features, allow the common read/write +operation to access all of it, otherwise keep exposing only the +lower 64 bits. + +On migration, save the 128 bit version of the features only if the +upper bits are non zero. Relay on reset to clear all the feature +space before load. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 712c79d6d374e7abe94599de5ba2d155d5a79955) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio-pci.c | 76 ++++++++++++++++++++++++++++++---- + include/hw/virtio/virtio-pci.h | 2 +- + 2 files changed, 68 insertions(+), 10 deletions(-) + +diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c +index 0cdc16217f..d38d0a5e9a 100644 +--- a/hw/virtio/virtio-pci.c ++++ b/hw/virtio/virtio-pci.c +@@ -110,6 +110,29 @@ static const VMStateDescription vmstate_virtio_pci_modern_queue_state = { + } + }; + ++static bool virtio_pci_modern_state_features128_needed(void *opaque) ++{ ++ VirtIOPCIProxy *proxy = opaque; ++ uint32_t features = 0; ++ int i; ++ ++ for (i = 2; i < ARRAY_SIZE(proxy->guest_features); ++i) { ++ features |= proxy->guest_features[i]; ++ } ++ return features; ++} ++ ++static const VMStateDescription vmstate_virtio_pci_modern_state_features128 = { ++ .name = "virtio_pci/modern_state/features128", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = &virtio_pci_modern_state_features128_needed, ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 2, 2), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ + static bool virtio_pci_modern_state_needed(void *opaque) + { + VirtIOPCIProxy *proxy = opaque; +@@ -117,6 +140,12 @@ static bool virtio_pci_modern_state_needed(void *opaque) + return virtio_pci_modern(proxy); + } + ++/* ++ * Avoid silently breaking migration should the feature space increase ++ * even more in the (far away) future ++ */ ++QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU32S != 4); ++ + static const VMStateDescription vmstate_virtio_pci_modern_state_sub = { + .name = "virtio_pci/modern_state", + .version_id = 1, +@@ -125,11 +154,15 @@ static const VMStateDescription vmstate_virtio_pci_modern_state_sub = { + .fields = (const VMStateField[]) { + VMSTATE_UINT32(dfselect, VirtIOPCIProxy), + VMSTATE_UINT32(gfselect, VirtIOPCIProxy), +- VMSTATE_UINT32_ARRAY(guest_features, VirtIOPCIProxy, 2), ++ VMSTATE_UINT32_SUB_ARRAY(guest_features, VirtIOPCIProxy, 0, 2), + VMSTATE_STRUCT_ARRAY(vqs, VirtIOPCIProxy, VIRTIO_QUEUE_MAX, 0, + vmstate_virtio_pci_modern_queue_state, + VirtIOPCIQueue), + VMSTATE_END_OF_LIST() ++ }, ++ .subsections = (const VMStateDescription * const []) { ++ &vmstate_virtio_pci_modern_state_features128, ++ NULL + } + }; + +@@ -1478,6 +1511,19 @@ int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, + return virtio_pci_add_mem_cap(proxy, &cap.cap); + } + ++static int virtio_pci_select_max(const VirtIODevice *vdev) ++{ ++ int i; ++ ++ for (i = VIRTIO_FEATURES_NU64S - 1; i > 0; i--) { ++ if (vdev->host_features_ex[i]) { ++ return (i + 1) * 2; ++ } ++ } ++ ++ return 2; ++} ++ + static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, + unsigned size) + { +@@ -1495,18 +1541,21 @@ static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr, + val = proxy->dfselect; + break; + case VIRTIO_PCI_COMMON_DF: +- if (proxy->dfselect <= 1) { ++ if (proxy->dfselect < virtio_pci_select_max(vdev)) { + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + +- val = (vdev->host_features & ~vdc->legacy_features) >> +- (32 * proxy->dfselect); ++ val = vdev->host_features_ex[proxy->dfselect >> 1] >> ++ (32 * (proxy->dfselect & 1)); ++ if (proxy->dfselect <= 1) { ++ val &= (~vdc->legacy_features) >> (32 * proxy->dfselect); ++ } + } + break; + case VIRTIO_PCI_COMMON_GFSELECT: + val = proxy->gfselect; + break; + case VIRTIO_PCI_COMMON_GF: +- if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) { ++ if (proxy->gfselect < virtio_pci_select_max(vdev)) { + val = proxy->guest_features[proxy->gfselect]; + } + break; +@@ -1589,11 +1638,18 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, + proxy->gfselect = val; + break; + case VIRTIO_PCI_COMMON_GF: +- if (proxy->gfselect < ARRAY_SIZE(proxy->guest_features)) { ++ if (proxy->gfselect < virtio_pci_select_max(vdev)) { ++ uint64_t features[VIRTIO_FEATURES_NU64S]; ++ int i; ++ + proxy->guest_features[proxy->gfselect] = val; +- virtio_set_features(vdev, +- (((uint64_t)proxy->guest_features[1]) << 32) | +- proxy->guest_features[0]); ++ virtio_features_clear(features); ++ for (i = 0; i < ARRAY_SIZE(proxy->guest_features); ++i) { ++ uint64_t cur = proxy->guest_features[i]; ++ ++ features[i >> 1] |= cur << ((i & 1) * 32); ++ } ++ virtio_set_features_ex(vdev, features); + } + break; + case VIRTIO_PCI_COMMON_MSIX: +@@ -2312,6 +2368,8 @@ static void virtio_pci_reset(DeviceState *qdev) + virtio_bus_reset(bus); + msix_unuse_all_vectors(&proxy->pci_dev); + ++ memset(proxy->guest_features, 0, sizeof(proxy->guest_features)); ++ + for (i = 0; i < VIRTIO_QUEUE_MAX; i++) { + proxy->vqs[i].enabled = 0; + proxy->vqs[i].reset = 0; +diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h +index eab5394898..639752977e 100644 +--- a/include/hw/virtio/virtio-pci.h ++++ b/include/hw/virtio/virtio-pci.h +@@ -158,7 +158,7 @@ struct VirtIOPCIProxy { + uint32_t nvectors; + uint32_t dfselect; + uint32_t gfselect; +- uint32_t guest_features[2]; ++ uint32_t guest_features[VIRTIO_FEATURES_NU32S]; + VirtIOPCIQueue vqs[VIRTIO_QUEUE_MAX]; + + VirtIOIRQFD *vector_irqfd; +-- +2.47.3 + diff --git a/kvm-virtio-serialize-extended-features-state.patch b/kvm-virtio-serialize-extended-features-state.patch new file mode 100644 index 0000000..97a19d7 --- /dev/null +++ b/kvm-virtio-serialize-extended-features-state.patch @@ -0,0 +1,222 @@ +From 0f4e78c05a49b1ab51bd057df02b5c42f7914ebf Mon Sep 17 00:00:00 2001 +From: Paolo Abeni +Date: Mon, 22 Sep 2025 16:18:19 +0200 +Subject: [PATCH 10/19] virtio: serialize extended features state + +RH-Author: Laurent Vivier +RH-MergeRequest: 456: backport support for GSO over UDP tunnel offload +RH-Jira: RHEL-143785 +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Commit: [5/14] 3448a1d2d4b7b6952819257c1bc2f5e5dcee8935 (lvivier/qemu-kvm-centos) + +JIRA: https://issues.redhat.com/browse/RHEL-143785 + +If the driver uses any of the extended features (i.e. 64 or above), +store the extended features range (64-127 bits). + +At load time, let legacy features initialize the full features range +and pass it to the set helper; sub-states loading will have filled-up +the extended part as needed. + +This is one of the few spots that need explicitly to know and set +in stone the extended features array size; add a build bug to prevent +breaking the migration should such size change again in the future: +more serialization plumbing will be needed. + +Reviewed-by: Akihiko Odaki +Acked-by: Jason Wang +Acked-by: Stefano Garzarella +Signed-off-by: Paolo Abeni +Tested-by: Lei Yang +Reviewed-by: Michael S. Tsirkin +Message-ID: +Signed-off-by: Michael S. Tsirkin +(cherry picked from commit 0a49a97433279512a03f3d9f36164a46caf498c6) +Signed-off-by: Laurent Vivier +--- + hw/virtio/virtio.c | 88 ++++++++++++++++++++++++++++++---------------- + 1 file changed, 57 insertions(+), 31 deletions(-) + +diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c +index 9a81ad912e..bf53c211e5 100644 +--- a/hw/virtio/virtio.c ++++ b/hw/virtio/virtio.c +@@ -2964,6 +2964,30 @@ static const VMStateDescription vmstate_virtio_disabled = { + } + }; + ++static bool virtio_128bit_features_needed(void *opaque) ++{ ++ VirtIODevice *vdev = opaque; ++ ++ return virtio_features_use_ex(vdev->host_features_ex); ++} ++ ++static const VMStateDescription vmstate_virtio_128bit_features = { ++ .name = "virtio/128bit_features", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = &virtio_128bit_features_needed, ++ .fields = (const VMStateField[]) { ++ VMSTATE_UINT64(guest_features_ex[1], VirtIODevice), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++/* ++ * Avoid silently breaking migration should the feature space increase ++ * even more in the (far away) future ++ */ ++QEMU_BUILD_BUG_ON(VIRTIO_FEATURES_NU64S != 2); ++ + static const VMStateDescription vmstate_virtio = { + .name = "virtio", + .version_id = 1, +@@ -2973,6 +2997,7 @@ static const VMStateDescription vmstate_virtio = { + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_virtio_device_endian, ++ &vmstate_virtio_128bit_features, + &vmstate_virtio_64bit_features, + &vmstate_virtio_virtqueues, + &vmstate_virtio_ringsize, +@@ -3069,23 +3094,28 @@ const VMStateInfo virtio_vmstate_info = { + .put = virtio_device_put, + }; + +-static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val) ++static int virtio_set_features_nocheck(VirtIODevice *vdev, const uint64_t *val) + { + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); +- bool bad = (val & ~(vdev->host_features)) != 0; ++ uint64_t tmp[VIRTIO_FEATURES_NU64S]; ++ bool bad; ++ ++ bad = virtio_features_andnot(tmp, val, vdev->host_features_ex); ++ virtio_features_and(tmp, val, vdev->host_features_ex); + +- val &= vdev->host_features; + if (k->set_features) { +- k->set_features(vdev, val); ++ bad = bad || virtio_features_use_ex(tmp); ++ k->set_features(vdev, tmp[0]); + } +- vdev->guest_features = val; ++ ++ virtio_features_copy(vdev->guest_features_ex, tmp); + return bad ? -1 : 0; + } + + typedef struct VirtioSetFeaturesNocheckData { + Coroutine *co; + VirtIODevice *vdev; +- uint64_t val; ++ uint64_t val[VIRTIO_FEATURES_NU64S]; + int ret; + } VirtioSetFeaturesNocheckData; + +@@ -3098,14 +3128,15 @@ static void virtio_set_features_nocheck_bh(void *opaque) + } + + static int coroutine_mixed_fn +-virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val) ++virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, ++ const uint64_t *val) + { + if (qemu_in_coroutine()) { + VirtioSetFeaturesNocheckData data = { + .co = qemu_coroutine_self(), + .vdev = vdev, +- .val = val, + }; ++ virtio_features_copy(data.val, val); + aio_bh_schedule_oneshot(qemu_get_current_aio_context(), + virtio_set_features_nocheck_bh, &data); + qemu_coroutine_yield(); +@@ -3117,6 +3148,7 @@ virtio_set_features_nocheck_maybe_co(VirtIODevice *vdev, uint64_t val) + + int virtio_set_features(VirtIODevice *vdev, uint64_t val) + { ++ uint64_t features[VIRTIO_FEATURES_NU64S]; + int ret; + /* + * The driver must not attempt to set features after feature negotiation +@@ -3132,7 +3164,8 @@ int virtio_set_features(VirtIODevice *vdev, uint64_t val) + __func__, vdev->name); + } + +- ret = virtio_set_features_nocheck(vdev, val); ++ virtio_features_from_u64(features, val); ++ ret = virtio_set_features_nocheck(vdev, features); + if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) { + /* VIRTIO_RING_F_EVENT_IDX changes the size of the caches. */ + int i; +@@ -3155,6 +3188,7 @@ void virtio_reset(void *opaque) + { + VirtIODevice *vdev = opaque; + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); ++ uint64_t features[VIRTIO_FEATURES_NU64S]; + int i; + + virtio_set_status(vdev, 0); +@@ -3181,7 +3215,8 @@ void virtio_reset(void *opaque) + vdev->start_on_kick = false; + vdev->started = false; + vdev->broken = false; +- virtio_set_features_nocheck(vdev, 0); ++ virtio_features_clear(features); ++ virtio_set_features_nocheck(vdev, features); + vdev->queue_sel = 0; + vdev->status = 0; + vdev->disabled = false; +@@ -3264,7 +3299,7 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) + * Note: devices should always test host features in future - don't create + * new dependencies like this. + */ +- vdev->guest_features = features; ++ virtio_features_from_u64(vdev->guest_features_ex, features); + + config_len = qemu_get_be32(f); + +@@ -3343,26 +3378,17 @@ virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id) + vdev->device_endian = virtio_default_endian(); + } + +- if (virtio_64bit_features_needed(vdev)) { +- /* +- * Subsection load filled vdev->guest_features. Run them +- * through virtio_set_features to sanity-check them against +- * host_features. +- */ +- uint64_t features64 = vdev->guest_features; +- if (virtio_set_features_nocheck_maybe_co(vdev, features64) < 0) { +- error_report("Features 0x%" PRIx64 " unsupported. " +- "Allowed features: 0x%" PRIx64, +- features64, vdev->host_features); +- return -1; +- } +- } else { +- if (virtio_set_features_nocheck_maybe_co(vdev, features) < 0) { +- error_report("Features 0x%x unsupported. " +- "Allowed features: 0x%" PRIx64, +- features, vdev->host_features); +- return -1; +- } ++ /* ++ * guest_features_ex is fully initialized with u32 features and upper ++ * bits have been filled as needed by the later load. ++ */ ++ if (virtio_set_features_nocheck_maybe_co(vdev, ++ vdev->guest_features_ex) < 0) { ++ error_report("Features 0x" VIRTIO_FEATURES_FMT " unsupported. " ++ "Allowed features: 0x" VIRTIO_FEATURES_FMT, ++ VIRTIO_FEATURES_PR(vdev->guest_features_ex), ++ VIRTIO_FEATURES_PR(vdev->host_features_ex)); ++ return -1; + } + + if (!virtio_device_started(vdev, vdev->status) && +-- +2.47.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 8f34097..154ae6d 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -143,7 +143,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 10.1.0 -Release: 11%{?rcrel}%{?dist}%{?cc_suffix} +Release: 12%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -355,6 +355,44 @@ Patch99: kvm-q35-increase-default-tseg-size.patch Patch100: kvm-hw-intc-ioapic-Fix-ACCEL_KERNEL_GSI_IRQFD_POSSIBLE-t.patch # For RHEL-111853 - [Intel 10.0 FEAT] [SPR] TDX: Virt-QEMU: QEMU Support [rhel-10] Patch101: kvm-redhat-allow-5-level-paging-for-TDX-VMs.patch +# For RHEL-79118 - [network-storage][rbd][core-dump]installation of guest failed sometimes with multiqueue enabled [rhel10] +Patch102: kvm-rbd-Run-co-BH-CB-in-the-coroutine-s-AioContext.patch +# For RHEL-79118 - [network-storage][rbd][core-dump]installation of guest failed sometimes with multiqueue enabled [rhel10] +Patch103: kvm-curl-Fix-coroutine-waking.patch +# For RHEL-79118 - [network-storage][rbd][core-dump]installation of guest failed sometimes with multiqueue enabled [rhel10] +Patch104: kvm-block-io-Take-reqs_lock-for-tracked_requests.patch +# For RHEL-79118 - [network-storage][rbd][core-dump]installation of guest failed sometimes with multiqueue enabled [rhel10] +Patch105: kvm-qcow2-Re-initialize-lock-in-invalidate_cache.patch +# For RHEL-79118 - [network-storage][rbd][core-dump]installation of guest failed sometimes with multiqueue enabled [rhel10] +Patch106: kvm-qcow2-Fix-cache_clean_timer.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch107: kvm-net-bundle-all-offloads-in-a-single-struct.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch108: kvm-linux-headers-deal-with-counted_by-annotation.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch109: kvm-linux-headers-Update-to-Linux-v6.17-rc1.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch110: kvm-virtio-introduce-extended-features-type.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch111: kvm-virtio-serialize-extended-features-state.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch112: kvm-virtio-add-support-for-negotiating-extended-features.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch113: kvm-virtio-pci-implement-support-for-extended-features.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch114: kvm-vhost-add-support-for-negotiating-extended-features.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch115: kvm-qmp-update-virtio-features-map-to-support-extended-f.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch116: kvm-vhost-backend-implement-extended-features-support.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch117: kvm-vhost-net-implement-extended-features-support.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch118: kvm-virtio-net-implement-extended-features-support.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch119: kvm-net-implement-tunnel-probing.patch +# For RHEL-143785 - backport support for GSO over UDP tunnel offload +Patch120: kvm-net-implement-UDP-tunnel-features-offloading.patch %if %{have_clang} BuildRequires: clang @@ -1434,6 +1472,31 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Feb 02 2026 Miroslav Rezanina - 10.1.0-12 +- kvm-rbd-Run-co-BH-CB-in-the-coroutine-s-AioContext.patch [RHEL-79118] +- kvm-curl-Fix-coroutine-waking.patch [RHEL-79118] +- kvm-block-io-Take-reqs_lock-for-tracked_requests.patch [RHEL-79118] +- kvm-qcow2-Re-initialize-lock-in-invalidate_cache.patch [RHEL-79118] +- kvm-qcow2-Fix-cache_clean_timer.patch [RHEL-79118] +- kvm-net-bundle-all-offloads-in-a-single-struct.patch [RHEL-143785] +- kvm-linux-headers-deal-with-counted_by-annotation.patch [RHEL-143785] +- kvm-linux-headers-Update-to-Linux-v6.17-rc1.patch [RHEL-143785] +- kvm-virtio-introduce-extended-features-type.patch [RHEL-143785] +- kvm-virtio-serialize-extended-features-state.patch [RHEL-143785] +- kvm-virtio-add-support-for-negotiating-extended-features.patch [RHEL-143785] +- kvm-virtio-pci-implement-support-for-extended-features.patch [RHEL-143785] +- kvm-vhost-add-support-for-negotiating-extended-features.patch [RHEL-143785] +- kvm-qmp-update-virtio-features-map-to-support-extended-f.patch [RHEL-143785] +- kvm-vhost-backend-implement-extended-features-support.patch [RHEL-143785] +- kvm-vhost-net-implement-extended-features-support.patch [RHEL-143785] +- kvm-virtio-net-implement-extended-features-support.patch [RHEL-143785] +- kvm-net-implement-tunnel-probing.patch [RHEL-143785] +- kvm-net-implement-UDP-tunnel-features-offloading.patch [RHEL-143785] +- Resolves: RHEL-79118 + ([network-storage][rbd][core-dump]installation of guest failed sometimes with multiqueue enabled [rhel10]) +- Resolves: RHEL-143785 + (backport support for GSO over UDP tunnel offload) + * Tue Jan 13 2026 Miroslav Rezanina - 10.1.0-11 - kvm-fix-pc_rhel_10_2_compat_len.patch [RHEL-126707] - kvm-q35-increase-default-tseg-size.patch [RHEL-126707]