diff --git a/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch b/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch new file mode 100644 index 0000000..ad2b261 --- /dev/null +++ b/kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch @@ -0,0 +1,87 @@ +From cd49a32e9c9e33efc51652b68180a07683814b4d Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:12 -0300 +Subject: [PATCH 4/9] Add dirty-sync-missed-zero-copy migration stat +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [4/8] 56cce61cf95aafc8dafae7531b43c166084abfec +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Signed-off-by: Leonardo Bras +Acked-by: Markus Armbruster +Acked-by: Peter Xu +Reviewed-by: Daniel P. Berrangé +Message-Id: <20220711211112.18951-3-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit cf20c897338067ab4b70a4596fdccaf90c7e29a1) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 2 ++ + monitor/hmp-cmds.c | 5 +++++ + qapi/migration.json | 7 ++++++- + 3 files changed, 13 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index e100b30f00..952a26c5c2 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1012,6 +1012,8 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; ++ info->ram->dirty_sync_missed_zero_copy = ++ ram_counters.dirty_sync_missed_zero_copy; + info->ram->postcopy_requests = ram_counters.postcopy_requests; + info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 8c384dc1b2..f7216ab5d0 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -305,6 +305,11 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", + info->ram->postcopy_bytes >> 10); + } ++ if (info->ram->dirty_sync_missed_zero_copy) { ++ monitor_printf(mon, ++ "Zero-copy-send fallbacks happened: %" PRIu64 " times\n", ++ info->ram->dirty_sync_missed_zero_copy); ++ } + } + + if (info->has_disk) { +diff --git a/qapi/migration.json b/qapi/migration.json +index c8ec260ab0..94bc5c69db 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -55,6 +55,10 @@ + # @postcopy-bytes: The number of bytes sent during the post-copy phase + # (since 7.0). + # ++# @dirty-sync-missed-zero-copy: Number of times dirty RAM synchronization could ++# not avoid copying dirty pages. This is between ++# 0 and @dirty-sync-count * @multifd-channels. ++# (since 7.1) + # Since: 0.14 + ## + { 'struct': 'MigrationStats', +@@ -65,7 +69,8 @@ + 'postcopy-requests' : 'int', 'page-size' : 'int', + 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', + 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', +- 'postcopy-bytes' : 'uint64' } } ++ 'postcopy-bytes' : 'uint64', ++ 'dirty-sync-missed-zero-copy' : 'uint64' } } + + ## + # @XBZRLECacheStats: +-- +2.31.1 + diff --git a/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch b/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch new file mode 100644 index 0000000..1a0beb2 --- /dev/null +++ b/kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch @@ -0,0 +1,109 @@ +From ea5299b5dde7d0b6b2f93cb646e6a24c9f105466 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Wed, 23 Mar 2022 12:33:25 +0100 +Subject: [PATCH 13/24] KVM: x86: workaround invalid CPUID[0xD,9] info on some + AMD processors +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [13/13] 38f147c911258e84e01336271ebd23a1c24371fc +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Some AMD processors expose the PKRU extended save state even if they do not have +the related PKU feature in CPUID. Worse, when they do they report a size of +64, whereas the expected size of the PKRU extended save state is 8, therefore +the esa->size == eax assertion does not hold. + +The state is already ignored by KVM_GET_SUPPORTED_CPUID because it +was not enabled in the host XCR0. However, QEMU kvm_cpu_xsave_init() +runs before QEMU invokes arch_prctl() to enable dynamically-enabled +save states such as XTILEDATA, and KVM_GET_SUPPORTED_CPUID hides save +states that have yet to be enabled. Therefore, kvm_cpu_xsave_init() +needs to consult the host CPUID instead of KVM_GET_SUPPORTED_CPUID, +and dies with an assertion failure. + +When setting up the ExtSaveArea array to match the host, ignore features that +KVM does not report as supported. This will cause QEMU to skip the incorrect +CPUID leaf instead of tripping the assertion. + +Closes: https://gitlab.com/qemu-project/qemu/-/issues/916 +Reported-by: Daniel P. Berrangé +Analyzed-by: Yang Zhong +Reported-by: Peter Krempa +Tested-by: Daniel P. Berrangé +Signed-off-by: Paolo Bonzini +(cherry picked from commit 58f7db26f21c690cf9a669c314cfd7371506084a) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 4 ++-- + target/i386/cpu.h | 2 ++ + target/i386/kvm/kvm-cpu.c | 19 ++++++++++++------- + 3 files changed, 16 insertions(+), 9 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 09e08f7f38..0543b846ff 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -4980,8 +4980,8 @@ CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) + return cpu_list; + } + +-static uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, +- bool migratable_only) ++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, ++ bool migratable_only) + { + FeatureWordInfo *wi = &feature_word_info[w]; + uint64_t r = 0; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 8ab2a4042a..006b735fe4 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -604,6 +604,8 @@ typedef enum FeatureWord { + } FeatureWord; + + typedef uint64_t FeatureWordArray[FEATURE_WORDS]; ++uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, ++ bool migratable_only); + + /* cpuid_features bits */ + #define CPUID_FP87 (1U << 0) +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index bdc967c484..74c1396a93 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -99,13 +99,18 @@ static void kvm_cpu_xsave_init(void) + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + +- if (esa->size) { +- host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); +- if (eax != 0) { +- assert(esa->size == eax); +- esa->offset = ebx; +- esa->ecx = ecx; +- } ++ if (!esa->size) { ++ continue; ++ } ++ if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits) ++ != esa->bits) { ++ continue; ++ } ++ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); ++ if (eax != 0) { ++ assert(esa->size == eax); ++ esa->offset = ebx; ++ esa->ecx = ecx; + } + } + } +-- +2.35.3 + diff --git a/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch index c7b8898..81ae532 100644 --- a/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch +++ b/kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch @@ -1,19 +1,19 @@ -From cda3fcf14f2883fea633e25256f6c14a71271adf Mon Sep 17 00:00:00 2001 +From 7eeec7c008e947bc3e1fed682791092b408852c6 Mon Sep 17 00:00:00 2001 From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:31 -0300 -Subject: [PATCH 08/18] QIOChannel: Add flags on io_writev and introduce +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 17/37] QIOChannel: Add flags on io_writev and introduce io_flush callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [2/11] 06acfb6b0cb2c25733c2eb198011f7623b5a7024 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [17/26] 7bde4e79fd3f76a6cc84d9cacf50420584ddd35c +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert Add flags to io_writev and introduce io_flush as optional callback to QIOChannelClass, allowing the implementation of zero copy writes by @@ -202,7 +202,7 @@ index baa4e2b089..bf52011be2 100644 { QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); diff --git a/io/channel-command.c b/io/channel-command.c -index 338da73ade..54560464ae 100644 +index b2a9e27138..5ff1691bad 100644 --- a/io/channel-command.c +++ b/io/channel-command.c @@ -258,6 +258,7 @@ static ssize_t qio_channel_command_writev(QIOChannel *ioc, @@ -214,7 +214,7 @@ index 338da73ade..54560464ae 100644 { QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); diff --git a/io/channel-file.c b/io/channel-file.c -index d7cf6d278f..ef6807a6be 100644 +index c4bf799a80..348a48545e 100644 --- a/io/channel-file.c +++ b/io/channel-file.c @@ -114,6 +114,7 @@ static ssize_t qio_channel_file_writev(QIOChannel *ioc, @@ -226,7 +226,7 @@ index d7cf6d278f..ef6807a6be 100644 { QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); diff --git a/io/channel-socket.c b/io/channel-socket.c -index 7a8d9f69c9..a1be2197ca 100644 +index 606ec97cf7..bfbd64787e 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -525,6 +525,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, @@ -258,7 +258,7 @@ index 2ae1b92fc0..4ce890a538 100644 { QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); diff --git a/io/channel-websock.c b/io/channel-websock.c -index 55145a6a8c..9619906ac3 100644 +index 70889bb54d..035dd6075b 100644 --- a/io/channel-websock.c +++ b/io/channel-websock.c @@ -1127,6 +1127,7 @@ static ssize_t qio_channel_websock_writev(QIOChannel *ioc, @@ -379,10 +379,10 @@ index e8b019dc36..0640941ac5 100644 static void qio_channel_restart_read(void *opaque) { diff --git a/migration/rdma.c b/migration/rdma.c -index ef1e65ec36..672d1958a9 100644 +index f5d3bbe7e9..54acd2000e 100644 --- a/migration/rdma.c +++ b/migration/rdma.c -@@ -2840,6 +2840,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, +@@ -2833,6 +2833,7 @@ static ssize_t qio_channel_rdma_writev(QIOChannel *ioc, size_t niov, int *fds, size_t nfds, diff --git a/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch b/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch new file mode 100644 index 0000000..98f1ac4 --- /dev/null +++ b/kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch @@ -0,0 +1,56 @@ +From a6c4aed18a027ce8e107fdf9184e9ea43a86f843 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Thu, 4 Aug 2022 04:10:43 -0300 +Subject: [PATCH 8/9] QIOChannelSocket: Add support for MSG_ZEROCOPY + IPV6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [8/8] 6e26ee7c9ebaedb07623313cb0678816867751dd +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +For using MSG_ZEROCOPY, there are two steps: +1 - io_writev() the packet, which enqueues the packet for sending, and +2 - io_flush(), which gets confirmation that all packets got correctly sent + +Currently, if MSG_ZEROCOPY is used to send packets over IPV6, no error will +be reported in (1), but it will fail in the first time (2) happens. + +This happens because (2) currently checks for cmsg_level & cmsg_type +associated with IPV4 only, before reporting any error. + +Add checks for cmsg_level & cmsg_type associated with IPV6, and thus enable +support for MSG_ZEROCOPY + IPV6 + +Fixes: 2bc58ffc29 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Signed-off-by: Daniel P. Berrangé +(cherry picked from commit 5258a7e2c0677d16e9e1d06845f60171adf0b290) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index cf0d67c51b..6010ad7017 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -747,8 +747,8 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + } + + cm = CMSG_FIRSTHDR(&msg); +- if (cm->cmsg_level != SOL_IP && +- cm->cmsg_type != IP_RECVERR) { ++ if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR && ++ cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) { + error_setg_errno(errp, EPROTOTYPE, + "Wrong cmsg in errqueue"); + return -1; +-- +2.31.1 + diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch new file mode 100644 index 0000000..5806062 --- /dev/null +++ b/kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch @@ -0,0 +1,65 @@ +From 905cc8032fc63619efb3f0a8c9754b7190bcc43a Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:11 -0300 +Subject: [PATCH 3/9] QIOChannelSocket: Fix zero-copy flush returning code 1 + when nothing sent +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [3/8] 1ad707702fa26cd4d0fa1870c21f5f26ae93ff97 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +If flush is called when no buffer was sent with MSG_ZEROCOPY, it currently +returns 1. This return code should be used only when Linux fails to use +MSG_ZEROCOPY on a lot of sendmsg(). + +Fix this by returning early from flush if no sendmsg(...,MSG_ZEROCOPY) +was attempted. + +Fixes: 2bc58ffc2926 ("QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX") +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Daniel P. Berrangé +Reviewed-by: Juan Quintela +Reviewed-by: Peter Xu +Message-Id: <20220711211112.18951-2-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 927f93e099c4f9184e60a1bc61624ac2d04d0223) +Signed-off-by: Leonardo Bras +--- + io/channel-socket.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/io/channel-socket.c b/io/channel-socket.c +index df858da924..cf0d67c51b 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -717,12 +717,18 @@ static int qio_channel_socket_flush(QIOChannel *ioc, + struct cmsghdr *cm; + char control[CMSG_SPACE(sizeof(*serr))]; + int received; +- int ret = 1; ++ int ret; ++ ++ if (sioc->zero_copy_queued == sioc->zero_copy_sent) { ++ return 0; ++ } + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + memset(control, 0, sizeof(control)); + ++ ret = 1; ++ + while (sioc->zero_copy_sent < sioc->zero_copy_queued) { + received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE); + if (received < 0) { +-- +2.31.1 + diff --git a/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch index 9d134e6..685478f 100644 --- a/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch +++ b/kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch @@ -1,19 +1,19 @@ -From e70f01749addd7d0b7aa7fa4fdedb664f98e6b9b Mon Sep 17 00:00:00 2001 +From c1fd32d93ae42fcf3c1a25f4d56e669f251087d8 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Mon, 20 Jun 2022 02:39:43 -0300 -Subject: [PATCH 16/18] QIOChannelSocket: Fix zero-copy send so socket flush +Subject: [PATCH 25/37] QIOChannelSocket: Fix zero-copy send so socket flush works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [10/11] a2dfac987e24026b1a78e90b86234ca206b6401f (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [25/26] 3ede94f3269e21c3ace073ed1a6f24696315bcbb +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert Somewhere between v6 and v7 the of the zero-copy-send patchset a crucial part of the flushing mechanism got missing: incrementing zero_copy_queued. @@ -38,7 +38,7 @@ Signed-off-by: Leonardo Bras 1 file changed, 5 insertions(+) diff --git a/io/channel-socket.c b/io/channel-socket.c -index 7490e5943d..8ae8b212cf 100644 +index 7d37b39de7..df858da924 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -612,6 +612,11 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, diff --git a/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch index 89aa806..4b272ee 100644 --- a/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch +++ b/kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch @@ -1,19 +1,19 @@ -From 4aeba0365d30dabe2e70dc172683f0878a4a9621 Mon Sep 17 00:00:00 2001 +From 5fd7af93a06adaddbae719aabbaf912159f4fb28 Mon Sep 17 00:00:00 2001 From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:32 -0300 -Subject: [PATCH 09/18] QIOChannelSocket: Implement io_writev zero copy flag & +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 18/37] QIOChannelSocket: Implement io_writev zero copy flag & io_flush for CONFIG_LINUX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [3/11] 9afeac1f5ac7675624660a0281726c09c8321180 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [18/26] 6f65c8c879a5df57213b541d58285b65178f8547 +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert For CONFIG_LINUX, implement the new zero copy flag and the optional callback io_flush on QIOChannelSocket, but enables it only when MSG_ZEROCOPY @@ -71,7 +71,7 @@ index e747e63514..513c428fe4 100644 diff --git a/io/channel-socket.c b/io/channel-socket.c -index a1be2197ca..fbd2214d20 100644 +index bfbd64787e..38a46ba213 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -26,6 +26,14 @@ @@ -234,7 +234,7 @@ index a1be2197ca..fbd2214d20 100644 static int qio_channel_socket_set_blocking(QIOChannel *ioc, bool enabled, -@@ -790,6 +895,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, +@@ -789,6 +894,9 @@ static void qio_channel_socket_class_init(ObjectClass *klass, ioc_klass->io_set_delay = qio_channel_socket_set_delay; ioc_klass->io_create_watch = qio_channel_socket_create_watch; ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler; diff --git a/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch index 6fc0c76..2575f64 100644 --- a/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch +++ b/kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch @@ -1,19 +1,19 @@ -From 60bf942a58db12c821f2a6a49e2e0b04b99bec30 Mon Sep 17 00:00:00 2001 +From cbfaf86331c2b2e01a2083303b7554672bf991b7 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Mon, 20 Jun 2022 02:39:42 -0300 -Subject: [PATCH 15/18] QIOChannelSocket: Introduce assert and reduce ifdefs to +Subject: [PATCH 24/37] QIOChannelSocket: Introduce assert and reduce ifdefs to improve readability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [9/11] eaa02d68301852ccc98bdacc7387d8d03be1cb05 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [24/26] b50e2e65307149f247155a7f7a032dc99e57718d +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert During implementation of MSG_ZEROCOPY feature, a lot of #ifdefs were introduced, particularly at qio_channel_socket_writev(). @@ -37,7 +37,7 @@ Signed-off-by: Leonardo Bras 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/io/channel-socket.c b/io/channel-socket.c -index fbd2214d20..7490e5943d 100644 +index 38a46ba213..7d37b39de7 100644 --- a/io/channel-socket.c +++ b/io/channel-socket.c @@ -579,11 +579,17 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc, diff --git a/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch b/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch new file mode 100644 index 0000000..2aaef33 --- /dev/null +++ b/kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch @@ -0,0 +1,128 @@ +From 96edd15df257f1d1496397a6fac24b4316570d7e Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Thu, 14 Apr 2022 16:45:30 -0400 +Subject: [PATCH 1/3] Revert redhat: Add some devices for exporting upstream + machine types + +RH-Author: Jon Maloy +RH-MergeRequest: 156: Revert redhat: Add some devices for exporting upstream machine types +RH-Commit: [1/1] f25d0da3a181136917ead82f5a5c59efe3fa445a (jmaloy/qemu-kvm) +RH-Bugzilla: 2065043 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Thomas Huth +RH-Acked-by: Peter Xu + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2065043 +Upstream: no + +Manual revert of commit 70d3924521c9bfd912bcf1a1fc76f49eb377de46, since +the directory structure looks different from rhel-av-8.4.0.z where +this commit is taken from. Besides, x86_64-softmmu.mak looks totally +different and should not be affected by this reversal. + +Signed-off-by: Jon Maloy +--- + configs/devices/x86_64-softmmu/x86_64-rh-devices.mak | 1 - + .../devices/x86_64-softmmu/x86_64-upstream-devices.mak | 4 ---- + hw/char/parallel.c | 9 --------- + hw/i386/pc_piix.c | 2 +- + hw/i386/pc_q35.c | 2 +- + hw/timer/hpet.c | 8 -------- + 6 files changed, 2 insertions(+), 24 deletions(-) + delete mode 100644 configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak + +diff --git a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +index fdbbdf9742..31ce08edab 100644 +--- a/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak ++++ b/configs/devices/x86_64-softmmu/x86_64-rh-devices.mak +@@ -1,5 +1,4 @@ + include ../rh-virtio.mak +-include x86_64-upstream-devices.mak + + CONFIG_AC97=y + CONFIG_ACPI=y +diff --git a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak b/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak +deleted file mode 100644 +index 2cd20f54d2..0000000000 +--- a/configs/devices/x86_64-softmmu/x86_64-upstream-devices.mak ++++ /dev/null +@@ -1,4 +0,0 @@ +-# We need "isa-parallel" +-CONFIG_PARALLEL=y +-# We need "hpet" +-CONFIG_HPET=y +diff --git a/hw/char/parallel.c b/hw/char/parallel.c +index e5f108211b..b45e67bfbb 100644 +--- a/hw/char/parallel.c ++++ b/hw/char/parallel.c +@@ -29,7 +29,6 @@ + #include "chardev/char-parallel.h" + #include "chardev/char-fe.h" + #include "hw/acpi/aml-build.h" +-#include "hw/boards.h" + #include "hw/irq.h" + #include "hw/isa/isa.h" + #include "hw/qdev-properties.h" +@@ -535,14 +534,6 @@ static void parallel_isa_realizefn(DeviceState *dev, Error **errp) + int base; + uint8_t dummy; + +- /* Restricted for Red Hat Enterprise Linux */ +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (strstr(mc->name, "rhel")) { +- error_setg(errp, "Device %s is not supported with machine type %s", +- object_get_typename(OBJECT(dev)), mc->name); +- return; +- } +- + if (!qemu_chr_fe_backend_connected(&s->chr)) { + error_setg(errp, "Can't create parallel device, empty char device"); + return; +diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c +index ab6d03e07a..5f101c8748 100644 +--- a/hw/i386/pc_piix.c ++++ b/hw/i386/pc_piix.c +@@ -966,7 +966,7 @@ static void pc_machine_rhel7_options(MachineClass *m) + { + PCMachineClass *pcmc = PC_MACHINE_CLASS(m); + m->family = "pc_piix_Y"; +- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; + pcmc->default_nic_model = "e1000"; + pcmc->pci_root_uid = 0; + m->default_display = "std"; +diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c +index 882fe7a68d..73b0d0d317 100644 +--- a/hw/i386/pc_q35.c ++++ b/hw/i386/pc_q35.c +@@ -633,7 +633,7 @@ static void pc_q35_machine_rhel_options(MachineClass *m) + pcmc->pci_root_uid = 0; + m->family = "pc_q35_Z"; + m->units_per_default_bus = 1; +- m->default_machine_opts = "firmware=bios-256k.bin,hpet=off"; ++ m->default_machine_opts = "firmware=bios-256k.bin"; + m->default_display = "std"; + m->no_floppy = 1; + m->no_parallel = 1; +diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c +index 202e032524..9520471be2 100644 +--- a/hw/timer/hpet.c ++++ b/hw/timer/hpet.c +@@ -733,14 +733,6 @@ static void hpet_realize(DeviceState *dev, Error **errp) + int i; + HPETTimer *timer; + +- /* Restricted for Red Hat Enterprise Linux */ +- MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); +- if (strstr(mc->name, "rhel")) { +- error_setg(errp, "Device %s is not supported with machine type %s", +- object_get_typename(OBJECT(dev)), mc->name); +- return; +- } +- + if (!s->intcap) { + warn_report("Hpet's intcap not initialized"); + } +-- +2.35.1 + diff --git a/kvm-Update-linux-headers-to-v6.0-rc4.patch b/kvm-Update-linux-headers-to-v6.0-rc4.patch new file mode 100644 index 0000000..39e152b --- /dev/null +++ b/kvm-Update-linux-headers-to-v6.0-rc4.patch @@ -0,0 +1,171 @@ +From 10fc28b61a6fba1e6dc44fd544cf31c7f313c622 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Fri, 28 Oct 2022 17:48:00 +0100 +Subject: [PATCH 05/42] Update linux headers to v6.0-rc4 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [5/41] ca55f497d1bf1e72179330f8f613781bf999d898 + +Based on upstream commit d525f73f9186a5bc641b8caf0b2c9bb94e5aa963 +("Update linux headers to v6.0-rc4"), but this is focusing only on the +ZPCI and protected dump changes. + +Signed-off-by: Cédric Le Goater +--- + linux-headers/linux/kvm.h | 87 +++++++++++++++++++++++++++++++++ + linux-headers/linux/vfio_zdev.h | 7 +++ + 2 files changed, 94 insertions(+) + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 0d05d02ee4..c65930288c 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1150,6 +1150,9 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_DISABLE_QUIRKS2 213 + /* #define KVM_CAP_VM_TSC_CONTROL 214 */ + #define KVM_CAP_SYSTEM_EVENT_DATA 215 ++#define KVM_CAP_S390_PROTECTED_DUMP 217 ++#define KVM_CAP_S390_ZPCI_OP 221 ++#define KVM_CAP_S390_CPU_TOPOLOGY 222 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1651,6 +1654,55 @@ struct kvm_s390_pv_unp { + __u64 tweak; + }; + ++enum pv_cmd_dmp_id { ++ KVM_PV_DUMP_INIT, ++ KVM_PV_DUMP_CONFIG_STOR_STATE, ++ KVM_PV_DUMP_COMPLETE, ++ KVM_PV_DUMP_CPU, ++}; ++ ++struct kvm_s390_pv_dmp { ++ __u64 subcmd; ++ __u64 buff_addr; ++ __u64 buff_len; ++ __u64 gaddr; /* For dump storage state */ ++ __u64 reserved[4]; ++}; ++ ++enum pv_cmd_info_id { ++ KVM_PV_INFO_VM, ++ KVM_PV_INFO_DUMP, ++}; ++ ++struct kvm_s390_pv_info_dump { ++ __u64 dump_cpu_buffer_len; ++ __u64 dump_config_mem_buffer_per_1m; ++ __u64 dump_config_finalize_len; ++}; ++ ++struct kvm_s390_pv_info_vm { ++ __u64 inst_calls_list[4]; ++ __u64 max_cpus; ++ __u64 max_guests; ++ __u64 max_guest_addr; ++ __u64 feature_indication; ++}; ++ ++struct kvm_s390_pv_info_header { ++ __u32 id; ++ __u32 len_max; ++ __u32 len_written; ++ __u32 reserved; ++}; ++ ++struct kvm_s390_pv_info { ++ struct kvm_s390_pv_info_header header; ++ union { ++ struct kvm_s390_pv_info_dump dump; ++ struct kvm_s390_pv_info_vm vm; ++ }; ++}; ++ + enum pv_cmd_id { + KVM_PV_ENABLE, + KVM_PV_DISABLE, +@@ -1659,6 +1711,8 @@ enum pv_cmd_id { + KVM_PV_VERIFY, + KVM_PV_PREP_RESET, + KVM_PV_UNSHARE_ALL, ++ KVM_PV_INFO, ++ KVM_PV_DUMP, + }; + + struct kvm_pv_cmd { +@@ -2066,4 +2120,37 @@ struct kvm_stats_desc { + /* Available with KVM_CAP_XSAVE2 */ + #define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) + ++/* Available with KVM_CAP_S390_PROTECTED_DUMP */ ++#define KVM_S390_PV_CPU_COMMAND _IOWR(KVMIO, 0xd0, struct kvm_pv_cmd) ++ ++/* Available with KVM_CAP_S390_ZPCI_OP */ ++#define KVM_S390_ZPCI_OP _IOW(KVMIO, 0xd1, struct kvm_s390_zpci_op) ++ ++struct kvm_s390_zpci_op { ++ /* in */ ++ __u32 fh; /* target device */ ++ __u8 op; /* operation to perform */ ++ __u8 pad[3]; ++ union { ++ /* for KVM_S390_ZPCIOP_REG_AEN */ ++ struct { ++ __u64 ibv; /* Guest addr of interrupt bit vector */ ++ __u64 sb; /* Guest addr of summary bit */ ++ __u32 flags; ++ __u32 noi; /* Number of interrupts */ ++ __u8 isc; /* Guest interrupt subclass */ ++ __u8 sbo; /* Offset of guest summary bit vector */ ++ __u16 pad; ++ } reg_aen; ++ __u64 reserved[8]; ++ } u; ++}; ++ ++/* types for kvm_s390_zpci_op->op */ ++#define KVM_S390_ZPCIOP_REG_AEN 0 ++#define KVM_S390_ZPCIOP_DEREG_AEN 1 ++ ++/* flags for kvm_s390_zpci_op->u.reg_aen.flags */ ++#define KVM_S390_ZPCIOP_REGAEN_HOST (1 << 0) ++ + #endif /* __LINUX_KVM_H */ +diff --git a/linux-headers/linux/vfio_zdev.h b/linux-headers/linux/vfio_zdev.h +index b4309397b6..77f2aff1f2 100644 +--- a/linux-headers/linux/vfio_zdev.h ++++ b/linux-headers/linux/vfio_zdev.h +@@ -29,6 +29,9 @@ struct vfio_device_info_cap_zpci_base { + __u16 fmb_length; /* Measurement Block Length (in bytes) */ + __u8 pft; /* PCI Function Type */ + __u8 gid; /* PCI function group ID */ ++ /* End of version 1 */ ++ __u32 fh; /* PCI function handle */ ++ /* End of version 2 */ + }; + + /** +@@ -47,6 +50,10 @@ struct vfio_device_info_cap_zpci_group { + __u16 noi; /* Maximum number of MSIs */ + __u16 maxstbl; /* Maximum Store Block Length */ + __u8 version; /* Supported PCI Version */ ++ /* End of version 1 */ ++ __u8 reserved; ++ __u16 imaxstbl; /* Maximum Interpreted Store Block Length */ ++ /* End of version 2 */ + }; + + /** +-- +2.37.3 + diff --git a/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch b/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch new file mode 100644 index 0000000..b29289b --- /dev/null +++ b/kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch @@ -0,0 +1,71 @@ +From 60da56e3685969493ae483c3cc2c66af13d00baf Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Wed, 10 Aug 2022 14:57:18 +0200 +Subject: [PATCH 1/3] backends/hostmem: Fix support of memory-backend-memfd in + qemu_maxrampagesize() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 221: backends/hostmem: Fix support of memory-backend-memfd in qemu_maxrampagesize() +RH-Bugzilla: 2117149 +RH-Acked-by: Thomas Huth +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Commit: [1/1] b5a1047750af32c0a261b8385ea0e819eb16681a + +It is currently not possible yet to use "memory-backend-memfd" on s390x +with hugepages enabled. This problem is caused by qemu_maxrampagesize() +not taking memory-backend-memfd objects into account yet, so the code +in s390_memory_init() fails to enable the huge page support there via +s390_set_max_pagesize(). Fix it by generalizing the code, so that it +looks at qemu_ram_pagesize(memdev->mr.ram_block) instead of re-trying +to get the information from the filesystem. + +Suggested-by: David Hildenbrand +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2116496 +Message-Id: <20220810125720.3849835-2-thuth@redhat.com> +Reviewed-by: David Hildenbrand +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit 8be934b70e923104da883b990dee18f02552d40e) +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2117149 +[clg: Resolved conflict on qemu_real_host_page_size() ] +Signed-off-by: Cédric Le Goater +--- + backends/hostmem.c | 14 ++------------ + 1 file changed, 2 insertions(+), 12 deletions(-) + +diff --git a/backends/hostmem.c b/backends/hostmem.c +index 4c05862ed5..0c4654ea85 100644 +--- a/backends/hostmem.c ++++ b/backends/hostmem.c +@@ -305,22 +305,12 @@ bool host_memory_backend_is_mapped(HostMemoryBackend *backend) + return backend->is_mapped; + } + +-#ifdef __linux__ + size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) + { +- Object *obj = OBJECT(memdev); +- char *path = object_property_get_str(obj, "mem-path", NULL); +- size_t pagesize = qemu_mempath_getpagesize(path); +- +- g_free(path); ++ size_t pagesize = qemu_ram_pagesize(memdev->mr.ram_block); ++ g_assert(pagesize >= qemu_real_host_page_size); + return pagesize; + } +-#else +-size_t host_memory_backend_pagesize(HostMemoryBackend *memdev) +-{ +- return qemu_real_host_page_size; +-} +-#endif + + static void + host_memory_backend_memory_complete(UserCreatable *uc, Error **errp) +-- +2.35.3 + diff --git a/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch b/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch new file mode 100644 index 0000000..7ff8e7e --- /dev/null +++ b/kvm-block-Make-bdrv_refresh_limits-non-recursive.patch @@ -0,0 +1,78 @@ +From 6348063b91b2370cc27153fd58fd11a6681631f6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 16 Feb 2022 11:53:53 +0100 +Subject: [PATCH 22/24] block: Make bdrv_refresh_limits() non-recursive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Hanna Reitz +RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive +RH-Commit: [1/3] 1a1fe37f8d8f0344dd8639d6cc9d884d1aff9096 +RH-Bugzilla: 2072932 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +bdrv_refresh_limits() recurses down to the node's children. That does +not seem necessary: When we refresh limits on some node, and then +recurse down and were to change one of its children's BlockLimits, then +that would mean we noticed the changed limits by pure chance. The fact +that we refresh the parent's limits has nothing to do with it, so the +reason for the change probably happened before this point in time, and +we should have refreshed the limits then. + +Consequently, we should actually propagate block limits changes upwards, +not downwards.  That is a separate and pre-existing issue, though, and +so will not be addressed in this patch. + +The problem with recursing is that bdrv_refresh_limits() is not atomic. +It begins with zeroing BDS.bl, and only then sets proper, valid limits. +If we do not drain all nodes whose limits are refreshed, then concurrent +I/O requests can encounter invalid request_alignment values and crash +qemu. Therefore, a recursing bdrv_refresh_limits() requires the whole +subtree to be drained, which is currently not ensured by most callers. + +A non-recursive bdrv_refresh_limits() only requires the node in question +to not receive I/O requests, and this is done by most callers in some +way or another: +- bdrv_open_driver() deals with a new node with no parents yet +- bdrv_set_file_or_backing_noperm() acts on a drained node +- bdrv_reopen_commit() acts only on drained nodes +- bdrv_append() should in theory require the node to be drained; in + practice most callers just lock the AioContext, which should at least + be enough to prevent concurrent I/O requests from accessing invalid + limits + +So we can resolve the bug by making bdrv_refresh_limits() non-recursive. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1879437 +Signed-off-by: Hanna Reitz +Reviewed-by: Eric Blake +Message-Id: <20220216105355.30729-2-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 4d378bbd831bdd2f6e6adcd4ea5b77b6effaa627) +Signed-off-by: Hanna Reitz +--- + block/io.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/block/io.c b/block/io.c +index 4e4cb556c5..c3e7301613 100644 +--- a/block/io.c ++++ b/block/io.c +@@ -189,10 +189,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp) + QLIST_FOREACH(c, &bs->children, next) { + if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW)) + { +- bdrv_refresh_limits(c->bs, tran, errp); +- if (*errp) { +- return; +- } + bdrv_merge_limits(&bs->bl, &c->bs->bl); + have_limits = true; + } +-- +2.35.3 + diff --git a/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch b/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch new file mode 100644 index 0000000..4b6c3fe --- /dev/null +++ b/kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch @@ -0,0 +1,70 @@ +From 407e23d7f0c9020404247afe7d4df98505222bbb Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Mon, 14 Nov 2022 14:25:02 +0100 +Subject: [PATCH 1/3] docs/system/s390x: Document the "loadparm" machine + property +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 233: s390x: Document the "loadparm" machine property +RH-Bugzilla: 2128225 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Jon Maloy +RH-Commit: [1/2] e9589ea32d2a8f82971476b644e1063fa14cf822 + +The "loadparm" machine property is useful for selecting alternative +kernels on the disk of the guest, but so far we do not tell the users +yet how to use it. Add some documentation to fill this gap. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2128235 +Message-Id: <20221114132502.110213-1-thuth@redhat.com> +Reviewed-by: Claudio Imbrenda +Signed-off-by: Thomas Huth +(cherry picked from commit be5df2edb5d69ff3107c5616aa035a9ba8d0422e) +--- + docs/system/s390x/bootdevices.rst | 26 ++++++++++++++++++++++++++ + 1 file changed, 26 insertions(+) + +diff --git a/docs/system/s390x/bootdevices.rst b/docs/system/s390x/bootdevices.rst +index 9e591cb9dc..d4bf3b9f0b 100644 +--- a/docs/system/s390x/bootdevices.rst ++++ b/docs/system/s390x/bootdevices.rst +@@ -53,6 +53,32 @@ recommended to specify a CD-ROM device via ``-device scsi-cd`` (as mentioned + above) instead. + + ++Selecting kernels with the ``loadparm`` property ++------------------------------------------------ ++ ++The ``s390-ccw-virtio`` machine supports the so-called ``loadparm`` parameter ++which can be used to select the kernel on the disk of the guest that the ++s390-ccw bios should boot. When starting QEMU, it can be specified like this:: ++ ++ qemu-system-s390x -machine s390-ccw-virtio,loadparm= ++ ++The first way to use this parameter is to use the word ``PROMPT`` as the ++```` here. In that case the s390-ccw bios will show a list of ++installed kernels on the disk of the guest and ask the user to enter a number ++to chose which kernel should be booted -- similar to what can be achieved by ++specifying the ``-boot menu=on`` option when starting QEMU. Note that the menu ++list will only show the names of the installed kernels when using a DASD-like ++disk image with 4k byte sectors. On normal SCSI-style disks with 512-byte ++sectors, there is not enough space for the zipl loader on the disk to store ++the kernel names, so you only get a list without names here. ++ ++The second way to use this parameter is to use a number in the range from 0 ++to 31. The numbers that can be used here correspond to the numbers that are ++shown when using the ``PROMPT`` option, and the s390-ccw bios will then try ++to automatically boot the kernel that is associated with the given number. ++Note that ``0`` can be used to boot the default entry. ++ ++ + Booting from a network device + ----------------------------- + +-- +2.37.3 + diff --git a/kvm-dump-Add-architecture-section-and-section-string-tab.patch b/kvm-dump-Add-architecture-section-and-section-string-tab.patch new file mode 100644 index 0000000..bc06fa8 --- /dev/null +++ b/kvm-dump-Add-architecture-section-and-section-string-tab.patch @@ -0,0 +1,356 @@ +From f2f3efff83dddd38a97699cd2701f46f61a732e3 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 11:32:10 +0000 +Subject: [PATCH 36/42] dump: Add architecture section and section string table + support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [36/41] 83b98ff185e93e62703f686b65546d60c783d783 + +Add hooks which architectures can use to add arbitrary data to custom +sections. + +Also add a section name string table in order to identify section +contents + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017113210.41674-1-frankja@linux.ibm.com> +(cherry picked from commit 9b72224f44612ddd5b434a1bccf79346946d11da) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 186 +++++++++++++++++++++++++++++++------ + include/sysemu/dump-arch.h | 3 + + include/sysemu/dump.h | 3 + + 3 files changed, 166 insertions(+), 26 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 7a42401790..4aa8fb64d2 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -104,6 +104,7 @@ static int dump_cleanup(DumpState *s) + memory_mapping_list_free(&s->list); + close(s->fd); + g_free(s->guest_note); ++ g_array_unref(s->string_table_buf); + s->guest_note = NULL; + if (s->resume) { + if (s->detached) { +@@ -153,11 +154,10 @@ static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) + elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); +- if (s->shdr_num) { +- elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); +- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); +- } ++ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); + } + + static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) +@@ -181,11 +181,10 @@ static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) + elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); + elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header->e_phnum = cpu_to_dump16(s, phnum); +- if (s->shdr_num) { +- elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); +- elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); +- } ++ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shstrndx = cpu_to_dump16(s, s->shdr_num - 1); + } + + static void write_elf_header(DumpState *s, Error **errp) +@@ -196,6 +195,8 @@ static void write_elf_header(DumpState *s, Error **errp) + void *header_ptr; + int ret; + ++ /* The NULL header and the shstrtab are always defined */ ++ assert(s->shdr_num >= 2); + if (dump_is_64bit(s)) { + prepare_elf64_header(s, &elf64_header); + header_size = sizeof(elf64_header); +@@ -394,17 +395,49 @@ static void prepare_elf_section_hdr_zero(DumpState *s) + } + } + +-static void prepare_elf_section_hdrs(DumpState *s) ++static void prepare_elf_section_hdr_string(DumpState *s, void *buff) ++{ ++ uint64_t index = s->string_table_buf->len; ++ const char strtab[] = ".shstrtab"; ++ Elf32_Shdr shdr32 = {}; ++ Elf64_Shdr shdr64 = {}; ++ int shdr_size; ++ void *shdr; ++ ++ g_array_append_vals(s->string_table_buf, strtab, sizeof(strtab)); ++ if (dump_is_64bit(s)) { ++ shdr_size = sizeof(Elf64_Shdr); ++ shdr64.sh_type = SHT_STRTAB; ++ shdr64.sh_offset = s->section_offset + s->elf_section_data_size; ++ shdr64.sh_name = index; ++ shdr64.sh_size = s->string_table_buf->len; ++ shdr = &shdr64; ++ } else { ++ shdr_size = sizeof(Elf32_Shdr); ++ shdr32.sh_type = SHT_STRTAB; ++ shdr32.sh_offset = s->section_offset + s->elf_section_data_size; ++ shdr32.sh_name = index; ++ shdr32.sh_size = s->string_table_buf->len; ++ shdr = &shdr32; ++ } ++ memcpy(buff, shdr, shdr_size); ++} ++ ++static bool prepare_elf_section_hdrs(DumpState *s, Error **errp) + { + size_t len, sizeof_shdr; ++ void *buff_hdr; + + /* + * Section ordering: + * - HDR zero ++ * - Arch section hdrs ++ * - String table hdr + */ + sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + len = sizeof_shdr * s->shdr_num; + s->elf_section_hdrs = g_malloc0(len); ++ buff_hdr = s->elf_section_hdrs; + + /* + * The first section header is ALWAYS a special initial section +@@ -420,6 +453,26 @@ static void prepare_elf_section_hdrs(DumpState *s) + if (s->phdr_num >= PN_XNUM) { + prepare_elf_section_hdr_zero(s); + } ++ buff_hdr += sizeof_shdr; ++ ++ /* Add architecture defined section headers */ ++ if (s->dump_info.arch_sections_write_hdr_fn ++ && s->shdr_num > 2) { ++ buff_hdr += s->dump_info.arch_sections_write_hdr_fn(s, buff_hdr); ++ ++ if (s->shdr_num >= SHN_LORESERVE) { ++ error_setg_errno(errp, EINVAL, ++ "dump: too many architecture defined sections"); ++ return false; ++ } ++ } ++ ++ /* ++ * String table is the last section since strings are added via ++ * arch_sections_write_hdr(). ++ */ ++ prepare_elf_section_hdr_string(s, buff_hdr); ++ return true; + } + + static void write_elf_section_headers(DumpState *s, Error **errp) +@@ -427,7 +480,9 @@ static void write_elf_section_headers(DumpState *s, Error **errp) + size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); + int ret; + +- prepare_elf_section_hdrs(s); ++ if (!prepare_elf_section_hdrs(s, errp)) { ++ return; ++ } + + ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); + if (ret < 0) { +@@ -437,6 +492,29 @@ static void write_elf_section_headers(DumpState *s, Error **errp) + g_free(s->elf_section_hdrs); + } + ++static void write_elf_sections(DumpState *s, Error **errp) ++{ ++ int ret; ++ ++ if (s->elf_section_data_size) { ++ /* Write architecture section data */ ++ ret = fd_write_vmcore(s->elf_section_data, ++ s->elf_section_data_size, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "dump: failed to write architecture section data"); ++ return; ++ } ++ } ++ ++ /* Write string table */ ++ ret = fd_write_vmcore(s->string_table_buf->data, ++ s->string_table_buf->len, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, "dump: failed to write string table data"); ++ } ++} ++ + static void write_data(DumpState *s, void *buf, int length, Error **errp) + { + int ret; +@@ -693,6 +771,31 @@ static void dump_iterate(DumpState *s, Error **errp) + } + } + ++static void dump_end(DumpState *s, Error **errp) ++{ ++ int rc; ++ ERRP_GUARD(); ++ ++ if (s->elf_section_data_size) { ++ s->elf_section_data = g_malloc0(s->elf_section_data_size); ++ } ++ ++ /* Adds the architecture defined section data to s->elf_section_data */ ++ if (s->dump_info.arch_sections_write_fn && ++ s->elf_section_data_size) { ++ rc = s->dump_info.arch_sections_write_fn(s, s->elf_section_data); ++ if (rc) { ++ error_setg_errno(errp, rc, ++ "dump: failed to get arch section data"); ++ g_free(s->elf_section_data); ++ return; ++ } ++ } ++ ++ /* write sections to vmcore */ ++ write_elf_sections(s, errp); ++} ++ + static void create_vmcore(DumpState *s, Error **errp) + { + ERRP_GUARD(); +@@ -702,7 +805,14 @@ static void create_vmcore(DumpState *s, Error **errp) + return; + } + ++ /* Iterate over memory and dump it to file */ + dump_iterate(s, errp); ++ if (*errp) { ++ return; ++ } ++ ++ /* Write the section data */ ++ dump_end(s, errp); + } + + static int write_start_flat_header(int fd) +@@ -1720,6 +1830,14 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->filter_area_begin = begin; + s->filter_area_length = length; + ++ /* First index is 0, it's the special null name */ ++ s->string_table_buf = g_array_new(FALSE, TRUE, 1); ++ /* ++ * Allocate the null name, due to the clearing option set to true ++ * it will be 0. ++ */ ++ g_array_set_size(s->string_table_buf, 1); ++ + memory_mapping_list_init(&s->list); + + guest_phys_blocks_init(&s->guest_phys_blocks); +@@ -1856,26 +1974,42 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + /* +- * calculate phdr_num ++ * The first section header is always a special one in which most ++ * fields are 0. The section header string table is also always ++ * set. ++ */ ++ s->shdr_num = 2; ++ ++ /* ++ * Adds the number of architecture sections to shdr_num and sets ++ * elf_section_data_size so we know the offsets and sizes of all ++ * parts. ++ */ ++ if (s->dump_info.arch_sections_add_fn) { ++ s->dump_info.arch_sections_add_fn(s); ++ } ++ ++ /* ++ * calculate shdr_num so we know the offsets and sizes of all ++ * parts. ++ * Calculate phdr_num + * +- * the type of ehdr->e_phnum is uint16_t, so we should avoid overflow ++ * The absolute maximum amount of phdrs is UINT32_MAX - 1 as ++ * sh_info is 32 bit. There's special handling once we go over ++ * UINT16_MAX - 1 but that is handled in the ehdr and section ++ * code. + */ +- s->phdr_num = 1; /* PT_NOTE */ +- if (s->list.num < UINT16_MAX - 2) { +- s->shdr_num = 0; ++ s->phdr_num = 1; /* Reserve PT_NOTE */ ++ if (s->list.num <= UINT32_MAX - 1) { + s->phdr_num += s->list.num; + } else { +- /* sh_info of section 0 holds the real number of phdrs */ +- s->shdr_num = 1; +- +- /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ +- if (s->list.num <= UINT32_MAX - 1) { +- s->phdr_num += s->list.num; +- } else { +- s->phdr_num = UINT32_MAX; +- } ++ s->phdr_num = UINT32_MAX; + } + ++ /* ++ * Now that the number of section and program headers is known we ++ * can calculate the offsets of the headers and data. ++ */ + if (dump_is_64bit(s)) { + s->shdr_offset = sizeof(Elf64_Ehdr); + s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +diff --git a/include/sysemu/dump-arch.h b/include/sysemu/dump-arch.h +index e25b02e990..59bbc9be38 100644 +--- a/include/sysemu/dump-arch.h ++++ b/include/sysemu/dump-arch.h +@@ -21,6 +21,9 @@ typedef struct ArchDumpInfo { + uint32_t page_size; /* The target's page size. If it's variable and + * unknown, then this should be the maximum. */ + uint64_t phys_base; /* The target's physmem base. */ ++ void (*arch_sections_add_fn)(DumpState *s); ++ uint64_t (*arch_sections_write_hdr_fn)(DumpState *s, uint8_t *buff); ++ int (*arch_sections_write_fn)(DumpState *s, uint8_t *buff); + } ArchDumpInfo; + + struct GuestPhysBlockList; /* memory_mapping.h */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 9ed811b313..38ccac7190 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -180,6 +180,9 @@ typedef struct DumpState { + hwaddr note_offset; + + void *elf_section_hdrs; /* Pointer to section header buffer */ ++ void *elf_section_data; /* Pointer to section data buffer */ ++ uint64_t elf_section_data_size; /* Size of section data */ ++ GArray *string_table_buf; /* String table data buffer */ + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ +-- +2.37.3 + diff --git a/kvm-dump-Add-more-offset-variables.patch b/kvm-dump-Add-more-offset-variables.patch new file mode 100644 index 0000000..373f814 --- /dev/null +++ b/kvm-dump-Add-more-offset-variables.patch @@ -0,0 +1,138 @@ +From bee31226b87d0b05faae84e88cce3af1b8dabbfd Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:59 +0000 +Subject: [PATCH 17/42] dump: Add more offset variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [17/41] fbe629e1476e8a0e039f989af6e1f4707075ba01 + +Offset calculations are easy enough to get wrong. Let's add a few +variables to make moving around elf headers and data sections easier. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Richard Henderson +Message-Id: <20220330123603.107120-6-frankja@linux.ibm.com> +(cherry picked from commit e71d353360bb09a8e784e35d78370c691f6ea185) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 35 +++++++++++++++-------------------- + include/sysemu/dump.h | 4 ++++ + 2 files changed, 19 insertions(+), 20 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 5cc2322325..85a402b38c 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -142,13 +142,11 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); ++ elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; +- +- elf_header.e_shoff = cpu_to_dump64(s, shoff); ++ elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); + elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } +@@ -179,13 +177,11 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); + elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); ++ elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; +- +- elf_header.e_shoff = cpu_to_dump32(s, shoff); ++ elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); + elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } +@@ -248,12 +244,11 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + static void write_elf64_note(DumpState *s, Error **errp) + { + Elf64_Phdr phdr; +- hwaddr begin = s->memory_offset - s->note_size; + int ret; + + memset(&phdr, 0, sizeof(Elf64_Phdr)); + phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump64(s, begin); ++ phdr.p_offset = cpu_to_dump64(s, s->note_offset); + phdr.p_paddr = 0; + phdr.p_filesz = cpu_to_dump64(s, s->note_size); + phdr.p_memsz = cpu_to_dump64(s, s->note_size); +@@ -313,13 +308,12 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + + static void write_elf32_note(DumpState *s, Error **errp) + { +- hwaddr begin = s->memory_offset - s->note_size; + Elf32_Phdr phdr; + int ret; + + memset(&phdr, 0, sizeof(Elf32_Phdr)); + phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump32(s, begin); ++ phdr.p_offset = cpu_to_dump32(s, s->note_offset); + phdr.p_paddr = 0; + phdr.p_filesz = cpu_to_dump32(s, s->note_size); + phdr.p_memsz = cpu_to_dump32(s, s->note_size); +@@ -1826,15 +1820,16 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) * s->shdr_num + +- s->note_size; ++ s->phdr_offset = sizeof(Elf64_Ehdr); ++ s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; ++ s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; ++ s->memory_offset = s->note_offset + s->note_size; + } else { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) * s->shdr_num + +- s->note_size; ++ ++ s->phdr_offset = sizeof(Elf32_Ehdr); ++ s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; ++ s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; ++ s->memory_offset = s->note_offset + s->note_size; + } + + return; +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 19458bffbd..ffc2ea1072 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -159,6 +159,10 @@ typedef struct DumpState { + bool resume; + bool detached; + ssize_t note_size; ++ hwaddr shdr_offset; ++ hwaddr phdr_offset; ++ hwaddr section_offset; ++ hwaddr note_offset; + hwaddr memory_offset; + int fd; + +-- +2.37.3 + diff --git a/kvm-dump-Cleanup-dump_begin-write-functions.patch b/kvm-dump-Cleanup-dump_begin-write-functions.patch new file mode 100644 index 0000000..449aab4 --- /dev/null +++ b/kvm-dump-Cleanup-dump_begin-write-functions.patch @@ -0,0 +1,94 @@ +From cbb653d73e32513ccd46b293a52384eed6a5f84f Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:02 +0000 +Subject: [PATCH 20/42] dump: Cleanup dump_begin write functions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [20/41] 18ea1457a3e54fd368e556d96c3be50c6ad0a6bd + +There's no need to have a gigantic if in there let's move the elf +32/64 bit logic into the section, segment or note code. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-9-frankja@linux.ibm.com> +(cherry picked from commit 5ff2e5a3e1e67930e523486e39549a33fcf97227) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 42 +++++++++++------------------------------- + 1 file changed, 11 insertions(+), 31 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 823ca32883..88abde355a 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -565,46 +565,26 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (dump_is_64bit(s)) { +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); ++ /* write all PT_LOAD to vmcore */ ++ write_elf_loads(s, errp); ++ if (*errp) { ++ return; ++ } ++ ++ /* write section to vmcore */ ++ if (s->shdr_num) { ++ write_elf_section(s, 1, errp); + if (*errp) { + return; + } ++ } + +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 1, errp); +- if (*errp) { +- return; +- } +- } +- ++ if (dump_is_64bit(s)) { + /* write notes to vmcore */ + write_elf64_notes(fd_write_vmcore, s, errp); +- if (*errp) { +- return; +- } + } else { +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); +- if (*errp) { +- return; +- } +- +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 0, errp); +- if (*errp) { +- return; +- } +- } +- + /* write notes to vmcore */ + write_elf32_notes(fd_write_vmcore, s, errp); +- if (*errp) { +- return; +- } + } + } + +-- +2.37.3 + diff --git a/kvm-dump-Consolidate-elf-note-function.patch b/kvm-dump-Consolidate-elf-note-function.patch new file mode 100644 index 0000000..3353e4a --- /dev/null +++ b/kvm-dump-Consolidate-elf-note-function.patch @@ -0,0 +1,67 @@ +From 0547599cf507930f91943f22d5f917ebacf69484 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:03 +0000 +Subject: [PATCH 21/42] dump: Consolidate elf note function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [21/41] 52298c098c116aea75ad15894731ff412c2c4e73 + +Just like with the other write functions let's move the 32/64 bit elf +handling to a function to improve readability. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-10-frankja@linux.ibm.com> +(cherry picked from commit c68124738bc29017e4254c898bc40be7be477af7) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 88abde355a..a451abc590 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -520,6 +520,15 @@ static void write_elf_loads(DumpState *s, Error **errp) + } + } + ++static void write_elf_notes(DumpState *s, Error **errp) ++{ ++ if (dump_is_64bit(s)) { ++ write_elf64_notes(fd_write_vmcore, s, errp); ++ } else { ++ write_elf32_notes(fd_write_vmcore, s, errp); ++ } ++} ++ + /* write elf header, PT_NOTE and elf note to vmcore. */ + static void dump_begin(DumpState *s, Error **errp) + { +@@ -579,13 +588,8 @@ static void dump_begin(DumpState *s, Error **errp) + } + } + +- if (dump_is_64bit(s)) { +- /* write notes to vmcore */ +- write_elf64_notes(fd_write_vmcore, s, errp); +- } else { +- /* write notes to vmcore */ +- write_elf32_notes(fd_write_vmcore, s, errp); +- } ++ /* write notes to vmcore */ ++ write_elf_notes(s, errp); + } + + static int get_next_block(DumpState *s, GuestPhysBlock *block) +-- +2.37.3 + diff --git a/kvm-dump-Consolidate-phdr-note-writes.patch b/kvm-dump-Consolidate-phdr-note-writes.patch new file mode 100644 index 0000000..700927a --- /dev/null +++ b/kvm-dump-Consolidate-phdr-note-writes.patch @@ -0,0 +1,169 @@ +From f87abe1ef14e80731249ebe9fe1bea569a68e9b4 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:01 +0000 +Subject: [PATCH 19/42] dump: Consolidate phdr note writes +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [19/41] 180c4c0ab4941a0bf366dc7f32ee035e03daa6c0 + +There's no need to have two write functions. Let's rather have two +functions that set the data for elf 32/64 and then write it in a +common function. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-8-frankja@linux.ibm.com> +(cherry picked from commit bc7d558017e6700f9a05c61b0b638a8994945f0d) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 94 +++++++++++++++++++++++++++-------------------------- + 1 file changed, 48 insertions(+), 46 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 6394e94023..823ca32883 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -246,24 +246,15 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + } + } + +-static void write_elf64_note(DumpState *s, Error **errp) ++static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) + { +- Elf64_Phdr phdr; +- int ret; +- +- memset(&phdr, 0, sizeof(Elf64_Phdr)); +- phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump64(s, s->note_offset); +- phdr.p_paddr = 0; +- phdr.p_filesz = cpu_to_dump64(s, s->note_size); +- phdr.p_memsz = cpu_to_dump64(s, s->note_size); +- phdr.p_vaddr = 0; +- +- ret = fd_write_vmcore(&phdr, sizeof(Elf64_Phdr), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write program header table"); +- } ++ memset(phdr, 0, sizeof(*phdr)); ++ phdr->p_type = cpu_to_dump32(s, PT_NOTE); ++ phdr->p_offset = cpu_to_dump64(s, s->note_offset); ++ phdr->p_paddr = 0; ++ phdr->p_filesz = cpu_to_dump64(s, s->note_size); ++ phdr->p_memsz = cpu_to_dump64(s, s->note_size); ++ phdr->p_vaddr = 0; + } + + static inline int cpu_index(CPUState *cpu) +@@ -311,24 +302,15 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + +-static void write_elf32_note(DumpState *s, Error **errp) ++static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) + { +- Elf32_Phdr phdr; +- int ret; +- +- memset(&phdr, 0, sizeof(Elf32_Phdr)); +- phdr.p_type = cpu_to_dump32(s, PT_NOTE); +- phdr.p_offset = cpu_to_dump32(s, s->note_offset); +- phdr.p_paddr = 0; +- phdr.p_filesz = cpu_to_dump32(s, s->note_size); +- phdr.p_memsz = cpu_to_dump32(s, s->note_size); +- phdr.p_vaddr = 0; +- +- ret = fd_write_vmcore(&phdr, sizeof(Elf32_Phdr), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write program header table"); +- } ++ memset(phdr, 0, sizeof(*phdr)); ++ phdr->p_type = cpu_to_dump32(s, PT_NOTE); ++ phdr->p_offset = cpu_to_dump32(s, s->note_offset); ++ phdr->p_paddr = 0; ++ phdr->p_filesz = cpu_to_dump32(s, s->note_size); ++ phdr->p_memsz = cpu_to_dump32(s, s->note_size); ++ phdr->p_vaddr = 0; + } + + static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s, +@@ -358,6 +340,32 @@ static void write_elf32_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + ++static void write_elf_phdr_note(DumpState *s, Error **errp) ++{ ++ ERRP_GUARD(); ++ Elf32_Phdr phdr32; ++ Elf64_Phdr phdr64; ++ void *phdr; ++ size_t size; ++ int ret; ++ ++ if (dump_is_64bit(s)) { ++ write_elf64_phdr_note(s, &phdr64); ++ size = sizeof(phdr64); ++ phdr = &phdr64; ++ } else { ++ write_elf32_phdr_note(s, &phdr32); ++ size = sizeof(phdr32); ++ phdr = &phdr32; ++ } ++ ++ ret = fd_write_vmcore(phdr, size, s); ++ if (ret < 0) { ++ error_setg_errno(errp, -ret, ++ "dump: failed to write program header table"); ++ } ++} ++ + static void write_elf_section(DumpState *s, int type, Error **errp) + { + Elf32_Shdr shdr32; +@@ -551,13 +559,13 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (dump_is_64bit(s)) { +- /* write PT_NOTE to vmcore */ +- write_elf64_note(s, errp); +- if (*errp) { +- return; +- } ++ /* write PT_NOTE to vmcore */ ++ write_elf_phdr_note(s, errp); ++ if (*errp) { ++ return; ++ } + ++ if (dump_is_64bit(s)) { + /* write all PT_LOAD to vmcore */ + write_elf_loads(s, errp); + if (*errp) { +@@ -578,12 +586,6 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + } else { +- /* write PT_NOTE to vmcore */ +- write_elf32_note(s, errp); +- if (*errp) { +- return; +- } +- + /* write all PT_LOAD to vmcore */ + write_elf_loads(s, errp); + if (*errp) { +-- +2.37.3 + diff --git a/kvm-dump-Introduce-dump_is_64bit-helper-function.patch b/kvm-dump-Introduce-dump_is_64bit-helper-function.patch new file mode 100644 index 0000000..ac5d269 --- /dev/null +++ b/kvm-dump-Introduce-dump_is_64bit-helper-function.patch @@ -0,0 +1,118 @@ +From c851676d202b5b76962529f3b6d433936becbd8a Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:36:00 +0000 +Subject: [PATCH 18/42] dump: Introduce dump_is_64bit() helper function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [18/41] a0fd2d1985c61b8e50d4a7ca26bc0ee6fcaa6196 + +Checking d_class in dump_info leads to lengthy conditionals so let's +shorten things a bit by introducing a helper function. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-7-frankja@linux.ibm.com> +(cherry picked from commit 05bbaa5040ccb3419e8b93af8040485430e2db42) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 85a402b38c..6394e94023 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -55,6 +55,11 @@ static Error *dump_migration_blocker; + DIV_ROUND_UP((name_size), 4) + \ + DIV_ROUND_UP((desc_size), 4)) * 4) + ++static inline bool dump_is_64bit(DumpState *s) ++{ ++ return s->dump_info.d_class == ELFCLASS64; ++} ++ + uint16_t cpu_to_dump16(DumpState *s, uint16_t val) + { + if (s->dump_info.d_endian == ELFDATA2LSB) { +@@ -489,7 +494,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + get_offset_range(memory_mapping->phys_addr, + memory_mapping->length, + s, &offset, &filesz); +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + write_elf64_load(s, memory_mapping, phdr_index++, offset, + filesz, errp); + } else { +@@ -537,7 +542,7 @@ static void dump_begin(DumpState *s, Error **errp) + */ + + /* write elf header to vmcore */ +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + write_elf64_header(s, errp); + } else { + write_elf32_header(s, errp); +@@ -546,7 +551,7 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + /* write PT_NOTE to vmcore */ + write_elf64_note(s, errp); + if (*errp) { +@@ -757,7 +762,7 @@ static void get_note_sizes(DumpState *s, const void *note, + uint64_t name_sz; + uint64_t desc_sz; + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + const Elf64_Nhdr *hdr = note; + note_head_sz = sizeof(Elf64_Nhdr); + name_sz = tswap64(hdr->n_namesz); +@@ -1017,10 +1022,10 @@ out: + + static void write_dump_header(DumpState *s, Error **errp) + { +- if (s->dump_info.d_class == ELFCLASS32) { +- create_header32(s, errp); +- } else { ++ if (dump_is_64bit(s)) { + create_header64(s, errp); ++ } else { ++ create_header32(s, errp); + } + } + +@@ -1715,8 +1720,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + uint32_t size; + uint16_t format; + +- note_head_size = s->dump_info.d_class == ELFCLASS32 ? +- sizeof(Elf32_Nhdr) : sizeof(Elf64_Nhdr); ++ note_head_size = dump_is_64bit(s) ? ++ sizeof(Elf64_Nhdr) : sizeof(Elf32_Nhdr); + + format = le16_to_cpu(vmci->vmcoreinfo.guest_format); + size = le32_to_cpu(vmci->vmcoreinfo.size); +@@ -1819,7 +1824,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + } + +- if (s->dump_info.d_class == ELFCLASS64) { ++ if (dump_is_64bit(s)) { + s->phdr_offset = sizeof(Elf64_Ehdr); + s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; + s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +-- +2.37.3 + diff --git a/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch b/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch new file mode 100644 index 0000000..71414ed --- /dev/null +++ b/kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch @@ -0,0 +1,136 @@ +From 255722667a4fa4d522bb0b7e0825cbbe635abb8d Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:57 +0000 +Subject: [PATCH 15/42] dump: Introduce shdr_num to decrease complexity +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [15/41] b0215ea5d381ef7f6abfe3f3bafea51ce933da56 + +Let's move from a boolean to a int variable which will later enable us +to store the number of sections that are in the dump file. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-4-frankja@linux.ibm.com> +(cherry picked from commit 862a395858e5a302ed5921487777acdc95a3a31b) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 24 ++++++++++++------------ + include/sysemu/dump.h | 2 +- + 2 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 7236b167cc..972e28b089 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -145,12 +145,12 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); +- if (s->have_section) { ++ if (s->shdr_num) { + uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump64(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, 1); ++ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } + + ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +@@ -182,12 +182,12 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); + elf_header.e_phnum = cpu_to_dump16(s, phnum); +- if (s->have_section) { ++ if (s->shdr_num) { + uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump32(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, 1); ++ elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); + } + + ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +@@ -566,7 +566,7 @@ static void dump_begin(DumpState *s, Error **errp) + } + + /* write section to vmcore */ +- if (s->have_section) { ++ if (s->shdr_num) { + write_elf_section(s, 1, errp); + if (*errp) { + return; +@@ -592,7 +592,7 @@ static void dump_begin(DumpState *s, Error **errp) + } + + /* write section to vmcore */ +- if (s->have_section) { ++ if (s->shdr_num) { + write_elf_section(s, 0, errp); + if (*errp) { + return; +@@ -1811,11 +1811,11 @@ static void dump_init(DumpState *s, int fd, bool has_format, + */ + s->phdr_num = 1; /* PT_NOTE */ + if (s->list.num < UINT16_MAX - 2) { ++ s->shdr_num = 0; + s->phdr_num += s->list.num; +- s->have_section = false; + } else { + /* sh_info of section 0 holds the real number of phdrs */ +- s->have_section = true; ++ s->shdr_num = 1; + + /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ + if (s->list.num <= UINT32_MAX - 1) { +@@ -1826,19 +1826,19 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- if (s->have_section) { ++ if (s->shdr_num) { + s->memory_offset = sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) + s->note_size; ++ sizeof(Elf64_Shdr) * s->shdr_num + s->note_size; + } else { + s->memory_offset = sizeof(Elf64_Ehdr) + + sizeof(Elf64_Phdr) * s->phdr_num + s->note_size; + } + } else { +- if (s->have_section) { ++ if (s->shdr_num) { + s->memory_offset = sizeof(Elf32_Ehdr) + + sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) + s->note_size; ++ sizeof(Elf32_Shdr) * s->shdr_num + s->note_size; + } else { + s->memory_offset = sizeof(Elf32_Ehdr) + + sizeof(Elf32_Phdr) * s->phdr_num + s->note_size; +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index b463fc9c02..19458bffbd 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -155,7 +155,7 @@ typedef struct DumpState { + ArchDumpInfo dump_info; + MemoryMappingList list; + uint32_t phdr_num; +- bool have_section; ++ uint32_t shdr_num; + bool resume; + bool detached; + ssize_t note_size; +-- +2.37.3 + diff --git a/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch b/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch new file mode 100644 index 0000000..5d66c34 --- /dev/null +++ b/kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch @@ -0,0 +1,142 @@ +From a18ba2fbaf132724e81be92da42b36d8f365e66c Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:56 +0000 +Subject: [PATCH 24/42] dump: Refactor dump_iterate and introduce + dump_filter_memblock_*() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [24/41] 74ef470f24d9d98093c4d63730a99474587033fd + +The iteration over the memblocks in dump_iterate() is hard to +understand so it's about time to clean it up. Instead of manually +grabbing the next memblock we can use QTAILQ_FOREACH to iterate over +all memblocks. + +Additionally we move the calculation of the offset and length out by +introducing and using the dump_filter_memblock_*() functions. These +functions will later be used to cleanup other parts of dump.c. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-4-frankja@linux.ibm.com> +(cherry picked from commit 1e8113032f5b1efc5da66382470ce4809c76f8f2) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 74 ++++++++++++++++++++++++++++++----------------------- + 1 file changed, 42 insertions(+), 32 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index fa787f379f..d981e843dd 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -592,31 +592,43 @@ static void dump_begin(DumpState *s, Error **errp) + write_elf_notes(s, errp); + } + +-static int get_next_block(DumpState *s, GuestPhysBlock *block) ++static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { +- while (1) { +- block = QTAILQ_NEXT(block, next); +- if (!block) { +- /* no more block */ +- return 1; +- } ++ int64_t size, left, right; + +- s->start = 0; +- s->next_block = block; +- if (s->has_filter) { +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { +- /* This block is out of the range */ +- continue; +- } ++ /* No filter, return full size */ ++ if (!filter_area_length) { ++ return block->target_end - block->target_start; ++ } + +- if (s->begin > block->target_start) { +- s->start = s->begin - block->target_start; +- } ++ /* calculate the overlapped region. */ ++ left = MAX(filter_area_start, block->target_start); ++ right = MIN(filter_area_start + filter_area_length, block->target_end); ++ size = right - left; ++ size = size > 0 ? size : 0; ++ ++ return size; ++} ++ ++static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) ++{ ++ if (filter_area_length) { ++ /* return -1 if the block is not within filter area */ ++ if (block->target_start >= filter_area_start + filter_area_length || ++ block->target_end <= filter_area_start) { ++ return -1; + } + +- return 0; ++ if (filter_area_start > block->target_start) { ++ return filter_area_start - block->target_start; ++ } + } ++ ++ return 0; + } + + /* write all memory to vmcore */ +@@ -624,24 +636,22 @@ static void dump_iterate(DumpState *s, Error **errp) + { + ERRP_GUARD(); + GuestPhysBlock *block; +- int64_t size; +- +- do { +- block = s->next_block; ++ int64_t memblock_size, memblock_start; + +- size = block->target_end - block->target_start; +- if (s->has_filter) { +- size -= s->start; +- if (s->begin + s->length < block->target_end) { +- size -= block->target_end - (s->begin + s->length); +- } ++ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ memblock_start = dump_filtered_memblock_start(block, s->begin, s->length); ++ if (memblock_start == -1) { ++ continue; + } +- write_memory(s, block, s->start, size, errp); ++ ++ memblock_size = dump_filtered_memblock_size(block, s->begin, s->length); ++ ++ /* Write the memory to file */ ++ write_memory(s, block, memblock_start, memblock_size, errp); + if (*errp) { + return; + } +- +- } while (!get_next_block(s, block)); ++ } + } + + static void create_vmcore(DumpState *s, Error **errp) +-- +2.37.3 + diff --git a/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch b/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch new file mode 100644 index 0000000..0bb95f9 --- /dev/null +++ b/kvm-dump-Reintroduce-memory_offset-and-section_offset.patch @@ -0,0 +1,45 @@ +From 6932fe3afbec443bbf6acff5b707536254e1bc37 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:16 +0000 +Subject: [PATCH 35/42] dump: Reintroduce memory_offset and section_offset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [35/41] e60c0d066aeeedb42e724712bc3aa7b7591c6c79 + +section_offset will later be used to store the offset to the section +data which will be stored last. For now memory_offset is only needed +to make section_offset look nicer. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-5-frankja@linux.ibm.com> +(cherry picked from commit 13fd417ddc81a1685c6a8f4e1c80bbfe7150f164) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/dump/dump.c b/dump/dump.c +index d17537d4e9..7a42401790 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1885,6 +1885,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; + s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; + } ++ s->memory_offset = s->note_offset + s->note_size; ++ s->section_offset = s->memory_offset + s->total_size; + + return; + +-- +2.37.3 + diff --git a/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch b/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch new file mode 100644 index 0000000..b4a1f10 --- /dev/null +++ b/kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch @@ -0,0 +1,70 @@ +From a8eeab6936a2bd27b33b63aed7e2ef96034f7772 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:58 +0000 +Subject: [PATCH 16/42] dump: Remove the section if when calculating the memory + offset +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [16/41] ff214d2c23b9cb16fd49d22d976829267df43133 + +When s->shdr_num is 0 we'll add 0 bytes of section headers which is +equivalent to not adding section headers but with the multiplication +we can remove a if/else. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-5-frankja@linux.ibm.com> +(cherry picked from commit 344107e07bd81546474a54ab83800158ca953059) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 24 ++++++++---------------- + 1 file changed, 8 insertions(+), 16 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 972e28b089..5cc2322325 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1826,23 +1826,15 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (s->dump_info.d_class == ELFCLASS64) { +- if (s->shdr_num) { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + +- sizeof(Elf64_Shdr) * s->shdr_num + s->note_size; +- } else { +- s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->phdr_num + s->note_size; +- } ++ s->memory_offset = sizeof(Elf64_Ehdr) + ++ sizeof(Elf64_Phdr) * s->phdr_num + ++ sizeof(Elf64_Shdr) * s->shdr_num + ++ s->note_size; + } else { +- if (s->shdr_num) { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + +- sizeof(Elf32_Shdr) * s->shdr_num + s->note_size; +- } else { +- s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->phdr_num + s->note_size; +- } ++ s->memory_offset = sizeof(Elf32_Ehdr) + ++ sizeof(Elf32_Phdr) * s->phdr_num + ++ sizeof(Elf32_Shdr) * s->shdr_num + ++ s->note_size; + } + + return; +-- +2.37.3 + diff --git a/kvm-dump-Remove-the-sh_info-variable.patch b/kvm-dump-Remove-the-sh_info-variable.patch new file mode 100644 index 0000000..3c9fe51 --- /dev/null +++ b/kvm-dump-Remove-the-sh_info-variable.patch @@ -0,0 +1,176 @@ +From eb763bec53d6b9aea7a6b60b0cf8c5d8b5f1b35c Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 7 Apr 2022 09:48:24 +0000 +Subject: [PATCH 14/42] dump: Remove the sh_info variable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [14/41] 24af12b78c8f5a02cf85df2f6b1d64249f9499c9 + +There's no need to have phdr_num and sh_info at the same time. We can +make phdr_num 32 bit and set PN_XNUM when we write the header if +phdr_num >= PN_XNUM. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220407094824.5074-1-frankja@linux.ibm.com> +(cherry picked from commit 046bc4160bc780eaacc2d702a2589f1a7a01188d) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 44 +++++++++++++++++++++++-------------------- + include/sysemu/dump.h | 3 +-- + 2 files changed, 25 insertions(+), 22 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 9876123f2e..7236b167cc 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -124,6 +124,12 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque) + + static void write_elf64_header(DumpState *s, Error **errp) + { ++ /* ++ * phnum in the elf header is 16 bit, if we have more segments we ++ * set phnum to PN_XNUM and write the real number of segments to a ++ * special section. ++ */ ++ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); + Elf64_Ehdr elf_header; + int ret; + +@@ -138,9 +144,9 @@ static void write_elf64_header(DumpState *s, Error **errp) + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header.e_phoff = cpu_to_dump64(s, sizeof(Elf64_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num); ++ elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->have_section) { +- uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->sh_info; ++ uint64_t shoff = sizeof(Elf64_Ehdr) + sizeof(Elf64_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump64(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +@@ -155,6 +161,12 @@ static void write_elf64_header(DumpState *s, Error **errp) + + static void write_elf32_header(DumpState *s, Error **errp) + { ++ /* ++ * phnum in the elf header is 16 bit, if we have more segments we ++ * set phnum to PN_XNUM and write the real number of segments to a ++ * special section. ++ */ ++ uint16_t phnum = MIN(s->phdr_num, PN_XNUM); + Elf32_Ehdr elf_header; + int ret; + +@@ -169,9 +181,9 @@ static void write_elf32_header(DumpState *s, Error **errp) + elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); + elf_header.e_phoff = cpu_to_dump32(s, sizeof(Elf32_Ehdr)); + elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, s->phdr_num); ++ elf_header.e_phnum = cpu_to_dump16(s, phnum); + if (s->have_section) { +- uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->sh_info; ++ uint32_t shoff = sizeof(Elf32_Ehdr) + sizeof(Elf32_Phdr) * s->phdr_num; + + elf_header.e_shoff = cpu_to_dump32(s, shoff); + elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +@@ -358,12 +370,12 @@ static void write_elf_section(DumpState *s, int type, Error **errp) + if (type == 0) { + shdr_size = sizeof(Elf32_Shdr); + memset(&shdr32, 0, shdr_size); +- shdr32.sh_info = cpu_to_dump32(s, s->sh_info); ++ shdr32.sh_info = cpu_to_dump32(s, s->phdr_num); + shdr = &shdr32; + } else { + shdr_size = sizeof(Elf64_Shdr); + memset(&shdr64, 0, shdr_size); +- shdr64.sh_info = cpu_to_dump32(s, s->sh_info); ++ shdr64.sh_info = cpu_to_dump32(s, s->phdr_num); + shdr = &shdr64; + } + +@@ -478,13 +490,6 @@ static void write_elf_loads(DumpState *s, Error **errp) + hwaddr offset, filesz; + MemoryMapping *memory_mapping; + uint32_t phdr_index = 1; +- uint32_t max_index; +- +- if (s->have_section) { +- max_index = s->sh_info; +- } else { +- max_index = s->phdr_num; +- } + + QTAILQ_FOREACH(memory_mapping, &s->list.head, next) { + get_offset_range(memory_mapping->phys_addr, +@@ -502,7 +507,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + return; + } + +- if (phdr_index >= max_index) { ++ if (phdr_index >= s->phdr_num) { + break; + } + } +@@ -1809,22 +1814,21 @@ static void dump_init(DumpState *s, int fd, bool has_format, + s->phdr_num += s->list.num; + s->have_section = false; + } else { ++ /* sh_info of section 0 holds the real number of phdrs */ + s->have_section = true; +- s->phdr_num = PN_XNUM; +- s->sh_info = 1; /* PT_NOTE */ + + /* the type of shdr->sh_info is uint32_t, so we should avoid overflow */ + if (s->list.num <= UINT32_MAX - 1) { +- s->sh_info += s->list.num; ++ s->phdr_num += s->list.num; + } else { +- s->sh_info = UINT32_MAX; ++ s->phdr_num = UINT32_MAX; + } + } + + if (s->dump_info.d_class == ELFCLASS64) { + if (s->have_section) { + s->memory_offset = sizeof(Elf64_Ehdr) + +- sizeof(Elf64_Phdr) * s->sh_info + ++ sizeof(Elf64_Phdr) * s->phdr_num + + sizeof(Elf64_Shdr) + s->note_size; + } else { + s->memory_offset = sizeof(Elf64_Ehdr) + +@@ -1833,7 +1837,7 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } else { + if (s->have_section) { + s->memory_offset = sizeof(Elf32_Ehdr) + +- sizeof(Elf32_Phdr) * s->sh_info + ++ sizeof(Elf32_Phdr) * s->phdr_num + + sizeof(Elf32_Shdr) + s->note_size; + } else { + s->memory_offset = sizeof(Elf32_Ehdr) + +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 250143cb5a..b463fc9c02 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -154,8 +154,7 @@ typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; +- uint16_t phdr_num; +- uint32_t sh_info; ++ uint32_t phdr_num; + bool have_section; + bool resume; + bool detached; +-- +2.37.3 + diff --git a/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch b/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch new file mode 100644 index 0000000..bdcaccd --- /dev/null +++ b/kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch @@ -0,0 +1,69 @@ +From 18fef7f02801d51207d67b8f8ec5f0d828889c78 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:11:01 +0000 +Subject: [PATCH 29/42] dump: Rename write_elf*_phdr_note to + prepare_elf*_phdr_note +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [29/41] 876cea6f6e51be8df2763f56d0daef99d11fdd49 + +The functions in question do not actually write to the file descriptor +they set up a buffer which is later written to the fd. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-9-frankja@linux.ibm.com> +(cherry picked from commit 2341a94d3a0a8a93a5a977e642da1807b8edaab8) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 8d5226f861..c2c1341ad7 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -261,7 +261,7 @@ static void write_elf32_load(DumpState *s, MemoryMapping *memory_mapping, + } + } + +-static void write_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) ++static void prepare_elf64_phdr_note(DumpState *s, Elf64_Phdr *phdr) + { + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = cpu_to_dump32(s, PT_NOTE); +@@ -317,7 +317,7 @@ static void write_elf64_notes(WriteCoreDumpFunction f, DumpState *s, + write_guest_note(f, s, errp); + } + +-static void write_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) ++static void prepare_elf32_phdr_note(DumpState *s, Elf32_Phdr *phdr) + { + memset(phdr, 0, sizeof(*phdr)); + phdr->p_type = cpu_to_dump32(s, PT_NOTE); +@@ -365,11 +365,11 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) + int ret; + + if (dump_is_64bit(s)) { +- write_elf64_phdr_note(s, &phdr64); ++ prepare_elf64_phdr_note(s, &phdr64); + size = sizeof(phdr64); + phdr = &phdr64; + } else { +- write_elf32_phdr_note(s, &phdr32); ++ prepare_elf32_phdr_note(s, &phdr32); + size = sizeof(phdr32); + phdr = &phdr32; + } +-- +2.37.3 + diff --git a/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch b/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch new file mode 100644 index 0000000..b5758cf --- /dev/null +++ b/kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch @@ -0,0 +1,57 @@ +From 04d4947a22fe3192384ff486d0a979d799ded98e Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:55 +0000 +Subject: [PATCH 23/42] dump: Rename write_elf_loads to write_elf_phdr_loads +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [23/41] 18e3ef70b97c525b7c43cf12143204bdb1060e4f + +Let's make it a bit clearer that we write the program headers of the +PT_LOAD type. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Steffen Eiden +Message-Id: <20220811121111.9878-3-frankja@linux.ibm.com> +(cherry picked from commit afae6056ea79e2d89fd90867de3a01732eae724f) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index a451abc590..fa787f379f 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -491,7 +491,7 @@ static void get_offset_range(hwaddr phys_addr, + } + } + +-static void write_elf_loads(DumpState *s, Error **errp) ++static void write_elf_phdr_loads(DumpState *s, Error **errp) + { + ERRP_GUARD(); + hwaddr offset, filesz; +@@ -574,8 +574,8 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, errp); ++ /* write all PT_LOADs to vmcore */ ++ write_elf_phdr_loads(s, errp); + if (*errp) { + return; + } +-- +2.37.3 + diff --git a/kvm-dump-Reorder-struct-DumpState.patch b/kvm-dump-Reorder-struct-DumpState.patch new file mode 100644 index 0000000..2ca3f2a --- /dev/null +++ b/kvm-dump-Reorder-struct-DumpState.patch @@ -0,0 +1,68 @@ +From 7e8d6290099b33f88621b45e62652a97704c9573 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:15 +0000 +Subject: [PATCH 34/42] dump: Reorder struct DumpState +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [34/41] 8d44e5e8c86ea5b33644eba141046cd657d0071e + +Let's move ELF related members into one block and guest memory related +ones into another to improve readability. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-4-frankja@linux.ibm.com> +(cherry picked from commit 8384b73c46fd474847d7e74d121318e344edc3c4) +Signed-off-by: Cédric Le Goater +--- + include/sysemu/dump.h | 16 +++++++++------- + 1 file changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 9995f65dc8..9ed811b313 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -154,15 +154,8 @@ typedef struct DumpState { + GuestPhysBlockList guest_phys_blocks; + ArchDumpInfo dump_info; + MemoryMappingList list; +- uint32_t phdr_num; +- uint32_t shdr_num; + bool resume; + bool detached; +- ssize_t note_size; +- hwaddr shdr_offset; +- hwaddr phdr_offset; +- hwaddr section_offset; +- hwaddr note_offset; + hwaddr memory_offset; + int fd; + +@@ -177,6 +170,15 @@ typedef struct DumpState { + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ + ++ /* Elf dump related data */ ++ uint32_t phdr_num; ++ uint32_t shdr_num; ++ ssize_t note_size; ++ hwaddr shdr_offset; ++ hwaddr phdr_offset; ++ hwaddr section_offset; ++ hwaddr note_offset; ++ + void *elf_section_hdrs; /* Pointer to section header buffer */ + + uint8_t *note_buf; /* buffer for notes */ +-- +2.37.3 + diff --git a/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch b/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch new file mode 100644 index 0000000..421a98e --- /dev/null +++ b/kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch @@ -0,0 +1,467 @@ +From 8f674e0e12e4b88fc035948612a0b0949e0ad892 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:54 +0000 +Subject: [PATCH 22/42] dump: Replace opaque DumpState pointer with a typed one +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [22/41] 5f071d7ef441ae6f5da70eb56018c4657deee3d7 + +It's always better to convey the type of a pointer if at all +possible. So let's add the DumpState typedef to typedefs.h and move +the dump note functions from the opaque pointers to DumpState +pointers. + +Signed-off-by: Janosch Frank +CC: Peter Maydell +CC: Cédric Le Goater +CC: Daniel Henrique Barboza +CC: David Gibson +CC: Greg Kurz +CC: Palmer Dabbelt +CC: Alistair Francis +CC: Bin Meng +CC: Cornelia Huck +CC: Thomas Huth +CC: Richard Henderson +CC: David Hildenbrand +Acked-by: Daniel Henrique Barboza +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-2-frankja@linux.ibm.com> +(cherry picked from commit 1af0006ab959864dfa2f59e9136c5fb93000b61f) +Signed-off-by: Cédric Le Goater +--- + include/hw/core/sysemu-cpu-ops.h | 8 ++++---- + include/qemu/typedefs.h | 1 + + target/arm/arch_dump.c | 6 ++---- + target/arm/cpu.h | 4 ++-- + target/i386/arch_dump.c | 30 +++++++++++++++--------------- + target/i386/cpu.h | 8 ++++---- + target/ppc/arch_dump.c | 18 +++++++++--------- + target/ppc/cpu.h | 4 ++-- + target/riscv/arch_dump.c | 6 ++---- + target/riscv/cpu.h | 4 ++-- + target/s390x/arch_dump.c | 10 +++++----- + target/s390x/s390x-internal.h | 2 +- + 12 files changed, 49 insertions(+), 52 deletions(-) + +diff --git a/include/hw/core/sysemu-cpu-ops.h b/include/hw/core/sysemu-cpu-ops.h +index a9ba39e5f2..ee169b872c 100644 +--- a/include/hw/core/sysemu-cpu-ops.h ++++ b/include/hw/core/sysemu-cpu-ops.h +@@ -53,25 +53,25 @@ typedef struct SysemuCPUOps { + * 32-bit VM coredump. + */ + int (*write_elf32_note)(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + /** + * @write_elf64_note: Callback for writing a CPU-specific ELF note to a + * 64-bit VM coredump. + */ + int (*write_elf64_note)(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + /** + * @write_elf32_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 32-bit VM coredump. + */ + int (*write_elf32_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + /** + * @write_elf64_qemunote: Callback for writing a CPU- and QEMU-specific ELF + * note to a 64-bit VM coredump. + */ + int (*write_elf64_qemunote)(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + /** + * @virtio_is_big_endian: Callback to return %true if a CPU which supports + * runtime configurable endianness is currently big-endian. +diff --git a/include/qemu/typedefs.h b/include/qemu/typedefs.h +index ee60eb3de4..ac9d031be6 100644 +--- a/include/qemu/typedefs.h ++++ b/include/qemu/typedefs.h +@@ -125,6 +125,7 @@ typedef struct VirtIODevice VirtIODevice; + typedef struct Visitor Visitor; + typedef struct VMChangeStateEntry VMChangeStateEntry; + typedef struct VMStateDescription VMStateDescription; ++typedef struct DumpState DumpState; + + /* + * Pointer types +diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c +index 0184845310..3a824e0aa6 100644 +--- a/target/arm/arch_dump.c ++++ b/target/arm/arch_dump.c +@@ -232,12 +232,11 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f, + #endif + + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct aarch64_note note; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; +- DumpState *s = opaque; + uint64_t pstate, sp; + int ret, i; + +@@ -360,12 +359,11 @@ static int arm_write_elf32_vfp(WriteCoreDumpFunction f, CPUARMState *env, + } + + int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct arm_note note; + ARMCPU *cpu = ARM_CPU(cs); + CPUARMState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i; + bool fpvalid = cpu_isar_feature(aa32_vfp_simd, cpu); + +diff --git a/target/arm/cpu.h b/target/arm/cpu.h +index e33f37b70a..8d2f496ef9 100644 +--- a/target/arm/cpu.h ++++ b/target/arm/cpu.h +@@ -1065,9 +1065,9 @@ int arm_gen_dynamic_svereg_xml(CPUState *cpu, int base_reg); + const char *arm_gdb_get_dynamic_xml(CPUState *cpu, const char *xmlname); + + int arm_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int arm_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + + #ifdef TARGET_AARCH64 + int aarch64_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); +diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c +index 004141fc04..c290910a04 100644 +--- a/target/i386/arch_dump.c ++++ b/target/i386/arch_dump.c +@@ -42,7 +42,7 @@ typedef struct { + + static int x86_64_write_elf64_note(WriteCoreDumpFunction f, + CPUX86State *env, int id, +- void *opaque) ++ DumpState *s) + { + x86_64_user_regs_struct regs; + Elf64_Nhdr *note; +@@ -94,7 +94,7 @@ static int x86_64_write_elf64_note(WriteCoreDumpFunction f, + buf += descsz - sizeof(x86_64_user_regs_struct)-sizeof(target_ulong); + memcpy(buf, ®s, sizeof(x86_64_user_regs_struct)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -148,7 +148,7 @@ static void x86_fill_elf_prstatus(x86_elf_prstatus *prstatus, CPUX86State *env, + } + + static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, +- int id, void *opaque) ++ int id, DumpState *s) + { + x86_elf_prstatus prstatus; + Elf64_Nhdr *note; +@@ -170,7 +170,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &prstatus, sizeof(prstatus)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -180,7 +180,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, + } + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + int ret; +@@ -189,10 +189,10 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + bool lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK); + + if (lma) { +- ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, opaque); ++ ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, s); + } else { + #endif +- ret = x86_write_elf64_note(f, &cpu->env, cpuid, opaque); ++ ret = x86_write_elf64_note(f, &cpu->env, cpuid, s); + #ifdef TARGET_X86_64 + } + #endif +@@ -201,7 +201,7 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + } + + int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + x86_elf_prstatus prstatus; +@@ -224,7 +224,7 @@ int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &prstatus, sizeof(prstatus)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -329,7 +329,7 @@ static void qemu_get_cpustate(QEMUCPUState *s, CPUX86State *env) + + static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + CPUX86State *env, +- void *opaque, ++ DumpState *s, + int type) + { + QEMUCPUState state; +@@ -369,7 +369,7 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + buf += ROUND_UP(name_size, 4); + memcpy(buf, &state, sizeof(state)); + +- ret = f(note, note_size, opaque); ++ ret = f(note, note_size, s); + g_free(note); + if (ret < 0) { + return -1; +@@ -379,19 +379,19 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, + } + + int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cs, +- void *opaque) ++ DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + +- return cpu_write_qemu_note(f, &cpu->env, opaque, 1); ++ return cpu_write_qemu_note(f, &cpu->env, s, 1); + } + + int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cs, +- void *opaque) ++ DumpState *s) + { + X86CPU *cpu = X86_CPU(cs); + +- return cpu_write_qemu_note(f, &cpu->env, opaque, 0); ++ return cpu_write_qemu_note(f, &cpu->env, s, 0); + } + + int cpu_get_dump_info(ArchDumpInfo *info, +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 006b735fe4..5d2ddd81b9 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1887,13 +1887,13 @@ extern const VMStateDescription vmstate_x86_cpu; + int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); + + int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, +- void *opaque); ++ DumpState *s); + + void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, + Error **errp); +diff --git a/target/ppc/arch_dump.c b/target/ppc/arch_dump.c +index bb392f6d88..e9f512bcd4 100644 +--- a/target/ppc/arch_dump.c ++++ b/target/ppc/arch_dump.c +@@ -270,23 +270,23 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + static int ppc_write_all_elf_notes(const char *note_name, + WriteCoreDumpFunction f, + PowerPCCPU *cpu, int id, +- void *opaque) ++ DumpState *s) + { +- NoteFuncArg arg = { .state = opaque }; ++ NoteFuncArg arg = { .state = s }; + int ret = -1; + int note_size; + const NoteFuncDesc *nf; + + for (nf = note_func; nf->note_contents_func; nf++) { +- arg.note.hdr.n_namesz = cpu_to_dump32(opaque, sizeof(arg.note.name)); +- arg.note.hdr.n_descsz = cpu_to_dump32(opaque, nf->contents_size); ++ arg.note.hdr.n_namesz = cpu_to_dump32(s, sizeof(arg.note.name)); ++ arg.note.hdr.n_descsz = cpu_to_dump32(s, nf->contents_size); + strncpy(arg.note.name, note_name, sizeof(arg.note.name)); + + (*nf->note_contents_func)(&arg, cpu); + + note_size = + sizeof(arg.note) - sizeof(arg.note.contents) + nf->contents_size; +- ret = f(&arg.note, note_size, opaque); ++ ret = f(&arg.note, note_size, s); + if (ret < 0) { + return -1; + } +@@ -295,15 +295,15 @@ static int ppc_write_all_elf_notes(const char *note_name, + } + + int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + PowerPCCPU *cpu = POWERPC_CPU(cs); +- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); ++ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); + } + + int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + PowerPCCPU *cpu = POWERPC_CPU(cs); +- return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, opaque); ++ return ppc_write_all_elf_notes("CORE", f, cpu, cpuid, s); + } +diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h +index 23e8b76c85..f5fb284706 100644 +--- a/target/ppc/cpu.h ++++ b/target/ppc/cpu.h +@@ -1289,9 +1289,9 @@ void ppc_gdb_gen_spr_xml(PowerPCCPU *cpu); + const char *ppc_gdb_get_dynamic_xml(CPUState *cs, const char *xml_name); + #endif + int ppc64_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int ppc32_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + #ifndef CONFIG_USER_ONLY + void ppc_cpu_do_interrupt(CPUState *cpu); + bool ppc_cpu_exec_interrupt(CPUState *cpu, int int_req); +diff --git a/target/riscv/arch_dump.c b/target/riscv/arch_dump.c +index 709f621d82..736a232956 100644 +--- a/target/riscv/arch_dump.c ++++ b/target/riscv/arch_dump.c +@@ -64,12 +64,11 @@ static void riscv64_note_init(struct riscv64_note *note, DumpState *s, + } + + int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct riscv64_note note; + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i = 0; + const char name[] = "CORE"; + +@@ -134,12 +133,11 @@ static void riscv32_note_init(struct riscv32_note *note, DumpState *s, + } + + int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + struct riscv32_note note; + RISCVCPU *cpu = RISCV_CPU(cs); + CPURISCVState *env = &cpu->env; +- DumpState *s = opaque; + int ret, i; + const char name[] = "CORE"; + +diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h +index 0760c0af93..4cce524b2c 100644 +--- a/target/riscv/cpu.h ++++ b/target/riscv/cpu.h +@@ -344,9 +344,9 @@ extern const char * const riscv_fpr_regnames[]; + const char *riscv_cpu_get_trap_name(target_ulong cause, bool async); + void riscv_cpu_do_interrupt(CPUState *cpu); + int riscv_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int riscv_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + int riscv_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); + int riscv_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); + bool riscv_cpu_fp_enabled(CPURISCVState *env); +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index 08daf93ae1..f60a14920d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -204,7 +204,7 @@ static const NoteFuncDesc note_linux[] = { + static int s390x_write_elf64_notes(const char *note_name, + WriteCoreDumpFunction f, + S390CPU *cpu, int id, +- void *opaque, ++ DumpState *s, + const NoteFuncDesc *funcs) + { + Note note; +@@ -222,7 +222,7 @@ static int s390x_write_elf64_notes(const char *note_name, + (*nf->note_contents_func)(¬e, cpu, id); + + note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; +- ret = f(¬e, note_size, opaque); ++ ret = f(¬e, note_size, s); + + if (ret < 0) { + return -1; +@@ -235,16 +235,16 @@ static int s390x_write_elf64_notes(const char *note_name, + + + int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque) ++ int cpuid, DumpState *s) + { + S390CPU *cpu = S390_CPU(cs); + int r; + +- r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, opaque, note_core); ++ r = s390x_write_elf64_notes("CORE", f, cpu, cpuid, s, note_core); + if (r) { + return r; + } +- return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, opaque, note_linux); ++ return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); + } + + int cpu_get_dump_info(ArchDumpInfo *info, +diff --git a/target/s390x/s390x-internal.h b/target/s390x/s390x-internal.h +index 1a178aed41..02cf6c3f43 100644 +--- a/target/s390x/s390x-internal.h ++++ b/target/s390x/s390x-internal.h +@@ -228,7 +228,7 @@ static inline hwaddr decode_basedisp_s(CPUS390XState *env, uint32_t ipb, + + /* arch_dump.c */ + int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, +- int cpuid, void *opaque); ++ int cpuid, DumpState *s); + + + /* cc_helper.c */ +-- +2.37.3 + diff --git a/kvm-dump-Rework-dump_calculate_size-function.patch b/kvm-dump-Rework-dump_calculate_size-function.patch new file mode 100644 index 0000000..e077bea --- /dev/null +++ b/kvm-dump-Rework-dump_calculate_size-function.patch @@ -0,0 +1,73 @@ +From 1f7cb73592a1922b3a981eb3232098281e07679f Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:59 +0000 +Subject: [PATCH 27/42] dump: Rework dump_calculate_size function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [27/41] eaa05c39109b57a119752ad3df66f4c2ace2cbe4 + +dump_calculate_size() sums up all the sizes of the guest memory +blocks. Since we already have a function that calculates the size of a +single memory block (dump_get_memblock_size()) we can simply iterate +over the blocks and use the function instead of calculating the size +ourselves. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20220811121111.9878-7-frankja@linux.ibm.com> +(cherry picked from commit c370d5300f9ac1f90f8158082d22262b904fe30e) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 22 ++++++++-------------- + 1 file changed, 8 insertions(+), 14 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index f6fe13e258..902a85ef8e 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1557,25 +1557,19 @@ bool dump_in_progress(void) + return (qatomic_read(&state->status) == DUMP_STATUS_ACTIVE); + } + +-/* calculate total size of memory to be dumped (taking filter into +- * acoount.) */ ++/* ++ * calculate total size of memory to be dumped (taking filter into ++ * account.) ++ */ + static int64_t dump_calculate_size(DumpState *s) + { + GuestPhysBlock *block; +- int64_t size = 0, total = 0, left = 0, right = 0; ++ int64_t total = 0; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (dump_has_filter(s)) { +- /* calculate the overlapped region. */ +- left = MAX(s->filter_area_begin, block->target_start); +- right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end); +- size = right - left; +- size = size > 0 ? size : 0; +- } else { +- /* count the whole region in */ +- size = (block->target_end - block->target_start); +- } +- total += size; ++ total += dump_filtered_memblock_size(block, ++ s->filter_area_begin, ++ s->filter_area_length); + } + + return total; +-- +2.37.3 + diff --git a/kvm-dump-Rework-filter-area-variables.patch b/kvm-dump-Rework-filter-area-variables.patch new file mode 100644 index 0000000..4e22f41 --- /dev/null +++ b/kvm-dump-Rework-filter-area-variables.patch @@ -0,0 +1,187 @@ +From 411f5354b809f6b783946e58d7655135814fb809 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:58 +0000 +Subject: [PATCH 26/42] dump: Rework filter area variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [26/41] f10a5523dfd2724f7a8637fca3ed68ba6df659a5 + +While the DumpState begin and length variables directly mirror the API +variable names they are not very descriptive. So let's add a +"filter_area_" prefix and make has_filter a function checking length > 0. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-6-frankja@linux.ibm.com> +(cherry picked from commit dddf725f70bfe7f5adb41fa31dbd06e767271bda) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 53 +++++++++++++++++++++++++------------------ + include/sysemu/dump.h | 13 ++++++++--- + 2 files changed, 41 insertions(+), 25 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index e6aa037f59..f6fe13e258 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -60,6 +60,11 @@ static inline bool dump_is_64bit(DumpState *s) + return s->dump_info.d_class == ELFCLASS64; + } + ++static inline bool dump_has_filter(DumpState *s) ++{ ++ return s->filter_area_length > 0; ++} ++ + uint16_t cpu_to_dump16(DumpState *s, uint16_t val) + { + if (s->dump_info.d_endian == ELFDATA2LSB) { +@@ -444,29 +449,30 @@ static void get_offset_range(hwaddr phys_addr, + *p_offset = -1; + *p_filesz = 0; + +- if (s->has_filter) { +- if (phys_addr < s->begin || phys_addr >= s->begin + s->length) { ++ if (dump_has_filter(s)) { ++ if (phys_addr < s->filter_area_begin || ++ phys_addr >= s->filter_area_begin + s->filter_area_length) { + return; + } + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (s->has_filter) { +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { ++ if (dump_has_filter(s)) { ++ if (block->target_start >= s->filter_area_begin + s->filter_area_length || ++ block->target_end <= s->filter_area_begin) { + /* This block is out of the range */ + continue; + } + +- if (s->begin <= block->target_start) { ++ if (s->filter_area_begin <= block->target_start) { + start = block->target_start; + } else { +- start = s->begin; ++ start = s->filter_area_begin; + } + + size_in_block = block->target_end - start; +- if (s->begin + s->length < block->target_end) { +- size_in_block -= block->target_end - (s->begin + s->length); ++ if (s->filter_area_begin + s->filter_area_length < block->target_end) { ++ size_in_block -= block->target_end - (s->filter_area_begin + s->filter_area_length); + } + } else { + start = block->target_start; +@@ -639,12 +645,12 @@ static void dump_iterate(DumpState *s, Error **errp) + int64_t memblock_size, memblock_start; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- memblock_start = dump_filtered_memblock_start(block, s->begin, s->length); ++ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, s->filter_area_length); + if (memblock_start == -1) { + continue; + } + +- memblock_size = dump_filtered_memblock_size(block, s->begin, s->length); ++ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, s->filter_area_length); + + /* Write the memory to file */ + write_memory(s, block, memblock_start, memblock_size, errp); +@@ -1513,14 +1519,14 @@ static int validate_start_block(DumpState *s) + { + GuestPhysBlock *block; + +- if (!s->has_filter) { ++ if (!dump_has_filter(s)) { + return 0; + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { + /* This block is out of the range */ +- if (block->target_start >= s->begin + s->length || +- block->target_end <= s->begin) { ++ if (block->target_start >= s->filter_area_begin + s->filter_area_length || ++ block->target_end <= s->filter_area_begin) { + continue; + } + return 0; +@@ -1559,10 +1565,10 @@ static int64_t dump_calculate_size(DumpState *s) + int64_t size = 0, total = 0, left = 0, right = 0; + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { +- if (s->has_filter) { ++ if (dump_has_filter(s)) { + /* calculate the overlapped region. */ +- left = MAX(s->begin, block->target_start); +- right = MIN(s->begin + s->length, block->target_end); ++ left = MAX(s->filter_area_begin, block->target_start); ++ right = MIN(s->filter_area_begin + s->filter_area_length, block->target_end); + size = right - left; + size = size > 0 ? size : 0; + } else { +@@ -1652,9 +1658,12 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + s->fd = fd; +- s->has_filter = has_filter; +- s->begin = begin; +- s->length = length; ++ if (has_filter && !length) { ++ error_setg(errp, QERR_INVALID_PARAMETER, "length"); ++ goto cleanup; ++ } ++ s->filter_area_begin = begin; ++ s->filter_area_length = length; + + memory_mapping_list_init(&s->list); + +@@ -1787,8 +1796,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + return; + } + +- if (s->has_filter) { +- memory_mapping_filter(&s->list, s->begin, s->length); ++ if (dump_has_filter(s)) { ++ memory_mapping_filter(&s->list, s->filter_area_begin, s->filter_area_length); + } + + /* +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 7fce1d4af6..b62513d87d 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -166,9 +166,16 @@ typedef struct DumpState { + hwaddr memory_offset; + int fd; + +- bool has_filter; +- int64_t begin; +- int64_t length; ++ /* ++ * Dump filter area variables ++ * ++ * A filtered dump only contains the guest memory designated by ++ * the start address and length variables defined below. ++ * ++ * If length is 0, no filtering is applied. ++ */ ++ int64_t filter_area_begin; /* Start address of partial guest memory area */ ++ int64_t filter_area_length; /* Length of partial guest memory area */ + + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ +-- +2.37.3 + diff --git a/kvm-dump-Rework-get_start_block.patch b/kvm-dump-Rework-get_start_block.patch new file mode 100644 index 0000000..f6bdde2 --- /dev/null +++ b/kvm-dump-Rework-get_start_block.patch @@ -0,0 +1,102 @@ +From b56c362132baef40cc25d910c1e0d217d83cfe44 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:10:57 +0000 +Subject: [PATCH 25/42] dump: Rework get_start_block +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [25/41] c93842a1aaeadcc11e91c194452fcd05d163b3ca + +get_start_block() returns the start address of the first memory block +or -1. + +With the GuestPhysBlock iterator conversion we don't need to set the +start address and can therefore remove that code and the "start" +DumpState struct member. The only functionality left is the validation +of the start block so it only makes sense to re-name the function to +validate_start_block() + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20220811121111.9878-5-frankja@linux.ibm.com> +(cherry picked from commit 0c2994ac9009577b967529ce18e269da5b280351) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 20 ++++++-------------- + include/sysemu/dump.h | 2 -- + 2 files changed, 6 insertions(+), 16 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index d981e843dd..e6aa037f59 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1509,30 +1509,22 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) + } + } + +-static ram_addr_t get_start_block(DumpState *s) ++static int validate_start_block(DumpState *s) + { + GuestPhysBlock *block; + + if (!s->has_filter) { +- s->next_block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + return 0; + } + + QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ /* This block is out of the range */ + if (block->target_start >= s->begin + s->length || + block->target_end <= s->begin) { +- /* This block is out of the range */ + continue; + } +- +- s->next_block = block; +- if (s->begin > block->target_start) { +- s->start = s->begin - block->target_start; +- } else { +- s->start = 0; +- } +- return s->start; +- } ++ return 0; ++ } + + return -1; + } +@@ -1679,8 +1671,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + goto cleanup; + } + +- s->start = get_start_block(s); +- if (s->start == -1) { ++ /* Is the filter filtering everything? */ ++ if (validate_start_block(s) == -1) { + error_setg(errp, QERR_INVALID_PARAMETER, "begin"); + goto cleanup; + } +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index ffc2ea1072..7fce1d4af6 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -166,8 +166,6 @@ typedef struct DumpState { + hwaddr memory_offset; + int fd; + +- GuestPhysBlock *next_block; +- ram_addr_t start; + bool has_filter; + int64_t begin; + int64_t length; +-- +2.37.3 + diff --git a/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch b/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch new file mode 100644 index 0000000..1f53426 --- /dev/null +++ b/kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch @@ -0,0 +1,173 @@ +From d1e147a3133d4d31d4b0c02c05916366fadd9c30 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Thu, 11 Aug 2022 12:11:00 +0000 +Subject: [PATCH 28/42] dump: Split elf header functions into prepare and write +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [28/41] f70a13ad443835e7f46b7c5e176e372d370ac797 + +Let's split the write from the modification of the elf header so we +can consolidate the write of the data in one function. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Marc-André Lureau +Message-Id: <20220811121111.9878-8-frankja@linux.ibm.com> +(cherry picked from commit 670e76998a61ca171200fcded3865b294a2d1243) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 100 ++++++++++++++++++++++++++++------------------------ + 1 file changed, 53 insertions(+), 47 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 902a85ef8e..8d5226f861 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -132,7 +132,7 @@ static int fd_write_vmcore(const void *buf, size_t size, void *opaque) + return 0; + } + +-static void write_elf64_header(DumpState *s, Error **errp) ++static void prepare_elf64_header(DumpState *s, Elf64_Ehdr *elf_header) + { + /* + * phnum in the elf header is 16 bit, if we have more segments we +@@ -140,34 +140,27 @@ static void write_elf64_header(DumpState *s, Error **errp) + * special section. + */ + uint16_t phnum = MIN(s->phdr_num, PN_XNUM); +- Elf64_Ehdr elf_header; +- int ret; + +- memset(&elf_header, 0, sizeof(Elf64_Ehdr)); +- memcpy(&elf_header, ELFMAG, SELFMAG); +- elf_header.e_ident[EI_CLASS] = ELFCLASS64; +- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; +- elf_header.e_ident[EI_VERSION] = EV_CURRENT; +- elf_header.e_type = cpu_to_dump16(s, ET_CORE); +- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); +- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); +- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump64(s, s->phdr_offset); +- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, phnum); ++ memset(elf_header, 0, sizeof(Elf64_Ehdr)); ++ memcpy(elf_header, ELFMAG, SELFMAG); ++ elf_header->e_ident[EI_CLASS] = ELFCLASS64; ++ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; ++ elf_header->e_ident[EI_VERSION] = EV_CURRENT; ++ elf_header->e_type = cpu_to_dump16(s, ET_CORE); ++ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); ++ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); ++ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); ++ elf_header->e_phoff = cpu_to_dump64(s, s->phdr_offset); ++ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf64_Phdr)); ++ elf_header->e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- elf_header.e_shoff = cpu_to_dump64(s, s->shdr_offset); +- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); +- } +- +- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); +- if (ret < 0) { +- error_setg_errno(errp, -ret, "dump: failed to write elf header"); ++ elf_header->e_shoff = cpu_to_dump64(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf64_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + } + } + +-static void write_elf32_header(DumpState *s, Error **errp) ++static void prepare_elf32_header(DumpState *s, Elf32_Ehdr *elf_header) + { + /* + * phnum in the elf header is 16 bit, if we have more segments we +@@ -175,28 +168,45 @@ static void write_elf32_header(DumpState *s, Error **errp) + * special section. + */ + uint16_t phnum = MIN(s->phdr_num, PN_XNUM); +- Elf32_Ehdr elf_header; +- int ret; + +- memset(&elf_header, 0, sizeof(Elf32_Ehdr)); +- memcpy(&elf_header, ELFMAG, SELFMAG); +- elf_header.e_ident[EI_CLASS] = ELFCLASS32; +- elf_header.e_ident[EI_DATA] = s->dump_info.d_endian; +- elf_header.e_ident[EI_VERSION] = EV_CURRENT; +- elf_header.e_type = cpu_to_dump16(s, ET_CORE); +- elf_header.e_machine = cpu_to_dump16(s, s->dump_info.d_machine); +- elf_header.e_version = cpu_to_dump32(s, EV_CURRENT); +- elf_header.e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); +- elf_header.e_phoff = cpu_to_dump32(s, s->phdr_offset); +- elf_header.e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); +- elf_header.e_phnum = cpu_to_dump16(s, phnum); ++ memset(elf_header, 0, sizeof(Elf32_Ehdr)); ++ memcpy(elf_header, ELFMAG, SELFMAG); ++ elf_header->e_ident[EI_CLASS] = ELFCLASS32; ++ elf_header->e_ident[EI_DATA] = s->dump_info.d_endian; ++ elf_header->e_ident[EI_VERSION] = EV_CURRENT; ++ elf_header->e_type = cpu_to_dump16(s, ET_CORE); ++ elf_header->e_machine = cpu_to_dump16(s, s->dump_info.d_machine); ++ elf_header->e_version = cpu_to_dump32(s, EV_CURRENT); ++ elf_header->e_ehsize = cpu_to_dump16(s, sizeof(elf_header)); ++ elf_header->e_phoff = cpu_to_dump32(s, s->phdr_offset); ++ elf_header->e_phentsize = cpu_to_dump16(s, sizeof(Elf32_Phdr)); ++ elf_header->e_phnum = cpu_to_dump16(s, phnum); + if (s->shdr_num) { +- elf_header.e_shoff = cpu_to_dump32(s, s->shdr_offset); +- elf_header.e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); +- elf_header.e_shnum = cpu_to_dump16(s, s->shdr_num); ++ elf_header->e_shoff = cpu_to_dump32(s, s->shdr_offset); ++ elf_header->e_shentsize = cpu_to_dump16(s, sizeof(Elf32_Shdr)); ++ elf_header->e_shnum = cpu_to_dump16(s, s->shdr_num); + } ++} + +- ret = fd_write_vmcore(&elf_header, sizeof(elf_header), s); ++static void write_elf_header(DumpState *s, Error **errp) ++{ ++ Elf32_Ehdr elf32_header; ++ Elf64_Ehdr elf64_header; ++ size_t header_size; ++ void *header_ptr; ++ int ret; ++ ++ if (dump_is_64bit(s)) { ++ prepare_elf64_header(s, &elf64_header); ++ header_size = sizeof(elf64_header); ++ header_ptr = &elf64_header; ++ } else { ++ prepare_elf32_header(s, &elf32_header); ++ header_size = sizeof(elf32_header); ++ header_ptr = &elf32_header; ++ } ++ ++ ret = fd_write_vmcore(header_ptr, header_size, s); + if (ret < 0) { + error_setg_errno(errp, -ret, "dump: failed to write elf header"); + } +@@ -565,11 +575,7 @@ static void dump_begin(DumpState *s, Error **errp) + */ + + /* write elf header to vmcore */ +- if (dump_is_64bit(s)) { +- write_elf64_header(s, errp); +- } else { +- write_elf32_header(s, errp); +- } ++ write_elf_header(s, errp); + if (*errp) { + return; + } +-- +2.37.3 + diff --git a/kvm-dump-Use-ERRP_GUARD.patch b/kvm-dump-Use-ERRP_GUARD.patch new file mode 100644 index 0000000..1ef42ee --- /dev/null +++ b/kvm-dump-Use-ERRP_GUARD.patch @@ -0,0 +1,420 @@ +From 4ca61efe246d62d420eb332655c0c8ead4cc762b Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Wed, 30 Mar 2022 12:35:55 +0000 +Subject: [PATCH 13/42] dump: Use ERRP_GUARD() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [13/41] f735cd1dab0230000cfadd878765fdf4647b239c + +Let's move to the new way of handling errors before changing the dump +code. This patch has mostly been generated by the coccinelle script +scripts/coccinelle/errp-guard.cocci. + +Signed-off-by: Janosch Frank +Reviewed-by: Richard Henderson +Reviewed-by: Marc-André Lureau +Message-Id: <20220330123603.107120-2-frankja@linux.ibm.com> +(cherry picked from commit 86a518bba4f4d7c9016fc5b104fe1e58b00ad756) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 144 ++++++++++++++++++++++------------------------------ + 1 file changed, 61 insertions(+), 83 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 662d0a62cd..9876123f2e 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -390,23 +390,21 @@ static void write_data(DumpState *s, void *buf, int length, Error **errp) + static void write_memory(DumpState *s, GuestPhysBlock *block, ram_addr_t start, + int64_t size, Error **errp) + { ++ ERRP_GUARD(); + int64_t i; +- Error *local_err = NULL; + + for (i = 0; i < size / s->dump_info.page_size; i++) { + write_data(s, block->host_addr + start + i * s->dump_info.page_size, +- s->dump_info.page_size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ s->dump_info.page_size, errp); ++ if (*errp) { + return; + } + } + + if ((size % s->dump_info.page_size) != 0) { + write_data(s, block->host_addr + start + i * s->dump_info.page_size, +- size % s->dump_info.page_size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ size % s->dump_info.page_size, errp); ++ if (*errp) { + return; + } + } +@@ -476,11 +474,11 @@ static void get_offset_range(hwaddr phys_addr, + + static void write_elf_loads(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + hwaddr offset, filesz; + MemoryMapping *memory_mapping; + uint32_t phdr_index = 1; + uint32_t max_index; +- Error *local_err = NULL; + + if (s->have_section) { + max_index = s->sh_info; +@@ -494,14 +492,13 @@ static void write_elf_loads(DumpState *s, Error **errp) + s, &offset, &filesz); + if (s->dump_info.d_class == ELFCLASS64) { + write_elf64_load(s, memory_mapping, phdr_index++, offset, +- filesz, &local_err); ++ filesz, errp); + } else { + write_elf32_load(s, memory_mapping, phdr_index++, offset, +- filesz, &local_err); ++ filesz, errp); + } + +- if (local_err) { +- error_propagate(errp, local_err); ++ if (*errp) { + return; + } + +@@ -514,7 +511,7 @@ static void write_elf_loads(DumpState *s, Error **errp) + /* write elf header, PT_NOTE and elf note to vmcore. */ + static void dump_begin(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + + /* + * the vmcore's format is: +@@ -542,73 +539,64 @@ static void dump_begin(DumpState *s, Error **errp) + + /* write elf header to vmcore */ + if (s->dump_info.d_class == ELFCLASS64) { +- write_elf64_header(s, &local_err); ++ write_elf64_header(s, errp); + } else { +- write_elf32_header(s, &local_err); ++ write_elf32_header(s, errp); + } +- if (local_err) { +- error_propagate(errp, local_err); ++ if (*errp) { + return; + } + + if (s->dump_info.d_class == ELFCLASS64) { + /* write PT_NOTE to vmcore */ +- write_elf64_note(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_note(s, errp); ++ if (*errp) { + return; + } + + /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_loads(s, errp); ++ if (*errp) { + return; + } + + /* write section to vmcore */ + if (s->have_section) { +- write_elf_section(s, 1, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_section(s, 1, errp); ++ if (*errp) { + return; + } + } + + /* write notes to vmcore */ +- write_elf64_notes(fd_write_vmcore, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_notes(fd_write_vmcore, s, errp); ++ if (*errp) { + return; + } + } else { + /* write PT_NOTE to vmcore */ +- write_elf32_note(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_note(s, errp); ++ if (*errp) { + return; + } + + /* write all PT_LOAD to vmcore */ +- write_elf_loads(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_loads(s, errp); ++ if (*errp) { + return; + } + + /* write section to vmcore */ + if (s->have_section) { +- write_elf_section(s, 0, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf_section(s, 0, errp); ++ if (*errp) { + return; + } + } + + /* write notes to vmcore */ +- write_elf32_notes(fd_write_vmcore, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_notes(fd_write_vmcore, s, errp); ++ if (*errp) { + return; + } + } +@@ -644,9 +632,9 @@ static int get_next_block(DumpState *s, GuestPhysBlock *block) + /* write all memory to vmcore */ + static void dump_iterate(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + GuestPhysBlock *block; + int64_t size; +- Error *local_err = NULL; + + do { + block = s->next_block; +@@ -658,9 +646,8 @@ static void dump_iterate(DumpState *s, Error **errp) + size -= block->target_end - (s->begin + s->length); + } + } +- write_memory(s, block, s->start, size, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_memory(s, block, s->start, size, errp); ++ if (*errp) { + return; + } + +@@ -669,11 +656,10 @@ static void dump_iterate(DumpState *s, Error **errp) + + static void create_vmcore(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + +- dump_begin(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ dump_begin(s, errp); ++ if (*errp) { + return; + } + +@@ -810,6 +796,7 @@ static bool note_name_equal(DumpState *s, + /* write common header, sub header and elf note to vmcore */ + static void create_header32(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + DiskDumpHeader32 *dh = NULL; + KdumpSubHeader32 *kh = NULL; + size_t size; +@@ -818,7 +805,6 @@ static void create_header32(DumpState *s, Error **errp) + uint32_t bitmap_blocks; + uint32_t status = 0; + uint64_t offset_note; +- Error *local_err = NULL; + + /* write common header, the version of kdump-compressed format is 6th */ + size = sizeof(DiskDumpHeader32); +@@ -894,9 +880,8 @@ static void create_header32(DumpState *s, Error **errp) + s->note_buf_offset = 0; + + /* use s->note_buf to store notes temporarily */ +- write_elf32_notes(buf_write_note, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf32_notes(buf_write_note, s, errp); ++ if (*errp) { + goto out; + } + if (write_buffer(s->fd, offset_note, s->note_buf, +@@ -922,6 +907,7 @@ out: + /* write common header, sub header and elf note to vmcore */ + static void create_header64(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + DiskDumpHeader64 *dh = NULL; + KdumpSubHeader64 *kh = NULL; + size_t size; +@@ -930,7 +916,6 @@ static void create_header64(DumpState *s, Error **errp) + uint32_t bitmap_blocks; + uint32_t status = 0; + uint64_t offset_note; +- Error *local_err = NULL; + + /* write common header, the version of kdump-compressed format is 6th */ + size = sizeof(DiskDumpHeader64); +@@ -1006,9 +991,8 @@ static void create_header64(DumpState *s, Error **errp) + s->note_buf_offset = 0; + + /* use s->note_buf to store notes temporarily */ +- write_elf64_notes(buf_write_note, s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_elf64_notes(buf_write_note, s, errp); ++ if (*errp) { + goto out; + } + +@@ -1472,8 +1456,8 @@ out: + + static void create_kdump_vmcore(DumpState *s, Error **errp) + { ++ ERRP_GUARD(); + int ret; +- Error *local_err = NULL; + + /* + * the kdump-compressed format is: +@@ -1503,21 +1487,18 @@ static void create_kdump_vmcore(DumpState *s, Error **errp) + return; + } + +- write_dump_header(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_header(s, errp); ++ if (*errp) { + return; + } + +- write_dump_bitmap(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_bitmap(s, errp); ++ if (*errp) { + return; + } + +- write_dump_pages(s, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ write_dump_pages(s, errp); ++ if (*errp) { + return; + } + +@@ -1647,10 +1628,10 @@ static void dump_init(DumpState *s, int fd, bool has_format, + DumpGuestMemoryFormat format, bool paging, bool has_filter, + int64_t begin, int64_t length, Error **errp) + { ++ ERRP_GUARD(); + VMCoreInfoState *vmci = vmcoreinfo_find(); + CPUState *cpu; + int nr_cpus; +- Error *err = NULL; + int ret; + + s->has_format = has_format; +@@ -1769,9 +1750,8 @@ static void dump_init(DumpState *s, int fd, bool has_format, + + /* get memory mapping */ + if (paging) { +- qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, &err); +- if (err != NULL) { +- error_propagate(errp, err); ++ qemu_get_guest_memory_mapping(&s->list, &s->guest_phys_blocks, errp); ++ if (*errp) { + goto cleanup; + } + } else { +@@ -1870,33 +1850,32 @@ cleanup: + /* this operation might be time consuming. */ + static void dump_process(DumpState *s, Error **errp) + { +- Error *local_err = NULL; ++ ERRP_GUARD(); + DumpQueryResult *result = NULL; + + if (s->has_format && s->format == DUMP_GUEST_MEMORY_FORMAT_WIN_DMP) { + #ifdef TARGET_X86_64 +- create_win_dump(s, &local_err); ++ create_win_dump(s, errp); + #endif + } else if (s->has_format && s->format != DUMP_GUEST_MEMORY_FORMAT_ELF) { +- create_kdump_vmcore(s, &local_err); ++ create_kdump_vmcore(s, errp); + } else { +- create_vmcore(s, &local_err); ++ create_vmcore(s, errp); + } + + /* make sure status is written after written_size updates */ + smp_wmb(); + qatomic_set(&s->status, +- (local_err ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED)); ++ (*errp ? DUMP_STATUS_FAILED : DUMP_STATUS_COMPLETED)); + + /* send DUMP_COMPLETED message (unconditionally) */ + result = qmp_query_dump(NULL); + /* should never fail */ + assert(result); +- qapi_event_send_dump_completed(result, !!local_err, (local_err ? +- error_get_pretty(local_err) : NULL)); ++ qapi_event_send_dump_completed(result, !!*errp, (*errp ? ++ error_get_pretty(*errp) : NULL)); + qapi_free_DumpQueryResult(result); + +- error_propagate(errp, local_err); + dump_cleanup(s); + } + +@@ -1925,10 +1904,10 @@ void qmp_dump_guest_memory(bool paging, const char *file, + int64_t length, bool has_format, + DumpGuestMemoryFormat format, Error **errp) + { ++ ERRP_GUARD(); + const char *p; + int fd = -1; + DumpState *s; +- Error *local_err = NULL; + bool detach_p = false; + + if (runstate_check(RUN_STATE_INMIGRATE)) { +@@ -2028,9 +2007,8 @@ void qmp_dump_guest_memory(bool paging, const char *file, + dump_state_prepare(s); + + dump_init(s, fd, has_format, format, paging, has_begin, +- begin, length, &local_err); +- if (local_err) { +- error_propagate(errp, local_err); ++ begin, length, errp); ++ if (*errp) { + qatomic_set(&s->status, DUMP_STATUS_FAILED); + return; + } +-- +2.37.3 + diff --git a/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch b/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch new file mode 100644 index 0000000..8ea0a7e --- /dev/null +++ b/kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch @@ -0,0 +1,150 @@ +From a918c7305ec7c68e8bc37b449f71e75d84124cd0 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:13 +0000 +Subject: [PATCH 32/42] dump: Use a buffer for ELF section data and headers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [32/41] e1a03e202e67764581e486f37e13e479200e5846 + +Currently we're writing the NULL section header if we overflow the +physical header number in the ELF header. But in the future we'll add +custom section headers AND section data. + +To facilitate this we need to rearange section handling a bit. As with +the other ELF headers we split the code into a prepare and a write +step. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-2-frankja@linux.ibm.com> +(cherry picked from commit e41ed29bcee5cb16715317bcf290f6b5c196eb0a) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 75 +++++++++++++++++++++++++++++-------------- + include/sysemu/dump.h | 2 ++ + 2 files changed, 53 insertions(+), 24 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 88177fa886..4142b4cc0c 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -381,31 +381,60 @@ static void write_elf_phdr_note(DumpState *s, Error **errp) + } + } + +-static void write_elf_section(DumpState *s, int type, Error **errp) ++static void prepare_elf_section_hdr_zero(DumpState *s) + { +- Elf32_Shdr shdr32; +- Elf64_Shdr shdr64; +- int shdr_size; +- void *shdr; +- int ret; ++ if (dump_is_64bit(s)) { ++ Elf64_Shdr *shdr64 = s->elf_section_hdrs; + +- if (type == 0) { +- shdr_size = sizeof(Elf32_Shdr); +- memset(&shdr32, 0, shdr_size); +- shdr32.sh_info = cpu_to_dump32(s, s->phdr_num); +- shdr = &shdr32; ++ shdr64->sh_info = cpu_to_dump32(s, s->phdr_num); + } else { +- shdr_size = sizeof(Elf64_Shdr); +- memset(&shdr64, 0, shdr_size); +- shdr64.sh_info = cpu_to_dump32(s, s->phdr_num); +- shdr = &shdr64; ++ Elf32_Shdr *shdr32 = s->elf_section_hdrs; ++ ++ shdr32->sh_info = cpu_to_dump32(s, s->phdr_num); ++ } ++} ++ ++static void prepare_elf_section_hdrs(DumpState *s) ++{ ++ size_t len, sizeof_shdr; ++ ++ /* ++ * Section ordering: ++ * - HDR zero ++ */ ++ sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); ++ len = sizeof_shdr * s->shdr_num; ++ s->elf_section_hdrs = g_malloc0(len); ++ ++ /* ++ * The first section header is ALWAYS a special initial section ++ * header. ++ * ++ * The header should be 0 with one exception being that if ++ * phdr_num is PN_XNUM then the sh_info field contains the real ++ * number of segment entries. ++ * ++ * As we zero allocate the buffer we will only need to modify ++ * sh_info for the PN_XNUM case. ++ */ ++ if (s->phdr_num >= PN_XNUM) { ++ prepare_elf_section_hdr_zero(s); + } ++} + +- ret = fd_write_vmcore(shdr, shdr_size, s); ++static void write_elf_section_headers(DumpState *s, Error **errp) ++{ ++ size_t sizeof_shdr = dump_is_64bit(s) ? sizeof(Elf64_Shdr) : sizeof(Elf32_Shdr); ++ int ret; ++ ++ prepare_elf_section_hdrs(s); ++ ++ ret = fd_write_vmcore(s->elf_section_hdrs, s->shdr_num * sizeof_shdr, s); + if (ret < 0) { +- error_setg_errno(errp, -ret, +- "dump: failed to write section header table"); ++ error_setg_errno(errp, -ret, "dump: failed to write section headers"); + } ++ ++ g_free(s->elf_section_hdrs); + } + + static void write_data(DumpState *s, void *buf, int length, Error **errp) +@@ -592,12 +621,10 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write section to vmcore */ +- if (s->shdr_num) { +- write_elf_section(s, 1, errp); +- if (*errp) { +- return; +- } ++ /* write section headers to vmcore */ ++ write_elf_section_headers(s, errp); ++ if (*errp) { ++ return; + } + + /* write notes to vmcore */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index b62513d87d..9995f65dc8 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -177,6 +177,8 @@ typedef struct DumpState { + int64_t filter_area_begin; /* Start address of partial guest memory area */ + int64_t filter_area_length; /* Length of partial guest memory area */ + ++ void *elf_section_hdrs; /* Pointer to section header buffer */ ++ + uint8_t *note_buf; /* buffer for notes */ + size_t note_buf_offset; /* the writing place in note_buf */ + uint32_t nr_cpus; /* number of guest's cpu */ +-- +2.37.3 + diff --git a/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch b/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch new file mode 100644 index 0000000..2efd686 --- /dev/null +++ b/kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch @@ -0,0 +1,104 @@ +From 987ede93fa4e3d058acddc19874e467faa116ede Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:14 +0000 +Subject: [PATCH 33/42] dump: Write ELF section headers right after ELF header +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [33/41] e956040753533ac376e9763145192de1e216027d + +Let's start bundling the writes of the headers and of the data so we +have a clear ordering between them. Since the ELF header uses offsets +to the headers we can freely order them. + +Signed-off-by: Janosch Frank +Reviewed-by: Marc-André Lureau +Message-Id: <20221017083822.43118-3-frankja@linux.ibm.com> +(cherry picked from commit cb415fd61e48d52f81dcf38956e3f913651cff1c) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 31 ++++++++++++++----------------- + 1 file changed, 14 insertions(+), 17 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 4142b4cc0c..d17537d4e9 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -584,6 +584,8 @@ static void dump_begin(DumpState *s, Error **errp) + * -------------- + * | elf header | + * -------------- ++ * | sctn_hdr | ++ * -------------- + * | PT_NOTE | + * -------------- + * | PT_LOAD | +@@ -592,8 +594,6 @@ static void dump_begin(DumpState *s, Error **errp) + * -------------- + * | PT_LOAD | + * -------------- +- * | sec_hdr | +- * -------------- + * | elf note | + * -------------- + * | memory | +@@ -609,20 +609,20 @@ static void dump_begin(DumpState *s, Error **errp) + return; + } + +- /* write PT_NOTE to vmcore */ +- write_elf_phdr_note(s, errp); ++ /* write section headers to vmcore */ ++ write_elf_section_headers(s, errp); + if (*errp) { + return; + } + +- /* write all PT_LOADs to vmcore */ +- write_elf_phdr_loads(s, errp); ++ /* write PT_NOTE to vmcore */ ++ write_elf_phdr_note(s, errp); + if (*errp) { + return; + } + +- /* write section headers to vmcore */ +- write_elf_section_headers(s, errp); ++ /* write all PT_LOADs to vmcore */ ++ write_elf_phdr_loads(s, errp); + if (*errp) { + return; + } +@@ -1877,16 +1877,13 @@ static void dump_init(DumpState *s, int fd, bool has_format, + } + + if (dump_is_64bit(s)) { +- s->phdr_offset = sizeof(Elf64_Ehdr); +- s->shdr_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; +- s->note_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; +- s->memory_offset = s->note_offset + s->note_size; ++ s->shdr_offset = sizeof(Elf64_Ehdr); ++ s->phdr_offset = s->shdr_offset + sizeof(Elf64_Shdr) * s->shdr_num; ++ s->note_offset = s->phdr_offset + sizeof(Elf64_Phdr) * s->phdr_num; + } else { +- +- s->phdr_offset = sizeof(Elf32_Ehdr); +- s->shdr_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; +- s->note_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; +- s->memory_offset = s->note_offset + s->note_size; ++ s->shdr_offset = sizeof(Elf32_Ehdr); ++ s->phdr_offset = s->shdr_offset + sizeof(Elf32_Shdr) * s->shdr_num; ++ s->note_offset = s->phdr_offset + sizeof(Elf32_Phdr) * s->phdr_num; + } + + return; +-- +2.37.3 + diff --git a/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch b/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch new file mode 100644 index 0000000..16e6e87 --- /dev/null +++ b/kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch @@ -0,0 +1,173 @@ +From deaf4e0f5e90d227b7b9f3e5d1dff7fd0bc0206a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Mon, 5 Sep 2022 16:06:21 +0400 +Subject: [PATCH 31/42] dump: fix kdump to work over non-aligned blocks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [31/41] b307bdce4a4791fc30160fa2a1678bd238f2432e + +Rewrite get_next_page() to work over non-aligned blocks. When it +encounters non aligned addresses, it will try to fill a page provided by +the caller. + +This solves a kdump crash with "tpm-crb-cmd" RAM memory region, +qemu-kvm: ../dump/dump.c:1162: _Bool get_next_page(GuestPhysBlock **, +uint64_t *, uint8_t **, DumpState *): Assertion `(block->target_start & +~target_page_mask) == 0' failed. + +because: +guest_phys_block_add_section: target_start=00000000fed40080 target_end=00000000fed41000: added (count: 4) + +Fixes: +https://bugzilla.redhat.com/show_bug.cgi?id=2120480 + +Signed-off-by: Marc-André Lureau +Acked-by: David Hildenbrand +(cherry picked from commit 94d788408d2d5a6474c99b2c9cf06913b9db7c58) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 79 +++++++++++++++++++++++++++++++++++++---------------- + 1 file changed, 56 insertions(+), 23 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 1c49232390..88177fa886 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1117,50 +1117,81 @@ static uint64_t dump_pfn_to_paddr(DumpState *s, uint64_t pfn) + } + + /* +- * exam every page and return the page frame number and the address of the page. +- * bufptr can be NULL. note: the blocks here is supposed to reflect guest-phys +- * blocks, so block->target_start and block->target_end should be interal +- * multiples of the target page size. ++ * Return the page frame number and the page content in *bufptr. bufptr can be ++ * NULL. If not NULL, *bufptr must contains a target page size of pre-allocated ++ * memory. This is not necessarily the memory returned. + */ + static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + uint8_t **bufptr, DumpState *s) + { + GuestPhysBlock *block = *blockptr; +- hwaddr addr, target_page_mask = ~((hwaddr)s->dump_info.page_size - 1); +- uint8_t *buf; ++ uint32_t page_size = s->dump_info.page_size; ++ uint8_t *buf = NULL, *hbuf; ++ hwaddr addr; + + /* block == NULL means the start of the iteration */ + if (!block) { + block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + *blockptr = block; + addr = block->target_start; ++ *pfnptr = dump_paddr_to_pfn(s, addr); + } else { +- addr = dump_pfn_to_paddr(s, *pfnptr + 1); ++ *pfnptr += 1; ++ addr = dump_pfn_to_paddr(s, *pfnptr); + } + assert(block != NULL); + +- if ((addr >= block->target_start) && +- (addr + s->dump_info.page_size <= block->target_end)) { +- buf = block->host_addr + (addr - block->target_start); +- } else { +- /* the next page is in the next block */ +- block = QTAILQ_NEXT(block, next); +- *blockptr = block; +- if (!block) { +- return false; ++ while (1) { ++ if (addr >= block->target_start && addr < block->target_end) { ++ size_t n = MIN(block->target_end - addr, page_size - addr % page_size); ++ hbuf = block->host_addr + (addr - block->target_start); ++ if (!buf) { ++ if (n == page_size) { ++ /* this is a whole target page, go for it */ ++ assert(addr % page_size == 0); ++ buf = hbuf; ++ break; ++ } else if (bufptr) { ++ assert(*bufptr); ++ buf = *bufptr; ++ memset(buf, 0, page_size); ++ } else { ++ return true; ++ } ++ } ++ ++ memcpy(buf + addr % page_size, hbuf, n); ++ addr += n; ++ if (addr % page_size == 0) { ++ /* we filled up the page */ ++ break; ++ } ++ } else { ++ /* the next page is in the next block */ ++ *blockptr = block = QTAILQ_NEXT(block, next); ++ if (!block) { ++ break; ++ } ++ ++ addr = block->target_start; ++ /* are we still in the same page? */ ++ if (dump_paddr_to_pfn(s, addr) != *pfnptr) { ++ if (buf) { ++ /* no, but we already filled something earlier, return it */ ++ break; ++ } else { ++ /* else continue from there */ ++ *pfnptr = dump_paddr_to_pfn(s, addr); ++ } ++ } + } +- addr = block->target_start; +- buf = block->host_addr; + } + +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, addr); + if (bufptr) { + *bufptr = buf; + } + +- return true; ++ return buf != NULL; + } + + static void write_dump_bitmap(DumpState *s, Error **errp) +@@ -1306,6 +1337,7 @@ static void write_dump_pages(DumpState *s, Error **errp) + uint8_t *buf; + GuestPhysBlock *block_iter = NULL; + uint64_t pfn_iter; ++ g_autofree uint8_t *page = NULL; + + /* get offset of page_desc and page_data in dump file */ + offset_desc = s->offset_page; +@@ -1341,12 +1373,13 @@ static void write_dump_pages(DumpState *s, Error **errp) + } + + offset_data += s->dump_info.page_size; ++ page = g_malloc(s->dump_info.page_size); + + /* + * dump memory to vmcore page by page. zero page will all be resided in the + * first page of page section + */ +- while (get_next_page(&block_iter, &pfn_iter, &buf, s)) { ++ for (buf = page; get_next_page(&block_iter, &pfn_iter, &buf, s); buf = page) { + /* check zero page */ + if (is_zero_page(buf, s->dump_info.page_size)) { + ret = write_cache(&page_desc, &pd_zero, sizeof(PageDescriptor), +-- +2.37.3 + diff --git a/kvm-dump-simplify-a-bit-kdump-get_next_page.patch b/kvm-dump-simplify-a-bit-kdump-get_next_page.patch new file mode 100644 index 0000000..9780d90 --- /dev/null +++ b/kvm-dump-simplify-a-bit-kdump-get_next_page.patch @@ -0,0 +1,75 @@ +From bb55fde4d8ca587e2ef52ce58a0c22e4d66a08dc Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Marc-Andr=C3=A9=20Lureau?= +Date: Thu, 25 Aug 2022 12:40:12 +0400 +Subject: [PATCH 30/42] dump: simplify a bit kdump get_next_page() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [30/41] 417ac19fa96036e0242f40121ac6e87a9f3f70ba + +This should be functionally equivalent, but slightly easier to read, +with simplified paths and checks at the end of the function. + +The following patch is a major rewrite to get rid of the assert(). + +Signed-off-by: Marc-André Lureau +Reviewed-by: David Hildenbrand +(cherry picked from commit 08df343874fcddd260021a04ce3c5a34f2c48164) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 21 ++++++++------------- + 1 file changed, 8 insertions(+), 13 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index c2c1341ad7..1c49232390 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -1133,17 +1133,11 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + if (!block) { + block = QTAILQ_FIRST(&s->guest_phys_blocks.head); + *blockptr = block; +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, block->target_start); +- if (bufptr) { +- *bufptr = block->host_addr; +- } +- return true; ++ addr = block->target_start; ++ } else { ++ addr = dump_pfn_to_paddr(s, *pfnptr + 1); + } +- +- *pfnptr = *pfnptr + 1; +- addr = dump_pfn_to_paddr(s, *pfnptr); ++ assert(block != NULL); + + if ((addr >= block->target_start) && + (addr + s->dump_info.page_size <= block->target_end)) { +@@ -1155,12 +1149,13 @@ static bool get_next_page(GuestPhysBlock **blockptr, uint64_t *pfnptr, + if (!block) { + return false; + } +- assert((block->target_start & ~target_page_mask) == 0); +- assert((block->target_end & ~target_page_mask) == 0); +- *pfnptr = dump_paddr_to_pfn(s, block->target_start); ++ addr = block->target_start; + buf = block->host_addr; + } + ++ assert((block->target_start & ~target_page_mask) == 0); ++ assert((block->target_end & ~target_page_mask) == 0); ++ *pfnptr = dump_paddr_to_pfn(s, addr); + if (bufptr) { + *bufptr = buf; + } +-- +2.37.3 + diff --git a/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch b/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch new file mode 100644 index 0000000..5070722 --- /dev/null +++ b/kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch @@ -0,0 +1,81 @@ +From edead46187b1e55ad5e238332780aef19f1bc214 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 9 Nov 2022 18:41:18 -0500 +Subject: [PATCH 1/2] hw/acpi: Add ospm_status hook implementation for acpi-ged + +RH-Author: Jon Maloy +RH-MergeRequest: 228: qemu-kvm: backport some aarch64 fixes +RH-Bugzilla: 2132609 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Commit: [1/2] 99730b1a27666ca745dc28d90751c938d43f1682 (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2132609 +Upstream: Merged + +commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9 +Author: Keqian Zhu +Date: Tue Aug 16 17:49:57 2022 +0800 + + hw/acpi: Add ospm_status hook implementation for acpi-ged + + Setup an ARM virtual machine of machine virt and execute qmp "query-acpi-ospm-status" + causes segmentation fault with following dumpstack: + #1 0x0000aaaaab64235c in qmp_query_acpi_ospm_status (errp=errp@entry=0xfffffffff030) at ../monitor/qmp-cmds.c:312 + #2 0x0000aaaaabfc4e20 in qmp_marshal_query_acpi_ospm_status (args=, ret=0xffffea4ffe90, errp=0xffffea4ffe88) at qapi/qapi-commands-acpi.c:63 + #3 0x0000aaaaabff8ba0 in do_qmp_dispatch_bh (opaque=0xffffea4ffe98) at ../qapi/qmp-dispatch.c:128 + #4 0x0000aaaaac02e594 in aio_bh_call (bh=0xffffe0004d80) at ../util/async.c:150 + #5 aio_bh_poll (ctx=ctx@entry=0xaaaaad0f6040) at ../util/async.c:178 + #6 0x0000aaaaac00bd40 in aio_dispatch (ctx=ctx@entry=0xaaaaad0f6040) at ../util/aio-posix.c:421 + #7 0x0000aaaaac02e010 in aio_ctx_dispatch (source=0xaaaaad0f6040, callback=, user_data=) at ../util/async.c:320 + #8 0x0000fffff76f6884 in g_main_context_dispatch () at /usr/lib64/libglib-2.0.so.0 + #9 0x0000aaaaac0452d4 in glib_pollfds_poll () at ../util/main-loop.c:297 + #10 os_host_main_loop_wait (timeout=0) at ../util/main-loop.c:320 + #11 main_loop_wait (nonblocking=nonblocking@entry=0) at ../util/main-loop.c:596 + #12 0x0000aaaaab5c9e50 in qemu_main_loop () at ../softmmu/runstate.c:734 + #13 0x0000aaaaab185370 in qemu_main (argc=argc@entry=47, argv=argv@entry=0xfffffffff518, envp=envp@entry=0x0) at ../softmmu/main.c:38 + #14 0x0000aaaaab16f99c in main (argc=47, argv=0xfffffffff518) at ../softmmu/main.c:47 + + Fixes: ebb62075021a ("hw/acpi: Add ACPI Generic Event Device Support") + Signed-off-by: Keqian Zhu + Reviewed-by: Igor Mammedov + Message-id: 20220816094957.31700-1-zhukeqian1@huawei.com + Signed-off-by: Peter Maydell + +(cherry picked from commit d4424bebceaa8ffbc23060ce45e52a9bb817e3c9) +Signed-off-by: Jon Maloy +--- + hw/acpi/generic_event_device.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c +index e28457a7d1..a3d31631fe 100644 +--- a/hw/acpi/generic_event_device.c ++++ b/hw/acpi/generic_event_device.c +@@ -267,6 +267,13 @@ static void acpi_ged_unplug_cb(HotplugHandler *hotplug_dev, + } + } + ++static void acpi_ged_ospm_status(AcpiDeviceIf *adev, ACPIOSTInfoList ***list) ++{ ++ AcpiGedState *s = ACPI_GED(adev); ++ ++ acpi_memory_ospm_status(&s->memhp_state, list); ++} ++ + static void acpi_ged_send_event(AcpiDeviceIf *adev, AcpiEventStatusBits ev) + { + AcpiGedState *s = ACPI_GED(adev); +@@ -409,6 +416,7 @@ static void acpi_ged_class_init(ObjectClass *class, void *data) + hc->unplug_request = acpi_ged_unplug_request_cb; + hc->unplug = acpi_ged_unplug_cb; + ++ adevc->ospm_status = acpi_ged_ospm_status; + adevc->send_event = acpi_ged_send_event; + } + +-- +2.37.3 + diff --git a/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch index 1bdad27..01e4097 100644 --- a/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch +++ b/kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch @@ -1,18 +1,19 @@ -From 6ee4a8718dcce2d6da43ee200534b75baf1d7bbe Mon Sep 17 00:00:00 2001 +From fe4abbda80eea7f65b6b5cc544a806fb6e064917 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 18 Nov 2021 12:57:32 +0100 -Subject: [PATCH 16/17] hw/block/fdc: Prevent end-of-track overrun +Subject: [PATCH 2/3] hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) -RH-Commit: [1/2] 9ffc5290348884d20b894fa79f4d0c8089247f8b (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1951522 +RH-MergeRequest: 194: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [1/2] 31fa0351382b4ca5bd989b09e4d811ae73040673 (jmaloy/qemu-kvm) +RH-Bugzilla: 1951521 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Thomas Huth RH-Acked-by: Hanna Reitz -RH-Acked-by: Miroslav Rezanina Per the 82078 datasheet, if the end-of-track (EOT byte in the FIFO) is more than the number of sectors per side, the @@ -73,10 +74,10 @@ Signed-off-by: Jon Maloy 1 file changed, 8 insertions(+) diff --git a/hw/block/fdc.c b/hw/block/fdc.c -index ca1776121f..6481ec0cfb 100644 +index 97fa6de423..755a26c114 100644 --- a/hw/block/fdc.c +++ b/hw/block/fdc.c -@@ -1532,6 +1532,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) +@@ -1531,6 +1531,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction) int tmp; fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]); tmp = (fdctrl->fifo[6] - ks + 1); @@ -92,5 +93,5 @@ index ca1776121f..6481ec0cfb 100644 tmp += fdctrl->fifo[6]; fdctrl->data_len *= tmp; -- -2.31.1 +2.35.3 diff --git a/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch b/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch new file mode 100644 index 0000000..d4ca84f --- /dev/null +++ b/kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch @@ -0,0 +1,52 @@ +From 100f33ff8a1d55986e43b99ba8726abc29ee8d26 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 5/5] hw/display/qxl: Assert memory slot fits in preallocated + MemoryRegion +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [5/5] f809ce48e7989dd6547b7c8bf1a5efc3fdcacbac (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 86fdb0582c653a9824183679403a85f588260d62 +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:41 2022 +0100 + + hw/display/qxl: Assert memory slot fits in preallocated MemoryRegion + + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-6-philmd@linaro.org> + +(cherry picked from commit 86fdb0582c653a9824183679403a85f588260d62) +Signed-off-by: Jon Maloy +--- + hw/display/qxl.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 2a4b2d4158..bcd9e8716a 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -1372,6 +1372,7 @@ static int qxl_add_memslot(PCIQXLDevice *d, uint32_t slot_id, uint64_t delta, + qxl_set_guest_bug(d, "%s: pci_region = %d", __func__, pci_region); + return 1; + } ++ assert(guest_end - pci_start <= memory_region_size(mr)); + + virt_start = (intptr_t)memory_region_get_ram_ptr(mr); + memslot.slot_id = slot_id; +-- +2.37.3 + diff --git a/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch b/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch new file mode 100644 index 0000000..9163570 --- /dev/null +++ b/kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch @@ -0,0 +1,130 @@ +From 4e1bfbe3a0a113fe3cf39336a9d7da4e8c2a21ea Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 4/5] hw/display/qxl: Avoid buffer overrun in qxl_phys2virt + (CVE-2022-4144) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [4/5] afe53f8d9b31c6fd8211fe172173151f3255e67c (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 6dbbf055148c6f1b7d8a3251a65bd6f3d1e1f622 +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:40 2022 +0100 + + hw/display/qxl: Avoid buffer overrun in qxl_phys2virt (CVE-2022-4144) + + Have qxl_get_check_slot_offset() return false if the requested + buffer size does not fit within the slot memory region. + + Similarly qxl_phys2virt() now returns NULL in such case, and + qxl_dirty_one_surface() aborts. + + This avoids buffer overrun in the host pointer returned by + memory_region_get_ram_ptr(). + + Fixes: CVE-2022-4144 (out-of-bounds read) + Reported-by: Wenxu Yin (@awxylitol) + Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1336 + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-5-philmd@linaro.org> + +(cherry picked from commit 6dbbf055148c6f1b7d8a3251a65bd6f3d1e1f622) +Signed-off-by: Jon Maloy +--- + hw/display/qxl.c | 27 +++++++++++++++++++++++---- + hw/display/qxl.h | 2 +- + 2 files changed, 24 insertions(+), 5 deletions(-) + +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index aa9065183e..2a4b2d4158 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -1412,11 +1412,13 @@ static void qxl_reset_surfaces(PCIQXLDevice *d) + + /* can be also called from spice server thread context */ + static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, +- uint32_t *s, uint64_t *o) ++ uint32_t *s, uint64_t *o, ++ size_t size_requested) + { + uint64_t phys = le64_to_cpu(pqxl); + uint32_t slot = (phys >> (64 - 8)) & 0xff; + uint64_t offset = phys & 0xffffffffffff; ++ uint64_t size_available; + + if (slot >= NUM_MEMSLOTS) { + qxl_set_guest_bug(qxl, "slot too large %d >= %d", slot, +@@ -1440,6 +1442,23 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, + slot, offset, qxl->guest_slots[slot].size); + return false; + } ++ size_available = memory_region_size(qxl->guest_slots[slot].mr); ++ if (qxl->guest_slots[slot].offset + offset >= size_available) { ++ qxl_set_guest_bug(qxl, ++ "slot %d offset %"PRIu64" > region size %"PRIu64"\n", ++ slot, qxl->guest_slots[slot].offset + offset, ++ size_available); ++ return false; ++ } ++ size_available -= qxl->guest_slots[slot].offset + offset; ++ if (size_requested > size_available) { ++ qxl_set_guest_bug(qxl, ++ "slot %d offset %"PRIu64" size %zu: " ++ "overrun by %"PRIu64" bytes\n", ++ slot, offset, size_requested, ++ size_requested - size_available); ++ return false; ++ } + + *s = slot; + *o = offset; +@@ -1459,7 +1478,7 @@ void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id, + offset = le64_to_cpu(pqxl) & 0xffffffffffff; + return (void *)(intptr_t)offset; + case MEMSLOT_GROUP_GUEST: +- if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset)) { ++ if (!qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size)) { + return NULL; + } + ptr = memory_region_get_ram_ptr(qxl->guest_slots[slot].mr); +@@ -1925,9 +1944,9 @@ static void qxl_dirty_one_surface(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, + uint32_t slot; + bool rc; + +- rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset); +- assert(rc == true); + size = (uint64_t)height * abs(stride); ++ rc = qxl_get_check_slot_offset(qxl, pqxl, &slot, &offset, size); ++ assert(rc == true); + trace_qxl_surfaces_dirty(qxl->id, offset, size); + qxl_set_dirty(qxl->guest_slots[slot].mr, + qxl->guest_slots[slot].offset + offset, +diff --git a/hw/display/qxl.h b/hw/display/qxl.h +index c784315daa..89ca832cf9 100644 +--- a/hw/display/qxl.h ++++ b/hw/display/qxl.h +@@ -157,7 +157,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + * + * Returns a host pointer to a buffer placed at offset @phys within the + * active slot @group_id of the PCI VGA RAM memory region associated with +- * the @qxl device. If the slot is inactive, or the offset is out ++ * the @qxl device. If the slot is inactive, or the offset + size are out + * of the memory region, returns NULL. + * + * Use with care; by the time this function returns, the returned pointer is +-- +2.37.3 + diff --git a/kvm-hw-display-qxl-Document-qxl_phys2virt.patch b/kvm-hw-display-qxl-Document-qxl_phys2virt.patch new file mode 100644 index 0000000..9bf2fe2 --- /dev/null +++ b/kvm-hw-display-qxl-Document-qxl_phys2virt.patch @@ -0,0 +1,70 @@ +From 068c531fb968ec04509b85f524d0745e6acf5449 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 2/5] hw/display/qxl: Document qxl_phys2virt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [2/5] f84c0b379022c527fc2508a242443d86454944c0 (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit b1901de83a9456cde26fc755f71ca2b7b3ef50fc +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:38 2022 +0100 + + hw/display/qxl: Document qxl_phys2virt() + + Reviewed-by: Marc-André Lureau + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-3-philmd@linaro.org> + +(cherry picked from commit b1901de83a9456cde26fc755f71ca2b7b3ef50fc) +Signed-off-by: Jon Maloy +--- + hw/display/qxl.h | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +diff --git a/hw/display/qxl.h b/hw/display/qxl.h +index 30d21f4d0b..c938f88a2f 100644 +--- a/hw/display/qxl.h ++++ b/hw/display/qxl.h +@@ -147,6 +147,25 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + #define QXL_DEFAULT_REVISION (QXL_REVISION_STABLE_V12 + 1) + + /* qxl.c */ ++/** ++ * qxl_phys2virt: Get a pointer within a PCI VRAM memory region. ++ * ++ * @qxl: QXL device ++ * @phys: physical offset of buffer within the VRAM ++ * @group_id: memory slot group ++ * ++ * Returns a host pointer to a buffer placed at offset @phys within the ++ * active slot @group_id of the PCI VGA RAM memory region associated with ++ * the @qxl device. If the slot is inactive, or the offset is out ++ * of the memory region, returns NULL. ++ * ++ * Use with care; by the time this function returns, the returned pointer is ++ * not protected by RCU anymore. If the caller is not within an RCU critical ++ * section and does not hold the iothread lock, it must have other means of ++ * protecting the pointer, such as a reference to the region that includes ++ * the incoming ram_addr_t. ++ * ++ */ + void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id); + void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...) + GCC_FMT_ATTR(2, 3); +-- +2.37.3 + diff --git a/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch b/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch new file mode 100644 index 0000000..c644ab2 --- /dev/null +++ b/kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch @@ -0,0 +1,74 @@ +From 5ec8d909d40fa04ef2c3572e01509a1866786070 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 1/5] hw/display/qxl: Have qxl_log_command Return early if no + log_cmd handler +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [1/5] 33d94f40c46cccbc32d108d1035365917bf90356 (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 61c34fc194b776ecadc39fb26b061331107e5599 +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:37 2022 +0100 + + hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler + + Only 3 command types are logged: no need to call qxl_phys2virt() + for the other types. Using different cases will help to pass + different structure sizes to qxl_phys2virt() in a pair of commits. + + Reviewed-by: Marc-André Lureau + Signed-off-by: Philippe Mathieu-Daudé + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-2-philmd@linaro.org> + +(cherry picked from commit 61c34fc194b776ecadc39fb26b061331107e5599) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-logger.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/hw/display/qxl-logger.c b/hw/display/qxl-logger.c +index 68bfa47568..1bcf803db6 100644 +--- a/hw/display/qxl-logger.c ++++ b/hw/display/qxl-logger.c +@@ -247,6 +247,16 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + qxl_name(qxl_type, ext->cmd.type), + compat ? "(compat)" : ""); + ++ switch (ext->cmd.type) { ++ case QXL_CMD_DRAW: ++ break; ++ case QXL_CMD_SURFACE: ++ break; ++ case QXL_CMD_CURSOR: ++ break; ++ default: ++ goto out; ++ } + data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); + if (!data) { + return 1; +@@ -269,6 +279,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + qxl_log_cmd_cursor(qxl, data, ext->group_id); + break; + } ++out: + fprintf(stderr, "\n"); + return 0; + } +-- +2.37.3 + diff --git a/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch b/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch new file mode 100644 index 0000000..dd902f7 --- /dev/null +++ b/kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch @@ -0,0 +1,234 @@ +From 0e6bd3911c4971f575aac7e9cd726467b52fe544 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 5 Dec 2022 15:32:55 -0500 +Subject: [PATCH 3/5] hw/display/qxl: Pass requested buffer size to + qxl_phys2virt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 240: hw/display/qxl: Have qxl_log_command Return early if no log_cmd handler +RH-Bugzilla: 2148545 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Marc-André Lureau +RH-Commit: [3/5] 8e362d67fe7fef9eb457cfb15d75b298fed725c3 (jmaloy/jons-qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2148545 +CVE: CVE-2022-4144 +Upstream: Merged + +commit 8efec0ef8bbc1e75a7ebf6e325a35806ece9b39f +Author: Philippe Mathieu-Daudé +Date: Mon Nov 28 21:27:39 2022 +0100 + + hw/display/qxl: Pass requested buffer size to qxl_phys2virt() + + Currently qxl_phys2virt() doesn't check for buffer overrun. + In order to do so in the next commit, pass the buffer size + as argument. + + For QXLCursor in qxl_render_cursor() -> qxl_cursor() we + verify the size of the chunked data ahead, checking we can + access 'sizeof(QXLCursor) + chunk->data_size' bytes. + Since in the SPICE_CURSOR_TYPE_MONO case the cursor is + assumed to fit in one chunk, no change are required. + In SPICE_CURSOR_TYPE_ALPHA the ahead read is handled in + qxl_unpack_chunks(). + + Signed-off-by: Philippe Mathieu-Daudé + Acked-by: Gerd Hoffmann + Signed-off-by: Stefan Hajnoczi + Message-Id: <20221128202741.4945-4-philmd@linaro.org> + +(cherry picked from commit 8efec0ef8bbc1e75a7ebf6e325a35806ece9b39f) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-logger.c | 11 ++++++++--- + hw/display/qxl-render.c | 20 ++++++++++++++++---- + hw/display/qxl.c | 14 +++++++++----- + hw/display/qxl.h | 4 +++- + 4 files changed, 36 insertions(+), 13 deletions(-) + +diff --git a/hw/display/qxl-logger.c b/hw/display/qxl-logger.c +index 1bcf803db6..35c38f6252 100644 +--- a/hw/display/qxl-logger.c ++++ b/hw/display/qxl-logger.c +@@ -106,7 +106,7 @@ static int qxl_log_image(PCIQXLDevice *qxl, QXLPHYSICAL addr, int group_id) + QXLImage *image; + QXLImageDescriptor *desc; + +- image = qxl_phys2virt(qxl, addr, group_id); ++ image = qxl_phys2virt(qxl, addr, group_id, sizeof(QXLImage)); + if (!image) { + return 1; + } +@@ -214,7 +214,8 @@ int qxl_log_cmd_cursor(PCIQXLDevice *qxl, QXLCursorCmd *cmd, int group_id) + cmd->u.set.position.y, + cmd->u.set.visible ? "yes" : "no", + cmd->u.set.shape); +- cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id); ++ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, group_id, ++ sizeof(QXLCursor)); + if (!cursor) { + return 1; + } +@@ -236,6 +237,7 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + { + bool compat = ext->flags & QXL_COMMAND_FLAG_COMPAT; + void *data; ++ size_t datasz; + int ret; + + if (!qxl->cmdlog) { +@@ -249,15 +251,18 @@ int qxl_log_command(PCIQXLDevice *qxl, const char *ring, QXLCommandExt *ext) + + switch (ext->cmd.type) { + case QXL_CMD_DRAW: ++ datasz = compat ? sizeof(QXLCompatDrawable) : sizeof(QXLDrawable); + break; + case QXL_CMD_SURFACE: ++ datasz = sizeof(QXLSurfaceCmd); + break; + case QXL_CMD_CURSOR: ++ datasz = sizeof(QXLCursorCmd); + break; + default: + goto out; + } +- data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ data = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, datasz); + if (!data) { + return 1; + } +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index ca217004bf..fcfd40c3ac 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -107,7 +107,9 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice *qxl) + qxl->guest_primary.resized = 0; + qxl->guest_primary.data = qxl_phys2virt(qxl, + qxl->guest_primary.surface.mem, +- MEMSLOT_GROUP_GUEST); ++ MEMSLOT_GROUP_GUEST, ++ qxl->guest_primary.abs_stride ++ * height); + if (!qxl->guest_primary.data) { + goto end; + } +@@ -228,7 +230,8 @@ static void qxl_unpack_chunks(void *dest, size_t size, PCIQXLDevice *qxl, + if (offset == size) { + return; + } +- chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id); ++ chunk = qxl_phys2virt(qxl, chunk->next_chunk, group_id, ++ sizeof(QXLDataChunk) + chunk->data_size); + if (!chunk) { + return; + } +@@ -295,7 +298,8 @@ fail: + /* called from spice server thread context only */ + int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext) + { +- QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, ++ sizeof(QXLCursorCmd)); + QXLCursor *cursor; + QEMUCursor *c; + +@@ -314,7 +318,15 @@ int qxl_render_cursor(PCIQXLDevice *qxl, QXLCommandExt *ext) + } + switch (cmd->type) { + case QXL_CURSOR_SET: +- cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id); ++ /* First read the QXLCursor to get QXLDataChunk::data_size ... */ ++ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id, ++ sizeof(QXLCursor)); ++ if (!cursor) { ++ return 1; ++ } ++ /* Then read including the chunked data following QXLCursor. */ ++ cursor = qxl_phys2virt(qxl, cmd->u.set.shape, ext->group_id, ++ sizeof(QXLCursor) + cursor->chunk.data_size); + if (!cursor) { + return 1; + } +diff --git a/hw/display/qxl.c b/hw/display/qxl.c +index 29c80b4289..aa9065183e 100644 +--- a/hw/display/qxl.c ++++ b/hw/display/qxl.c +@@ -274,7 +274,8 @@ static void qxl_spice_monitors_config_async(PCIQXLDevice *qxl, int replay) + QXL_IO_MONITORS_CONFIG_ASYNC)); + } + +- cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST); ++ cfg = qxl_phys2virt(qxl, qxl->guest_monitors_config, MEMSLOT_GROUP_GUEST, ++ sizeof(QXLMonitorsConfig)); + if (cfg != NULL && cfg->count == 1) { + qxl->guest_primary.resized = 1; + qxl->guest_head0_width = cfg->heads[0].width; +@@ -459,7 +460,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext) + switch (le32_to_cpu(ext->cmd.type)) { + case QXL_CMD_SURFACE: + { +- QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ QXLSurfaceCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, ++ sizeof(QXLSurfaceCmd)); + + if (!cmd) { + return 1; +@@ -494,7 +496,8 @@ static int qxl_track_command(PCIQXLDevice *qxl, struct QXLCommandExt *ext) + } + case QXL_CMD_CURSOR: + { +- QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id); ++ QXLCursorCmd *cmd = qxl_phys2virt(qxl, ext->cmd.data, ext->group_id, ++ sizeof(QXLCursorCmd)); + + if (!cmd) { + return 1; +@@ -1444,7 +1447,8 @@ static bool qxl_get_check_slot_offset(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, + } + + /* can be also called from spice server thread context */ +-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id) ++void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL pqxl, int group_id, ++ size_t size) + { + uint64_t offset; + uint32_t slot; +@@ -1952,7 +1956,7 @@ static void qxl_dirty_surfaces(PCIQXLDevice *qxl) + } + + cmd = qxl_phys2virt(qxl, qxl->guest_surfaces.cmds[i], +- MEMSLOT_GROUP_GUEST); ++ MEMSLOT_GROUP_GUEST, sizeof(QXLSurfaceCmd)); + assert(cmd); + assert(cmd->type == QXL_SURFACE_CMD_CREATE); + qxl_dirty_one_surface(qxl, cmd->u.surface_create.data, +diff --git a/hw/display/qxl.h b/hw/display/qxl.h +index c938f88a2f..c784315daa 100644 +--- a/hw/display/qxl.h ++++ b/hw/display/qxl.h +@@ -153,6 +153,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + * @qxl: QXL device + * @phys: physical offset of buffer within the VRAM + * @group_id: memory slot group ++ * @size: size of the buffer + * + * Returns a host pointer to a buffer placed at offset @phys within the + * active slot @group_id of the PCI VGA RAM memory region associated with +@@ -166,7 +167,8 @@ OBJECT_DECLARE_SIMPLE_TYPE(PCIQXLDevice, PCI_QXL) + * the incoming ram_addr_t. + * + */ +-void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id); ++void *qxl_phys2virt(PCIQXLDevice *qxl, QXLPHYSICAL phys, int group_id, ++ size_t size); + void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...) + GCC_FMT_ATTR(2, 3); + +-- +2.37.3 + diff --git a/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch b/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch new file mode 100644 index 0000000..514dd55 --- /dev/null +++ b/kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch @@ -0,0 +1,67 @@ +From f96220d64a31a4a52b2d132a503048579946f982 Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 18 Aug 2022 17:01:13 +0200 +Subject: [PATCH 3/3] i386: do kvm_put_msr_feature_control() first thing when + vCPU is reset + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8 +RH-Bugzilla: 2125271 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Jon Maloy +RH-Commit: [2/2] 08e1e67db96801e4a35aa6b60a93b2c2f1641220 + +kvm_put_sregs2() fails to reset 'locked' CR4/CR0 bits upon vCPU reset when +it is in VMX root operation. Do kvm_put_msr_feature_control() before +kvm_put_sregs2() to (possibly) kick vCPU out of VMX root operation. It also +seems logical to do kvm_put_msr_feature_control() before +kvm_put_nested_state() and not after it, especially when 'real' nested +state is set. + +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220818150113.479917-3-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 45ed68a1a3a19754ade954d75a3c9d13ff560e5c) +Signed-off-by: Vitaly Kuznetsov +--- + target/i386/kvm/kvm.c | 17 ++++++++++++----- + 1 file changed, 12 insertions(+), 5 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index 81d729dc40..a06221d3e5 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -4255,6 +4255,18 @@ int kvm_arch_put_registers(CPUState *cpu, int level) + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + ++ /* ++ * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX ++ * root operation upon vCPU reset. kvm_put_msr_feature_control() should also ++ * preceed kvm_put_nested_state() when 'real' nested state is set. ++ */ ++ if (level >= KVM_PUT_RESET_STATE) { ++ ret = kvm_put_msr_feature_control(x86_cpu); ++ if (ret < 0) { ++ return ret; ++ } ++ } ++ + /* must be before kvm_put_nested_state so that EFER.SVME is set */ + ret = kvm_put_sregs(x86_cpu); + if (ret < 0) { +@@ -4266,11 +4278,6 @@ int kvm_arch_put_registers(CPUState *cpu, int level) + if (ret < 0) { + return ret; + } +- +- ret = kvm_put_msr_feature_control(x86_cpu); +- if (ret < 0) { +- return ret; +- } + } + + if (level == KVM_PUT_FULL_STATE) { +-- +2.35.3 + diff --git a/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch b/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch new file mode 100644 index 0000000..411bed4 --- /dev/null +++ b/kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch @@ -0,0 +1,94 @@ +From 46e54544c3480658111d6f111d6c265dcea2e19b Mon Sep 17 00:00:00 2001 +From: Vitaly Kuznetsov +Date: Thu, 18 Aug 2022 17:01:12 +0200 +Subject: [PATCH 2/3] i386: reset KVM nested state upon CPU reset + +RH-Author: Miroslav Rezanina +RH-MergeRequest: 219: Synchronize qemu-6.2.0-20.el8.1 build from RHEL 8.7 to RHEL 8.8 +RH-Bugzilla: 2125271 +RH-Acked-by: Vitaly Kuznetsov +RH-Acked-by: Jon Maloy +RH-Commit: [1/2] de4db7bceb6baaf69aec8b0ae9aa8887aa869e15 + +Make sure env->nested_state is cleaned up when a vCPU is reset, it may +be stale after an incoming migration, kvm_arch_put_registers() may +end up failing or putting vCPU in a weird state. + +Reviewed-by: Maxim Levitsky +Signed-off-by: Vitaly Kuznetsov +Message-Id: <20220818150113.479917-2-vkuznets@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3cafdb67504a34a0305260f0c86a73d5a3fb000b) +Signed-off-by: Vitaly Kuznetsov +--- + target/i386/kvm/kvm.c | 37 +++++++++++++++++++++++++++---------- + 1 file changed, 27 insertions(+), 10 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index bd439e56ad..81d729dc40 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1615,6 +1615,30 @@ static void kvm_init_xsave(CPUX86State *env) + env->xsave_buf_len); + } + ++static void kvm_init_nested_state(CPUX86State *env) ++{ ++ struct kvm_vmx_nested_state_hdr *vmx_hdr; ++ uint32_t size; ++ ++ if (!env->nested_state) { ++ return; ++ } ++ ++ size = env->nested_state->size; ++ ++ memset(env->nested_state, 0, size); ++ env->nested_state->size = size; ++ ++ if (cpu_has_vmx(env)) { ++ env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; ++ vmx_hdr = &env->nested_state->hdr.vmx; ++ vmx_hdr->vmxon_pa = -1ull; ++ vmx_hdr->vmcs12_pa = -1ull; ++ } else if (cpu_has_svm(env)) { ++ env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; ++ } ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + struct { +@@ -2042,19 +2066,10 @@ int kvm_arch_init_vcpu(CPUState *cs) + assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); + + if (cpu_has_vmx(env) || cpu_has_svm(env)) { +- struct kvm_vmx_nested_state_hdr *vmx_hdr; +- + env->nested_state = g_malloc0(max_nested_state_len); + env->nested_state->size = max_nested_state_len; + +- if (cpu_has_vmx(env)) { +- env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; +- vmx_hdr = &env->nested_state->hdr.vmx; +- vmx_hdr->vmxon_pa = -1ull; +- vmx_hdr->vmcs12_pa = -1ull; +- } else { +- env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; +- } ++ kvm_init_nested_state(env); + } + } + +@@ -2117,6 +2132,8 @@ void kvm_arch_reset_vcpu(X86CPU *cpu) + /* enabled by default */ + env->poll_control_msr = 1; + ++ kvm_init_nested_state(env); ++ + sev_es_set_reset_vector(CPU(cpu)); + } + +-- +2.35.3 + diff --git a/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch b/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch new file mode 100644 index 0000000..6af2a9f --- /dev/null +++ b/kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch @@ -0,0 +1,92 @@ +From eaade87072e903cf550dfdb8ed1480dddc6bb0e3 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Thu, 20 Jan 2022 15:22:59 +0100 +Subject: [PATCH 21/24] ide: Increment BB in-flight counter for TRIM BH +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Hanna Reitz +RH-MergeRequest: 188: ide: Increment BB in-flight counter for TRIM BH +RH-Commit: [1/1] 1e702e735ff63f2b8b69c20cac1b309dd085cd62 +RH-Bugzilla: 2029980 +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf +RH-Acked-by: Paolo Bonzini + +When we still have an AIOCB registered for DMA operations, we try to +settle the respective operation by draining the BlockBackend associated +with the IDE device. + +However, this assumes that every DMA operation is associated with an +increment of the BlockBackend’s in-flight counter (e.g. through some +ongoing I/O operation), so that draining the BB until its in-flight +counter reaches 0 will settle all DMA operations. That is not the case: +For TRIM, the guest can issue a zero-length operation that will not +result in any I/O operation forwarded to the BlockBackend, and also not +increment the in-flight counter in any other way. In such a case, +blk_drain() will be a no-op if no other operations are in flight. + +It is clear that if blk_drain() is a no-op, the value of +s->bus->dma->aiocb will not change between checking it in the `if` +condition and asserting that it is NULL after blk_drain(). + +The particular problem is that ide_issue_trim() creates a BH +(ide_trim_bh_cb()) to settle the TRIM request: iocb->common.cb() is +ide_dma_cb(), which will either create a new request, or find the +transfer to be done and call ide_set_inactive(), which clears +s->bus->dma->aiocb. Therefore, the blk_drain() must wait for +ide_trim_bh_cb() to run, which currently it will not always do. + +To fix this issue, we increment the BlockBackend's in-flight counter +when the TRIM operation begins (in ide_issue_trim(), when the +ide_trim_bh_cb() BH is created) and decrement it when ide_trim_bh_cb() +is done. + +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2029980 +Suggested-by: Paolo Bonzini +Signed-off-by: Hanna Reitz +Message-Id: <20220120142259.120189-1-hreitz@redhat.com> +Reviewed-by: Paolo Bonzini +Reviewed-by: John Snow +Tested-by: John Snow +(cherry picked from commit 7e5cdb345f77d76cb4877fe6230c4e17a7d0d0ca) +Signed-off-by: Hanna Reitz +--- + hw/ide/core.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/hw/ide/core.c b/hw/ide/core.c +index e28f8aad61..15138225be 100644 +--- a/hw/ide/core.c ++++ b/hw/ide/core.c +@@ -433,12 +433,16 @@ static const AIOCBInfo trim_aiocb_info = { + static void ide_trim_bh_cb(void *opaque) + { + TrimAIOCB *iocb = opaque; ++ BlockBackend *blk = iocb->s->blk; + + iocb->common.cb(iocb->common.opaque, iocb->ret); + + qemu_bh_delete(iocb->bh); + iocb->bh = NULL; + qemu_aio_unref(iocb); ++ ++ /* Paired with an increment in ide_issue_trim() */ ++ blk_dec_in_flight(blk); + } + + static void ide_issue_trim_cb(void *opaque, int ret) +@@ -508,6 +512,9 @@ BlockAIOCB *ide_issue_trim( + IDEState *s = opaque; + TrimAIOCB *iocb; + ++ /* Paired with a decrement in ide_trim_bh_cb() */ ++ blk_inc_in_flight(s->blk); ++ + iocb = blk_aio_get(&trim_aiocb_info, s->blk, cb, cb_opaque); + iocb->s = s; + iocb->bh = qemu_bh_new(ide_trim_bh_cb, iocb); +-- +2.35.3 + diff --git a/kvm-include-elf.h-add-s390x-note-types.patch b/kvm-include-elf.h-add-s390x-note-types.patch new file mode 100644 index 0000000..9e17d2c --- /dev/null +++ b/kvm-include-elf.h-add-s390x-note-types.patch @@ -0,0 +1,43 @@ +From 3fceb3b60a60c5008eecf99e45e269b757042b5a Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:20 +0000 +Subject: [PATCH 39/42] include/elf.h: add s390x note types +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [39/41] ebf0873744905abbe9cfc423a56c6d1b4f2ae936 + +Adding two s390x note types + +Signed-off-by: Janosch Frank +Reviewed-by: Thomas Huth +Message-Id: <20221017083822.43118-9-frankja@linux.ibm.com> +(cherry picked from commit 5433669c7a1884cc0394c360148965edf7519884) +Signed-off-by: Cédric Le Goater +--- + include/elf.h | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/include/elf.h b/include/elf.h +index 811bf4a1cb..4edab8e5a2 100644 +--- a/include/elf.h ++++ b/include/elf.h +@@ -1647,6 +1647,8 @@ typedef struct elf64_shdr { + #define NT_TASKSTRUCT 4 + #define NT_AUXV 6 + #define NT_PRXFPREG 0x46e62b7f /* copied from gdb5.1/include/elf/common.h */ ++#define NT_S390_PV_CPU_DATA 0x30e /* s390 protvirt cpu dump data */ ++#define NT_S390_RI_CB 0x30d /* s390 runtime instrumentation */ + #define NT_S390_GS_CB 0x30b /* s390 guarded storage registers */ + #define NT_S390_VXRS_HIGH 0x30a /* s390 vector registers 16-31 */ + #define NT_S390_VXRS_LOW 0x309 /* s390 vector registers 0-15 (lower half) */ +-- +2.37.3 + diff --git a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch index a37ea6f..0d652dd 100644 --- a/kvm-iotests-108-Fix-when-missing-user_allow_other.patch +++ b/kvm-iotests-108-Fix-when-missing-user_allow_other.patch @@ -1,15 +1,15 @@ -From 447bca651c9156d7aba6b7495c75f19b5e4ed53f Mon Sep 17 00:00:00 2001 +From 676e19198916d7631ba1367646dd08dc72079f88 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Thu, 21 Apr 2022 16:24:35 +0200 -Subject: [PATCH 07/16] iotests/108: Fix when missing user_allow_other +Subject: [PATCH 6/6] iotests/108: Fix when missing user_allow_other RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [4/4] a51ab8606fc9d8dea2b6539f4e795d5813892a5c (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [4/4] 36b70b5378ae7c8084b9e847706f00003abe9c11 +RH-Bugzilla: 1519071 RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake FUSE exports' allow-other option defaults to "auto", which means that it will try passing allow_other as a mount option, and fall back to not @@ -35,7 +35,7 @@ Signed-off-by: Hanna Reitz 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 -index a3090e2875..4681c7c769 100755 +index 23abbeaff0..775ff08eca 100755 --- a/tests/qemu-iotests/108 +++ b/tests/qemu-iotests/108 @@ -326,7 +326,7 @@ else @@ -48,5 +48,5 @@ index a3090e2875..4681c7c769 100755 & -- -2.31.1 +2.27.0 diff --git a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch index 7a968f6..cc67d7c 100644 --- a/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch +++ b/kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch @@ -1,15 +1,15 @@ -From ed69e01352b5e9a06173daab53bfa373c8535732 Mon Sep 17 00:00:00 2001 +From d638552d76db0db9e2b6ae90a35f0b451b0cbaf8 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Tue, 5 Apr 2022 15:46:51 +0200 -Subject: [PATCH 05/16] iotests/108: Test new refcount rebuild algorithm +Subject: [PATCH 4/6] iotests/108: Test new refcount rebuild algorithm RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [2/4] b68310a9fee8465dd3f568c8e867e1b7ae52bdaf (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [2/4] 2aa8c383f0c88c414f10ade8bd2e8af07c35f35b +RH-Bugzilla: 1519071 RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake One clear problem with how qcow2's refcount structure rebuild algorithm used to be before "qcow2: Improve refcount structure rebuilding" was @@ -55,7 +55,7 @@ Signed-off-by: Hanna Reitz 2 files changed, 343 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108 -index 56339ab2c5..a3090e2875 100755 +index 8eaef0b8bf..23abbeaff0 100755 --- a/tests/qemu-iotests/108 +++ b/tests/qemu-iotests/108 @@ -30,13 +30,20 @@ status=1 # failure is the default! @@ -441,5 +441,5 @@ index 75bab8dc84..b5401d788d 100644 No errors were found on the image. *** done -- -2.31.1 +2.27.0 diff --git a/kvm-iotests-Allow-using-QMP-with-the-QSD.patch b/kvm-iotests-Allow-using-QMP-with-the-QSD.patch new file mode 100644 index 0000000..5d45438 --- /dev/null +++ b/kvm-iotests-Allow-using-QMP-with-the-QSD.patch @@ -0,0 +1,99 @@ +From 12f596b66d577eb92f154fadf734d058dd0756d6 Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 16 Feb 2022 11:53:54 +0100 +Subject: [PATCH 23/24] iotests: Allow using QMP with the QSD + +RH-Author: Hanna Reitz +RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive +RH-Commit: [2/3] 55bee4690a2e02d3be9f2bd68f2d244d0a36743b +RH-Bugzilla: 2072932 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Add a parameter to optionally open a QMP connection when creating a +QemuStorageDaemon instance. + +Signed-off-by: Hanna Reitz +Message-Id: <20220216105355.30729-3-hreitz@redhat.com> +Reviewed-by: Eric Blake +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit ec88eed8d14088b36a3495710368b8d1a3c33420) +Signed-off-by: Hanna Reitz +--- + tests/qemu-iotests/iotests.py | 32 +++++++++++++++++++++++++++++++- + 1 file changed, 31 insertions(+), 1 deletion(-) + +diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py +index a51b5ce8cd..2ef493755c 100644 +--- a/tests/qemu-iotests/iotests.py ++++ b/tests/qemu-iotests/iotests.py +@@ -38,6 +38,7 @@ + + from qemu.machine import qtest + from qemu.qmp import QMPMessage ++from qemu.aqmp.legacy import QEMUMonitorProtocol + + # Use this logger for logging messages directly from the iotests module + logger = logging.getLogger('qemu.iotests') +@@ -315,14 +316,30 @@ def cmd(self, cmd): + + + class QemuStorageDaemon: +- def __init__(self, *args: str, instance_id: str = 'a'): ++ _qmp: Optional[QEMUMonitorProtocol] = None ++ _qmpsock: Optional[str] = None ++ # Python < 3.8 would complain if this type were not a string literal ++ # (importing `annotations` from `__future__` would work; but not on <= 3.6) ++ _p: 'Optional[subprocess.Popen[bytes]]' = None ++ ++ def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False): + assert '--pidfile' not in args + self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid') + all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile] + ++ if qmp: ++ self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock') ++ all_args += ['--chardev', ++ f'socket,id=qmp-sock,path={self._qmpsock}', ++ '--monitor', 'qmp-sock'] ++ ++ self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True) ++ + # Cannot use with here, we want the subprocess to stay around + # pylint: disable=consider-using-with + self._p = subprocess.Popen(all_args) ++ if self._qmp is not None: ++ self._qmp.accept() + while not os.path.exists(self.pidfile): + if self._p.poll() is not None: + cmd = ' '.join(all_args) +@@ -337,11 +354,24 @@ def __init__(self, *args: str, instance_id: str = 'a'): + + assert self._pid == self._p.pid + ++ def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \ ++ -> QMPMessage: ++ assert self._qmp is not None ++ return self._qmp.cmd(cmd, args) ++ + def stop(self, kill_signal=15): + self._p.send_signal(kill_signal) + self._p.wait() + self._p = None + ++ if self._qmp: ++ self._qmp.close() ++ ++ if self._qmpsock is not None: ++ try: ++ os.remove(self._qmpsock) ++ except OSError: ++ pass + try: + os.remove(self.pidfile) + except OSError: +-- +2.35.3 + diff --git a/kvm-iotests-graph-changes-while-io-New-test.patch b/kvm-iotests-graph-changes-while-io-New-test.patch new file mode 100644 index 0000000..d40e25e --- /dev/null +++ b/kvm-iotests-graph-changes-while-io-New-test.patch @@ -0,0 +1,153 @@ +From 27042ff7aca4366c50e8ed66b47487d46774d16a Mon Sep 17 00:00:00 2001 +From: Hanna Reitz +Date: Wed, 16 Feb 2022 11:53:55 +0100 +Subject: [PATCH 24/24] iotests/graph-changes-while-io: New test + +RH-Author: Hanna Reitz +RH-MergeRequest: 189: block: Make bdrv_refresh_limits() non-recursive +RH-Commit: [3/3] b9dffe09bef6cf9b2f0aad69b327ea1df92e847a +RH-Bugzilla: 2072932 +RH-Acked-by: Eric Blake +RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Kevin Wolf + +Test the following scenario: +1. Some block node (null-co) attached to a user (here: NBD server) that + performs I/O and keeps the node in an I/O thread +2. Repeatedly run blockdev-add/blockdev-del to add/remove an overlay + to/from that node + +Each blockdev-add triggers bdrv_refresh_limits(), and because +blockdev-add runs in the main thread, it does not stop the I/O requests. +I/O can thus happen while the limits are refreshed, and when such a +request sees a temporarily invalid block limit (e.g. alignment is 0), +this may easily crash qemu (or the storage daemon in this case). + +The block layer needs to ensure that I/O requests to a node are paused +while that node's BlockLimits are refreshed. + +Signed-off-by: Hanna Reitz +Reviewed-by: Eric Blake +Message-Id: <20220216105355.30729-4-hreitz@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 971bea8089531af56b1bbd9ce62e756bdf006711) +Signed-off-by: Hanna Reitz +--- + .../qemu-iotests/tests/graph-changes-while-io | 91 +++++++++++++++++++ + .../tests/graph-changes-while-io.out | 5 + + 2 files changed, 96 insertions(+) + create mode 100755 tests/qemu-iotests/tests/graph-changes-while-io + create mode 100644 tests/qemu-iotests/tests/graph-changes-while-io.out + +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io b/tests/qemu-iotests/tests/graph-changes-while-io +new file mode 100755 +index 0000000000..567e8cf21e +--- /dev/null ++++ b/tests/qemu-iotests/tests/graph-changes-while-io +@@ -0,0 +1,91 @@ ++#!/usr/bin/env python3 ++# group: rw ++# ++# Test graph changes while I/O is happening ++# ++# Copyright (C) 2022 Red Hat, Inc. ++# ++# This program is free software; you can redistribute it and/or modify ++# it under the terms of the GNU General Public License as published by ++# the Free Software Foundation; either version 2 of the License, or ++# (at your option) any later version. ++# ++# This program is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++# GNU General Public License for more details. ++# ++# You should have received a copy of the GNU General Public License ++# along with this program. If not, see . ++# ++ ++import os ++from threading import Thread ++import iotests ++from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \ ++ QemuStorageDaemon ++ ++ ++top = os.path.join(iotests.test_dir, 'top.img') ++nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock') ++ ++ ++def do_qemu_img_bench() -> None: ++ """ ++ Do some I/O requests on `nbd_sock`. ++ """ ++ assert qemu_img('bench', '-f', 'raw', '-c', '2000000', ++ f'nbd+unix:///node0?socket={nbd_sock}') == 0 ++ ++ ++class TestGraphChangesWhileIO(QMPTestCase): ++ def setUp(self) -> None: ++ # Create an overlay that can be added at runtime on top of the ++ # null-co block node that will receive I/O ++ assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://', ++ top) == 0 ++ ++ # QSD instance with a null-co block node in an I/O thread, ++ # exported over NBD (on `nbd_sock`, export name "node0") ++ self.qsd = QemuStorageDaemon( ++ '--object', 'iothread,id=iothread0', ++ '--blockdev', 'null-co,node-name=node0,read-zeroes=true', ++ '--nbd-server', f'addr.type=unix,addr.path={nbd_sock}', ++ '--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' + ++ 'fixed-iothread=true,writable=true', ++ qmp=True ++ ) ++ ++ def tearDown(self) -> None: ++ self.qsd.stop() ++ ++ def test_blockdev_add_while_io(self) -> None: ++ # Run qemu-img bench in the background ++ bench_thr = Thread(target=do_qemu_img_bench) ++ bench_thr.start() ++ ++ # While qemu-img bench is running, repeatedly add and remove an ++ # overlay to/from node0 ++ while bench_thr.is_alive(): ++ result = self.qsd.qmp('blockdev-add', { ++ 'driver': imgfmt, ++ 'node-name': 'overlay', ++ 'backing': 'node0', ++ 'file': { ++ 'driver': 'file', ++ 'filename': top ++ } ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ result = self.qsd.qmp('blockdev-del', { ++ 'node-name': 'overlay' ++ }) ++ self.assert_qmp(result, 'return', {}) ++ ++ bench_thr.join() ++ ++if __name__ == '__main__': ++ # Format must support raw backing files ++ iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'], ++ supported_protocols=['file']) +diff --git a/tests/qemu-iotests/tests/graph-changes-while-io.out b/tests/qemu-iotests/tests/graph-changes-while-io.out +new file mode 100644 +index 0000000000..ae1213e6f8 +--- /dev/null ++++ b/tests/qemu-iotests/tests/graph-changes-while-io.out +@@ -0,0 +1,5 @@ ++. ++---------------------------------------------------------------------- ++Ran 1 tests ++ ++OK +-- +2.35.3 + diff --git a/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch index f12b8ec..7c1fcc4 100644 --- a/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch +++ b/kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch @@ -1,15 +1,15 @@ -From 49d9c9dced7278517105e9cfec34ea4af716432d Mon Sep 17 00:00:00 2001 +From 99d33621440fd30e0da2974dafb0cd372334305a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Jun 2022 17:47:12 +0100 -Subject: [PATCH 6/6] linux-aio: explain why max batch is checked in +Subject: [PATCH 2/2] linux-aio: explain why max batch is checked in laio_io_unplug() RH-Author: Stefan Hajnoczi -RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() -RH-Commit: [2/2] b3d6421086bde50d4baad2343b2df89c5f66950e (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2092788 +RH-MergeRequest: 199: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [2/2] 8617870ed70e3a57269f06eeb242d0fab79a66fb +RH-Bugzilla: 2105410 RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz RH-Acked-by: Stefano Garzarella It may not be obvious why laio_io_unplug() checks max batch. I discussed @@ -28,10 +28,10 @@ Signed-off-by: Stefan Hajnoczi 1 file changed, 6 insertions(+) diff --git a/block/linux-aio.c b/block/linux-aio.c -index 6078da7e42..9c2393a2f7 100644 +index 77f17ad596..85650c4222 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c -@@ -365,6 +365,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, +@@ -362,6 +362,12 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, assert(s->io_q.plugged); s->io_q.plugged--; @@ -45,5 +45,5 @@ index 6078da7e42..9c2393a2f7 100644 (!s->io_q.plugged && !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending))) { -- -2.31.1 +2.35.3 diff --git a/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch index ed9b5ee..c89fc72 100644 --- a/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +++ b/kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch @@ -1,15 +1,15 @@ -From e7326c3a7e0fc022aa5c0ae07bc1e19ad1b6f2ed Mon Sep 17 00:00:00 2001 +From 0fbb0c87628bef2cb4d1b7748d67020dde50cdef Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 9 Jun 2022 17:47:11 +0100 -Subject: [PATCH 5/6] linux-aio: fix unbalanced plugged counter in +Subject: [PATCH 1/2] linux-aio: fix unbalanced plugged counter in laio_io_unplug() RH-Author: Stefan Hajnoczi -RH-MergeRequest: 102: linux-aio: fix unbalanced plugged counter in laio_io_unplug() -RH-Commit: [1/2] 8a71da371c72521f1d70b8767ee564575e0d522b (stefanha/centos-stream-qemu-kvm) -RH-Bugzilla: 2092788 +RH-MergeRequest: 199: linux-aio: fix unbalanced plugged counter in laio_io_unplug() +RH-Commit: [1/2] f518df755090289905898a36922992288688e338 +RH-Bugzilla: 2105410 RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Kevin Wolf +RH-Acked-by: Hanna Reitz RH-Acked-by: Stefano Garzarella Every laio_io_plug() call has a matching laio_io_unplug() call. There is @@ -36,10 +36,10 @@ Signed-off-by: Stefan Hajnoczi 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/linux-aio.c b/block/linux-aio.c -index 4c423fcccf..6078da7e42 100644 +index f53ae72e21..77f17ad596 100644 --- a/block/linux-aio.c +++ b/block/linux-aio.c -@@ -363,8 +363,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, +@@ -360,8 +360,10 @@ void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s, uint64_t dev_max_batch) { assert(s->io_q.plugged); @@ -52,5 +52,5 @@ index 4c423fcccf..6078da7e42 100644 ioq_submit(s); } -- -2.31.1 +2.35.3 diff --git a/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch b/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch new file mode 100644 index 0000000..90adb5c --- /dev/null +++ b/kvm-linux-headers-Update-headers-to-v5.17-rc1.patch @@ -0,0 +1,1227 @@ +From e9ecd7543fa8d3e9fe80f4144e4c0461f783fc37 Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 8 Feb 2022 15:48:05 -0500 +Subject: [PATCH 03/24] linux-headers: Update headers to v5.17-rc1 + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [3/13] 63593c2431eabf02222f37467736b580022b94c8 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Update headers to 5.17-rc1. I need latest fuse changes. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: <20220208204813.682906-3-vgoyal@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit ef17dd6a8e6b6e3aeb29233996d44dfcb736d515) +Signed-off-by: Paul Lai +--- + include/standard-headers/asm-x86/kvm_para.h | 1 + + include/standard-headers/drm/drm_fourcc.h | 11 ++ + include/standard-headers/linux/ethtool.h | 1 + + include/standard-headers/linux/fuse.h | 60 +++++++- + include/standard-headers/linux/pci_regs.h | 142 +++++++++--------- + include/standard-headers/linux/virtio_gpio.h | 72 +++++++++ + include/standard-headers/linux/virtio_i2c.h | 47 ++++++ + include/standard-headers/linux/virtio_iommu.h | 8 +- + .../standard-headers/linux/virtio_pcidev.h | 65 ++++++++ + include/standard-headers/linux/virtio_scmi.h | 24 +++ + linux-headers/asm-generic/unistd.h | 5 +- + linux-headers/asm-mips/unistd_n32.h | 2 + + linux-headers/asm-mips/unistd_n64.h | 2 + + linux-headers/asm-mips/unistd_o32.h | 2 + + linux-headers/asm-powerpc/unistd_32.h | 2 + + linux-headers/asm-powerpc/unistd_64.h | 2 + + linux-headers/asm-riscv/bitsperlong.h | 14 ++ + linux-headers/asm-riscv/mman.h | 1 + + linux-headers/asm-riscv/unistd.h | 44 ++++++ + linux-headers/asm-s390/unistd_32.h | 2 + + linux-headers/asm-s390/unistd_64.h | 2 + + linux-headers/asm-x86/kvm.h | 16 +- + linux-headers/asm-x86/unistd_32.h | 1 + + linux-headers/asm-x86/unistd_64.h | 1 + + linux-headers/asm-x86/unistd_x32.h | 1 + + linux-headers/linux/kvm.h | 17 +++ + 26 files changed, 469 insertions(+), 76 deletions(-) + create mode 100644 include/standard-headers/linux/virtio_gpio.h + create mode 100644 include/standard-headers/linux/virtio_i2c.h + create mode 100644 include/standard-headers/linux/virtio_pcidev.h + create mode 100644 include/standard-headers/linux/virtio_scmi.h + create mode 100644 linux-headers/asm-riscv/bitsperlong.h + create mode 100644 linux-headers/asm-riscv/mman.h + create mode 100644 linux-headers/asm-riscv/unistd.h + +diff --git a/include/standard-headers/asm-x86/kvm_para.h b/include/standard-headers/asm-x86/kvm_para.h +index 204cfb8640..f0235e58a1 100644 +--- a/include/standard-headers/asm-x86/kvm_para.h ++++ b/include/standard-headers/asm-x86/kvm_para.h +@@ -8,6 +8,7 @@ + * should be used to determine that a VM is running under KVM. + */ + #define KVM_CPUID_SIGNATURE 0x40000000 ++#define KVM_SIGNATURE "KVMKVMKVM\0\0\0" + + /* This CPUID returns two feature bitmaps in eax, edx. Before enabling + * a particular paravirtualization, the appropriate feature bit should +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 2c025cb4fe..4888f85f69 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -313,6 +313,13 @@ extern "C" { + */ + #define DRM_FORMAT_P016 fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */ + ++/* 2 plane YCbCr420. ++ * 3 10 bit components and 2 padding bits packed into 4 bytes. ++ * index 0 = Y plane, [31:0] x:Y2:Y1:Y0 2:10:10:10 little endian ++ * index 1 = Cr:Cb plane, [63:0] x:Cr2:Cb2:Cr1:x:Cb1:Cr0:Cb0 [2:10:10:10:2:10:10:10] little endian ++ */ ++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel packed */ ++ + /* 3 plane non-subsampled (444) YCbCr + * 16 bits per component, but only 10 bits are used and 6 bits are padded + * index 0: Y plane, [15:0] Y:x [10:6] little endian +@@ -853,6 +860,10 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) + * and UV. Some SAND-using hardware stores UV in a separate tiled + * image from Y to reduce the column height, which is not supported + * with these modifiers. ++ * ++ * The DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT modifier is also ++ * supported for DRM_FORMAT_P030 where the columns remain as 128 bytes ++ * wide, but as this is a 10 bpp format that translates to 96 pixels. + */ + + #define DRM_FORMAT_MOD_BROADCOM_SAND32_COL_HEIGHT(v) \ +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 688eb8dc39..38d5a4cd6e 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -231,6 +231,7 @@ enum tunable_id { + ETHTOOL_RX_COPYBREAK, + ETHTOOL_TX_COPYBREAK, + ETHTOOL_PFC_PREVENTION_TOUT, /* timeout in msecs */ ++ ETHTOOL_TX_COPYBREAK_BUF_SIZE, + /* + * Add your fresh new tunable attribute above and remember to update + * tunable_strings[] in net/ethtool/common.c +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index 23ea31708b..bda06258be 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -184,6 +184,16 @@ + * + * 7.34 + * - add FUSE_SYNCFS ++ * ++ * 7.35 ++ * - add FOPEN_NOFLUSH ++ * ++ * 7.36 ++ * - extend fuse_init_in with reserved fields, add FUSE_INIT_EXT init flag ++ * - add flags2 to fuse_init_in and fuse_init_out ++ * - add FUSE_SECURITY_CTX init flag ++ * - add security context to create, mkdir, symlink, and mknod requests ++ * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX + */ + + #ifndef _LINUX_FUSE_H +@@ -215,7 +225,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 34 ++#define FUSE_KERNEL_MINOR_VERSION 36 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -286,12 +296,14 @@ struct fuse_file_lock { + * FOPEN_NONSEEKABLE: the file is not seekable + * FOPEN_CACHE_DIR: allow caching this directory + * FOPEN_STREAM: the file is stream-like (no file position at all) ++ * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) + */ + #define FOPEN_DIRECT_IO (1 << 0) + #define FOPEN_KEEP_CACHE (1 << 1) + #define FOPEN_NONSEEKABLE (1 << 2) + #define FOPEN_CACHE_DIR (1 << 3) + #define FOPEN_STREAM (1 << 4) ++#define FOPEN_NOFLUSH (1 << 5) + + /** + * INIT request/reply flags +@@ -332,6 +344,11 @@ struct fuse_file_lock { + * write/truncate sgid is killed only if file has group + * execute permission. (Same as Linux VFS behavior). + * FUSE_SETXATTR_EXT: Server supports extended struct fuse_setxattr_in ++ * FUSE_INIT_EXT: extended fuse_init_in request ++ * FUSE_INIT_RESERVED: reserved, do not use ++ * FUSE_SECURITY_CTX: add security context to create, mkdir, symlink, and ++ * mknod ++ * FUSE_HAS_INODE_DAX: use per inode DAX + */ + #define FUSE_ASYNC_READ (1 << 0) + #define FUSE_POSIX_LOCKS (1 << 1) +@@ -363,6 +380,11 @@ struct fuse_file_lock { + #define FUSE_SUBMOUNTS (1 << 27) + #define FUSE_HANDLE_KILLPRIV_V2 (1 << 28) + #define FUSE_SETXATTR_EXT (1 << 29) ++#define FUSE_INIT_EXT (1 << 30) ++#define FUSE_INIT_RESERVED (1 << 31) ++/* bits 32..63 get shifted down 32 bits into the flags2 field */ ++#define FUSE_SECURITY_CTX (1ULL << 32) ++#define FUSE_HAS_INODE_DAX (1ULL << 33) + + /** + * CUSE INIT request/reply flags +@@ -445,8 +467,10 @@ struct fuse_file_lock { + * fuse_attr flags + * + * FUSE_ATTR_SUBMOUNT: Object is a submount root ++ * FUSE_ATTR_DAX: Enable DAX for this file in per inode DAX mode + */ + #define FUSE_ATTR_SUBMOUNT (1 << 0) ++#define FUSE_ATTR_DAX (1 << 1) + + /** + * Open flags +@@ -732,6 +756,8 @@ struct fuse_init_in { + uint32_t minor; + uint32_t max_readahead; + uint32_t flags; ++ uint32_t flags2; ++ uint32_t unused[11]; + }; + + #define FUSE_COMPAT_INIT_OUT_SIZE 8 +@@ -748,7 +774,8 @@ struct fuse_init_out { + uint32_t time_gran; + uint16_t max_pages; + uint16_t map_alignment; +- uint32_t unused[8]; ++ uint32_t flags2; ++ uint32_t unused[7]; + }; + + #define CUSE_INIT_INFO_MAX 4096 +@@ -856,9 +883,12 @@ struct fuse_dirent { + char name[]; + }; + +-#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +-#define FUSE_DIRENT_ALIGN(x) \ ++/* Align variable length records to 64bit boundary */ ++#define FUSE_REC_ALIGN(x) \ + (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1)) ++ ++#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) ++#define FUSE_DIRENT_ALIGN(x) FUSE_REC_ALIGN(x) + #define FUSE_DIRENT_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) + +@@ -975,4 +1005,26 @@ struct fuse_syncfs_in { + uint64_t padding; + }; + ++/* ++ * For each security context, send fuse_secctx with size of security context ++ * fuse_secctx will be followed by security context name and this in turn ++ * will be followed by actual context label. ++ * fuse_secctx, name, context ++ */ ++struct fuse_secctx { ++ uint32_t size; ++ uint32_t padding; ++}; ++ ++/* ++ * Contains the information about how many fuse_secctx structures are being ++ * sent and what's the total size of all security contexts (including ++ * size of fuse_secctx_header). ++ * ++ */ ++struct fuse_secctx_header { ++ uint32_t size; ++ uint32_t nr_secctx; ++}; ++ + #endif /* _LINUX_FUSE_H */ +diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h +index ff6ccbc6ef..bee1a9ed6e 100644 +--- a/include/standard-headers/linux/pci_regs.h ++++ b/include/standard-headers/linux/pci_regs.h +@@ -301,23 +301,23 @@ + #define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ + #define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +-/* Message Signalled Interrupt registers */ ++/* Message Signaled Interrupt registers */ + +-#define PCI_MSI_FLAGS 2 /* Message Control */ ++#define PCI_MSI_FLAGS 0x02 /* Message Control */ + #define PCI_MSI_FLAGS_ENABLE 0x0001 /* MSI feature enabled */ + #define PCI_MSI_FLAGS_QMASK 0x000e /* Maximum queue size available */ + #define PCI_MSI_FLAGS_QSIZE 0x0070 /* Message queue size configured */ + #define PCI_MSI_FLAGS_64BIT 0x0080 /* 64-bit addresses allowed */ + #define PCI_MSI_FLAGS_MASKBIT 0x0100 /* Per-vector masking capable */ + #define PCI_MSI_RFU 3 /* Rest of capability flags */ +-#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +-#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +-#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +-#define PCI_MSI_MASK_32 12 /* Mask bits register for 32-bit devices */ +-#define PCI_MSI_PENDING_32 16 /* Pending intrs for 32-bit devices */ +-#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +-#define PCI_MSI_MASK_64 16 /* Mask bits register for 64-bit devices */ +-#define PCI_MSI_PENDING_64 20 /* Pending intrs for 64-bit devices */ ++#define PCI_MSI_ADDRESS_LO 0x04 /* Lower 32 bits */ ++#define PCI_MSI_ADDRESS_HI 0x08 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ ++#define PCI_MSI_DATA_32 0x08 /* 16 bits of data for 32-bit devices */ ++#define PCI_MSI_MASK_32 0x0c /* Mask bits register for 32-bit devices */ ++#define PCI_MSI_PENDING_32 0x10 /* Pending intrs for 32-bit devices */ ++#define PCI_MSI_DATA_64 0x0c /* 16 bits of data for 64-bit devices */ ++#define PCI_MSI_MASK_64 0x10 /* Mask bits register for 64-bit devices */ ++#define PCI_MSI_PENDING_64 0x14 /* Pending intrs for 64-bit devices */ + + /* MSI-X registers (in MSI-X capability) */ + #define PCI_MSIX_FLAGS 2 /* Message Control */ +@@ -335,10 +335,10 @@ + + /* MSI-X Table entry format (in memory mapped by a BAR) */ + #define PCI_MSIX_ENTRY_SIZE 16 +-#define PCI_MSIX_ENTRY_LOWER_ADDR 0 /* Message Address */ +-#define PCI_MSIX_ENTRY_UPPER_ADDR 4 /* Message Upper Address */ +-#define PCI_MSIX_ENTRY_DATA 8 /* Message Data */ +-#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 /* Vector Control */ ++#define PCI_MSIX_ENTRY_LOWER_ADDR 0x0 /* Message Address */ ++#define PCI_MSIX_ENTRY_UPPER_ADDR 0x4 /* Message Upper Address */ ++#define PCI_MSIX_ENTRY_DATA 0x8 /* Message Data */ ++#define PCI_MSIX_ENTRY_VECTOR_CTRL 0xc /* Vector Control */ + #define PCI_MSIX_ENTRY_CTRL_MASKBIT 0x00000001 + + /* CompactPCI Hotswap Register */ +@@ -470,7 +470,7 @@ + + /* PCI Express capability registers */ + +-#define PCI_EXP_FLAGS 2 /* Capabilities register */ ++#define PCI_EXP_FLAGS 0x02 /* Capabilities register */ + #define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ + #define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ + #define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +@@ -484,7 +484,7 @@ + #define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ + #define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ + #define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +-#define PCI_EXP_DEVCAP 4 /* Device capabilities */ ++#define PCI_EXP_DEVCAP 0x04 /* Device capabilities */ + #define PCI_EXP_DEVCAP_PAYLOAD 0x00000007 /* Max_Payload_Size */ + #define PCI_EXP_DEVCAP_PHANTOM 0x00000018 /* Phantom functions */ + #define PCI_EXP_DEVCAP_EXT_TAG 0x00000020 /* Extended tags */ +@@ -497,7 +497,7 @@ + #define PCI_EXP_DEVCAP_PWR_VAL 0x03fc0000 /* Slot Power Limit Value */ + #define PCI_EXP_DEVCAP_PWR_SCL 0x0c000000 /* Slot Power Limit Scale */ + #define PCI_EXP_DEVCAP_FLR 0x10000000 /* Function Level Reset */ +-#define PCI_EXP_DEVCTL 8 /* Device Control */ ++#define PCI_EXP_DEVCTL 0x08 /* Device Control */ + #define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ + #define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ + #define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ +@@ -522,7 +522,7 @@ + #define PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */ + #define PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */ + #define PCI_EXP_DEVCTL_BCR_FLR 0x8000 /* Bridge Configuration Retry / FLR */ +-#define PCI_EXP_DEVSTA 10 /* Device Status */ ++#define PCI_EXP_DEVSTA 0x0a /* Device Status */ + #define PCI_EXP_DEVSTA_CED 0x0001 /* Correctable Error Detected */ + #define PCI_EXP_DEVSTA_NFED 0x0002 /* Non-Fatal Error Detected */ + #define PCI_EXP_DEVSTA_FED 0x0004 /* Fatal Error Detected */ +@@ -530,7 +530,7 @@ + #define PCI_EXP_DEVSTA_AUXPD 0x0010 /* AUX Power Detected */ + #define PCI_EXP_DEVSTA_TRPND 0x0020 /* Transactions Pending */ + #define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V1 12 /* v1 endpoints without link end here */ +-#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ ++#define PCI_EXP_LNKCAP 0x0c /* Link Capabilities */ + #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ + #define PCI_EXP_LNKCAP_SLS_2_5GB 0x00000001 /* LNKCAP2 SLS Vector bit 0 */ + #define PCI_EXP_LNKCAP_SLS_5_0GB 0x00000002 /* LNKCAP2 SLS Vector bit 1 */ +@@ -549,7 +549,7 @@ + #define PCI_EXP_LNKCAP_DLLLARC 0x00100000 /* Data Link Layer Link Active Reporting Capable */ + #define PCI_EXP_LNKCAP_LBNC 0x00200000 /* Link Bandwidth Notification Capability */ + #define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ +-#define PCI_EXP_LNKCTL 16 /* Link Control */ ++#define PCI_EXP_LNKCTL 0x10 /* Link Control */ + #define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ + #define PCI_EXP_LNKCTL_ASPM_L0S 0x0001 /* L0s Enable */ + #define PCI_EXP_LNKCTL_ASPM_L1 0x0002 /* L1 Enable */ +@@ -562,7 +562,7 @@ + #define PCI_EXP_LNKCTL_HAWD 0x0200 /* Hardware Autonomous Width Disable */ + #define PCI_EXP_LNKCTL_LBMIE 0x0400 /* Link Bandwidth Management Interrupt Enable */ + #define PCI_EXP_LNKCTL_LABIE 0x0800 /* Link Autonomous Bandwidth Interrupt Enable */ +-#define PCI_EXP_LNKSTA 18 /* Link Status */ ++#define PCI_EXP_LNKSTA 0x12 /* Link Status */ + #define PCI_EXP_LNKSTA_CLS 0x000f /* Current Link Speed */ + #define PCI_EXP_LNKSTA_CLS_2_5GB 0x0001 /* Current Link Speed 2.5GT/s */ + #define PCI_EXP_LNKSTA_CLS_5_0GB 0x0002 /* Current Link Speed 5.0GT/s */ +@@ -582,7 +582,7 @@ + #define PCI_EXP_LNKSTA_LBMS 0x4000 /* Link Bandwidth Management Status */ + #define PCI_EXP_LNKSTA_LABS 0x8000 /* Link Autonomous Bandwidth Status */ + #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V1 20 /* v1 endpoints with link end here */ +-#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ ++#define PCI_EXP_SLTCAP 0x14 /* Slot Capabilities */ + #define PCI_EXP_SLTCAP_ABP 0x00000001 /* Attention Button Present */ + #define PCI_EXP_SLTCAP_PCP 0x00000002 /* Power Controller Present */ + #define PCI_EXP_SLTCAP_MRLSP 0x00000004 /* MRL Sensor Present */ +@@ -595,7 +595,7 @@ + #define PCI_EXP_SLTCAP_EIP 0x00020000 /* Electromechanical Interlock Present */ + #define PCI_EXP_SLTCAP_NCCS 0x00040000 /* No Command Completed Support */ + #define PCI_EXP_SLTCAP_PSN 0xfff80000 /* Physical Slot Number */ +-#define PCI_EXP_SLTCTL 24 /* Slot Control */ ++#define PCI_EXP_SLTCTL 0x18 /* Slot Control */ + #define PCI_EXP_SLTCTL_ABPE 0x0001 /* Attention Button Pressed Enable */ + #define PCI_EXP_SLTCTL_PFDE 0x0002 /* Power Fault Detected Enable */ + #define PCI_EXP_SLTCTL_MRLSCE 0x0004 /* MRL Sensor Changed Enable */ +@@ -617,7 +617,7 @@ + #define PCI_EXP_SLTCTL_EIC 0x0800 /* Electromechanical Interlock Control */ + #define PCI_EXP_SLTCTL_DLLSCE 0x1000 /* Data Link Layer State Changed Enable */ + #define PCI_EXP_SLTCTL_IBPD_DISABLE 0x4000 /* In-band PD disable */ +-#define PCI_EXP_SLTSTA 26 /* Slot Status */ ++#define PCI_EXP_SLTSTA 0x1a /* Slot Status */ + #define PCI_EXP_SLTSTA_ABP 0x0001 /* Attention Button Pressed */ + #define PCI_EXP_SLTSTA_PFD 0x0002 /* Power Fault Detected */ + #define PCI_EXP_SLTSTA_MRLSC 0x0004 /* MRL Sensor Changed */ +@@ -627,15 +627,15 @@ + #define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ + #define PCI_EXP_SLTSTA_EIS 0x0080 /* Electromechanical Interlock Status */ + #define PCI_EXP_SLTSTA_DLLSC 0x0100 /* Data Link Layer State Changed */ +-#define PCI_EXP_RTCTL 28 /* Root Control */ ++#define PCI_EXP_RTCTL 0x1c /* Root Control */ + #define PCI_EXP_RTCTL_SECEE 0x0001 /* System Error on Correctable Error */ + #define PCI_EXP_RTCTL_SENFEE 0x0002 /* System Error on Non-Fatal Error */ + #define PCI_EXP_RTCTL_SEFEE 0x0004 /* System Error on Fatal Error */ + #define PCI_EXP_RTCTL_PMEIE 0x0008 /* PME Interrupt Enable */ + #define PCI_EXP_RTCTL_CRSSVE 0x0010 /* CRS Software Visibility Enable */ +-#define PCI_EXP_RTCAP 30 /* Root Capabilities */ ++#define PCI_EXP_RTCAP 0x1e /* Root Capabilities */ + #define PCI_EXP_RTCAP_CRSVIS 0x0001 /* CRS Software Visibility capability */ +-#define PCI_EXP_RTSTA 32 /* Root Status */ ++#define PCI_EXP_RTSTA 0x20 /* Root Status */ + #define PCI_EXP_RTSTA_PME 0x00010000 /* PME status */ + #define PCI_EXP_RTSTA_PENDING 0x00020000 /* PME pending */ + /* +@@ -646,7 +646,7 @@ + * Use pcie_capability_read_word() and similar interfaces to use them + * safely. + */ +-#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ ++#define PCI_EXP_DEVCAP2 0x24 /* Device Capabilities 2 */ + #define PCI_EXP_DEVCAP2_COMP_TMOUT_DIS 0x00000010 /* Completion Timeout Disable supported */ + #define PCI_EXP_DEVCAP2_ARI 0x00000020 /* Alternative Routing-ID */ + #define PCI_EXP_DEVCAP2_ATOMIC_ROUTE 0x00000040 /* Atomic Op routing */ +@@ -658,7 +658,7 @@ + #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ + #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ + #define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ +-#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ ++#define PCI_EXP_DEVCTL2 0x28 /* Device Control 2 */ + #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ + #define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */ + #define PCI_EXP_DEVCTL2_ARI 0x0020 /* Alternative Routing-ID */ +@@ -670,9 +670,9 @@ + #define PCI_EXP_DEVCTL2_OBFF_MSGA_EN 0x2000 /* Enable OBFF Message type A */ + #define PCI_EXP_DEVCTL2_OBFF_MSGB_EN 0x4000 /* Enable OBFF Message type B */ + #define PCI_EXP_DEVCTL2_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ +-#define PCI_EXP_DEVSTA2 42 /* Device Status 2 */ +-#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints without link end here */ +-#define PCI_EXP_LNKCAP2 44 /* Link Capabilities 2 */ ++#define PCI_EXP_DEVSTA2 0x2a /* Device Status 2 */ ++#define PCI_CAP_EXP_RC_ENDPOINT_SIZEOF_V2 0x2c /* end of v2 EPs w/o link */ ++#define PCI_EXP_LNKCAP2 0x2c /* Link Capabilities 2 */ + #define PCI_EXP_LNKCAP2_SLS_2_5GB 0x00000002 /* Supported Speed 2.5GT/s */ + #define PCI_EXP_LNKCAP2_SLS_5_0GB 0x00000004 /* Supported Speed 5GT/s */ + #define PCI_EXP_LNKCAP2_SLS_8_0GB 0x00000008 /* Supported Speed 8GT/s */ +@@ -680,7 +680,7 @@ + #define PCI_EXP_LNKCAP2_SLS_32_0GB 0x00000020 /* Supported Speed 32GT/s */ + #define PCI_EXP_LNKCAP2_SLS_64_0GB 0x00000040 /* Supported Speed 64GT/s */ + #define PCI_EXP_LNKCAP2_CROSSLINK 0x00000100 /* Crosslink supported */ +-#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ ++#define PCI_EXP_LNKCTL2 0x30 /* Link Control 2 */ + #define PCI_EXP_LNKCTL2_TLS 0x000f + #define PCI_EXP_LNKCTL2_TLS_2_5GT 0x0001 /* Supported Speed 2.5GT/s */ + #define PCI_EXP_LNKCTL2_TLS_5_0GT 0x0002 /* Supported Speed 5GT/s */ +@@ -691,12 +691,12 @@ + #define PCI_EXP_LNKCTL2_ENTER_COMP 0x0010 /* Enter Compliance */ + #define PCI_EXP_LNKCTL2_TX_MARGIN 0x0380 /* Transmit Margin */ + #define PCI_EXP_LNKCTL2_HASD 0x0020 /* HW Autonomous Speed Disable */ +-#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ +-#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 52 /* v2 endpoints with link end here */ +-#define PCI_EXP_SLTCAP2 52 /* Slot Capabilities 2 */ ++#define PCI_EXP_LNKSTA2 0x32 /* Link Status 2 */ ++#define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 0x32 /* end of v2 EPs w/ link */ ++#define PCI_EXP_SLTCAP2 0x34 /* Slot Capabilities 2 */ + #define PCI_EXP_SLTCAP2_IBPD 0x00000001 /* In-band PD Disable Supported */ +-#define PCI_EXP_SLTCTL2 56 /* Slot Control 2 */ +-#define PCI_EXP_SLTSTA2 58 /* Slot Status 2 */ ++#define PCI_EXP_SLTCTL2 0x38 /* Slot Control 2 */ ++#define PCI_EXP_SLTSTA2 0x3a /* Slot Status 2 */ + + /* Extended Capabilities (PCI-X 2.0 and Express) */ + #define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +@@ -742,7 +742,7 @@ + #define PCI_EXT_CAP_MCAST_ENDPOINT_SIZEOF 40 + + /* Advanced Error Reporting */ +-#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ ++#define PCI_ERR_UNCOR_STATUS 0x04 /* Uncorrectable Error Status */ + #define PCI_ERR_UNC_UND 0x00000001 /* Undefined */ + #define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ + #define PCI_ERR_UNC_SURPDN 0x00000020 /* Surprise Down */ +@@ -760,11 +760,11 @@ + #define PCI_ERR_UNC_MCBTLP 0x00800000 /* MC blocked TLP */ + #define PCI_ERR_UNC_ATOMEG 0x01000000 /* Atomic egress blocked */ + #define PCI_ERR_UNC_TLPPRE 0x02000000 /* TLP prefix blocked */ +-#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ ++#define PCI_ERR_UNCOR_MASK 0x08 /* Uncorrectable Error Mask */ + /* Same bits as above */ +-#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ ++#define PCI_ERR_UNCOR_SEVER 0x0c /* Uncorrectable Error Severity */ + /* Same bits as above */ +-#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ ++#define PCI_ERR_COR_STATUS 0x10 /* Correctable Error Status */ + #define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ + #define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ + #define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +@@ -773,20 +773,20 @@ + #define PCI_ERR_COR_ADV_NFAT 0x00002000 /* Advisory Non-Fatal */ + #define PCI_ERR_COR_INTERNAL 0x00004000 /* Corrected Internal */ + #define PCI_ERR_COR_LOG_OVER 0x00008000 /* Header Log Overflow */ +-#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ ++#define PCI_ERR_COR_MASK 0x14 /* Correctable Error Mask */ + /* Same bits as above */ +-#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +-#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ ++#define PCI_ERR_CAP 0x18 /* Advanced Error Capabilities & Ctrl*/ ++#define PCI_ERR_CAP_FEP(x) ((x) & 0x1f) /* First Error Pointer */ + #define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ + #define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ + #define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ + #define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +-#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +-#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ ++#define PCI_ERR_HEADER_LOG 0x1c /* Header Log Register (16 bytes) */ ++#define PCI_ERR_ROOT_COMMAND 0x2c /* Root Error Command */ + #define PCI_ERR_ROOT_CMD_COR_EN 0x00000001 /* Correctable Err Reporting Enable */ + #define PCI_ERR_ROOT_CMD_NONFATAL_EN 0x00000002 /* Non-Fatal Err Reporting Enable */ + #define PCI_ERR_ROOT_CMD_FATAL_EN 0x00000004 /* Fatal Err Reporting Enable */ +-#define PCI_ERR_ROOT_STATUS 48 ++#define PCI_ERR_ROOT_STATUS 0x30 + #define PCI_ERR_ROOT_COR_RCV 0x00000001 /* ERR_COR Received */ + #define PCI_ERR_ROOT_MULTI_COR_RCV 0x00000002 /* Multiple ERR_COR */ + #define PCI_ERR_ROOT_UNCOR_RCV 0x00000004 /* ERR_FATAL/NONFATAL */ +@@ -795,52 +795,52 @@ + #define PCI_ERR_ROOT_NONFATAL_RCV 0x00000020 /* Non-Fatal Received */ + #define PCI_ERR_ROOT_FATAL_RCV 0x00000040 /* Fatal Received */ + #define PCI_ERR_ROOT_AER_IRQ 0xf8000000 /* Advanced Error Interrupt Message Number */ +-#define PCI_ERR_ROOT_ERR_SRC 52 /* Error Source Identification */ ++#define PCI_ERR_ROOT_ERR_SRC 0x34 /* Error Source Identification */ + + /* Virtual Channel */ +-#define PCI_VC_PORT_CAP1 4 ++#define PCI_VC_PORT_CAP1 0x04 + #define PCI_VC_CAP1_EVCC 0x00000007 /* extended VC count */ + #define PCI_VC_CAP1_LPEVCC 0x00000070 /* low prio extended VC count */ + #define PCI_VC_CAP1_ARB_SIZE 0x00000c00 +-#define PCI_VC_PORT_CAP2 8 ++#define PCI_VC_PORT_CAP2 0x08 + #define PCI_VC_CAP2_32_PHASE 0x00000002 + #define PCI_VC_CAP2_64_PHASE 0x00000004 + #define PCI_VC_CAP2_128_PHASE 0x00000008 + #define PCI_VC_CAP2_ARB_OFF 0xff000000 +-#define PCI_VC_PORT_CTRL 12 ++#define PCI_VC_PORT_CTRL 0x0c + #define PCI_VC_PORT_CTRL_LOAD_TABLE 0x00000001 +-#define PCI_VC_PORT_STATUS 14 ++#define PCI_VC_PORT_STATUS 0x0e + #define PCI_VC_PORT_STATUS_TABLE 0x00000001 +-#define PCI_VC_RES_CAP 16 ++#define PCI_VC_RES_CAP 0x10 + #define PCI_VC_RES_CAP_32_PHASE 0x00000002 + #define PCI_VC_RES_CAP_64_PHASE 0x00000004 + #define PCI_VC_RES_CAP_128_PHASE 0x00000008 + #define PCI_VC_RES_CAP_128_PHASE_TB 0x00000010 + #define PCI_VC_RES_CAP_256_PHASE 0x00000020 + #define PCI_VC_RES_CAP_ARB_OFF 0xff000000 +-#define PCI_VC_RES_CTRL 20 ++#define PCI_VC_RES_CTRL 0x14 + #define PCI_VC_RES_CTRL_LOAD_TABLE 0x00010000 + #define PCI_VC_RES_CTRL_ARB_SELECT 0x000e0000 + #define PCI_VC_RES_CTRL_ID 0x07000000 + #define PCI_VC_RES_CTRL_ENABLE 0x80000000 +-#define PCI_VC_RES_STATUS 26 ++#define PCI_VC_RES_STATUS 0x1a + #define PCI_VC_RES_STATUS_TABLE 0x00000001 + #define PCI_VC_RES_STATUS_NEGO 0x00000002 + #define PCI_CAP_VC_BASE_SIZEOF 0x10 +-#define PCI_CAP_VC_PER_VC_SIZEOF 0x0C ++#define PCI_CAP_VC_PER_VC_SIZEOF 0x0c + + /* Power Budgeting */ +-#define PCI_PWR_DSR 4 /* Data Select Register */ +-#define PCI_PWR_DATA 8 /* Data Register */ ++#define PCI_PWR_DSR 0x04 /* Data Select Register */ ++#define PCI_PWR_DATA 0x08 /* Data Register */ + #define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ + #define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ + #define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ + #define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ + #define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ + #define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +-#define PCI_PWR_CAP 12 /* Capability */ ++#define PCI_PWR_CAP 0x0c /* Capability */ + #define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ +-#define PCI_EXT_CAP_PWR_SIZEOF 16 ++#define PCI_EXT_CAP_PWR_SIZEOF 0x10 + + /* Root Complex Event Collector Endpoint Association */ + #define PCI_RCEC_RCIEP_BITMAP 4 /* Associated Bitmap for RCiEPs */ +@@ -964,7 +964,7 @@ + #define PCI_SRIOV_VFM_MI 0x1 /* Dormant.MigrateIn */ + #define PCI_SRIOV_VFM_MO 0x2 /* Active.MigrateOut */ + #define PCI_SRIOV_VFM_AV 0x3 /* Active.Available */ +-#define PCI_EXT_CAP_SRIOV_SIZEOF 64 ++#define PCI_EXT_CAP_SRIOV_SIZEOF 0x40 + + #define PCI_LTR_MAX_SNOOP_LAT 0x4 + #define PCI_LTR_MAX_NOSNOOP_LAT 0x6 +@@ -1017,12 +1017,12 @@ + #define PCI_TPH_LOC_NONE 0x000 /* no location */ + #define PCI_TPH_LOC_CAP 0x200 /* in capability */ + #define PCI_TPH_LOC_MSIX 0x400 /* in MSI-X */ +-#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* st table mask */ +-#define PCI_TPH_CAP_ST_SHIFT 16 /* st table shift */ +-#define PCI_TPH_BASE_SIZEOF 12 /* size with no st table */ ++#define PCI_TPH_CAP_ST_MASK 0x07FF0000 /* ST table mask */ ++#define PCI_TPH_CAP_ST_SHIFT 16 /* ST table shift */ ++#define PCI_TPH_BASE_SIZEOF 0xc /* size with no ST table */ + + /* Downstream Port Containment */ +-#define PCI_EXP_DPC_CAP 4 /* DPC Capability */ ++#define PCI_EXP_DPC_CAP 0x04 /* DPC Capability */ + #define PCI_EXP_DPC_IRQ 0x001F /* Interrupt Message Number */ + #define PCI_EXP_DPC_CAP_RP_EXT 0x0020 /* Root Port Extensions */ + #define PCI_EXP_DPC_CAP_POISONED_TLP 0x0040 /* Poisoned TLP Egress Blocking Supported */ +@@ -1030,19 +1030,19 @@ + #define PCI_EXP_DPC_RP_PIO_LOG_SIZE 0x0F00 /* RP PIO Log Size */ + #define PCI_EXP_DPC_CAP_DL_ACTIVE 0x1000 /* ERR_COR signal on DL_Active supported */ + +-#define PCI_EXP_DPC_CTL 6 /* DPC control */ ++#define PCI_EXP_DPC_CTL 0x06 /* DPC control */ + #define PCI_EXP_DPC_CTL_EN_FATAL 0x0001 /* Enable trigger on ERR_FATAL message */ + #define PCI_EXP_DPC_CTL_EN_NONFATAL 0x0002 /* Enable trigger on ERR_NONFATAL message */ + #define PCI_EXP_DPC_CTL_INT_EN 0x0008 /* DPC Interrupt Enable */ + +-#define PCI_EXP_DPC_STATUS 8 /* DPC Status */ ++#define PCI_EXP_DPC_STATUS 0x08 /* DPC Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER 0x0001 /* Trigger Status */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN 0x0006 /* Trigger Reason */ + #define PCI_EXP_DPC_STATUS_INTERRUPT 0x0008 /* Interrupt Status */ + #define PCI_EXP_DPC_RP_BUSY 0x0010 /* Root Port Busy */ + #define PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT 0x0060 /* Trig Reason Extension */ + +-#define PCI_EXP_DPC_SOURCE_ID 10 /* DPC Source Identifier */ ++#define PCI_EXP_DPC_SOURCE_ID 0x0A /* DPC Source Identifier */ + + #define PCI_EXP_DPC_RP_PIO_STATUS 0x0C /* RP PIO Status */ + #define PCI_EXP_DPC_RP_PIO_MASK 0x10 /* RP PIO Mask */ +@@ -1086,7 +1086,11 @@ + + /* Designated Vendor-Specific (DVSEC, PCI_EXT_CAP_ID_DVSEC) */ + #define PCI_DVSEC_HEADER1 0x4 /* Designated Vendor-Specific Header1 */ ++#define PCI_DVSEC_HEADER1_VID(x) ((x) & 0xffff) ++#define PCI_DVSEC_HEADER1_REV(x) (((x) >> 16) & 0xf) ++#define PCI_DVSEC_HEADER1_LEN(x) (((x) >> 20) & 0xfff) + #define PCI_DVSEC_HEADER2 0x8 /* Designated Vendor-Specific Header2 */ ++#define PCI_DVSEC_HEADER2_ID(x) ((x) & 0xffff) + + /* Data Link Feature */ + #define PCI_DLF_CAP 0x04 /* Capabilities Register */ +diff --git a/include/standard-headers/linux/virtio_gpio.h b/include/standard-headers/linux/virtio_gpio.h +new file mode 100644 +index 0000000000..2b5cf06349 +--- /dev/null ++++ b/include/standard-headers/linux/virtio_gpio.h +@@ -0,0 +1,72 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++ ++#ifndef _LINUX_VIRTIO_GPIO_H ++#define _LINUX_VIRTIO_GPIO_H ++ ++#include "standard-headers/linux/types.h" ++ ++/* Virtio GPIO Feature bits */ ++#define VIRTIO_GPIO_F_IRQ 0 ++ ++/* Virtio GPIO request types */ ++#define VIRTIO_GPIO_MSG_GET_NAMES 0x0001 ++#define VIRTIO_GPIO_MSG_GET_DIRECTION 0x0002 ++#define VIRTIO_GPIO_MSG_SET_DIRECTION 0x0003 ++#define VIRTIO_GPIO_MSG_GET_VALUE 0x0004 ++#define VIRTIO_GPIO_MSG_SET_VALUE 0x0005 ++#define VIRTIO_GPIO_MSG_IRQ_TYPE 0x0006 ++ ++/* Possible values of the status field */ ++#define VIRTIO_GPIO_STATUS_OK 0x0 ++#define VIRTIO_GPIO_STATUS_ERR 0x1 ++ ++/* Direction types */ ++#define VIRTIO_GPIO_DIRECTION_NONE 0x00 ++#define VIRTIO_GPIO_DIRECTION_OUT 0x01 ++#define VIRTIO_GPIO_DIRECTION_IN 0x02 ++ ++/* Virtio GPIO IRQ types */ ++#define VIRTIO_GPIO_IRQ_TYPE_NONE 0x00 ++#define VIRTIO_GPIO_IRQ_TYPE_EDGE_RISING 0x01 ++#define VIRTIO_GPIO_IRQ_TYPE_EDGE_FALLING 0x02 ++#define VIRTIO_GPIO_IRQ_TYPE_EDGE_BOTH 0x03 ++#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_HIGH 0x04 ++#define VIRTIO_GPIO_IRQ_TYPE_LEVEL_LOW 0x08 ++ ++struct virtio_gpio_config { ++ uint16_t ngpio; ++ uint8_t padding[2]; ++ uint32_t gpio_names_size; ++}; ++ ++/* Virtio GPIO Request / Response */ ++struct virtio_gpio_request { ++ uint16_t type; ++ uint16_t gpio; ++ uint32_t value; ++}; ++ ++struct virtio_gpio_response { ++ uint8_t status; ++ uint8_t value; ++}; ++ ++struct virtio_gpio_response_get_names { ++ uint8_t status; ++ uint8_t value[]; ++}; ++ ++/* Virtio GPIO IRQ Request / Response */ ++struct virtio_gpio_irq_request { ++ uint16_t gpio; ++}; ++ ++struct virtio_gpio_irq_response { ++ uint8_t status; ++}; ++ ++/* Possible values of the interrupt status field */ ++#define VIRTIO_GPIO_IRQ_STATUS_INVALID 0x0 ++#define VIRTIO_GPIO_IRQ_STATUS_VALID 0x1 ++ ++#endif /* _LINUX_VIRTIO_GPIO_H */ +diff --git a/include/standard-headers/linux/virtio_i2c.h b/include/standard-headers/linux/virtio_i2c.h +new file mode 100644 +index 0000000000..09fa907793 +--- /dev/null ++++ b/include/standard-headers/linux/virtio_i2c.h +@@ -0,0 +1,47 @@ ++/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */ ++/* ++ * Definitions for virtio I2C Adpter ++ * ++ * Copyright (c) 2021 Intel Corporation. All rights reserved. ++ */ ++ ++#ifndef _LINUX_VIRTIO_I2C_H ++#define _LINUX_VIRTIO_I2C_H ++ ++#include "standard-headers/linux/const.h" ++#include "standard-headers/linux/types.h" ++ ++/* Virtio I2C Feature bits */ ++#define VIRTIO_I2C_F_ZERO_LENGTH_REQUEST 0 ++ ++/* The bit 0 of the @virtio_i2c_out_hdr.@flags, used to group the requests */ ++#define VIRTIO_I2C_FLAGS_FAIL_NEXT _BITUL(0) ++ ++/* The bit 1 of the @virtio_i2c_out_hdr.@flags, used to mark a buffer as read */ ++#define VIRTIO_I2C_FLAGS_M_RD _BITUL(1) ++ ++/** ++ * struct virtio_i2c_out_hdr - the virtio I2C message OUT header ++ * @addr: the controlled device address ++ * @padding: used to pad to full dword ++ * @flags: used for feature extensibility ++ */ ++struct virtio_i2c_out_hdr { ++ uint16_t addr; ++ uint16_t padding; ++ uint32_t flags; ++}; ++ ++/** ++ * struct virtio_i2c_in_hdr - the virtio I2C message IN header ++ * @status: the processing result from the backend ++ */ ++struct virtio_i2c_in_hdr { ++ uint8_t status; ++}; ++ ++/* The final status written by the device */ ++#define VIRTIO_I2C_MSG_OK 0 ++#define VIRTIO_I2C_MSG_ERR 1 ++ ++#endif /* _LINUX_VIRTIO_I2C_H */ +diff --git a/include/standard-headers/linux/virtio_iommu.h b/include/standard-headers/linux/virtio_iommu.h +index b9443b83a1..366379c2f0 100644 +--- a/include/standard-headers/linux/virtio_iommu.h ++++ b/include/standard-headers/linux/virtio_iommu.h +@@ -16,6 +16,7 @@ + #define VIRTIO_IOMMU_F_BYPASS 3 + #define VIRTIO_IOMMU_F_PROBE 4 + #define VIRTIO_IOMMU_F_MMIO 5 ++#define VIRTIO_IOMMU_F_BYPASS_CONFIG 6 + + struct virtio_iommu_range_64 { + uint64_t start; +@@ -36,6 +37,8 @@ struct virtio_iommu_config { + struct virtio_iommu_range_32 domain_range; + /* Probe buffer size */ + uint32_t probe_size; ++ uint8_t bypass; ++ uint8_t reserved[3]; + }; + + /* Request types */ +@@ -66,11 +69,14 @@ struct virtio_iommu_req_tail { + uint8_t reserved[3]; + }; + ++#define VIRTIO_IOMMU_ATTACH_F_BYPASS (1 << 0) ++ + struct virtio_iommu_req_attach { + struct virtio_iommu_req_head head; + uint32_t domain; + uint32_t endpoint; +- uint8_t reserved[8]; ++ uint32_t flags; ++ uint8_t reserved[4]; + struct virtio_iommu_req_tail tail; + }; + +diff --git a/include/standard-headers/linux/virtio_pcidev.h b/include/standard-headers/linux/virtio_pcidev.h +new file mode 100644 +index 0000000000..bdf1d062da +--- /dev/null ++++ b/include/standard-headers/linux/virtio_pcidev.h +@@ -0,0 +1,65 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ ++/* ++ * Copyright (C) 2021 Intel Corporation ++ * Author: Johannes Berg ++ */ ++#ifndef _LINUX_VIRTIO_PCIDEV_H ++#define _LINUX_VIRTIO_PCIDEV_H ++#include "standard-headers/linux/types.h" ++ ++/** ++ * enum virtio_pcidev_ops - virtual PCI device operations ++ * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors ++ * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8; ++ * the @data field should be filled in by the device (in little endian). ++ * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8; ++ * the @data field contains the data to write (in little endian). ++ * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable; ++ * the @data field should be filled in by the device (in little endian). ++ * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable; ++ * the @data field contains the data to write (in little endian). ++ * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but ++ * the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE) ++ * @VIRTIO_PCIDEV_OP_INT: legacy INTx# pin interrupt, the addr field is 1-4 for ++ * the number ++ * @VIRTIO_PCIDEV_OP_MSI: MSI(-X) interrupt, this message basically transports ++ * the 16- or 32-bit write that would otherwise be done into memory, ++ * analogous to the write messages (@VIRTIO_PCIDEV_OP_MMIO_WRITE) above ++ * @VIRTIO_PCIDEV_OP_PME: Dummy message whose content is ignored (and should be ++ * all zeroes) to signal the PME# pin. ++ */ ++enum virtio_pcidev_ops { ++ VIRTIO_PCIDEV_OP_RESERVED = 0, ++ VIRTIO_PCIDEV_OP_CFG_READ, ++ VIRTIO_PCIDEV_OP_CFG_WRITE, ++ VIRTIO_PCIDEV_OP_MMIO_READ, ++ VIRTIO_PCIDEV_OP_MMIO_WRITE, ++ VIRTIO_PCIDEV_OP_MMIO_MEMSET, ++ VIRTIO_PCIDEV_OP_INT, ++ VIRTIO_PCIDEV_OP_MSI, ++ VIRTIO_PCIDEV_OP_PME, ++}; ++ ++/** ++ * struct virtio_pcidev_msg - virtio PCI device operation ++ * @op: the operation to do ++ * @bar: the bar (only with BAR read/write messages) ++ * @reserved: reserved ++ * @size: the size of the read/write (in bytes) ++ * @addr: the address to read/write ++ * @data: the data, normally @size long, but just one byte for ++ * %VIRTIO_PCIDEV_OP_MMIO_MEMSET ++ * ++ * Note: the fields are all in native (CPU) endian, however, the ++ * @data values will often be in little endian (see the ops above.) ++ */ ++struct virtio_pcidev_msg { ++ uint8_t op; ++ uint8_t bar; ++ uint16_t reserved; ++ uint32_t size; ++ uint64_t addr; ++ uint8_t data[]; ++}; ++ ++#endif /* _LINUX_VIRTIO_PCIDEV_H */ +diff --git a/include/standard-headers/linux/virtio_scmi.h b/include/standard-headers/linux/virtio_scmi.h +new file mode 100644 +index 0000000000..8f2c305aea +--- /dev/null ++++ b/include/standard-headers/linux/virtio_scmi.h +@@ -0,0 +1,24 @@ ++/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ ++/* ++ * Copyright (C) 2020-2021 OpenSynergy GmbH ++ * Copyright (C) 2021 ARM Ltd. ++ */ ++ ++#ifndef _LINUX_VIRTIO_SCMI_H ++#define _LINUX_VIRTIO_SCMI_H ++ ++#include "standard-headers/linux/virtio_types.h" ++ ++/* Device implements some SCMI notifications, or delayed responses. */ ++#define VIRTIO_SCMI_F_P2A_CHANNELS 0 ++ ++/* Device implements any SCMI statistics shared memory region */ ++#define VIRTIO_SCMI_F_SHARED_MEMORY 1 ++ ++/* Virtqueues */ ++ ++#define VIRTIO_SCMI_VQ_TX 0 /* cmdq */ ++#define VIRTIO_SCMI_VQ_RX 1 /* eventq */ ++#define VIRTIO_SCMI_VQ_MAX_CNT 2 ++ ++#endif /* _LINUX_VIRTIO_SCMI_H */ +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index 4557a8b608..1c48b0ae3b 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -883,8 +883,11 @@ __SYSCALL(__NR_process_mrelease, sys_process_mrelease) + #define __NR_futex_waitv 449 + __SYSCALL(__NR_futex_waitv, sys_futex_waitv) + ++#define __NR_set_mempolicy_home_node 450 ++__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) ++ + #undef __NR_syscalls +-#define __NR_syscalls 450 ++#define __NR_syscalls 451 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 4b3e7ad1ec..1f14a6fad3 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -377,5 +377,7 @@ + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) + #define __NR_process_mrelease (__NR_Linux + 448) ++#define __NR_futex_waitv (__NR_Linux + 449) ++#define __NR_set_mempolicy_home_node (__NR_Linux + 450) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index 488d9298d9..e5a8ebec78 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -353,5 +353,7 @@ + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) + #define __NR_process_mrelease (__NR_Linux + 448) ++#define __NR_futex_waitv (__NR_Linux + 449) ++#define __NR_set_mempolicy_home_node (__NR_Linux + 450) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index f47399870a..871d57168f 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -423,5 +423,7 @@ + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) + #define __NR_process_mrelease (__NR_Linux + 448) ++#define __NR_futex_waitv (__NR_Linux + 449) ++#define __NR_set_mempolicy_home_node (__NR_Linux + 450) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index 11d54696dc..585c7fefbc 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -430,6 +430,8 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index cf740bab13..350f7ec0ac 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -402,6 +402,8 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-riscv/bitsperlong.h b/linux-headers/asm-riscv/bitsperlong.h +new file mode 100644 +index 0000000000..cc5c45a9ce +--- /dev/null ++++ b/linux-headers/asm-riscv/bitsperlong.h +@@ -0,0 +1,14 @@ ++/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2012 ARM Ltd. ++ * Copyright (C) 2015 Regents of the University of California ++ */ ++ ++#ifndef _ASM_RISCV_BITSPERLONG_H ++#define _ASM_RISCV_BITSPERLONG_H ++ ++#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) ++ ++#include ++ ++#endif /* _ASM_RISCV_BITSPERLONG_H */ +diff --git a/linux-headers/asm-riscv/mman.h b/linux-headers/asm-riscv/mman.h +new file mode 100644 +index 0000000000..8eebf89f5a +--- /dev/null ++++ b/linux-headers/asm-riscv/mman.h +@@ -0,0 +1 @@ ++#include +diff --git a/linux-headers/asm-riscv/unistd.h b/linux-headers/asm-riscv/unistd.h +new file mode 100644 +index 0000000000..8062996c2d +--- /dev/null ++++ b/linux-headers/asm-riscv/unistd.h +@@ -0,0 +1,44 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++/* ++ * Copyright (C) 2018 David Abdurachmanov ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program. If not, see . ++ */ ++ ++#ifdef __LP64__ ++#define __ARCH_WANT_NEW_STAT ++#define __ARCH_WANT_SET_GET_RLIMIT ++#endif /* __LP64__ */ ++ ++#define __ARCH_WANT_SYS_CLONE3 ++ ++#include ++ ++/* ++ * Allows the instruction cache to be flushed from userspace. Despite RISC-V ++ * having a direct 'fence.i' instruction available to userspace (which we ++ * can't trap!), that's not actually viable when running on Linux because the ++ * kernel might schedule a process on another hart. There is no way for ++ * userspace to handle this without invoking the kernel (as it doesn't know the ++ * thread->hart mappings), so we've defined a RISC-V specific system call to ++ * flush the instruction cache. ++ * ++ * __NR_riscv_flush_icache is defined to flush the instruction cache over an ++ * address range, with the flush applying to either all threads or just the ++ * caller. We don't currently do anything with the address range, that's just ++ * in there for forwards compatibility. ++ */ ++#ifndef __NR_riscv_flush_icache ++#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) ++#endif ++__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index 8f97d98128..8e644d65f5 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -420,5 +420,7 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index 021ffc30e6..51da542fec 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -368,5 +368,7 @@ + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 + #define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 5a776a08f7..2da3316bb5 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -373,9 +373,23 @@ struct kvm_debugregs { + __u64 reserved[9]; + }; + +-/* for KVM_CAP_XSAVE */ ++/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */ + struct kvm_xsave { ++ /* ++ * KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes ++ * as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) ++ * respectively, when invoked on the vm file descriptor. ++ * ++ * The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2) ++ * will always be at least 4096. Currently, it is only greater ++ * than 4096 if a dynamic feature has been enabled with ++ * ``arch_prctl()``, but this may change in the future. ++ * ++ * The offsets of the state save areas in struct kvm_xsave follow ++ * the contents of CPUID leaf 0xD on the host. ++ */ + __u32 region[1024]; ++ __u32 extra[0]; + }; + + #define KVM_MAX_XCRS 16 +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index 9c9ffe312b..87e1e977af 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -440,6 +440,7 @@ + #define __NR_memfd_secret 447 + #define __NR_process_mrelease 448 + #define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index 084f1eef9c..147a78d623 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -362,6 +362,7 @@ + #define __NR_memfd_secret 447 + #define __NR_process_mrelease 448 + #define __NR_futex_waitv 449 ++#define __NR_set_mempolicy_home_node 450 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index a2441affc2..27098db7fb 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -315,6 +315,7 @@ + #define __NR_memfd_secret (__X32_SYSCALL_BIT + 447) + #define __NR_process_mrelease (__X32_SYSCALL_BIT + 448) + #define __NR_futex_waitv (__X32_SYSCALL_BIT + 449) ++#define __NR_set_mempolicy_home_node (__X32_SYSCALL_BIT + 450) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 02c5e7b7bb..00af3bc333 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1130,6 +1130,9 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_BINARY_STATS_FD 203 + #define KVM_CAP_EXIT_ON_EMULATION_FAILURE 204 + #define KVM_CAP_ARM_MTE 205 ++#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 ++#define KVM_CAP_VM_GPA_BITS 207 ++#define KVM_CAP_XSAVE2 208 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -1161,11 +1164,20 @@ struct kvm_irq_routing_hv_sint { + __u32 sint; + }; + ++struct kvm_irq_routing_xen_evtchn { ++ __u32 port; ++ __u32 vcpu; ++ __u32 priority; ++}; ++ ++#define KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL ((__u32)(-1)) ++ + /* gsi routing entry types */ + #define KVM_IRQ_ROUTING_IRQCHIP 1 + #define KVM_IRQ_ROUTING_MSI 2 + #define KVM_IRQ_ROUTING_S390_ADAPTER 3 + #define KVM_IRQ_ROUTING_HV_SINT 4 ++#define KVM_IRQ_ROUTING_XEN_EVTCHN 5 + + struct kvm_irq_routing_entry { + __u32 gsi; +@@ -1177,6 +1189,7 @@ struct kvm_irq_routing_entry { + struct kvm_irq_routing_msi msi; + struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; ++ struct kvm_irq_routing_xen_evtchn xen_evtchn; + __u32 pad[8]; + } u; + }; +@@ -1207,6 +1220,7 @@ struct kvm_x86_mce { + #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) + #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) ++#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) + + struct kvm_xen_hvm_config { + __u32 flags; +@@ -1609,6 +1623,9 @@ struct kvm_enc_region { + #define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3) + #define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4) + ++/* Available with KVM_CAP_XSAVE2 */ ++#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) ++ + struct kvm_s390_pv_sec_parm { + __u64 origin; + __u64 length; +-- +2.35.3 + diff --git a/kvm-linux-headers-include-missing-changes-from-5.17.patch b/kvm-linux-headers-include-missing-changes-from-5.17.patch new file mode 100644 index 0000000..1319926 --- /dev/null +++ b/kvm-linux-headers-include-missing-changes-from-5.17.patch @@ -0,0 +1,58 @@ +From aa6181d87e2b4ef1a70be002881908d2df5548a9 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 22 Feb 2022 17:58:11 +0100 +Subject: [PATCH 04/24] linux-headers: include missing changes from 5.17 + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [4/13] 2ed7cbc07e63d85cda916ef44d1e82b1fba7fdf4 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1ea5208febcc068449b63282d72bb719ab67a466) +Signed-off-by: Paul Lai +--- + linux-headers/asm-x86/kvm.h | 3 +++ + linux-headers/linux/kvm.h | 4 ++++ + 2 files changed, 7 insertions(+) + +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index 2da3316bb5..bf6e96011d 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -452,6 +452,9 @@ struct kvm_sync_regs { + + #define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + ++/* attributes for system fd (group 0) */ ++#define KVM_X86_XCOMP_GUEST_SUPP 0 ++ + struct kvm_vmx_nested_state_data { + __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; + __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index 00af3bc333..d232feaae9 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206 + #define KVM_CAP_VM_GPA_BITS 207 + #define KVM_CAP_XSAVE2 208 ++#define KVM_CAP_SYS_ATTRIBUTES 209 + + #ifdef KVM_CAP_IRQ_ROUTING + +@@ -2047,4 +2048,7 @@ struct kvm_stats_desc { + + #define KVM_GET_STATS_FD _IO(KVMIO, 0xce) + ++/* Available with KVM_CAP_XSAVE2 */ ++#define KVM_GET_XSAVE2 _IOR(KVMIO, 0xcf, struct kvm_xsave) ++ + #endif /* __LINUX_KVM_H */ +-- +2.35.3 + diff --git a/kvm-linux-headers-update-to-5.16-rc1.patch b/kvm-linux-headers-update-to-5.16-rc1.patch new file mode 100644 index 0000000..1ad047b --- /dev/null +++ b/kvm-linux-headers-update-to-5.16-rc1.patch @@ -0,0 +1,725 @@ +From 64808db4a14867ad774b5e7535972a886e20a156 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 11 Nov 2021 12:06:01 +0100 +Subject: [PATCH 02/24] linux-headers: update to 5.16-rc1 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [2/13] 4af2f4942db029b81890e3862793fb54b62791cc +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Signed-off-by: Paolo Bonzini +Acked-by: Cornelia Huck +Reviewed-by: Alex Bennée +Message-Id: <20211111110604.207376-3-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 43709a0ca3b09e952bde3f38112f1d7fbf7c65b1) +Signed-off-by: Paul Lai +--- + include/standard-headers/drm/drm_fourcc.h | 121 +++++++++++++++++- + include/standard-headers/linux/ethtool.h | 31 +++++ + include/standard-headers/linux/fuse.h | 10 +- + include/standard-headers/linux/pci_regs.h | 6 + + include/standard-headers/linux/virtio_gpu.h | 18 ++- + include/standard-headers/linux/virtio_ids.h | 24 ++++ + include/standard-headers/linux/virtio_vsock.h | 3 +- + linux-headers/asm-arm64/unistd.h | 1 + + linux-headers/asm-generic/unistd.h | 22 +++- + linux-headers/asm-mips/unistd_n32.h | 1 + + linux-headers/asm-mips/unistd_n64.h | 1 + + linux-headers/asm-mips/unistd_o32.h | 1 + + linux-headers/asm-powerpc/unistd_32.h | 1 + + linux-headers/asm-powerpc/unistd_64.h | 1 + + linux-headers/asm-s390/unistd_32.h | 1 + + linux-headers/asm-s390/unistd_64.h | 1 + + linux-headers/asm-x86/kvm.h | 5 + + linux-headers/asm-x86/unistd_32.h | 3 + + linux-headers/asm-x86/unistd_64.h | 3 + + linux-headers/asm-x86/unistd_x32.h | 3 + + linux-headers/linux/kvm.h | 40 +++++- + 21 files changed, 276 insertions(+), 21 deletions(-) + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 352b51fd0a..2c025cb4fe 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -103,6 +103,12 @@ extern "C" { + /* 8 bpp Red */ + #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + ++/* 10 bpp Red */ ++#define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ ++ ++/* 12 bpp Red */ ++#define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ ++ + /* 16 bpp Red */ + #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ + +@@ -372,6 +378,12 @@ extern "C" { + + #define DRM_FORMAT_RESERVED ((1ULL << 56) - 1) + ++#define fourcc_mod_get_vendor(modifier) \ ++ (((modifier) >> 56) & 0xff) ++ ++#define fourcc_mod_is_vendor(modifier, vendor) \ ++ (fourcc_mod_get_vendor(modifier) == DRM_FORMAT_MOD_VENDOR_## vendor) ++ + #define fourcc_mod_code(vendor, val) \ + ((((uint64_t)DRM_FORMAT_MOD_VENDOR_## vendor) << 56) | ((val) & 0x00ffffffffffffffULL)) + +@@ -899,9 +911,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) + + /* + * The top 4 bits (out of the 56 bits alloted for specifying vendor specific +- * modifiers) denote the category for modifiers. Currently we have only two +- * categories of modifiers ie AFBC and MISC. We can have a maximum of sixteen +- * different categories. ++ * modifiers) denote the category for modifiers. Currently we have three ++ * categories of modifiers ie AFBC, MISC and AFRC. We can have a maximum of ++ * sixteen different categories. + */ + #define DRM_FORMAT_MOD_ARM_CODE(__type, __val) \ + fourcc_mod_code(ARM, ((uint64_t)(__type) << 52) | ((__val) & 0x000fffffffffffffULL)) +@@ -1016,6 +1028,109 @@ drm_fourcc_canonicalize_nvidia_format_mod(uint64_t modifier) + */ + #define AFBC_FORMAT_MOD_USM (1ULL << 12) + ++/* ++ * Arm Fixed-Rate Compression (AFRC) modifiers ++ * ++ * AFRC is a proprietary fixed rate image compression protocol and format, ++ * designed to provide guaranteed bandwidth and memory footprint ++ * reductions in graphics and media use-cases. ++ * ++ * AFRC buffers consist of one or more planes, with the same components ++ * and meaning as an uncompressed buffer using the same pixel format. ++ * ++ * Within each plane, the pixel/luma/chroma values are grouped into ++ * "coding unit" blocks which are individually compressed to a ++ * fixed size (in bytes). All coding units within a given plane of a buffer ++ * store the same number of values, and have the same compressed size. ++ * ++ * The coding unit size is configurable, allowing different rates of compression. ++ * ++ * The start of each AFRC buffer plane must be aligned to an alignment granule which ++ * depends on the coding unit size. ++ * ++ * Coding Unit Size Plane Alignment ++ * ---------------- --------------- ++ * 16 bytes 1024 bytes ++ * 24 bytes 512 bytes ++ * 32 bytes 2048 bytes ++ * ++ * Coding units are grouped into paging tiles. AFRC buffer dimensions must be aligned ++ * to a multiple of the paging tile dimensions. ++ * The dimensions of each paging tile depend on whether the buffer is optimised for ++ * scanline (SCAN layout) or rotated (ROT layout) access. ++ * ++ * Layout Paging Tile Width Paging Tile Height ++ * ------ ----------------- ------------------ ++ * SCAN 16 coding units 4 coding units ++ * ROT 8 coding units 8 coding units ++ * ++ * The dimensions of each coding unit depend on the number of components ++ * in the compressed plane and whether the buffer is optimised for ++ * scanline (SCAN layout) or rotated (ROT layout) access. ++ * ++ * Number of Components in Plane Layout Coding Unit Width Coding Unit Height ++ * ----------------------------- --------- ----------------- ------------------ ++ * 1 SCAN 16 samples 4 samples ++ * Example: 16x4 luma samples in a 'Y' plane ++ * 16x4 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer ++ * ----------------------------- --------- ----------------- ------------------ ++ * 1 ROT 8 samples 8 samples ++ * Example: 8x8 luma samples in a 'Y' plane ++ * 8x8 chroma 'V' values, in the 'V' plane of a fully-planar YUV buffer ++ * ----------------------------- --------- ----------------- ------------------ ++ * 2 DONT CARE 8 samples 4 samples ++ * Example: 8x4 chroma pairs in the 'UV' plane of a semi-planar YUV buffer ++ * ----------------------------- --------- ----------------- ------------------ ++ * 3 DONT CARE 4 samples 4 samples ++ * Example: 4x4 pixels in an RGB buffer without alpha ++ * ----------------------------- --------- ----------------- ------------------ ++ * 4 DONT CARE 4 samples 4 samples ++ * Example: 4x4 pixels in an RGB buffer with alpha ++ */ ++ ++#define DRM_FORMAT_MOD_ARM_TYPE_AFRC 0x02 ++ ++#define DRM_FORMAT_MOD_ARM_AFRC(__afrc_mode) \ ++ DRM_FORMAT_MOD_ARM_CODE(DRM_FORMAT_MOD_ARM_TYPE_AFRC, __afrc_mode) ++ ++/* ++ * AFRC coding unit size modifier. ++ * ++ * Indicates the number of bytes used to store each compressed coding unit for ++ * one or more planes in an AFRC encoded buffer. The coding unit size for chrominance ++ * is the same for both Cb and Cr, which may be stored in separate planes. ++ * ++ * AFRC_FORMAT_MOD_CU_SIZE_P0 indicates the number of bytes used to store ++ * each compressed coding unit in the first plane of the buffer. For RGBA buffers ++ * this is the only plane, while for semi-planar and fully-planar YUV buffers, ++ * this corresponds to the luma plane. ++ * ++ * AFRC_FORMAT_MOD_CU_SIZE_P12 indicates the number of bytes used to store ++ * each compressed coding unit in the second and third planes in the buffer. ++ * For semi-planar and fully-planar YUV buffers, this corresponds to the chroma plane(s). ++ * ++ * For single-plane buffers, AFRC_FORMAT_MOD_CU_SIZE_P0 must be specified ++ * and AFRC_FORMAT_MOD_CU_SIZE_P12 must be zero. ++ * For semi-planar and fully-planar buffers, both AFRC_FORMAT_MOD_CU_SIZE_P0 and ++ * AFRC_FORMAT_MOD_CU_SIZE_P12 must be specified. ++ */ ++#define AFRC_FORMAT_MOD_CU_SIZE_MASK 0xf ++#define AFRC_FORMAT_MOD_CU_SIZE_16 (1ULL) ++#define AFRC_FORMAT_MOD_CU_SIZE_24 (2ULL) ++#define AFRC_FORMAT_MOD_CU_SIZE_32 (3ULL) ++ ++#define AFRC_FORMAT_MOD_CU_SIZE_P0(__afrc_cu_size) (__afrc_cu_size) ++#define AFRC_FORMAT_MOD_CU_SIZE_P12(__afrc_cu_size) ((__afrc_cu_size) << 4) ++ ++/* ++ * AFRC scanline memory layout. ++ * ++ * Indicates if the buffer uses the scanline-optimised layout ++ * for an AFRC encoded buffer, otherwise, it uses the rotation-optimised layout. ++ * The memory layout is the same for all planes. ++ */ ++#define AFRC_FORMAT_MOD_LAYOUT_SCAN (1ULL << 8) ++ + /* + * Arm 16x16 Block U-Interleaved modifier + * +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 053d3fafdf..688eb8dc39 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -603,6 +603,7 @@ enum ethtool_link_ext_state { + ETHTOOL_LINK_EXT_STATE_CALIBRATION_FAILURE, + ETHTOOL_LINK_EXT_STATE_POWER_BUDGET_EXCEEDED, + ETHTOOL_LINK_EXT_STATE_OVERHEAT, ++ ETHTOOL_LINK_EXT_STATE_MODULE, + }; + + /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */ +@@ -639,6 +640,8 @@ enum ethtool_link_ext_substate_link_logical_mismatch { + enum ethtool_link_ext_substate_bad_signal_integrity { + ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1, + ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE, ++ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_REFERENCE_CLOCK_LOST, ++ ETHTOOL_LINK_EXT_SUBSTATE_BSI_SERDES_ALOS, + }; + + /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */ +@@ -647,6 +650,11 @@ enum ethtool_link_ext_substate_cable_issue { + ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE, + }; + ++/* More information in addition to ETHTOOL_LINK_EXT_STATE_MODULE. */ ++enum ethtool_link_ext_substate_module { ++ ETHTOOL_LINK_EXT_SUBSTATE_MODULE_CMIS_NOT_READY = 1, ++}; ++ + #define ETH_GSTRING_LEN 32 + + /** +@@ -704,6 +712,29 @@ enum ethtool_stringset { + ETH_SS_COUNT + }; + ++/** ++ * enum ethtool_module_power_mode_policy - plug-in module power mode policy ++ * @ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH: Module is always in high power mode. ++ * @ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO: Module is transitioned by the host ++ * to high power mode when the first port using it is put administratively ++ * up and to low power mode when the last port using it is put ++ * administratively down. ++ */ ++enum ethtool_module_power_mode_policy { ++ ETHTOOL_MODULE_POWER_MODE_POLICY_HIGH = 1, ++ ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO, ++}; ++ ++/** ++ * enum ethtool_module_power_mode - plug-in module power mode ++ * @ETHTOOL_MODULE_POWER_MODE_LOW: Module is in low power mode. ++ * @ETHTOOL_MODULE_POWER_MODE_HIGH: Module is in high power mode. ++ */ ++enum ethtool_module_power_mode { ++ ETHTOOL_MODULE_POWER_MODE_LOW = 1, ++ ETHTOOL_MODULE_POWER_MODE_HIGH, ++}; ++ + /** + * struct ethtool_gstrings - string set for data tagging + * @cmd: Command number = %ETHTOOL_GSTRINGS +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index cce105bfba..23ea31708b 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -181,6 +181,9 @@ + * - add FUSE_OPEN_KILL_SUIDGID + * - extend fuse_setxattr_in, add FUSE_SETXATTR_EXT + * - add FUSE_SETXATTR_ACL_KILL_SGID ++ * ++ * 7.34 ++ * - add FUSE_SYNCFS + */ + + #ifndef _LINUX_FUSE_H +@@ -212,7 +215,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 33 ++#define FUSE_KERNEL_MINOR_VERSION 34 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -505,6 +508,7 @@ enum fuse_opcode { + FUSE_COPY_FILE_RANGE = 47, + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, ++ FUSE_SYNCFS = 50, + + /* CUSE specific operations */ + CUSE_INIT = 4096, +@@ -967,4 +971,8 @@ struct fuse_removemapping_one { + #define FUSE_REMOVEMAPPING_MAX_ENTRY \ + (PAGE_SIZE / sizeof(struct fuse_removemapping_one)) + ++struct fuse_syncfs_in { ++ uint64_t padding; ++}; ++ + #endif /* _LINUX_FUSE_H */ +diff --git a/include/standard-headers/linux/pci_regs.h b/include/standard-headers/linux/pci_regs.h +index e709ae8235..ff6ccbc6ef 100644 +--- a/include/standard-headers/linux/pci_regs.h ++++ b/include/standard-headers/linux/pci_regs.h +@@ -504,6 +504,12 @@ + #define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ + #define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ + #define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ ++#define PCI_EXP_DEVCTL_PAYLOAD_128B 0x0000 /* 128 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_256B 0x0020 /* 256 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_512B 0x0040 /* 512 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_1024B 0x0060 /* 1024 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_2048B 0x0080 /* 2048 Bytes */ ++#define PCI_EXP_DEVCTL_PAYLOAD_4096B 0x00a0 /* 4096 Bytes */ + #define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ + #define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ + #define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +diff --git a/include/standard-headers/linux/virtio_gpu.h b/include/standard-headers/linux/virtio_gpu.h +index 1357e4774e..2da48d3d4c 100644 +--- a/include/standard-headers/linux/virtio_gpu.h ++++ b/include/standard-headers/linux/virtio_gpu.h +@@ -59,6 +59,11 @@ + * VIRTIO_GPU_CMD_RESOURCE_CREATE_BLOB + */ + #define VIRTIO_GPU_F_RESOURCE_BLOB 3 ++/* ++ * VIRTIO_GPU_CMD_CREATE_CONTEXT with ++ * context_init and multiple timelines ++ */ ++#define VIRTIO_GPU_F_CONTEXT_INIT 4 + + enum virtio_gpu_ctrl_type { + VIRTIO_GPU_UNDEFINED = 0, +@@ -122,14 +127,20 @@ enum virtio_gpu_shm_id { + VIRTIO_GPU_SHM_ID_HOST_VISIBLE = 1 + }; + +-#define VIRTIO_GPU_FLAG_FENCE (1 << 0) ++#define VIRTIO_GPU_FLAG_FENCE (1 << 0) ++/* ++ * If the following flag is set, then ring_idx contains the index ++ * of the command ring that needs to used when creating the fence ++ */ ++#define VIRTIO_GPU_FLAG_INFO_RING_IDX (1 << 1) + + struct virtio_gpu_ctrl_hdr { + uint32_t type; + uint32_t flags; + uint64_t fence_id; + uint32_t ctx_id; +- uint32_t padding; ++ uint8_t ring_idx; ++ uint8_t padding[3]; + }; + + /* data passed in the cursor vq */ +@@ -269,10 +280,11 @@ struct virtio_gpu_resource_create_3d { + }; + + /* VIRTIO_GPU_CMD_CTX_CREATE */ ++#define VIRTIO_GPU_CONTEXT_INIT_CAPSET_ID_MASK 0x000000ff + struct virtio_gpu_ctx_create { + struct virtio_gpu_ctrl_hdr hdr; + uint32_t nlen; +- uint32_t padding; ++ uint32_t context_init; + char debug_name[64]; + }; + +diff --git a/include/standard-headers/linux/virtio_ids.h b/include/standard-headers/linux/virtio_ids.h +index 4fe842c3a3..80d76b75bc 100644 +--- a/include/standard-headers/linux/virtio_ids.h ++++ b/include/standard-headers/linux/virtio_ids.h +@@ -54,7 +54,31 @@ + #define VIRTIO_ID_SOUND 25 /* virtio sound */ + #define VIRTIO_ID_FS 26 /* virtio filesystem */ + #define VIRTIO_ID_PMEM 27 /* virtio pmem */ ++#define VIRTIO_ID_RPMB 28 /* virtio rpmb */ + #define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ ++#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */ ++#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */ ++#define VIRTIO_ID_SCMI 32 /* virtio SCMI */ ++#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/ ++#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */ ++#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */ ++#define VIRTIO_ID_CAN 36 /* virtio can */ ++#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */ ++#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */ ++#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */ + #define VIRTIO_ID_BT 40 /* virtio bluetooth */ ++#define VIRTIO_ID_GPIO 41 /* virtio gpio */ ++ ++/* ++ * Virtio Transitional IDs ++ */ ++ ++#define VIRTIO_TRANS_ID_NET 1000 /* transitional virtio net */ ++#define VIRTIO_TRANS_ID_BLOCK 1001 /* transitional virtio block */ ++#define VIRTIO_TRANS_ID_BALLOON 1002 /* transitional virtio balloon */ ++#define VIRTIO_TRANS_ID_CONSOLE 1003 /* transitional virtio console */ ++#define VIRTIO_TRANS_ID_SCSI 1004 /* transitional virtio SCSI */ ++#define VIRTIO_TRANS_ID_RNG 1005 /* transitional virtio rng */ ++#define VIRTIO_TRANS_ID_9P 1009 /* transitional virtio 9p console */ + + #endif /* _LINUX_VIRTIO_IDS_H */ +diff --git a/include/standard-headers/linux/virtio_vsock.h b/include/standard-headers/linux/virtio_vsock.h +index 3a23488e42..467e751b17 100644 +--- a/include/standard-headers/linux/virtio_vsock.h ++++ b/include/standard-headers/linux/virtio_vsock.h +@@ -97,7 +97,8 @@ enum virtio_vsock_shutdown { + + /* VIRTIO_VSOCK_OP_RW flags values */ + enum virtio_vsock_rw { +- VIRTIO_VSOCK_SEQ_EOR = 1, ++ VIRTIO_VSOCK_SEQ_EOM = 1, ++ VIRTIO_VSOCK_SEQ_EOR = 2, + }; + + #endif /* _LINUX_VIRTIO_VSOCK_H */ +diff --git a/linux-headers/asm-arm64/unistd.h b/linux-headers/asm-arm64/unistd.h +index f83a70e07d..ce2ee8f1e3 100644 +--- a/linux-headers/asm-arm64/unistd.h ++++ b/linux-headers/asm-arm64/unistd.h +@@ -20,5 +20,6 @@ + #define __ARCH_WANT_SET_GET_RLIMIT + #define __ARCH_WANT_TIME32_SYSCALLS + #define __ARCH_WANT_SYS_CLONE3 ++#define __ARCH_WANT_MEMFD_SECRET + + #include +diff --git a/linux-headers/asm-generic/unistd.h b/linux-headers/asm-generic/unistd.h +index f211961ce1..4557a8b608 100644 +--- a/linux-headers/asm-generic/unistd.h ++++ b/linux-headers/asm-generic/unistd.h +@@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise) + #define __NR_remap_file_pages 234 + __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) + #define __NR_mbind 235 +-__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) ++__SYSCALL(__NR_mbind, sys_mbind) + #define __NR_get_mempolicy 236 +-__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) ++__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) + #define __NR_set_mempolicy 237 +-__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) ++__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) + #define __NR_migrate_pages 238 +-__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) ++__SYSCALL(__NR_migrate_pages, sys_migrate_pages) + #define __NR_move_pages 239 +-__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) ++__SYSCALL(__NR_move_pages, sys_move_pages) + #endif + + #define __NR_rt_tgsigqueueinfo 240 +@@ -873,8 +873,18 @@ __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule) + #define __NR_landlock_restrict_self 446 + __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) + ++#ifdef __ARCH_WANT_MEMFD_SECRET ++#define __NR_memfd_secret 447 ++__SYSCALL(__NR_memfd_secret, sys_memfd_secret) ++#endif ++#define __NR_process_mrelease 448 ++__SYSCALL(__NR_process_mrelease, sys_process_mrelease) ++ ++#define __NR_futex_waitv 449 ++__SYSCALL(__NR_futex_waitv, sys_futex_waitv) ++ + #undef __NR_syscalls +-#define __NR_syscalls 447 ++#define __NR_syscalls 450 + + /* + * 32 bit systems traditionally used different +diff --git a/linux-headers/asm-mips/unistd_n32.h b/linux-headers/asm-mips/unistd_n32.h +index 09cd297698..4b3e7ad1ec 100644 +--- a/linux-headers/asm-mips/unistd_n32.h ++++ b/linux-headers/asm-mips/unistd_n32.h +@@ -376,5 +376,6 @@ + #define __NR_landlock_create_ruleset (__NR_Linux + 444) + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) ++#define __NR_process_mrelease (__NR_Linux + 448) + + #endif /* _ASM_UNISTD_N32_H */ +diff --git a/linux-headers/asm-mips/unistd_n64.h b/linux-headers/asm-mips/unistd_n64.h +index 780e0cead6..488d9298d9 100644 +--- a/linux-headers/asm-mips/unistd_n64.h ++++ b/linux-headers/asm-mips/unistd_n64.h +@@ -352,5 +352,6 @@ + #define __NR_landlock_create_ruleset (__NR_Linux + 444) + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) ++#define __NR_process_mrelease (__NR_Linux + 448) + + #endif /* _ASM_UNISTD_N64_H */ +diff --git a/linux-headers/asm-mips/unistd_o32.h b/linux-headers/asm-mips/unistd_o32.h +index 06a2b3b55e..f47399870a 100644 +--- a/linux-headers/asm-mips/unistd_o32.h ++++ b/linux-headers/asm-mips/unistd_o32.h +@@ -422,5 +422,6 @@ + #define __NR_landlock_create_ruleset (__NR_Linux + 444) + #define __NR_landlock_add_rule (__NR_Linux + 445) + #define __NR_landlock_restrict_self (__NR_Linux + 446) ++#define __NR_process_mrelease (__NR_Linux + 448) + + #endif /* _ASM_UNISTD_O32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_32.h b/linux-headers/asm-powerpc/unistd_32.h +index cd5a8a41b2..11d54696dc 100644 +--- a/linux-headers/asm-powerpc/unistd_32.h ++++ b/linux-headers/asm-powerpc/unistd_32.h +@@ -429,6 +429,7 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-powerpc/unistd_64.h b/linux-headers/asm-powerpc/unistd_64.h +index 8458effa8d..cf740bab13 100644 +--- a/linux-headers/asm-powerpc/unistd_64.h ++++ b/linux-headers/asm-powerpc/unistd_64.h +@@ -401,6 +401,7 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-s390/unistd_32.h b/linux-headers/asm-s390/unistd_32.h +index 0c3cd299e4..8f97d98128 100644 +--- a/linux-headers/asm-s390/unistd_32.h ++++ b/linux-headers/asm-s390/unistd_32.h +@@ -419,5 +419,6 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + #endif /* _ASM_S390_UNISTD_32_H */ +diff --git a/linux-headers/asm-s390/unistd_64.h b/linux-headers/asm-s390/unistd_64.h +index 8dfc08b5e6..021ffc30e6 100644 +--- a/linux-headers/asm-s390/unistd_64.h ++++ b/linux-headers/asm-s390/unistd_64.h +@@ -367,5 +367,6 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_process_mrelease 448 + + #endif /* _ASM_S390_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h +index a6c327f8ad..5a776a08f7 100644 +--- a/linux-headers/asm-x86/kvm.h ++++ b/linux-headers/asm-x86/kvm.h +@@ -295,6 +295,7 @@ struct kvm_debug_exit_arch { + #define KVM_GUESTDBG_USE_HW_BP 0x00020000 + #define KVM_GUESTDBG_INJECT_DB 0x00040000 + #define KVM_GUESTDBG_INJECT_BP 0x00080000 ++#define KVM_GUESTDBG_BLOCKIRQ 0x00100000 + + /* for KVM_SET_GUEST_DEBUG */ + struct kvm_guest_debug_arch { +@@ -503,4 +504,8 @@ struct kvm_pmu_event_filter { + #define KVM_PMU_EVENT_ALLOW 0 + #define KVM_PMU_EVENT_DENY 1 + ++/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */ ++#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */ ++#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */ ++ + #endif /* _ASM_X86_KVM_H */ +diff --git a/linux-headers/asm-x86/unistd_32.h b/linux-headers/asm-x86/unistd_32.h +index 66e96c0c68..9c9ffe312b 100644 +--- a/linux-headers/asm-x86/unistd_32.h ++++ b/linux-headers/asm-x86/unistd_32.h +@@ -437,6 +437,9 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_memfd_secret 447 ++#define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 + + + #endif /* _ASM_UNISTD_32_H */ +diff --git a/linux-headers/asm-x86/unistd_64.h b/linux-headers/asm-x86/unistd_64.h +index b8ff6f14ee..084f1eef9c 100644 +--- a/linux-headers/asm-x86/unistd_64.h ++++ b/linux-headers/asm-x86/unistd_64.h +@@ -359,6 +359,9 @@ + #define __NR_landlock_create_ruleset 444 + #define __NR_landlock_add_rule 445 + #define __NR_landlock_restrict_self 446 ++#define __NR_memfd_secret 447 ++#define __NR_process_mrelease 448 ++#define __NR_futex_waitv 449 + + + #endif /* _ASM_UNISTD_64_H */ +diff --git a/linux-headers/asm-x86/unistd_x32.h b/linux-headers/asm-x86/unistd_x32.h +index 06a1097c15..a2441affc2 100644 +--- a/linux-headers/asm-x86/unistd_x32.h ++++ b/linux-headers/asm-x86/unistd_x32.h +@@ -312,6 +312,9 @@ + #define __NR_landlock_create_ruleset (__X32_SYSCALL_BIT + 444) + #define __NR_landlock_add_rule (__X32_SYSCALL_BIT + 445) + #define __NR_landlock_restrict_self (__X32_SYSCALL_BIT + 446) ++#define __NR_memfd_secret (__X32_SYSCALL_BIT + 447) ++#define __NR_process_mrelease (__X32_SYSCALL_BIT + 448) ++#define __NR_futex_waitv (__X32_SYSCALL_BIT + 449) + #define __NR_rt_sigaction (__X32_SYSCALL_BIT + 512) + #define __NR_rt_sigreturn (__X32_SYSCALL_BIT + 513) + #define __NR_ioctl (__X32_SYSCALL_BIT + 514) +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index bcaf66cc4d..02c5e7b7bb 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -269,6 +269,7 @@ struct kvm_xen_exit { + #define KVM_EXIT_AP_RESET_HOLD 32 + #define KVM_EXIT_X86_BUS_LOCK 33 + #define KVM_EXIT_XEN 34 ++#define KVM_EXIT_RISCV_SBI 35 + + /* For KVM_EXIT_INTERNAL_ERROR */ + /* Emulate instruction failed. */ +@@ -397,13 +398,23 @@ struct kvm_run { + * "ndata" is correct, that new fields are enumerated in "flags", + * and that each flag enumerates fields that are 64-bit aligned + * and sized (so that ndata+internal.data[] is valid/accurate). ++ * ++ * Space beyond the defined fields may be used to store arbitrary ++ * debug information relating to the emulation failure. It is ++ * accounted for in "ndata" but the format is unspecified and is ++ * not represented in "flags". Any such information is *not* ABI! + */ + struct { + __u32 suberror; + __u32 ndata; + __u64 flags; +- __u8 insn_size; +- __u8 insn_bytes[15]; ++ union { ++ struct { ++ __u8 insn_size; ++ __u8 insn_bytes[15]; ++ }; ++ }; ++ /* Arbitrary debug data may follow. */ + } emulation_failure; + /* KVM_EXIT_OSI */ + struct { +@@ -469,6 +480,13 @@ struct kvm_run { + } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; ++ /* KVM_EXIT_RISCV_SBI */ ++ struct { ++ unsigned long extension_id; ++ unsigned long function_id; ++ unsigned long args[6]; ++ unsigned long ret[2]; ++ } riscv_sbi; + /* Fix the size of the union. */ + char padding[256]; + }; +@@ -1223,11 +1241,16 @@ struct kvm_irqfd { + + /* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */ + #define KVM_CLOCK_TSC_STABLE 2 ++#define KVM_CLOCK_REALTIME (1 << 2) ++#define KVM_CLOCK_HOST_TSC (1 << 3) + + struct kvm_clock_data { + __u64 clock; + __u32 flags; +- __u32 pad[9]; ++ __u32 pad0; ++ __u64 realtime; ++ __u64 host_tsc; ++ __u32 pad[4]; + }; + + /* For KVM_CAP_SW_TLB */ +@@ -1965,7 +1988,9 @@ struct kvm_stats_header { + #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) + #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) + #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) +-#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK ++#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) ++#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) ++#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST + + #define KVM_STATS_UNIT_SHIFT 4 + #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) +@@ -1988,8 +2013,9 @@ struct kvm_stats_header { + * @size: The number of data items for this stats. + * Every data item is of type __u64. + * @offset: The offset of the stats to the start of stat structure in +- * struture kvm or kvm_vcpu. +- * @unused: Unused field for future usage. Always 0 for now. ++ * structure kvm or kvm_vcpu. ++ * @bucket_size: A parameter value used for histogram stats. It is only used ++ * for linear histogram stats, specifying the size of the bucket; + * @name: The name string for the stats. Its size is indicated by the + * &kvm_stats_header->name_size. + */ +@@ -1998,7 +2024,7 @@ struct kvm_stats_desc { + __s16 exponent; + __u16 size; + __u32 offset; +- __u32 unused; ++ __u32 bucket_size; + char name[]; + }; + +-- +2.35.3 + diff --git a/kvm-migration-Add-migrate_use_tls-helper.patch b/kvm-migration-Add-migrate_use_tls-helper.patch index 0fe0d91..8fdfe68 100644 --- a/kvm-migration-Add-migrate_use_tls-helper.patch +++ b/kvm-migration-Add-migrate_use_tls-helper.patch @@ -1,18 +1,18 @@ -From 828f6c106eedcb7a48e551ffda15af56ff92a899 Mon Sep 17 00:00:00 2001 +From a7c6bc008fe006f005d5c15d3f883572ad5defc5 Mon Sep 17 00:00:00 2001 From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:34 -0300 -Subject: [PATCH 11/18] migration: Add migrate_use_tls() helper +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 20/37] migration: Add migrate_use_tls() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [5/11] 06e945297c3b9c0ce5864885aafcdba1e5746bc2 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [20/26] 02afc2e60f1abbf6db45d83e54a18b66dad52426 +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert A lot of places check parameters.tls_creds in order to evaluate if TLS is in use, and sometimes call migrate_get_current() just for that test. @@ -50,10 +50,10 @@ index c4fc000a1a..086b5c0d8b 100644 TYPE_QIO_CHANNEL_TLS)) { migration_tls_channel_process_incoming(s, ioc, &local_err); diff --git a/migration/migration.c b/migration/migration.c -index 0a6b3b9f4d..d91efb66fe 100644 +index b0fc3f68bd..8e28f2ee41 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -2582,6 +2582,15 @@ bool migrate_use_zero_copy_send(void) +@@ -2568,6 +2568,15 @@ bool migrate_use_zero_copy_send(void) } #endif @@ -70,10 +70,10 @@ index 0a6b3b9f4d..d91efb66fe 100644 { MigrationState *s; diff --git a/migration/migration.h b/migration/migration.h -index 5bcb7628ef..c2cabb8a14 100644 +index 908098939f..9396b7e90a 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -381,6 +381,7 @@ bool migrate_use_zero_copy_send(void); +@@ -344,6 +344,7 @@ bool migrate_use_zero_copy_send(void); #else #define migrate_use_zero_copy_send() (false) #endif @@ -82,10 +82,10 @@ index 5bcb7628ef..c2cabb8a14 100644 uint64_t migrate_xbzrle_cache_size(void); bool migrate_colo_enabled(void); diff --git a/migration/multifd.c b/migration/multifd.c -index 76b57a7177..43998ad117 100644 +index 3725226400..e53811f04a 100644 --- a/migration/multifd.c +++ b/migration/multifd.c -@@ -784,14 +784,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, +@@ -789,14 +789,11 @@ static bool multifd_channel_connect(MultiFDSendParams *p, QIOChannel *ioc, Error *error) { diff --git a/kvm-migration-Add-migration_incoming_transport_cleanup.patch b/kvm-migration-Add-migration_incoming_transport_cleanup.patch new file mode 100644 index 0000000..985bbe2 --- /dev/null +++ b/kvm-migration-Add-migration_incoming_transport_cleanup.patch @@ -0,0 +1,102 @@ +From 02eab793d82cd3c82d31f1e1f34d16fcc30caf0e Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 1 Mar 2022 16:39:14 +0800 +Subject: [PATCH 27/37] migration: Add migration_incoming_transport_cleanup() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 195: migration: Allow migrate-recover to run multiple times +RH-Commit: [1/2] 57b2a9a165ee7cb2d01519bd54eb8dc4185815e0 +RH-Bugzilla: 2097652 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Hanna Reitz + +Add a helper to cleanup the transport listener. + +When do it, we should also null-ify the cleanup hook and the data, then it's +even safe to call it multiple times. + +Move the socket_address_list cleanup altogether, because that's a mirror of the +listener channels and only for the purpose of query-migrate. Hence when +someone wants to cleanup the listener transport, it should also want to cleanup +the socket list too, always. + +No functional change intended. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Peter Xu +Message-Id: <20220301083925.33483-15-peterx@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit e031149c78489413038e934eec9f54ac699cf322) +Signed-off-by: Peter Xu +--- + migration/migration.c | 22 ++++++++++++++-------- + migration/migration.h | 1 + + 2 files changed, 15 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index c8aa55d2fe..b787a36789 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -263,6 +263,19 @@ MigrationIncomingState *migration_incoming_get_current(void) + return current_incoming; + } + ++void migration_incoming_transport_cleanup(MigrationIncomingState *mis) ++{ ++ if (mis->socket_address_list) { ++ qapi_free_SocketAddressList(mis->socket_address_list); ++ mis->socket_address_list = NULL; ++ } ++ ++ if (mis->transport_cleanup) { ++ mis->transport_cleanup(mis->transport_data); ++ mis->transport_data = mis->transport_cleanup = NULL; ++ } ++} ++ + void migration_incoming_state_destroy(void) + { + struct MigrationIncomingState *mis = migration_incoming_get_current(); +@@ -283,10 +296,8 @@ void migration_incoming_state_destroy(void) + g_array_free(mis->postcopy_remote_fds, TRUE); + mis->postcopy_remote_fds = NULL; + } +- if (mis->transport_cleanup) { +- mis->transport_cleanup(mis->transport_data); +- } + ++ migration_incoming_transport_cleanup(mis); + qemu_event_reset(&mis->main_thread_load_event); + + if (mis->page_requested) { +@@ -294,11 +305,6 @@ void migration_incoming_state_destroy(void) + mis->page_requested = NULL; + } + +- if (mis->socket_address_list) { +- qapi_free_SocketAddressList(mis->socket_address_list); +- mis->socket_address_list = NULL; +- } +- + yank_unregister_instance(MIGRATION_YANK_INSTANCE); + } + +diff --git a/migration/migration.h b/migration/migration.h +index 9396b7e90a..243898e3be 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -130,6 +130,7 @@ struct MigrationIncomingState { + + MigrationIncomingState *migration_incoming_get_current(void); + void migration_incoming_state_destroy(void); ++void migration_incoming_transport_cleanup(MigrationIncomingState *mis); + /* + * Functions to work with blocktime context + */ +-- +2.35.3 + diff --git a/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch index 206ac3d..63e67c6 100644 --- a/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch +++ b/kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch @@ -1,19 +1,19 @@ -From d6500340dc3c1152b5efe04ef3daa50c17a55e30 Mon Sep 17 00:00:00 2001 +From 2a84bf822cae38f67458043cd379a22e0fd22485 Mon Sep 17 00:00:00 2001 From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:33 -0300 -Subject: [PATCH 10/18] migration: Add zero-copy-send parameter for QMP/HMP for +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 19/37] migration: Add zero-copy-send parameter for QMP/HMP for Linux MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [4/11] 514d98d595992c53ff98de750035e080ded8972e (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [19/26] 44ec703088cad75fd6e504958527e81d3261c9df +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert Add property that allows zero-copy migration of memory pages on the sending side, and also includes a helper function @@ -42,10 +42,10 @@ Signed-off-by: Leonardo Bras 5 files changed, 76 insertions(+), 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c -index 695f0f2900..0a6b3b9f4d 100644 +index 8a13294da6..b0fc3f68bd 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -899,6 +899,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) +@@ -888,6 +888,10 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->multifd_zlib_level = s->parameters.multifd_zlib_level; params->has_multifd_zstd_level = true; params->multifd_zstd_level = s->parameters.multifd_zstd_level; @@ -56,7 +56,7 @@ index 695f0f2900..0a6b3b9f4d 100644 params->has_xbzrle_cache_size = true; params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; params->has_max_postcopy_bandwidth = true; -@@ -1555,6 +1559,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, +@@ -1541,6 +1545,11 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_multifd_compression) { dest->multifd_compression = params->multifd_compression; } @@ -68,7 +68,7 @@ index 695f0f2900..0a6b3b9f4d 100644 if (params->has_xbzrle_cache_size) { dest->xbzrle_cache_size = params->xbzrle_cache_size; } -@@ -1667,6 +1676,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +@@ -1653,6 +1662,11 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_multifd_compression) { s->parameters.multifd_compression = params->multifd_compression; } @@ -80,7 +80,7 @@ index 695f0f2900..0a6b3b9f4d 100644 if (params->has_xbzrle_cache_size) { s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; xbzrle_cache_resize(params->xbzrle_cache_size, errp); -@@ -2557,6 +2571,17 @@ int migrate_multifd_zstd_level(void) +@@ -2543,6 +2557,17 @@ int migrate_multifd_zstd_level(void) return s->parameters.multifd_zstd_level; } @@ -98,7 +98,7 @@ index 695f0f2900..0a6b3b9f4d 100644 int migrate_use_xbzrle(void) { MigrationState *s; -@@ -4200,6 +4225,10 @@ static Property migration_properties[] = { +@@ -4193,6 +4218,10 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, parameters.multifd_zstd_level, DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), @@ -109,7 +109,7 @@ index 695f0f2900..0a6b3b9f4d 100644 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, parameters.xbzrle_cache_size, DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -@@ -4297,6 +4326,9 @@ static void migration_instance_init(Object *obj) +@@ -4290,6 +4319,9 @@ static void migration_instance_init(Object *obj) params->has_multifd_compression = true; params->has_multifd_zlib_level = true; params->has_multifd_zstd_level = true; @@ -120,10 +120,10 @@ index 695f0f2900..0a6b3b9f4d 100644 params->has_max_postcopy_bandwidth = true; params->has_max_cpu_throttle = true; diff --git a/migration/migration.h b/migration/migration.h -index 2de861df01..5bcb7628ef 100644 +index d016cedd9d..908098939f 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -376,6 +376,11 @@ MultiFDCompression migrate_multifd_compression(void); +@@ -339,6 +339,11 @@ MultiFDCompression migrate_multifd_compression(void); int migrate_multifd_zlib_level(void); int migrate_multifd_zstd_level(void); @@ -159,10 +159,10 @@ index 05705a32d8..3754d8f72c 100644 object_unref(OBJECT(sioc)); } diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 634968498b..55b48d3733 100644 +index 2669156b28..e02da5008b 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c -@@ -1309,6 +1309,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) +@@ -1297,6 +1297,12 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_multifd_zstd_level = true; visit_type_uint8(v, param, &p->multifd_zstd_level, &err); break; @@ -176,10 +176,10 @@ index 634968498b..55b48d3733 100644 p->has_xbzrle_cache_size = true; if (!visit_type_size(v, param, &cache_size, &err)) { diff --git a/qapi/migration.json b/qapi/migration.json -index 27d7b28158..4d833ecdd6 100644 +index bbfd48cf0b..59b5c5780b 100644 --- a/qapi/migration.json +++ b/qapi/migration.json -@@ -741,6 +741,13 @@ +@@ -730,6 +730,13 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # @@ -193,7 +193,7 @@ index 27d7b28158..4d833ecdd6 100644 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the -@@ -780,6 +787,7 @@ +@@ -769,6 +776,7 @@ 'xbzrle-cache-size', 'max-postcopy-bandwidth', 'max-cpu-throttle', 'multifd-compression', 'multifd-zlib-level' ,'multifd-zstd-level', @@ -201,7 +201,7 @@ index 27d7b28158..4d833ecdd6 100644 'block-bitmap-mapping' ] } ## -@@ -906,6 +914,13 @@ +@@ -895,6 +903,13 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # @@ -215,7 +215,7 @@ index 27d7b28158..4d833ecdd6 100644 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the -@@ -960,6 +975,7 @@ +@@ -949,6 +964,7 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', @@ -223,7 +223,7 @@ index 27d7b28158..4d833ecdd6 100644 '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } ## -@@ -1106,6 +1122,13 @@ +@@ -1095,6 +1111,13 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # @@ -237,7 +237,7 @@ index 27d7b28158..4d833ecdd6 100644 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the -@@ -1158,6 +1181,7 @@ +@@ -1147,6 +1170,7 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', diff --git a/kvm-migration-All-this-fields-are-unsigned.patch b/kvm-migration-All-this-fields-are-unsigned.patch new file mode 100644 index 0000000..245e2b4 --- /dev/null +++ b/kvm-migration-All-this-fields-are-unsigned.patch @@ -0,0 +1,329 @@ +From b21f18afceba8231c78d29e66f58516e12c28d22 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 10/37] migration: All this fields are unsigned +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [10/26] 2c3ee27aae334db3b283ab7ef580f58e396e569d +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +So printing it as %d is wrong. Notice that for the channel id, that +is an uint8_t, but I changed it anyways for consistency. + +Signed-off-by: Juan Quintela +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Peter Xu +(cherry picked from commit 04e114049406dbb69fc9043c795ddd28fdba31a6) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 20 ++++++++++---------- + migration/multifd-zstd.c | 24 ++++++++++++------------ + migration/multifd.c | 16 ++++++++-------- + migration/trace-events | 26 +++++++++++++------------- + 4 files changed, 43 insertions(+), 43 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index a1950a4588..a987e4a26c 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -52,7 +52,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + zs->opaque = Z_NULL; + if (deflateInit(zs, migrate_multifd_zlib_level()) != Z_OK) { + g_free(z); +- error_setg(errp, "multifd %d: deflate init failed", p->id); ++ error_setg(errp, "multifd %u: deflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ +@@ -62,7 +62,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + if (!z->zbuff) { + deflateEnd(&z->zs); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + p->data = z; +@@ -134,12 +134,12 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + ret = deflate(zs, flush); + } while (ret == Z_OK && zs->avail_in && zs->avail_out); + if (ret == Z_OK && zs->avail_in) { +- error_setg(errp, "multifd %d: deflate failed to compress all input", ++ error_setg(errp, "multifd %u: deflate failed to compress all input", + p->id); + return -1; + } + if (ret != Z_OK) { +- error_setg(errp, "multifd %d: deflate returned %d instead of Z_OK", ++ error_setg(errp, "multifd %u: deflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } +@@ -193,7 +193,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + zs->avail_in = 0; + zs->next_in = Z_NULL; + if (inflateInit(zs) != Z_OK) { +- error_setg(errp, "multifd %d: inflate init failed", p->id); ++ error_setg(errp, "multifd %u: inflate init failed", p->id); + return -1; + } + /* We will never have more than page_count pages */ +@@ -203,7 +203,7 @@ static int zlib_recv_setup(MultiFDRecvParams *p, Error **errp) + z->zbuff = g_try_malloc(z->zbuff_len); + if (!z->zbuff) { + inflateEnd(zs); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -252,7 +252,7 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + int i; + + if (flags != MULTIFD_FLAG_ZLIB) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZLIB); + return -1; + } +@@ -289,19 +289,19 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + } while (ret == Z_OK && zs->avail_in + && (zs->total_out - start) < page_size); + if (ret == Z_OK && (zs->total_out - start) < page_size) { +- error_setg(errp, "multifd %d: inflate generated too few output", ++ error_setg(errp, "multifd %u: inflate generated too few output", + p->id); + return -1; + } + if (ret != Z_OK) { +- error_setg(errp, "multifd %d: inflate returned %d instead of Z_OK", ++ error_setg(errp, "multifd %u: inflate returned %d instead of Z_OK", + p->id, ret); + return -1; + } + } + out_size = zs->total_out - out_size; + if (out_size != expected_size) { +- error_setg(errp, "multifd %d: packet size received %d size expected %d", ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", + p->id, out_size, expected_size); + return -1; + } +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index d9ed42622b..2185a83eac 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -56,7 +56,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + z->zcs = ZSTD_createCStream(); + if (!z->zcs) { + g_free(z); +- error_setg(errp, "multifd %d: zstd createCStream failed", p->id); ++ error_setg(errp, "multifd %u: zstd createCStream failed", p->id); + return -1; + } + +@@ -64,7 +64,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + if (ZSTD_isError(res)) { + ZSTD_freeCStream(z->zcs); + g_free(z); +- error_setg(errp, "multifd %d: initCStream failed with error %s", ++ error_setg(errp, "multifd %u: initCStream failed with error %s", + p->id, ZSTD_getErrorName(res)); + return -1; + } +@@ -75,7 +75,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + if (!z->zbuff) { + ZSTD_freeCStream(z->zcs); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -146,12 +146,12 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.size - z->out.pos > 0)); + if (ret > 0 && (z->in.size - z->in.pos > 0)) { +- error_setg(errp, "multifd %d: compressStream buffer too small", ++ error_setg(errp, "multifd %u: compressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { +- error_setg(errp, "multifd %d: compressStream error %s", ++ error_setg(errp, "multifd %u: compressStream error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } +@@ -201,7 +201,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + z->zds = ZSTD_createDStream(); + if (!z->zds) { + g_free(z); +- error_setg(errp, "multifd %d: zstd createDStream failed", p->id); ++ error_setg(errp, "multifd %u: zstd createDStream failed", p->id); + return -1; + } + +@@ -209,7 +209,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + if (ZSTD_isError(ret)) { + ZSTD_freeDStream(z->zds); + g_free(z); +- error_setg(errp, "multifd %d: initDStream failed with error %s", ++ error_setg(errp, "multifd %u: initDStream failed with error %s", + p->id, ZSTD_getErrorName(ret)); + return -1; + } +@@ -222,7 +222,7 @@ static int zstd_recv_setup(MultiFDRecvParams *p, Error **errp) + if (!z->zbuff) { + ZSTD_freeDStream(z->zds); + g_free(z); +- error_setg(errp, "multifd %d: out of memory for zbuff", p->id); ++ error_setg(errp, "multifd %u: out of memory for zbuff", p->id); + return -1; + } + return 0; +@@ -270,7 +270,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + int i; + + if (flags != MULTIFD_FLAG_ZSTD) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_ZSTD); + return -1; + } +@@ -302,19 +302,19 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + } while (ret > 0 && (z->in.size - z->in.pos > 0) + && (z->out.pos < page_size)); + if (ret > 0 && (z->out.pos < page_size)) { +- error_setg(errp, "multifd %d: decompressStream buffer too small", ++ error_setg(errp, "multifd %u: decompressStream buffer too small", + p->id); + return -1; + } + if (ZSTD_isError(ret)) { +- error_setg(errp, "multifd %d: decompressStream returned %s", ++ error_setg(errp, "multifd %u: decompressStream returned %s", + p->id, ZSTD_getErrorName(ret)); + return ret; + } + out_size += z->out.pos; + } + if (out_size != expected_size) { +- error_setg(errp, "multifd %d: packet size received %d size expected %d", ++ error_setg(errp, "multifd %u: packet size received %u size expected %u", + p->id, out_size, expected_size); + return -1; + } +diff --git a/migration/multifd.c b/migration/multifd.c +index 0533da154a..d0d19470f9 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -148,7 +148,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + + if (flags != MULTIFD_FLAG_NOCOMP) { +- error_setg(errp, "multifd %d: flags received %x flags expected %x", ++ error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +@@ -212,8 +212,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.version != MULTIFD_VERSION) { +- error_setg(errp, "multifd: received packet version %d " +- "expected %d", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received packet version %u " ++ "expected %u", msg.version, MULTIFD_VERSION); + return -1; + } + +@@ -229,8 +229,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) + } + + if (msg.id > migrate_multifd_channels()) { +- error_setg(errp, "multifd: received channel version %d " +- "expected %d", msg.version, MULTIFD_VERSION); ++ error_setg(errp, "multifd: received channel version %u " ++ "expected %u", msg.version, MULTIFD_VERSION); + return -1; + } + +@@ -303,7 +303,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + packet->version = be32_to_cpu(packet->version); + if (packet->version != MULTIFD_VERSION) { + error_setg(errp, "multifd: received packet " +- "version %d and expected version %d", ++ "version %u and expected version %u", + packet->version, MULTIFD_VERSION); + return -1; + } +@@ -317,7 +317,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + */ + if (packet->pages_alloc > pages_max * 100) { + error_setg(errp, "multifd: received packet " +- "with size %d and expected a maximum size of %d", ++ "with size %u and expected a maximum size of %u", + packet->pages_alloc, pages_max * 100) ; + return -1; + } +@@ -333,7 +333,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->pages->num = be32_to_cpu(packet->pages_used); + if (p->pages->num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " +- "with %d pages and expected maximum pages are %d", ++ "with %u pages and expected maximum pages are %u", + p->pages->num, packet->pages_alloc) ; + return -1; + } +diff --git a/migration/trace-events b/migration/trace-events +index b48d873b8a..5172cb3b3d 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -115,23 +115,23 @@ ram_write_tracking_ramblock_start(const char *block_id, size_t page_size, void * + ram_write_tracking_ramblock_stop(const char *block_id, size_t page_size, void *addr, size_t length) "%s: page_size: %zu addr: %p length: %zu" + + # multifd.c +-multifd_new_send_channel_async(uint8_t id) "channel %d" +-multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +-multifd_recv_new_channel(uint8_t id) "channel %d" ++multifd_new_send_channel_async(uint8_t id) "channel %u" ++multifd_recv(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_recv_new_channel(uint8_t id) "channel %u" + multifd_recv_sync_main(long packet_num) "packet num %ld" +-multifd_recv_sync_main_signal(uint8_t id) "channel %d" +-multifd_recv_sync_main_wait(uint8_t id) "channel %d" ++multifd_recv_sync_main_signal(uint8_t id) "channel %u" ++multifd_recv_sync_main_wait(uint8_t id) "channel %u" + multifd_recv_terminate_threads(bool error) "error %d" +-multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +-multifd_recv_thread_start(uint8_t id) "%d" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %d packet_num %" PRIu64 " pages %d flags 0x%x next packet size %d" +-multifd_send_error(uint8_t id) "channel %d" ++multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_recv_thread_start(uint8_t id) "%u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" +-multifd_send_sync_main_signal(uint8_t id) "channel %d" +-multifd_send_sync_main_wait(uint8_t id) "channel %d" ++multifd_send_sync_main_signal(uint8_t id) "channel %u" ++multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(bool error) "error %d" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %d packets %" PRIu64 " pages %" PRIu64 +-multifd_send_thread_start(uint8_t id) "%d" ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" + multifd_tls_outgoing_handshake_complete(void *ioc) "ioc=%p" +-- +2.35.3 + diff --git a/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch index 29dc0ea..b4f1e68 100644 --- a/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch +++ b/kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch @@ -1,18 +1,18 @@ -From fd6f516a94e635bc42e58448f314db575814a834 Mon Sep 17 00:00:00 2001 +From f5be3d8a5944679c1239b974e0f910f1afe4f532 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 31 Mar 2022 11:08:45 -0400 -Subject: [PATCH 18/18] migration: Allow migrate-recover to run multiple times +Subject: [PATCH 28/37] migration: Allow migrate-recover to run multiple times MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Peter Xu -RH-MergeRequest: 104: migration: Allow migrate-recover to run multiple times -RH-Commit: [1/1] afd726e54c069ae800e2d01f34e768d6bac7dcb9 (peterx/qemu-kvm) -RH-Bugzilla: 2096143 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Hanna Reitz +RH-MergeRequest: 195: migration: Allow migrate-recover to run multiple times +RH-Commit: [2/2] a2e6b02007a06c9c7f5237289095811c7d7ca1f1 +RH-Bugzilla: 2097652 +RH-Acked-by: Leonardo Brás RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Hanna Reitz Previously migration didn't have an easy way to cleanup the listening transport, migrate recovery only allows to execute once. That's done with a @@ -37,10 +37,10 @@ Signed-off-by: Peter Xu 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/migration/migration.c b/migration/migration.c -index 2a141bfaf3..8fb3eae910 100644 +index b787a36789..616c3ff32e 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -2166,11 +2166,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) +@@ -2158,11 +2158,8 @@ void qmp_migrate_recover(const char *uri, Error **errp) return; } @@ -54,7 +54,7 @@ index 2a141bfaf3..8fb3eae910 100644 /* * Note that this call will never start a real migration; it will -@@ -2178,12 +2175,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) +@@ -2170,12 +2167,6 @@ void qmp_migrate_recover(const char *uri, Error **errp) * to continue using that newly established channel. */ qemu_start_incoming_migration(uri, errp); @@ -68,10 +68,10 @@ index 2a141bfaf3..8fb3eae910 100644 void qmp_migrate_pause(Error **errp) diff --git a/migration/migration.h b/migration/migration.h -index c2cabb8a14..fbc8690ec8 100644 +index 243898e3be..0ae2133326 100644 --- a/migration/migration.h +++ b/migration/migration.h -@@ -139,7 +139,6 @@ struct MigrationIncomingState { +@@ -103,7 +103,6 @@ struct MigrationIncomingState { struct PostcopyBlocktimeContext *blocktime_ctx; /* notify PAUSED postcopy incoming migrations to try to continue */ @@ -80,10 +80,10 @@ index c2cabb8a14..fbc8690ec8 100644 QemuSemaphore postcopy_pause_sem_fault; diff --git a/migration/savevm.c b/migration/savevm.c -index 02ed94c180..d9076897b8 100644 +index 0bef031acb..b8382aaa64 100644 --- a/migration/savevm.c +++ b/migration/savevm.c -@@ -2589,9 +2589,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) +@@ -2568,9 +2568,6 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) assert(migrate_postcopy_ram()); diff --git a/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch b/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch new file mode 100644 index 0000000..f1a7d49 --- /dev/null +++ b/kvm-migration-Avoid-false-positive-on-non-supported-scen.patch @@ -0,0 +1,93 @@ +From 097f72427f4f5da4fdcdbeee52aea0c1f67d54dc Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Tue, 19 Jul 2022 09:23:45 -0300 +Subject: [PATCH 6/9] migration: Avoid false-positive on non-supported + scenarios for zero-copy-send +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [6/8] f23195f3ab4f6eba0463f38e5971ccaccdac2cfd +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Migration with zero-copy-send currently has it's limitations, as it can't +be used with TLS nor any kind of compression. In such scenarios, it should +output errors during parameter / capability setting. + +But currently there are some ways of setting this not-supported scenarios +without printing the error message: + +!) For 'compression' capability, it works by enabling it together with +zero-copy-send. This happens because the validity test for zero-copy uses +the helper unction migrate_use_compression(), which check for compression +presence in s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS]. + +The point here is: the validity test happens before the capability gets +enabled. If all of them get enabled together, this test will not return +error. + +In order to fix that, replace migrate_use_compression() by directly testing +the cap_list parameter migrate_caps_check(). + +2) For features enabled by parameters such as TLS & 'multifd_compression', +there was also a possibility of setting non-supported scenarios: setting +zero-copy-send first, then setting the unsupported parameter. + +In order to fix that, also add a check for parameters conflicting with +zero-copy-send on migrate_params_check(). + +3) XBZRLE is also a compression capability, so it makes sense to also add +it to the list of capabilities which are not supported with zero-copy-send. + +Fixes: 1abaec9a1b2c ("migration: Change zero_copy_send from migration parameter to migration capability") +Signed-off-by: Leonardo Bras +Message-Id: <20220719122345.253713-1-leobras@redhat.com> +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit 90eb69e4f1a16b388d0483543bf6bfc69a9966e4) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 952a26c5c2..35b3197eff 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1260,7 +1260,9 @@ static bool migrate_caps_check(bool *cap_list, + #ifdef CONFIG_LINUX + if (cap_list[MIGRATION_CAPABILITY_ZERO_COPY_SEND] && + (!cap_list[MIGRATION_CAPABILITY_MULTIFD] || +- migrate_use_compression() || ++ cap_list[MIGRATION_CAPABILITY_COMPRESS] || ++ cap_list[MIGRATION_CAPABILITY_XBZRLE] || ++ migrate_multifd_compression() || + migrate_use_tls())) { + error_setg(errp, + "Zero copy only available for non-compressed non-TLS multifd migration"); +@@ -1497,6 +1499,17 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) + error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); + return false; + } ++ ++#ifdef CONFIG_LINUX ++ if (migrate_use_zero_copy_send() && ++ ((params->has_multifd_compression && params->multifd_compression) || ++ (params->has_tls_creds && params->tls_creds && *params->tls_creds))) { ++ error_setg(errp, ++ "Zero copy only available for non-compressed non-TLS multifd migration"); ++ return false; ++ } ++#endif ++ + return true; + } + +-- +2.31.1 + diff --git a/kvm-migration-Change-zero_copy_send-from-migration-param.patch b/kvm-migration-Change-zero_copy_send-from-migration-param.patch index abeeeb6..b1f576d 100644 --- a/kvm-migration-Change-zero_copy_send-from-migration-param.patch +++ b/kvm-migration-Change-zero_copy_send-from-migration-param.patch @@ -1,19 +1,19 @@ -From 7e2a037f3f349c21201152cecce32d8c8ff0bea0 Mon Sep 17 00:00:00 2001 +From 70108ff9ffe77062116e47670c0e0c2396529f88 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Mon, 20 Jun 2022 02:39:45 -0300 -Subject: [PATCH 17/18] migration: Change zero_copy_send from migration +Subject: [PATCH 26/37] migration: Change zero_copy_send from migration parameter to migration capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [11/11] e4a955607947896a49398ac8400241a0adac51a1 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [26/26] ea61e6cbdbe47611bd22d18988e1c4c4e8357cc3 +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert When originally implemented, zero_copy_send was designed as a Migration paramenter. @@ -43,10 +43,10 @@ Signed-off-by: Leonardo Bras 3 files changed, 34 insertions(+), 63 deletions(-) diff --git a/migration/migration.c b/migration/migration.c -index 102236fba0..2a141bfaf3 100644 +index 5357efd348..c8aa55d2fe 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -163,7 +163,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, +@@ -162,7 +162,8 @@ INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot, MIGRATION_CAPABILITY_COMPRESS, MIGRATION_CAPABILITY_XBZRLE, MIGRATION_CAPABILITY_X_COLO, @@ -54,9 +54,9 @@ index 102236fba0..2a141bfaf3 100644 + MIGRATION_CAPABILITY_VALIDATE_UUID, + MIGRATION_CAPABILITY_ZERO_COPY_SEND); - /* When we add fault tolerance, we could have several - migrations at once. For now we don't need to add -@@ -899,10 +900,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) + bool migrate_pre_2_2; + +@@ -888,10 +889,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->multifd_zlib_level = s->parameters.multifd_zlib_level; params->has_multifd_zstd_level = true; params->multifd_zstd_level = s->parameters.multifd_zstd_level; @@ -67,7 +67,7 @@ index 102236fba0..2a141bfaf3 100644 params->has_xbzrle_cache_size = true; params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; params->has_max_postcopy_bandwidth = true; -@@ -1263,6 +1260,24 @@ static bool migrate_caps_check(bool *cap_list, +@@ -1249,6 +1246,24 @@ static bool migrate_caps_check(bool *cap_list, } } @@ -92,7 +92,7 @@ index 102236fba0..2a141bfaf3 100644 /* incoming side only */ if (runstate_check(RUN_STATE_INMIGRATE) && !migrate_multifd_is_allowed() && -@@ -1485,16 +1500,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) +@@ -1471,16 +1486,6 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); return false; } @@ -109,7 +109,7 @@ index 102236fba0..2a141bfaf3 100644 return true; } -@@ -1568,11 +1573,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, +@@ -1554,11 +1559,6 @@ static void migrate_params_test_apply(MigrateSetParameters *params, if (params->has_multifd_compression) { dest->multifd_compression = params->multifd_compression; } @@ -121,7 +121,7 @@ index 102236fba0..2a141bfaf3 100644 if (params->has_xbzrle_cache_size) { dest->xbzrle_cache_size = params->xbzrle_cache_size; } -@@ -1685,11 +1685,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) +@@ -1671,11 +1671,6 @@ static void migrate_params_apply(MigrateSetParameters *params, Error **errp) if (params->has_multifd_compression) { s->parameters.multifd_compression = params->multifd_compression; } @@ -133,7 +133,7 @@ index 102236fba0..2a141bfaf3 100644 if (params->has_xbzrle_cache_size) { s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; xbzrle_cache_resize(params->xbzrle_cache_size, errp); -@@ -2587,7 +2582,7 @@ bool migrate_use_zero_copy_send(void) +@@ -2573,7 +2568,7 @@ bool migrate_use_zero_copy_send(void) s = migrate_get_current(); @@ -142,7 +142,7 @@ index 102236fba0..2a141bfaf3 100644 } #endif -@@ -4243,10 +4238,6 @@ static Property migration_properties[] = { +@@ -4236,10 +4231,6 @@ static Property migration_properties[] = { DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState, parameters.multifd_zstd_level, DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL), @@ -153,7 +153,7 @@ index 102236fba0..2a141bfaf3 100644 DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, parameters.xbzrle_cache_size, DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), -@@ -4284,6 +4275,10 @@ static Property migration_properties[] = { +@@ -4277,6 +4268,10 @@ static Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD), DEFINE_PROP_MIG_CAP("x-background-snapshot", MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT), @@ -164,7 +164,7 @@ index 102236fba0..2a141bfaf3 100644 DEFINE_PROP_END_OF_LIST(), }; -@@ -4344,9 +4339,6 @@ static void migration_instance_init(Object *obj) +@@ -4337,9 +4332,6 @@ static void migration_instance_init(Object *obj) params->has_multifd_compression = true; params->has_multifd_zlib_level = true; params->has_multifd_zstd_level = true; @@ -175,10 +175,10 @@ index 102236fba0..2a141bfaf3 100644 params->has_max_postcopy_bandwidth = true; params->has_max_cpu_throttle = true; diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c -index 55b48d3733..634968498b 100644 +index e02da5008b..2669156b28 100644 --- a/monitor/hmp-cmds.c +++ b/monitor/hmp-cmds.c -@@ -1309,12 +1309,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) +@@ -1297,12 +1297,6 @@ void hmp_migrate_set_parameter(Monitor *mon, const QDict *qdict) p->has_multifd_zstd_level = true; visit_type_uint8(v, param, &p->multifd_zstd_level, &err); break; @@ -192,10 +192,10 @@ index 55b48d3733..634968498b 100644 p->has_xbzrle_cache_size = true; if (!visit_type_size(v, param, &cache_size, &err)) { diff --git a/qapi/migration.json b/qapi/migration.json -index 4d833ecdd6..5105790cd0 100644 +index 59b5c5780b..fe70a0c4b2 100644 --- a/qapi/migration.json +++ b/qapi/migration.json -@@ -463,6 +463,13 @@ +@@ -452,6 +452,13 @@ # procedure starts. The VM RAM is saved with running VM. # (since 6.0) # @@ -209,7 +209,7 @@ index 4d833ecdd6..5105790cd0 100644 # Features: # @unstable: Members @x-colo and @x-ignore-shared are experimental. # -@@ -476,7 +483,8 @@ +@@ -465,7 +472,8 @@ 'block', 'return-path', 'pause-before-switchover', 'multifd', 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', { 'name': 'x-ignore-shared', 'features': [ 'unstable' ] }, @@ -219,7 +219,7 @@ index 4d833ecdd6..5105790cd0 100644 ## # @MigrationCapabilityStatus: -@@ -741,12 +749,6 @@ +@@ -730,12 +738,6 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # @@ -232,7 +232,7 @@ index 4d833ecdd6..5105790cd0 100644 # # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such -@@ -787,7 +789,6 @@ +@@ -776,7 +778,6 @@ 'xbzrle-cache-size', 'max-postcopy-bandwidth', 'max-cpu-throttle', 'multifd-compression', 'multifd-zlib-level' ,'multifd-zstd-level', @@ -240,7 +240,7 @@ index 4d833ecdd6..5105790cd0 100644 'block-bitmap-mapping' ] } ## -@@ -914,13 +915,6 @@ +@@ -903,13 +904,6 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # @@ -254,7 +254,7 @@ index 4d833ecdd6..5105790cd0 100644 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the -@@ -975,7 +969,6 @@ +@@ -964,7 +958,6 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', @@ -262,7 +262,7 @@ index 4d833ecdd6..5105790cd0 100644 '*block-bitmap-mapping': [ 'BitmapMigrationNodeAlias' ] } } ## -@@ -1122,13 +1115,6 @@ +@@ -1111,13 +1104,6 @@ # will consume more CPU. # Defaults to 1. (Since 5.0) # @@ -276,7 +276,7 @@ index 4d833ecdd6..5105790cd0 100644 # @block-bitmap-mapping: Maps block nodes and bitmaps on them to # aliases for the purpose of dirty bitmap migration. Such # aliases may for example be the corresponding names on the -@@ -1181,7 +1167,6 @@ +@@ -1170,7 +1156,6 @@ '*multifd-compression': 'MultiFDCompression', '*multifd-zlib-level': 'uint8', '*multifd-zstd-level': 'uint8', diff --git a/kvm-migration-Introduce-ram_transferred_add.patch b/kvm-migration-Introduce-ram_transferred_add.patch new file mode 100644 index 0000000..561e231 --- /dev/null +++ b/kvm-migration-Introduce-ram_transferred_add.patch @@ -0,0 +1,122 @@ +From 030b54f5a2b2c8976370c962e9847af4746ac2c2 Mon Sep 17 00:00:00 2001 +From: David Edmondson +Date: Tue, 21 Dec 2021 09:34:40 +0000 +Subject: [PATCH 1/9] migration: Introduce ram_transferred_add() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [1/8] a6545760b0de13d533f6164be0545a6720bb42c7 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Replace direct manipulation of ram_counters.transferred with a +function. + +Signed-off-by: David Edmondson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit 4c2d0f6dca24f3396ab0718ad3f9f53cc53004df) +Signed-off-by: Leonardo Bras +--- + migration/ram.c | 23 ++++++++++++++--------- + 1 file changed, 14 insertions(+), 9 deletions(-) + +diff --git a/migration/ram.c b/migration/ram.c +index 3e208efca7..3e82c4ff46 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -391,6 +391,11 @@ uint64_t ram_bytes_remaining(void) + + MigrationStats ram_counters; + ++static void ram_transferred_add(uint64_t bytes) ++{ ++ ram_counters.transferred += bytes; ++} ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* Current block being searched */ +@@ -772,7 +777,7 @@ static int save_xbzrle_page(RAMState *rs, uint8_t **current_data, + * RAM_SAVE_FLAG_CONTINUE. + */ + xbzrle_counters.bytes += bytes_xbzrle - 8; +- ram_counters.transferred += bytes_xbzrle; ++ ram_transferred_add(bytes_xbzrle); + + return 1; + } +@@ -1203,7 +1208,7 @@ static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset) + + if (len) { + ram_counters.duplicate++; +- ram_counters.transferred += len; ++ ram_transferred_add(len); + return 1; + } + return -1; +@@ -1239,7 +1244,7 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + } + + if (bytes_xmit) { +- ram_counters.transferred += bytes_xmit; ++ ram_transferred_add(bytes_xmit); + *pages = 1; + } + +@@ -1270,8 +1275,8 @@ static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + uint8_t *buf, bool async) + { +- ram_counters.transferred += save_page_header(rs, rs->f, block, +- offset | RAM_SAVE_FLAG_PAGE); ++ ram_transferred_add(save_page_header(rs, rs->f, block, ++ offset | RAM_SAVE_FLAG_PAGE)); + if (async) { + qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE, + migrate_release_ram() & +@@ -1279,7 +1284,7 @@ static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset, + } else { + qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE); + } +- ram_counters.transferred += TARGET_PAGE_SIZE; ++ ram_transferred_add(TARGET_PAGE_SIZE); + ram_counters.normal++; + return 1; + } +@@ -1378,7 +1383,7 @@ exit: + static void + update_compress_thread_counts(const CompressParam *param, int bytes_xmit) + { +- ram_counters.transferred += bytes_xmit; ++ ram_transferred_add(bytes_xmit); + + if (param->zero_page) { + ram_counters.duplicate++; +@@ -2303,7 +2308,7 @@ void acct_update_position(QEMUFile *f, size_t size, bool zero) + ram_counters.duplicate += pages; + } else { + ram_counters.normal += pages; +- ram_counters.transferred += size; ++ ram_transferred_add(size); + qemu_update_position(f, size); + } + } +@@ -3147,7 +3152,7 @@ out: + + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); + qemu_fflush(f); +- ram_counters.transferred += 8; ++ ram_transferred_add(8); + + ret = qemu_file_get_error(f); + } +-- +2.31.1 + diff --git a/kvm-migration-Never-call-twice-qemu_target_page_size.patch b/kvm-migration-Never-call-twice-qemu_target_page_size.patch new file mode 100644 index 0000000..d956712 --- /dev/null +++ b/kvm-migration-Never-call-twice-qemu_target_page_size.patch @@ -0,0 +1,116 @@ +From 6a9a5a2809cbbe2982df156722b88efeec998e3d Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:22 -0300 +Subject: [PATCH 01/37] migration: Never call twice qemu_target_page_size() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [1/26] 809ca84dec80bafc1959df8c9e57f482ee752a97 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 144fa06b3431e806057ce1438338395b35a3e544) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 7 ++++--- + migration/multifd.c | 7 ++++--- + migration/savevm.c | 5 +++-- + 3 files changed, 11 insertions(+), 8 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index a87ff01b81..8a13294da6 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -992,6 +992,8 @@ static void populate_time_info(MigrationInfo *info, MigrationState *s) + + static void populate_ram_info(MigrationInfo *info, MigrationState *s) + { ++ size_t page_size = qemu_target_page_size(); ++ + info->has_ram = true; + info->ram = g_malloc0(sizeof(*info->ram)); + info->ram->transferred = ram_counters.transferred; +@@ -1000,12 +1002,11 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + /* legacy value. It is not used anymore */ + info->ram->skipped = 0; + info->ram->normal = ram_counters.normal; +- info->ram->normal_bytes = ram_counters.normal * +- qemu_target_page_size(); ++ info->ram->normal_bytes = ram_counters.normal * page_size; + info->ram->mbps = s->mbps; + info->ram->dirty_sync_count = ram_counters.dirty_sync_count; + info->ram->postcopy_requests = ram_counters.postcopy_requests; +- info->ram->page_size = qemu_target_page_size(); ++ info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; + info->ram->pages_per_second = s->pages_per_second; + +diff --git a/migration/multifd.c b/migration/multifd.c +index 7c9deb1921..8125d0015c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -289,7 +289,8 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + { + MultiFDPacket_t *packet = p->packet; +- uint32_t pages_max = MULTIFD_PACKET_SIZE / qemu_target_page_size(); ++ size_t page_size = qemu_target_page_size(); ++ uint32_t pages_max = MULTIFD_PACKET_SIZE / page_size; + RAMBlock *block; + int i; + +@@ -358,14 +359,14 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + for (i = 0; i < p->pages->used; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + +- if (offset > (block->used_length - qemu_target_page_size())) { ++ if (offset > (block->used_length - page_size)) { + error_setg(errp, "multifd: offset too long %" PRIu64 + " (max " RAM_ADDR_FMT ")", + offset, block->used_length); + return -1; + } + p->pages->iov[i].iov_base = block->host + offset; +- p->pages->iov[i].iov_len = qemu_target_page_size(); ++ p->pages->iov[i].iov_len = page_size; + } + + return 0; +diff --git a/migration/savevm.c b/migration/savevm.c +index d59e976d50..0bef031acb 100644 +--- a/migration/savevm.c ++++ b/migration/savevm.c +@@ -1685,6 +1685,7 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + { + PostcopyState ps = postcopy_state_set(POSTCOPY_INCOMING_ADVISE); + uint64_t remote_pagesize_summary, local_pagesize_summary, remote_tps; ++ size_t page_size = qemu_target_page_size(); + Error *local_err = NULL; + + trace_loadvm_postcopy_handle_advise(); +@@ -1741,13 +1742,13 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis, + } + + remote_tps = qemu_get_be64(mis->from_src_file); +- if (remote_tps != qemu_target_page_size()) { ++ if (remote_tps != page_size) { + /* + * Again, some differences could be dealt with, but for now keep it + * simple. + */ + error_report("Postcopy needs matching target page sizes (s=%d d=%zd)", +- (int)remote_tps, qemu_target_page_size()); ++ (int)remote_tps, page_size); + return -1; + } + +-- +2.35.3 + diff --git a/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch b/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch new file mode 100644 index 0000000..1cf4724 --- /dev/null +++ b/kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch @@ -0,0 +1,122 @@ +From 82637509cc9197ad9d1e1b286a608bf0da04b7b3 Mon Sep 17 00:00:00 2001 +From: David Edmondson +Date: Tue, 21 Dec 2021 09:34:41 +0000 +Subject: [PATCH 2/9] migration: Tally pre-copy, downtime and post-copy bytes + independently +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [2/8] 7d1bf37a3d93da88da6525d70fc1fce1abb92b83 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Provide information on the number of bytes copied in the pre-copy, +downtime and post-copy phases of migration. + +Signed-off-by: David Edmondson +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit ae6806688016711bb9ec7541266d76ab511c5e3b) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 3 +++ + migration/ram.c | 7 +++++++ + monitor/hmp-cmds.c | 12 ++++++++++++ + qapi/migration.json | 13 ++++++++++++- + 4 files changed, 34 insertions(+), 1 deletion(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 616c3ff32e..e100b30f00 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -1016,6 +1016,9 @@ static void populate_ram_info(MigrationInfo *info, MigrationState *s) + info->ram->page_size = page_size; + info->ram->multifd_bytes = ram_counters.multifd_bytes; + info->ram->pages_per_second = s->pages_per_second; ++ info->ram->precopy_bytes = ram_counters.precopy_bytes; ++ info->ram->downtime_bytes = ram_counters.downtime_bytes; ++ info->ram->postcopy_bytes = ram_counters.postcopy_bytes; + + if (migrate_use_xbzrle()) { + info->has_xbzrle_cache = true; +diff --git a/migration/ram.c b/migration/ram.c +index 3e82c4ff46..e7173da217 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -393,6 +393,13 @@ MigrationStats ram_counters; + + static void ram_transferred_add(uint64_t bytes) + { ++ if (runstate_is_running()) { ++ ram_counters.precopy_bytes += bytes; ++ } else if (migration_in_postcopy()) { ++ ram_counters.postcopy_bytes += bytes; ++ } else { ++ ram_counters.downtime_bytes += bytes; ++ } + ram_counters.transferred += bytes; + } + +diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c +index 2669156b28..8c384dc1b2 100644 +--- a/monitor/hmp-cmds.c ++++ b/monitor/hmp-cmds.c +@@ -293,6 +293,18 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict) + monitor_printf(mon, "postcopy request count: %" PRIu64 "\n", + info->ram->postcopy_requests); + } ++ if (info->ram->precopy_bytes) { ++ monitor_printf(mon, "precopy ram: %" PRIu64 " kbytes\n", ++ info->ram->precopy_bytes >> 10); ++ } ++ if (info->ram->downtime_bytes) { ++ monitor_printf(mon, "downtime ram: %" PRIu64 " kbytes\n", ++ info->ram->downtime_bytes >> 10); ++ } ++ if (info->ram->postcopy_bytes) { ++ monitor_printf(mon, "postcopy ram: %" PRIu64 " kbytes\n", ++ info->ram->postcopy_bytes >> 10); ++ } + } + + if (info->has_disk) { +diff --git a/qapi/migration.json b/qapi/migration.json +index fe70a0c4b2..c8ec260ab0 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -46,6 +46,15 @@ + # @pages-per-second: the number of memory pages transferred per second + # (Since 4.0) + # ++# @precopy-bytes: The number of bytes sent in the pre-copy phase ++# (since 7.0). ++# ++# @downtime-bytes: The number of bytes sent while the guest is paused ++# (since 7.0). ++# ++# @postcopy-bytes: The number of bytes sent during the post-copy phase ++# (since 7.0). ++# + # Since: 0.14 + ## + { 'struct': 'MigrationStats', +@@ -54,7 +63,9 @@ + 'normal-bytes': 'int', 'dirty-pages-rate' : 'int', + 'mbps' : 'number', 'dirty-sync-count' : 'int', + 'postcopy-requests' : 'int', 'page-size' : 'int', +- 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64' } } ++ 'multifd-bytes' : 'uint64', 'pages-per-second' : 'uint64', ++ 'precopy-bytes' : 'uint64', 'downtime-bytes' : 'uint64', ++ 'postcopy-bytes' : 'uint64' } } + + ## + # @XBZRLECacheStats: +-- +2.31.1 + diff --git a/kvm-migration-add-remaining-params-has_-true-in-migratio.patch b/kvm-migration-add-remaining-params-has_-true-in-migratio.patch new file mode 100644 index 0000000..73011b3 --- /dev/null +++ b/kvm-migration-add-remaining-params-has_-true-in-migratio.patch @@ -0,0 +1,62 @@ +From 8aecb49fdd771c5819fccc9e750b2e9cd4e94b58 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 25 Jul 2022 22:02:35 -0300 +Subject: [PATCH 7/9] migration: add remaining params->has_* = true in + migration_instance_init() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [7/8] fb622e5b88e14eb859d4903d9c088ba6ca63fc81 +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Some of params->has_* = true are missing in migration_instance_init, this +causes migrate_params_check() to skip some tests, allowing some +unsupported scenarios. + +Fix this by adding all missing params->has_* = true in +migration_instance_init(). + +Fixes: 69ef1f36b0 ("migration: define 'tls-creds' and 'tls-hostname' migration parameters") +Fixes: 1d58872a91 ("migration: do not wait for free thread") +Fixes: d2f1d29b95 ("migration: add support for a "tls-authz" migration parameter") +Signed-off-by: Leonardo Bras +Message-Id: <20220726010235.342927-1-leobras@redhat.com> +Reviewed-by: Peter Xu +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit df67aa3e61e2c83459da7d815962d9706f1528fc) +Signed-off-by: Leonardo Bras +--- + migration/migration.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/migration/migration.c b/migration/migration.c +index 35b3197eff..51e6726dac 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -4334,6 +4334,7 @@ static void migration_instance_init(Object *obj) + /* Set has_* up only for parameter checks */ + params->has_compress_level = true; + params->has_compress_threads = true; ++ params->has_compress_wait_thread = true; + params->has_decompress_threads = true; + params->has_throttle_trigger_threshold = true; + params->has_cpu_throttle_initial = true; +@@ -4354,6 +4355,9 @@ static void migration_instance_init(Object *obj) + params->has_announce_max = true; + params->has_announce_rounds = true; + params->has_announce_step = true; ++ params->has_tls_creds = true; ++ params->has_tls_hostname = true; ++ params->has_tls_authz = true; + + qemu_sem_init(&ms->postcopy_pause_sem, 0); + qemu_sem_init(&ms->postcopy_pause_rp_sem, 0); +-- +2.31.1 + diff --git a/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch b/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch new file mode 100644 index 0000000..5008e15 --- /dev/null +++ b/kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch @@ -0,0 +1,83 @@ +From 2516a21205e67078cb735e9fd47ba50156c166b7 Mon Sep 17 00:00:00 2001 +From: Leonardo Bras +Date: Mon, 11 Jul 2022 18:11:13 -0300 +Subject: [PATCH 5/9] migration/multifd: Report to user when zerocopy not + working +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 201: Zero-copy-send fixes + improvements +RH-Commit: [5/8] 0b2e23b7f8ae72936e11369cd44ba474ef3b9e8c +RH-Bugzilla: 2110203 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina + +Some errors, like the lack of Scatter-Gather support by the network +interface(NETIF_F_SG) may cause sendmsg(...,MSG_ZEROCOPY) to fail on using +zero-copy, which causes it to fall back to the default copying mechanism. + +After each full dirty-bitmap scan there should be a zero-copy flush +happening, which checks for errors each of the previous calls to +sendmsg(...,MSG_ZEROCOPY). If all of them failed to use zero-copy, then +increment dirty_sync_missed_zero_copy migration stat to let the user know +about it. + +Signed-off-by: Leonardo Bras +Reviewed-by: Daniel P. Berrangé +Acked-by: Peter Xu +Message-Id: <20220711211112.18951-4-leobras@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit d59c40cc483729f2e67c80e58df769ad19976fe9) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 2 ++ + migration/ram.c | 5 +++++ + migration/ram.h | 2 ++ + 3 files changed, 9 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 90ab4c4346..7c16523e6b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -631,6 +631,8 @@ int multifd_send_sync_main(QEMUFile *f) + if (ret < 0) { + error_report_err(err); + return -1; ++ } else if (ret == 1) { ++ dirty_sync_missed_zero_copy(); + } + } + } +diff --git a/migration/ram.c b/migration/ram.c +index e7173da217..93cdb456ac 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -403,6 +403,11 @@ static void ram_transferred_add(uint64_t bytes) + ram_counters.transferred += bytes; + } + ++void dirty_sync_missed_zero_copy(void) ++{ ++ ram_counters.dirty_sync_missed_zero_copy++; ++} ++ + /* used by the search for pages to send */ + struct PageSearchStatus { + /* Current block being searched */ +diff --git a/migration/ram.h b/migration/ram.h +index c515396a9a..69c3ccb26a 100644 +--- a/migration/ram.h ++++ b/migration/ram.h +@@ -88,4 +88,6 @@ void ram_write_tracking_prepare(void); + int ram_write_tracking_start(void); + void ram_write_tracking_stop(void); + ++void dirty_sync_missed_zero_copy(void); ++ + #endif +-- +2.31.1 + diff --git a/kvm-multifd-Add-missing-documentation.patch b/kvm-multifd-Add-missing-documentation.patch new file mode 100644 index 0000000..361f0c1 --- /dev/null +++ b/kvm-multifd-Add-missing-documentation.patch @@ -0,0 +1,82 @@ +From 3b567f762cbd8d4ffaf717b0baba9cf9fe9614c2 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 03/37] multifd: Add missing documentation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [3/26] 924fca4305ebd8669955d456fc1c515f509e6026 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 18ede636bc29fd8bda628fe3e5c593f8c1b734f4) +(fixed typo in commit message) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 2 ++ + migration/multifd-zstd.c | 2 ++ + migration/multifd.c | 1 + + 3 files changed, 5 insertions(+) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index ab4ba75d75..f403d2f031 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -74,6 +74,7 @@ static int zlib_send_setup(MultiFDSendParams *p, Error **errp) + * Close the channel and return memory. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + { +@@ -96,6 +97,7 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + * + * @p: Params for the channel that we are using + * @used: number of pages used ++ * @errp: pointer to an error + */ + static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + { +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 693bddf8c9..8d657f8860 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -86,6 +86,7 @@ static int zstd_send_setup(MultiFDSendParams *p, Error **errp) + * Close the channel and return memory. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + { +@@ -109,6 +110,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + * + * @p: Params for the channel that we are using + * @used: number of pages used ++ * @errp: pointer to an error + */ + static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + { +diff --git a/migration/multifd.c b/migration/multifd.c +index 8ea86d81dc..cdeffdc4c5 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -66,6 +66,7 @@ static int nocomp_send_setup(MultiFDSendParams *p, Error **errp) + * For no compression this function does nothing. + * + * @p: Params for the channel that we are using ++ * @errp: pointer to an error + */ + static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + { +-- +2.35.3 + diff --git a/kvm-multifd-Fill-offset-and-block-for-reception.patch b/kvm-multifd-Fill-offset-and-block-for-reception.patch new file mode 100644 index 0000000..7996f87 --- /dev/null +++ b/kvm-multifd-Fill-offset-and-block-for-reception.patch @@ -0,0 +1,50 @@ +From 8c1edb1889ff44506f35fa185d6569b0dd9d7260 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 07/37] multifd: Fill offset and block for reception +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [7/26] 51a9e6b76af956d63fc735172211d9bf6f0f6f80 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +We were using the iov directly, but we will need this info on the +following patch. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 01102a2ef6c97acc5cc8a2c3bb62b7665a20f51f) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 55d99a8232..0533da154a 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -354,6 +354,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + ++ p->pages->block = block; + for (i = 0; i < p->pages->num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + +@@ -363,6 +364,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + offset, block->used_length); + return -1; + } ++ p->pages->offset[i] = offset; + p->pages->iov[i].iov_base = block->host + offset; + p->pages->iov[i].iov_len = page_size; + } +-- +2.35.3 + diff --git a/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch index c7159e1..dccdf1f 100644 --- a/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch +++ b/kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch @@ -1,19 +1,19 @@ -From c1a2866d158ac67179fa0d17f1710302eb9a3866 Mon Sep 17 00:00:00 2001 +From 7a7e2191f1ac4114380248cbd3c6ab7425250747 Mon Sep 17 00:00:00 2001 From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:37 -0300 -Subject: [PATCH 14/18] multifd: Implement zero copy write in multifd migration +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 23/37] multifd: Implement zero copy write in multifd migration (multifd-zero-copy) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [8/11] b93009cc94b2cc4b464b4f68ebfb37b870dd6f7d (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [23/26] 904ce3909cfef62dd84cc7d3c6a3482e7e6f28e9 +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert Implement zero copy send on nocomp_send_write(), by making use of QIOChannel writev + flags & flush interface. @@ -51,10 +51,10 @@ Signed-off-by: Leonardo Bras 4 files changed, 50 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c -index d91efb66fe..102236fba0 100644 +index 8e28f2ee41..5357efd348 100644 --- a/migration/migration.c +++ b/migration/migration.c -@@ -1485,7 +1485,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) +@@ -1471,7 +1471,16 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: "); return false; } @@ -73,10 +73,10 @@ index d91efb66fe..102236fba0 100644 } diff --git a/migration/multifd.c b/migration/multifd.c -index 8fca6c970e..0b5b41c53f 100644 +index 193f70cdba..90ab4c4346 100644 --- a/migration/multifd.c +++ b/migration/multifd.c -@@ -571,6 +571,7 @@ void multifd_save_cleanup(void) +@@ -576,6 +576,7 @@ void multifd_save_cleanup(void) int multifd_send_sync_main(QEMUFile *f) { int i; @@ -84,7 +84,7 @@ index 8fca6c970e..0b5b41c53f 100644 if (!migrate_use_multifd()) { return 0; -@@ -581,6 +582,20 @@ int multifd_send_sync_main(QEMUFile *f) +@@ -586,6 +587,20 @@ int multifd_send_sync_main(QEMUFile *f) return -1; } } @@ -105,7 +105,7 @@ index 8fca6c970e..0b5b41c53f 100644 for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; -@@ -602,6 +617,17 @@ int multifd_send_sync_main(QEMUFile *f) +@@ -607,6 +622,17 @@ int multifd_send_sync_main(QEMUFile *f) ram_counters.transferred += p->packet_len; qemu_mutex_unlock(&p->mutex); qemu_sem_post(&p->sem); @@ -123,7 +123,7 @@ index 8fca6c970e..0b5b41c53f 100644 } for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; -@@ -686,8 +712,8 @@ static void *multifd_send_thread(void *opaque) +@@ -691,8 +717,8 @@ static void *multifd_send_thread(void *opaque) p->iov[0].iov_base = p->packet; } @@ -134,7 +134,7 @@ index 8fca6c970e..0b5b41c53f 100644 if (ret != 0) { break; } -@@ -928,6 +954,13 @@ int multifd_save_setup(Error **errp) +@@ -933,6 +959,13 @@ int multifd_save_setup(Error **errp) /* We need one extra place for the packet header */ p->iov = g_new0(struct iovec, page_count + 1); p->normal = g_new0(ram_addr_t, page_count); @@ -149,10 +149,10 @@ index 8fca6c970e..0b5b41c53f 100644 } diff --git a/migration/multifd.h b/migration/multifd.h -index cd495195ce..7ec688fb4f 100644 +index 92de878155..11d5e273e6 100644 --- a/migration/multifd.h +++ b/migration/multifd.h -@@ -96,6 +96,8 @@ typedef struct { +@@ -95,6 +95,8 @@ typedef struct { uint32_t packet_len; /* pointer to the packet */ MultiFDPacket_t *packet; diff --git a/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch b/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch new file mode 100644 index 0000000..e23d35d --- /dev/null +++ b/kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch @@ -0,0 +1,98 @@ +From 75cd92cb7cff055f46163e64d66ba3f685f9ac04 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 09/37] multifd: Make zlib compression method not use iovs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [9/26] d33dd62b833d50fee989a195aebcc8d5e7d43181 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit a5ed22948873b50fcf1415d1ce15c71d61a9388d) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 17 +++++++++-------- + 1 file changed, 9 insertions(+), 8 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 330fc021c5..a1950a4588 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/rcu.h" ++#include "exec/ramblock.h" + #include "exec/target_page.h" + #include "qapi/error.h" + #include "migration.h" +@@ -100,8 +101,8 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { +- struct iovec *iov = p->pages->iov; + struct zlib_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + z_stream *zs = &z->zs; + uint32_t out_size = 0; + int ret; +@@ -115,8 +116,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + +- zs->avail_in = iov[i].iov_len; +- zs->next_in = iov[i].iov_base; ++ zs->avail_in = page_size; ++ zs->next_in = p->pages->block->host + p->pages->offset[i]; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +@@ -240,6 +241,7 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ +@@ -264,7 +266,6 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + zs->next_in = z->zbuff; + + for (i = 0; i < p->pages->num; i++) { +- struct iovec *iov = &p->pages->iov[i]; + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + +@@ -272,8 +273,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + flush = Z_SYNC_FLUSH; + } + +- zs->avail_out = iov->iov_len; +- zs->next_out = iov->iov_base; ++ zs->avail_out = page_size; ++ zs->next_out = p->pages->block->host + p->pages->offset[i]; + + /* + * Welcome to inflate semantics +@@ -286,8 +287,8 @@ static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + do { + ret = inflate(zs, flush); + } while (ret == Z_OK && zs->avail_in +- && (zs->total_out - start) < iov->iov_len); +- if (ret == Z_OK && (zs->total_out - start) < iov->iov_len) { ++ && (zs->total_out - start) < page_size); ++ if (ret == Z_OK && (zs->total_out - start) < page_size) { + error_setg(errp, "multifd %d: inflate generated too few output", + p->id); + return -1; +-- +2.35.3 + diff --git a/kvm-multifd-Make-zlib-use-iov-s.patch b/kvm-multifd-Make-zlib-use-iov-s.patch new file mode 100644 index 0000000..6310738 --- /dev/null +++ b/kvm-multifd-Make-zlib-use-iov-s.patch @@ -0,0 +1,53 @@ +From 1cdab9cadef1ed84ec34651a1edbffa36c1e67d0 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 12/37] multifd: Make zlib use iov's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [12/26] 58630452e14802e71a9eadb17cfe4964ebf8e091 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 48a4a44c1cde382c6b8e7792d01fe7d9b0a59c69) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index a987e4a26c..96475e096e 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -145,6 +145,9 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + } + out_size += available - zs->avail_out; + } ++ p->iov[p->iovs_num].iov_base = z->zbuff; ++ p->iov[p->iovs_num].iov_len = out_size; ++ p->iovs_num++; + p->next_packet_size = out_size; + p->flags |= MULTIFD_FLAG_ZLIB; + +@@ -164,10 +167,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- struct zlib_data *z = p->data; +- +- return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, +- errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +-- +2.35.3 + diff --git a/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch b/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch new file mode 100644 index 0000000..3a10280 --- /dev/null +++ b/kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch @@ -0,0 +1,94 @@ +From ab6262bd4829e3bd6437fe32737209df2af2d141 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 08/37] multifd: Make zstd compression method not use iovs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [8/26] 010579fa73b5a4c6fd631dc9fbaf6f974974bc99 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit f5ff548774c22b34a0c0e2fef85f1be11160d774) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zstd.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index f0d1105792..d9ed42622b 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -13,6 +13,7 @@ + #include "qemu/osdep.h" + #include + #include "qemu/rcu.h" ++#include "exec/ramblock.h" + #include "exec/target_page.h" + #include "qapi/error.h" + #include "migration.h" +@@ -113,8 +114,8 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { +- struct iovec *iov = p->pages->iov; + struct zstd_data *z = p->data; ++ size_t page_size = qemu_target_page_size(); + int ret; + uint32_t i; + +@@ -128,8 +129,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + if (i == p->pages->num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = iov[i].iov_base; +- z->in.size = iov[i].iov_len; ++ z->in.src = p->pages->block->host + p->pages->offset[i]; ++ z->in.size = page_size; + z->in.pos = 0; + + /* +@@ -261,7 +262,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +- uint32_t expected_size = p->pages->num * qemu_target_page_size(); ++ size_t page_size = qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * page_size; + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + struct zstd_data *z = p->data; + int ret; +@@ -283,10 +285,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + z->in.pos = 0; + + for (i = 0; i < p->pages->num; i++) { +- struct iovec *iov = &p->pages->iov[i]; +- +- z->out.dst = iov->iov_base; +- z->out.size = iov->iov_len; ++ z->out.dst = p->pages->block->host + p->pages->offset[i]; ++ z->out.size = page_size; + z->out.pos = 0; + + /* +@@ -300,8 +300,8 @@ static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + do { + ret = ZSTD_decompressStream(z->zds, &z->out, &z->in); + } while (ret > 0 && (z->in.size - z->in.pos > 0) +- && (z->out.pos < iov->iov_len)); +- if (ret > 0 && (z->out.pos < iov->iov_len)) { ++ && (z->out.pos < page_size)); ++ if (ret > 0 && (z->out.pos < page_size)) { + error_setg(errp, "multifd %d: decompressStream buffer too small", + p->id); + return -1; +-- +2.35.3 + diff --git a/kvm-multifd-Make-zstd-use-iov-s.patch b/kvm-multifd-Make-zstd-use-iov-s.patch new file mode 100644 index 0000000..af3e7fb --- /dev/null +++ b/kvm-multifd-Make-zstd-use-iov-s.patch @@ -0,0 +1,53 @@ +From bac5ce0b4d3552d6056045f201b4e50dd6204b31 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 13/37] multifd: Make zstd use iov's +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [13/26] 4d7036fb32efdf088d23737b9710e6ad1a4654aa +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 0a818b89eb8eaf79ae651405907d8110a0935cfd) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zstd.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 2185a83eac..4e60cdbc54 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -156,6 +156,9 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + return -1; + } + } ++ p->iov[p->iovs_num].iov_base = z->zbuff; ++ p->iov[p->iovs_num].iov_len = z->out.pos; ++ p->iovs_num++; + p->next_packet_size = z->out.pos; + p->flags |= MULTIFD_FLAG_ZSTD; + +@@ -175,10 +178,7 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- struct zstd_data *z = p->data; +- +- return qio_channel_write_all(p->c, (void *)z->zbuff, p->next_packet_size, +- errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +-- +2.35.3 + diff --git a/kvm-multifd-Move-iov-from-pages-to-params.patch b/kvm-multifd-Move-iov-from-pages-to-params.patch new file mode 100644 index 0000000..6a59707 --- /dev/null +++ b/kvm-multifd-Move-iov-from-pages-to-params.patch @@ -0,0 +1,190 @@ +From 1181a9cbcaf37a82aa7bf117ef209f554b8c4a71 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 11/37] multifd: Move iov from pages to params +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [11/26] 24dff3ef68cf3327811242193502319ed3e3940a +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +This will allow us to reduce the number of system calls on the next patch. + +Signed-off-by: Juan Quintela +(cherry picked from commit 226468ba3dea950ab4bb0b729878dde25812da1c) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 34 ++++++++++++++++++++++++---------- + migration/multifd.h | 8 ++++++-- + 2 files changed, 30 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index d0d19470f9..5004f394aa 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -86,7 +86,16 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { +- p->next_packet_size = p->pages->num * qemu_target_page_size(); ++ MultiFDPages_t *pages = p->pages; ++ size_t page_size = qemu_target_page_size(); ++ ++ for (int i = 0; i < p->pages->num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ p->iov[p->iovs_num].iov_len = page_size; ++ p->iovs_num++; ++ } ++ ++ p->next_packet_size = p->pages->num * page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -104,7 +113,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + */ + static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) + { +- return qio_channel_writev_all(p->c, p->pages->iov, used, errp); ++ return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); + } + + /** +@@ -146,13 +155,18 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; ++ size_t page_size = qemu_target_page_size(); + + if (flags != MULTIFD_FLAG_NOCOMP) { + error_setg(errp, "multifd %u: flags received %x flags expected %x", + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +- return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp); ++ for (int i = 0; i < p->pages->num; i++) { ++ p->iov[i].iov_base = p->pages->block->host + p->pages->offset[i]; ++ p->iov[i].iov_len = page_size; ++ } ++ return qio_channel_readv_all(p->c, p->iov, p->pages->num, errp); + } + + static MultiFDMethods multifd_nocomp_ops = { +@@ -242,7 +256,6 @@ static MultiFDPages_t *multifd_pages_init(size_t size) + MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1); + + pages->allocated = size; +- pages->iov = g_new0(struct iovec, size); + pages->offset = g_new0(ram_addr_t, size); + + return pages; +@@ -254,8 +267,6 @@ static void multifd_pages_clear(MultiFDPages_t *pages) + pages->allocated = 0; + pages->packet_num = 0; + pages->block = NULL; +- g_free(pages->iov); +- pages->iov = NULL; + g_free(pages->offset); + pages->offset = NULL; + g_free(pages); +@@ -365,8 +376,6 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + p->pages->offset[i] = offset; +- p->pages->iov[i].iov_base = block->host + offset; +- p->pages->iov[i].iov_len = page_size; + } + + return 0; +@@ -470,8 +479,6 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + + if (pages->block == block) { + pages->offset[pages->num] = offset; +- pages->iov[pages->num].iov_base = block->host + offset; +- pages->iov[pages->num].iov_len = qemu_target_page_size(); + pages->num++; + + if (pages->num < pages->allocated) { +@@ -564,6 +571,8 @@ void multifd_save_cleanup(void) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -651,6 +660,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; ++ p->iovs_num = 0; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -919,6 +929,7 @@ int multifd_save_setup(Error **errp) + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); ++ p->iov = g_new0(struct iovec, page_count); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +@@ -1018,6 +1029,8 @@ int multifd_load_cleanup(Error **errp) + p->packet_len = 0; + g_free(p->packet); + p->packet = NULL; ++ g_free(p->iov); ++ p->iov = NULL; + multifd_recv_state->ops->recv_cleanup(p); + } + qemu_sem_destroy(&multifd_recv_state->sem_sync); +@@ -1158,6 +1171,7 @@ int multifd_load_setup(Error **errp) + + sizeof(uint64_t) * page_count; + p->packet = g_malloc0(p->packet_len); + p->name = g_strdup_printf("multifdrecv_%d", i); ++ p->iov = g_new0(struct iovec, page_count); + } + + for (i = 0; i < thread_count; i++) { +diff --git a/migration/multifd.h b/migration/multifd.h +index e57adc783b..c3f18af364 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -62,8 +62,6 @@ typedef struct { + uint64_t packet_num; + /* offset of each page */ + ram_addr_t *offset; +- /* pointer to each page */ +- struct iovec *iov; + RAMBlock *block; + } MultiFDPages_t; + +@@ -110,6 +108,10 @@ typedef struct { + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; ++ /* buffers to send */ ++ struct iovec *iov; ++ /* number of iovs used */ ++ uint32_t iovs_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +@@ -149,6 +151,8 @@ typedef struct { + uint64_t num_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; ++ /* buffers to recv */ ++ struct iovec *iov; + /* used for de-compression methods */ + void *data; + } MultiFDRecvParams; +-- +2.35.3 + diff --git a/kvm-multifd-Remove-send_write-method.patch b/kvm-multifd-Remove-send_write-method.patch new file mode 100644 index 0000000..79fc649 --- /dev/null +++ b/kvm-multifd-Remove-send_write-method.patch @@ -0,0 +1,160 @@ +From 2952487c7e5ed14796fbffae0b964a35790d6850 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 14/37] multifd: Remove send_write() method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [14/26] 5fa59ffa09099fbc6da84e9a192ca71af52cc98f +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Everything use now iov's. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 468fcb5dd0c965e1af0da9efab09b1462631da18) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 17 ----------------- + migration/multifd-zstd.c | 17 ----------------- + migration/multifd.c | 20 ++------------------ + migration/multifd.h | 2 -- + 4 files changed, 2 insertions(+), 54 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 96475e096e..8ed29b9633 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -154,22 +154,6 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * zlib_send_write: do the actual write of the data +- * +- * Do the actual write of the comprresed buffer. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int zlib_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * zlib_recv_setup: setup receive side + * +@@ -312,7 +296,6 @@ static MultiFDMethods multifd_zlib_ops = { + .send_setup = zlib_send_setup, + .send_cleanup = zlib_send_cleanup, + .send_prepare = zlib_send_prepare, +- .send_write = zlib_send_write, + .recv_setup = zlib_recv_setup, + .recv_cleanup = zlib_recv_cleanup, + .recv_pages = zlib_recv_pages +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 4e60cdbc54..25e1f517b5 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -165,22 +165,6 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * zstd_send_write: do the actual write of the data +- * +- * Do the actual write of the comprresed buffer. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int zstd_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * zstd_recv_setup: setup receive side + * +@@ -325,7 +309,6 @@ static MultiFDMethods multifd_zstd_ops = { + .send_setup = zstd_send_setup, + .send_cleanup = zstd_send_cleanup, + .send_prepare = zstd_send_prepare, +- .send_write = zstd_send_write, + .recv_setup = zstd_recv_setup, + .recv_cleanup = zstd_recv_cleanup, + .recv_pages = zstd_recv_pages +diff --git a/migration/multifd.c b/migration/multifd.c +index 5004f394aa..1e1551d78b 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -100,22 +100,6 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + return 0; + } + +-/** +- * nocomp_send_write: do the actual write of the data +- * +- * For no compression we just have to write the data. +- * +- * Returns 0 for success or -1 for error +- * +- * @p: Params for the channel that we are using +- * @used: number of pages used +- * @errp: pointer to an error +- */ +-static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp) +-{ +- return qio_channel_writev_all(p->c, p->iov, p->iovs_num, errp); +-} +- + /** + * nocomp_recv_setup: setup receive side + * +@@ -173,7 +157,6 @@ static MultiFDMethods multifd_nocomp_ops = { + .send_setup = nocomp_send_setup, + .send_cleanup = nocomp_send_cleanup, + .send_prepare = nocomp_send_prepare, +- .send_write = nocomp_send_write, + .recv_setup = nocomp_recv_setup, + .recv_cleanup = nocomp_recv_cleanup, + .recv_pages = nocomp_recv_pages +@@ -687,7 +670,8 @@ static void *multifd_send_thread(void *opaque) + } + + if (used) { +- ret = multifd_send_state->ops->send_write(p, used, &local_err); ++ ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, ++ &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index c3f18af364..7496f951a7 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -164,8 +164,6 @@ typedef struct { + void (*send_cleanup)(MultiFDSendParams *p, Error **errp); + /* Prepare the send packet */ + int (*send_prepare)(MultiFDSendParams *p, Error **errp); +- /* Write the send packet */ +- int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Setup for receiving side */ + int (*recv_setup)(MultiFDRecvParams *p, Error **errp); + /* Cleanup for receiving side */ +-- +2.35.3 + diff --git a/kvm-multifd-Rename-used-field-to-num.patch b/kvm-multifd-Rename-used-field-to-num.patch new file mode 100644 index 0000000..24bdd8c --- /dev/null +++ b/kvm-multifd-Rename-used-field-to-num.patch @@ -0,0 +1,177 @@ +From 003ef20d11b33a7139fae6fbcf170188a07afc43 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:22 -0300 +Subject: [PATCH 02/37] multifd: Rename used field to num +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [2/26] 952283197ef89be4d61c7690bb6c3194e5c67217 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +We will need to split it later in zero_num (number of zero pages) and +normal_num (number of normal pages). This name is better. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 90a3d2f9d5f729147b2827c177932603ae6e2d55) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 38 +++++++++++++++++++------------------- + migration/multifd.h | 2 +- + 2 files changed, 20 insertions(+), 20 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 8125d0015c..8ea86d81dc 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -252,7 +252,7 @@ static MultiFDPages_t *multifd_pages_init(size_t size) + + static void multifd_pages_clear(MultiFDPages_t *pages) + { +- pages->used = 0; ++ pages->num = 0; + pages->allocated = 0; + pages->packet_num = 0; + pages->block = NULL; +@@ -270,7 +270,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->pages_used = cpu_to_be32(p->pages->used); ++ packet->pages_used = cpu_to_be32(p->pages->num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +@@ -278,7 +278,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + strncpy(packet->ramblock, p->pages->block->idstr, 256); + } + +- for (i = 0; i < p->pages->used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ + uint64_t temp = p->pages->offset[i]; + +@@ -332,18 +332,18 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + p->pages = multifd_pages_init(packet->pages_alloc); + } + +- p->pages->used = be32_to_cpu(packet->pages_used); +- if (p->pages->used > packet->pages_alloc) { ++ p->pages->num = be32_to_cpu(packet->pages_used); ++ if (p->pages->num > packet->pages_alloc) { + error_setg(errp, "multifd: received packet " + "with %d pages and expected maximum pages are %d", +- p->pages->used, packet->pages_alloc) ; ++ p->pages->num, packet->pages_alloc) ; + return -1; + } + + p->next_packet_size = be32_to_cpu(packet->next_packet_size); + p->packet_num = be64_to_cpu(packet->packet_num); + +- if (p->pages->used == 0) { ++ if (p->pages->num == 0) { + return 0; + } + +@@ -356,7 +356,7 @@ static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp) + return -1; + } + +- for (i = 0; i < p->pages->used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + uint64_t offset = be64_to_cpu(packet->offset[i]); + + if (offset > (block->used_length - page_size)) { +@@ -443,13 +443,13 @@ static int multifd_send_pages(QEMUFile *f) + } + qemu_mutex_unlock(&p->mutex); + } +- assert(!p->pages->used); ++ assert(!p->pages->num); + assert(!p->pages->block); + + p->packet_num = multifd_send_state->packet_num++; + multifd_send_state->pages = p->pages; + p->pages = pages; +- transferred = ((uint64_t) pages->used) * qemu_target_page_size() ++ transferred = ((uint64_t) pages->num) * qemu_target_page_size() + + p->packet_len; + qemu_file_update_transfer(f, transferred); + ram_counters.multifd_bytes += transferred; +@@ -469,12 +469,12 @@ int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset) + } + + if (pages->block == block) { +- pages->offset[pages->used] = offset; +- pages->iov[pages->used].iov_base = block->host + offset; +- pages->iov[pages->used].iov_len = qemu_target_page_size(); +- pages->used++; ++ pages->offset[pages->num] = offset; ++ pages->iov[pages->num].iov_base = block->host + offset; ++ pages->iov[pages->num].iov_len = qemu_target_page_size(); ++ pages->num++; + +- if (pages->used < pages->allocated) { ++ if (pages->num < pages->allocated) { + return 1; + } + } +@@ -586,7 +586,7 @@ void multifd_send_sync_main(QEMUFile *f) + if (!migrate_use_multifd()) { + return; + } +- if (multifd_send_state->pages->used) { ++ if (multifd_send_state->pages->num) { + if (multifd_send_pages(f) < 0) { + error_report("%s: multifd_send_pages fail", __func__); + return; +@@ -649,7 +649,7 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +- uint32_t used = p->pages->used; ++ uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + flags = p->flags; + +@@ -665,7 +665,7 @@ static void *multifd_send_thread(void *opaque) + p->flags = 0; + p->num_packets++; + p->num_pages += used; +- p->pages->used = 0; ++ p->pages->num = 0; + p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + +@@ -1091,7 +1091,7 @@ static void *multifd_recv_thread(void *opaque) + break; + } + +- used = p->pages->used; ++ used = p->pages->num; + flags = p->flags; + /* recv methods don't know how to handle the SYNC flag */ + p->flags &= ~MULTIFD_FLAG_SYNC; +diff --git a/migration/multifd.h b/migration/multifd.h +index 15c50ca0b2..86820dd028 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -55,7 +55,7 @@ typedef struct { + + typedef struct { + /* number of used pages */ +- uint32_t used; ++ uint32_t num; + /* number of allocated pages */ + uint32_t allocated; + /* global number of generated multifd packets */ +-- +2.35.3 + diff --git a/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch index 415e3a9..d54cce8 100644 --- a/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch +++ b/kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch @@ -1,19 +1,19 @@ -From 63255c13492f42a3236d96e706e5f8e70bb4e219 Mon Sep 17 00:00:00 2001 +From 33a38fef5e889b45571228bde519746fd90d8877 Mon Sep 17 00:00:00 2001 From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:36 -0300 -Subject: [PATCH 13/18] multifd: Send header packet without flags if +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 22/37] multifd: Send header packet without flags if zero-copy-send is enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [7/11] 137eea685e387d3d6aff187ec3fcac05bc16b6e3 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [22/26] 9abfee42b72f11911cf128519826d09cbd2f5bc3 +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert Since d48c3a0445 ("multifd: Use a single writev on the send side"), sending the header packet and the memory pages happens in the same @@ -49,10 +49,10 @@ Signed-off-by: Leonardo Bras 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c -index cdb57439a7..8fca6c970e 100644 +index 1e34e01ebc..193f70cdba 100644 --- a/migration/multifd.c +++ b/migration/multifd.c -@@ -619,6 +619,7 @@ static void *multifd_send_thread(void *opaque) +@@ -624,6 +624,7 @@ static void *multifd_send_thread(void *opaque) MultiFDSendParams *p = opaque; Error *local_err = NULL; int ret = 0; @@ -60,7 +60,7 @@ index cdb57439a7..8fca6c970e 100644 trace_multifd_send_thread_start(p->id); rcu_register_thread(); -@@ -641,9 +642,14 @@ static void *multifd_send_thread(void *opaque) +@@ -646,9 +647,14 @@ static void *multifd_send_thread(void *opaque) if (p->pending_job) { uint64_t packet_num = p->packet_num; uint32_t flags = p->flags; @@ -76,7 +76,7 @@ index cdb57439a7..8fca6c970e 100644 for (int i = 0; i < p->pages->num; i++) { p->normal[p->normal_num] = p->pages->offset[i]; p->normal_num++; -@@ -667,8 +673,18 @@ static void *multifd_send_thread(void *opaque) +@@ -672,8 +678,18 @@ static void *multifd_send_thread(void *opaque) trace_multifd_send(p->id, packet_num, p->normal_num, flags, p->next_packet_size); diff --git a/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch b/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch new file mode 100644 index 0000000..ef5e6d2 --- /dev/null +++ b/kvm-multifd-The-variable-is-only-used-inside-the-loop.patch @@ -0,0 +1,48 @@ +From 56cd14fc23c58707b9184da11f36d777bba6ce78 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 04/37] multifd: The variable is only used inside the loop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [4/26] 45d8bbde75ebbef6329c41ddb56db4526739f94f +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 1943c11a62bd0741e5d9fbba78404fe47ebea820) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index cdeffdc4c5..ce7101cf9d 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -629,7 +629,6 @@ static void *multifd_send_thread(void *opaque) + MultiFDSendParams *p = opaque; + Error *local_err = NULL; + int ret = 0; +- uint32_t flags = 0; + + trace_multifd_send_thread_start(p->id); + rcu_register_thread(); +@@ -652,7 +651,7 @@ static void *multifd_send_thread(void *opaque) + if (p->pending_job) { + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; +- flags = p->flags; ++ uint32_t flags = p->flags; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, used, +-- +2.35.3 + diff --git a/kvm-multifd-Use-a-single-writev-on-the-send-side.patch b/kvm-multifd-Use-a-single-writev-on-the-send-side.patch new file mode 100644 index 0000000..b4f3036 --- /dev/null +++ b/kvm-multifd-Use-a-single-writev-on-the-send-side.patch @@ -0,0 +1,80 @@ +From 4051de396e02ea2c1911c842426318bcd97f93c7 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 15/37] multifd: Use a single writev on the send side +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [15/26] c37063c813fc0ba695072117f272360e5c413803 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +Until now, we wrote the packet header with write(), and the rest of the +pages with writev(). Just increase the size of the iovec and do a +single writev(). + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit d48c3a044537689866fe44e65d24c7d39a68868a) +Signed-off-by: Leonardo Bras +--- + migration/multifd.c | 20 ++++++++------------ + 1 file changed, 8 insertions(+), 12 deletions(-) + +diff --git a/migration/multifd.c b/migration/multifd.c +index 1e1551d78b..d0f86542b1 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -643,7 +643,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; +- p->iovs_num = 0; ++ p->iovs_num = 1; + + if (used) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); +@@ -663,20 +663,15 @@ static void *multifd_send_thread(void *opaque) + trace_multifd_send(p->id, packet_num, used, flags, + p->next_packet_size); + +- ret = qio_channel_write_all(p->c, (void *)p->packet, +- p->packet_len, &local_err); ++ p->iov[0].iov_len = p->packet_len; ++ p->iov[0].iov_base = p->packet; ++ ++ ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, ++ &local_err); + if (ret != 0) { + break; + } + +- if (used) { +- ret = qio_channel_writev_all(p->c, p->iov, p->iovs_num, +- &local_err); +- if (ret != 0) { +- break; +- } +- } +- + qemu_mutex_lock(&p->mutex); + p->pending_job--; + qemu_mutex_unlock(&p->mutex); +@@ -913,7 +908,8 @@ int multifd_save_setup(Error **errp) + p->packet->version = cpu_to_be32(MULTIFD_VERSION); + p->name = g_strdup_printf("multifdsend_%d", i); + p->tls_hostname = g_strdup(s->hostname); +- p->iov = g_new0(struct iovec, page_count); ++ /* We need one extra place for the packet header */ ++ p->iov = g_new0(struct iovec, page_count + 1); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +-- +2.35.3 + diff --git a/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch b/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch new file mode 100644 index 0000000..032dac2 --- /dev/null +++ b/kvm-multifd-Use-normal-pages-array-on-the-send-side.patch @@ -0,0 +1,261 @@ +From 3b57c876e1eaca34fb5bd9067553de945013d4be Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:24 -0300 +Subject: [PATCH 16/37] multifd: Use normal pages array on the send side +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [16/26] 1c48806474daf48fe93920ac361311af95c6a6f3 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +We are only sending normal pages through multifd channels. +Later on this series, we are going to also send zero pages. +We are going to detect if a page is zero or non zero in the multifd +channel thread, not on the main thread. + +So we receive an array of pages page->offset[N] + +And we will end with: + +p->normal[N - zero_pages] +p->zero[zero_pages]. + +In this patch, we just copy all the pages in offset to normal. + +for (i = 0; i < pages->num; i++) { + p->narmal[p->normal_num] = pages->offset[i]; + p->normal_num++: +} + +Later in the series this becomes: + +for (i = 0; i < pages->num; i++) { + if (buffer_is_zero(page->offset[i])) { + p->zerol[p->zero_num] = pages->offset[i]; + p->zero_num++: + } else { + p->narmal[p->normal_num] = pages->offset[i]; + p->normal_num++: + } +} + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert + +--- + +Improving comment (dave) +Renaming num_normal_pages to total_normal_pages (peter) + +(cherry picked from commit 815956f03902980c771da64b17f7f791c1cb57b0) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 6 +++--- + migration/multifd-zstd.c | 6 +++--- + migration/multifd.c | 30 +++++++++++++++++++----------- + migration/multifd.h | 8 ++++++-- + migration/trace-events | 4 ++-- + 5 files changed, 33 insertions(+), 21 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 8ed29b9633..8508f26adf 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -108,16 +108,16 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + int ret; + uint32_t i; + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == p->pages->num - 1) { ++ if (i == p->normal_num - 1) { + flush = Z_SYNC_FLUSH; + } + + zs->avail_in = page_size; +- zs->next_in = p->pages->block->host + p->pages->offset[i]; ++ zs->next_in = p->pages->block->host + p->normal[i]; + + zs->avail_out = available; + zs->next_out = z->zbuff + out_size; +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 25e1f517b5..693af3a140 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -123,13 +123,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == p->pages->num - 1) { ++ if (i == p->normal_num - 1) { + flush = ZSTD_e_flush; + } +- z->in.src = p->pages->block->host + p->pages->offset[i]; ++ z->in.src = p->pages->block->host + p->normal[i]; + z->in.size = page_size; + z->in.pos = 0; + +diff --git a/migration/multifd.c b/migration/multifd.c +index d0f86542b1..3725226400 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -89,13 +89,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + MultiFDPages_t *pages = p->pages; + size_t page_size = qemu_target_page_size(); + +- for (int i = 0; i < p->pages->num; i++) { +- p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; ++ for (int i = 0; i < p->normal_num; i++) { ++ p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; + p->iov[p->iovs_num].iov_len = page_size; + p->iovs_num++; + } + +- p->next_packet_size = p->pages->num * page_size; ++ p->next_packet_size = p->normal_num * page_size; + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -262,7 +262,7 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + + packet->flags = cpu_to_be32(p->flags); + packet->pages_alloc = cpu_to_be32(p->pages->allocated); +- packet->pages_used = cpu_to_be32(p->pages->num); ++ packet->pages_used = cpu_to_be32(p->normal_num); + packet->next_packet_size = cpu_to_be32(p->next_packet_size); + packet->packet_num = cpu_to_be64(p->packet_num); + +@@ -270,9 +270,9 @@ static void multifd_send_fill_packet(MultiFDSendParams *p) + strncpy(packet->ramblock, p->pages->block->idstr, 256); + } + +- for (i = 0; i < p->pages->num; i++) { ++ for (i = 0; i < p->normal_num; i++) { + /* there are architectures where ram_addr_t is 32 bit */ +- uint64_t temp = p->pages->offset[i]; ++ uint64_t temp = p->normal[i]; + + packet->offset[i] = cpu_to_be64(temp); + } +@@ -556,6 +556,8 @@ void multifd_save_cleanup(void) + p->packet = NULL; + g_free(p->iov); + p->iov = NULL; ++ g_free(p->normal); ++ p->normal = NULL; + multifd_send_state->ops->send_cleanup(p, &local_err); + if (local_err) { + migrate_set_error(migrate_get_current(), local_err); +@@ -640,12 +642,17 @@ static void *multifd_send_thread(void *opaque) + qemu_mutex_lock(&p->mutex); + + if (p->pending_job) { +- uint32_t used = p->pages->num; + uint64_t packet_num = p->packet_num; + uint32_t flags = p->flags; + p->iovs_num = 1; ++ p->normal_num = 0; ++ ++ for (int i = 0; i < p->pages->num; i++) { ++ p->normal[p->normal_num] = p->pages->offset[i]; ++ p->normal_num++; ++ } + +- if (used) { ++ if (p->normal_num) { + ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); +@@ -655,12 +662,12 @@ static void *multifd_send_thread(void *opaque) + multifd_send_fill_packet(p); + p->flags = 0; + p->num_packets++; +- p->num_pages += used; ++ p->total_normal_pages += p->normal_num; + p->pages->num = 0; + p->pages->block = NULL; + qemu_mutex_unlock(&p->mutex); + +- trace_multifd_send(p->id, packet_num, used, flags, ++ trace_multifd_send(p->id, packet_num, p->normal_num, flags, + p->next_packet_size); + + p->iov[0].iov_len = p->packet_len; +@@ -710,7 +717,7 @@ out: + qemu_mutex_unlock(&p->mutex); + + rcu_unregister_thread(); +- trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages); ++ trace_multifd_send_thread_end(p->id, p->num_packets, p->total_normal_pages); + + return NULL; + } +@@ -910,6 +917,7 @@ int multifd_save_setup(Error **errp) + p->tls_hostname = g_strdup(s->hostname); + /* We need one extra place for the packet header */ + p->iov = g_new0(struct iovec, page_count + 1); ++ p->normal = g_new0(ram_addr_t, page_count); + socket_send_channel_create(multifd_new_send_channel_async, p); + } + +diff --git a/migration/multifd.h b/migration/multifd.h +index 7496f951a7..7823199dbe 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -104,14 +104,18 @@ typedef struct { + /* thread local variables */ + /* packets sent through this channel */ + uint64_t num_packets; +- /* pages sent through this channel */ +- uint64_t num_pages; ++ /* non zero pages sent through this channel */ ++ uint64_t total_normal_pages; + /* syncs main thread and channels */ + QemuSemaphore sem_sync; + /* buffers to send */ + struct iovec *iov; + /* number of iovs used */ + uint32_t iovs_num; ++ /* Pages that are not zero */ ++ ram_addr_t *normal; ++ /* num of non zero pages */ ++ uint32_t normal_num; + /* used for compression methods */ + void *data; + } MultiFDSendParams; +diff --git a/migration/trace-events b/migration/trace-events +index 5172cb3b3d..171a83a55d 100644 +--- a/migration/trace-events ++++ b/migration/trace-events +@@ -124,13 +124,13 @@ multifd_recv_sync_main_wait(uint8_t id) "channel %u" + multifd_recv_terminate_threads(bool error) "error %d" + multifd_recv_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 + multifd_recv_thread_start(uint8_t id) "%u" +-multifd_send(uint8_t id, uint64_t packet_num, uint32_t used, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " pages %u flags 0x%x next packet size %u" ++multifd_send(uint8_t id, uint64_t packet_num, uint32_t normal, uint32_t flags, uint32_t next_packet_size) "channel %u packet_num %" PRIu64 " normal pages %u flags 0x%x next packet size %u" + multifd_send_error(uint8_t id) "channel %u" + multifd_send_sync_main(long packet_num) "packet num %ld" + multifd_send_sync_main_signal(uint8_t id) "channel %u" + multifd_send_sync_main_wait(uint8_t id) "channel %u" + multifd_send_terminate_threads(bool error) "error %d" +-multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t pages) "channel %u packets %" PRIu64 " pages %" PRIu64 ++multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64 + multifd_send_thread_start(uint8_t id) "%u" + multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s" + multifd_tls_outgoing_handshake_error(void *ioc, const char *err) "ioc=%p err=%s" +-- +2.35.3 + diff --git a/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch index e6d726a..7912266 100644 --- a/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch +++ b/kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch @@ -1,19 +1,19 @@ -From 4ca5375a936bc87829c6e2b4620f56c73a5efc70 Mon Sep 17 00:00:00 2001 +From fce933410a5068220a5f29011a6d1a647e357a62 Mon Sep 17 00:00:00 2001 From: Leonardo Bras -Date: Fri, 13 May 2022 03:28:35 -0300 -Subject: [PATCH 12/18] multifd: multifd_send_sync_main now returns negative on +Date: Wed, 18 May 2022 02:52:25 -0300 +Subject: [PATCH 21/37] multifd: multifd_send_sync_main now returns negative on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Leonardo Brás -RH-MergeRequest: 95: MSG_ZEROCOPY + Multifd -RH-Commit: [6/11] c8ebdee4327d463c74f4b2eeb42d3c964f314c94 (LeoBras/centos-qemu-kvm) -RH-Bugzilla: 1968509 -RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Dr. David Alan Gilbert +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [21/26] b4e4f3663576aa87f3b2f66f1d38bad4f50bd4ac +RH-Bugzilla: 2072049 RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert Even though multifd_send_sync_main() currently emits error_reports, it's callers don't really check it before continuing. @@ -40,10 +40,10 @@ Signed-off-by: Leonardo Bras 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c -index 43998ad117..cdb57439a7 100644 +index e53811f04a..1e34e01ebc 100644 --- a/migration/multifd.c +++ b/migration/multifd.c -@@ -568,17 +568,17 @@ void multifd_save_cleanup(void) +@@ -573,17 +573,17 @@ void multifd_save_cleanup(void) multifd_send_state = NULL; } @@ -64,7 +64,7 @@ index 43998ad117..cdb57439a7 100644 } } for (i = 0; i < migrate_multifd_channels(); i++) { -@@ -591,7 +591,7 @@ void multifd_send_sync_main(QEMUFile *f) +@@ -596,7 +596,7 @@ void multifd_send_sync_main(QEMUFile *f) if (p->quit) { error_report("%s: channel %d has already quit", __func__, i); qemu_mutex_unlock(&p->mutex); @@ -73,7 +73,7 @@ index 43998ad117..cdb57439a7 100644 } p->packet_num = multifd_send_state->packet_num++; -@@ -610,6 +610,8 @@ void multifd_send_sync_main(QEMUFile *f) +@@ -615,6 +615,8 @@ void multifd_send_sync_main(QEMUFile *f) qemu_sem_wait(&p->sem_sync); } trace_multifd_send_sync_main(multifd_send_state->packet_num); @@ -83,7 +83,7 @@ index 43998ad117..cdb57439a7 100644 static void *multifd_send_thread(void *opaque) diff --git a/migration/multifd.h b/migration/multifd.h -index 4dda900a0b..cd495195ce 100644 +index 7823199dbe..92de878155 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -22,7 +22,7 @@ int multifd_load_cleanup(Error **errp); @@ -96,10 +96,10 @@ index 4dda900a0b..cd495195ce 100644 /* Multifd Compression flags */ diff --git a/migration/ram.c b/migration/ram.c -index 0ef4bd63eb..fb6db54642 100644 +index 863035d235..3e208efca7 100644 --- a/migration/ram.c +++ b/migration/ram.c -@@ -2903,6 +2903,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) +@@ -2992,6 +2992,7 @@ static int ram_save_setup(QEMUFile *f, void *opaque) { RAMState **rsp = opaque; RAMBlock *block; @@ -107,7 +107,7 @@ index 0ef4bd63eb..fb6db54642 100644 if (compress_threads_save_setup()) { return -1; -@@ -2937,7 +2938,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) +@@ -3026,7 +3027,11 @@ static int ram_save_setup(QEMUFile *f, void *opaque) ram_control_before_iterate(f, RAM_CONTROL_SETUP); ram_control_after_iterate(f, RAM_CONTROL_SETUP); @@ -120,7 +120,7 @@ index 0ef4bd63eb..fb6db54642 100644 qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); -@@ -3046,7 +3051,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) +@@ -3135,7 +3140,11 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) out: if (ret >= 0 && migration_is_setup_or_active(migrate_get_current()->state)) { @@ -132,8 +132,8 @@ index 0ef4bd63eb..fb6db54642 100644 + qemu_put_be64(f, RAM_SAVE_FLAG_EOS); qemu_fflush(f); - ram_transferred_add(8); -@@ -3106,13 +3115,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) + ram_counters.transferred += 8; +@@ -3193,13 +3202,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) ram_control_after_iterate(f, RAM_CONTROL_FINISH); } diff --git a/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch b/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch new file mode 100644 index 0000000..3f3b923 --- /dev/null +++ b/kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch @@ -0,0 +1,135 @@ +From 5f53448092c944857a2b89138f22c5ab335d8250 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 05/37] multifd: remove used parameter from send_prepare() + method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [5/26] ad6360d19d65e8c332dcdc3d3234478639e03db8 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +It is already there as p->pages->num. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 02fb81043ecee338e4aeb8f5be09a46325dc5e43) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 7 +++---- + migration/multifd-zstd.c | 7 +++---- + migration/multifd.c | 9 +++------ + migration/multifd.h | 2 +- + 4 files changed, 10 insertions(+), 15 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index f403d2f031..0c70a2dc78 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -96,10 +96,9 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) ++static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) + { + struct iovec *iov = p->pages->iov; + struct zlib_data *z = p->data; +@@ -108,11 +107,11 @@ static int zlib_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + int ret; + uint32_t i; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + uint32_t available = z->zbuff_len - out_size; + int flush = Z_NO_FLUSH; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 8d657f8860..466b370cad 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -109,10 +109,9 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) ++static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) + { + struct iovec *iov = p->pages->iov; + struct zstd_data *z = p->data; +@@ -123,10 +122,10 @@ static int zstd_send_prepare(MultiFDSendParams *p, uint32_t used, Error **errp) + z->out.size = z->zbuff_len; + z->out.pos = 0; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + ZSTD_EndDirective flush = ZSTD_e_continue; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = ZSTD_e_flush; + } + z->in.src = iov[i].iov_base; +diff --git a/migration/multifd.c b/migration/multifd.c +index ce7101cf9d..098ef8842c 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -82,13 +82,11 @@ static void nocomp_send_cleanup(MultiFDSendParams *p, Error **errp) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int nocomp_send_prepare(MultiFDSendParams *p, uint32_t used, +- Error **errp) ++static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) + { +- p->next_packet_size = used * qemu_target_page_size(); ++ p->next_packet_size = p->pages->num * qemu_target_page_size(); + p->flags |= MULTIFD_FLAG_NOCOMP; + return 0; + } +@@ -654,8 +652,7 @@ static void *multifd_send_thread(void *opaque) + uint32_t flags = p->flags; + + if (used) { +- ret = multifd_send_state->ops->send_prepare(p, used, +- &local_err); ++ ret = multifd_send_state->ops->send_prepare(p, &local_err); + if (ret != 0) { + qemu_mutex_unlock(&p->mutex); + break; +diff --git a/migration/multifd.h b/migration/multifd.h +index 86820dd028..7968cc5c20 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -159,7 +159,7 @@ typedef struct { + /* Cleanup for sending side */ + void (*send_cleanup)(MultiFDSendParams *p, Error **errp); + /* Prepare the send packet */ +- int (*send_prepare)(MultiFDSendParams *p, uint32_t used, Error **errp); ++ int (*send_prepare)(MultiFDSendParams *p, Error **errp); + /* Write the send packet */ + int (*send_write)(MultiFDSendParams *p, uint32_t used, Error **errp); + /* Setup for receiving side */ +-- +2.35.3 + diff --git a/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch b/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch new file mode 100644 index 0000000..02c5918 --- /dev/null +++ b/kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch @@ -0,0 +1,149 @@ +From 8cdedf86dc193673ea24516e7b44f8b4da5dd713 Mon Sep 17 00:00:00 2001 +From: Juan Quintela +Date: Wed, 18 May 2022 02:52:23 -0300 +Subject: [PATCH 06/37] multifd: remove used parameter from send_recv_pages() + method +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Leonardo Brás +RH-MergeRequest: 191: MSG_ZEROCOPY + Multifd @ rhel8.7 +RH-Commit: [6/26] 5c1a506e4178501a0894ea4e7ac919e1d4d4cc32 +RH-Bugzilla: 2072049 +RH-Acked-by: Peter Xu +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Dr. David Alan Gilbert + +It is already there as p->pages->num. + +Signed-off-by: Juan Quintela +Reviewed-by: Dr. David Alan Gilbert +(cherry picked from commit 40a4bfe9d3f8ad35a9c3ffb4cbf7367e2777054b) +Signed-off-by: Leonardo Bras +--- + migration/multifd-zlib.c | 9 ++++----- + migration/multifd-zstd.c | 7 +++---- + migration/multifd.c | 7 +++---- + migration/multifd.h | 2 +- + 4 files changed, 11 insertions(+), 14 deletions(-) + +diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c +index 0c70a2dc78..330fc021c5 100644 +--- a/migration/multifd-zlib.c ++++ b/migration/multifd-zlib.c +@@ -235,17 +235,16 @@ static void zlib_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int zlib_recv_pages(MultiFDRecvParams *p, Error **errp) + { + struct zlib_data *z = p->data; + z_stream *zs = &z->zs; + uint32_t in_size = p->next_packet_size; + /* we measure the change of total_out */ + uint32_t out_size = zs->total_out; +- uint32_t expected_size = used * qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + int ret; + int i; +@@ -264,12 +263,12 @@ static int zlib_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + zs->avail_in = in_size; + zs->next_in = z->zbuff; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + struct iovec *iov = &p->pages->iov[i]; + int flush = Z_NO_FLUSH; + unsigned long start = zs->total_out; + +- if (i == used - 1) { ++ if (i == p->pages->num - 1) { + flush = Z_SYNC_FLUSH; + } + +diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c +index 466b370cad..f0d1105792 100644 +--- a/migration/multifd-zstd.c ++++ b/migration/multifd-zstd.c +@@ -255,14 +255,13 @@ static void zstd_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int zstd_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t in_size = p->next_packet_size; + uint32_t out_size = 0; +- uint32_t expected_size = used * qemu_target_page_size(); ++ uint32_t expected_size = p->pages->num * qemu_target_page_size(); + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + struct zstd_data *z = p->data; + int ret; +@@ -283,7 +282,7 @@ static int zstd_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + z->in.size = in_size; + z->in.pos = 0; + +- for (i = 0; i < used; i++) { ++ for (i = 0; i < p->pages->num; i++) { + struct iovec *iov = &p->pages->iov[i]; + + z->out.dst = iov->iov_base; +diff --git a/migration/multifd.c b/migration/multifd.c +index 098ef8842c..55d99a8232 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -141,10 +141,9 @@ static void nocomp_recv_cleanup(MultiFDRecvParams *p) + * Returns 0 for success or -1 for error + * + * @p: Params for the channel that we are using +- * @used: number of pages used + * @errp: pointer to an error + */ +-static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) ++static int nocomp_recv_pages(MultiFDRecvParams *p, Error **errp) + { + uint32_t flags = p->flags & MULTIFD_FLAG_COMPRESSION_MASK; + +@@ -153,7 +152,7 @@ static int nocomp_recv_pages(MultiFDRecvParams *p, uint32_t used, Error **errp) + p->id, flags, MULTIFD_FLAG_NOCOMP); + return -1; + } +- return qio_channel_readv_all(p->c, p->pages->iov, used, errp); ++ return qio_channel_readv_all(p->c, p->pages->iov, p->pages->num, errp); + } + + static MultiFDMethods multifd_nocomp_ops = { +@@ -1099,7 +1098,7 @@ static void *multifd_recv_thread(void *opaque) + qemu_mutex_unlock(&p->mutex); + + if (used) { +- ret = multifd_recv_state->ops->recv_pages(p, used, &local_err); ++ ret = multifd_recv_state->ops->recv_pages(p, &local_err); + if (ret != 0) { + break; + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 7968cc5c20..e57adc783b 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -167,7 +167,7 @@ typedef struct { + /* Cleanup for receiving side */ + void (*recv_cleanup)(MultiFDRecvParams *p); + /* Read all pages */ +- int (*recv_pages)(MultiFDRecvParams *p, uint32_t used, Error **errp); ++ int (*recv_pages)(MultiFDRecvParams *p, Error **errp); + } MultiFDMethods; + + void multifd_register_ops(int method, MultiFDMethods *ops); +-- +2.35.3 + diff --git a/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch b/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch new file mode 100644 index 0000000..83fe9af --- /dev/null +++ b/kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch @@ -0,0 +1,63 @@ +From 115507e5e8b97993b50ea7b39d6d4bb493973e46 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 5 Aug 2022 11:42:14 +0200 +Subject: [PATCH 9/9] pc-bios/s390-ccw: Fix booting with logical block size < + physical block size + +RH-Author: Thomas Huth +RH-MergeRequest: 207: pc-bios/s390-ccw: Fix booting with logical block size < physical block size +RH-Commit: [1/1] ab22832592e0a48277bf7aca1b941a1be79aeab6 +RH-Bugzilla: 2112296 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Claudio Imbrenda + +For accessing single blocks during boot, it's the logical block size that +matters. (Physical block sizes are rather interesting e.g. for creating +file systems with the correct alignment for speed reasons etc.). +So the s390-ccw bios has to use the logical block size for calculating +sector numbers during the boot phase, the "physical_block_exp" shift +value must not be taken into account. This change fixes the boot process +when the guest hast been installed on a disk where the logical block size +differs from the physical one, e.g. if the guest has been installed +like this: + + qemu-system-s390x -nographic -accel kvm -m 2G \ + -drive if=none,id=d1,file=fedora.iso,format=raw,media=cdrom \ + -device virtio-scsi -device scsi-cd,drive=d1 \ + -drive if=none,id=d2,file=test.qcow2,format=qcow2 + -device virtio-blk,drive=d2,physical_block_size=4096,logical_block_size=512 + +Linux correctly uses the logical block size of 512 for the installation, +but the s390-ccw bios tries to boot from a disk with 4096 block size so +far, as long as this patch has not been applied yet (well, it used to work +by accident in the past due to the virtio_assume_scsi() hack that used to +enforce 512 byte sectors on all virtio-block disks, but that hack has been +well removed in commit 5447de2619050a0a4d to fix other scenarios). + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2112296 +Message-Id: <20220805094214.285223-1-thuth@redhat.com> +Reviewed-by: Cornelia Huck +Reviewed-by: Eric Farman +Signed-off-by: Thomas Huth +(cherry picked from commit 393296de19650e1400ca265914cfdeb313725363) +--- + pc-bios/s390-ccw/virtio-blkdev.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/pc-bios/s390-ccw/virtio-blkdev.c b/pc-bios/s390-ccw/virtio-blkdev.c +index 8271c47296..794f99b42c 100644 +--- a/pc-bios/s390-ccw/virtio-blkdev.c ++++ b/pc-bios/s390-ccw/virtio-blkdev.c +@@ -173,7 +173,7 @@ int virtio_get_block_size(void) + + switch (vdev->senseid.cu_model) { + case VIRTIO_ID_BLOCK: +- return vdev->config.blk.blk_size << vdev->config.blk.physical_block_exp; ++ return vdev->config.blk.blk_size; + case VIRTIO_ID_SCSI: + return vdev->scsi_block_size; + } +-- +2.31.1 + diff --git a/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch index b212194..89d8a91 100644 --- a/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch +++ b/kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch @@ -1,18 +1,18 @@ -From 2e38b4ec5c53b2b98539a70105d3046e1c452ab8 Mon Sep 17 00:00:00 2001 +From 0e7b71a3f0b3a2e1dba54f02efc15b02f337e031 Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 13/17] pc-bios/s390-ccw: Split virtio-scsi code from +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 36/37] pc-bios/s390-ccw: Split virtio-scsi code from virtio_blk_setup_device() RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [8/10] f49c5fb77e05c9dc09ed9f037e37f6a461e4bba6 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [8/9] 8e24806a91c91b2e3603da88e5a22d96a91e8686 +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit cf30b7c4a9b2c64518be8037c2e6670aacdb00b9 Author: Thomas Huth @@ -176,5 +176,5 @@ index 4b14c2c2f9..e6b6cd4815 100644 #endif /* VIRTIO_SCSI_H */ -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch index 231a8a0..fd34b3d 100644 --- a/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch +++ b/kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch @@ -1,18 +1,18 @@ -From 64fa56e0520215e3909e442f09d8073c1870648a Mon Sep 17 00:00:00 2001 +From 8433b2ba40d0618c7086da87685e1c51b6da3b11 Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 07/17] pc-bios/s390-ccw/bootmap: Improve the guessing logic in +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 30/37] pc-bios/s390-ccw/bootmap: Improve the guessing logic in zipl_load_vblk() RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [2/10] ca8f5e847617cf4ac2fd6c38edb2982f32fa3eba (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [2/9] db1d2e7929352bec0e1a5d4cf3fb385bbe02304b +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit 422865f6672ee1482b98d18321b55c1ecfb06c82 Author: Thomas Huth @@ -98,5 +98,5 @@ index 56411ab3b6..994e59c0b0 100644 virtio_assume_eckd(); } -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch index 5e4b689..84bf0ce 100644 --- a/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch +++ b/kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch @@ -1,18 +1,18 @@ -From 430e76fd964390db86c8486f76b916a1cf7f74c2 Mon Sep 17 00:00:00 2001 +From 8b05a4aa32e5ae6cdbc16a5350f6df35d2d79efc Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 12/17] pc-bios/s390-ccw/virtio: Beautify the code for reading +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 35/37] pc-bios/s390-ccw/virtio: Beautify the code for reading virtqueue configuration RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [7/10] b15c06b4c5431837672b6cb5d57d09da20718441 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [7/9] 52fb7fee7d7c46397f32e35bd5f92f82616dfb5c +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit 070824885741f5d2a66626d3c4ecb2773c8e0552 Author: Thomas Huth @@ -52,5 +52,5 @@ index d8c2b52710..f37510f312 100644 vring_init(&vdev->vrings[i], &info); vdev->vrings[i].schid = vdev->schid; -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch index 04ab605..9e9d8e6 100644 --- a/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch +++ b/kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch @@ -1,18 +1,18 @@ -From 7d4f2454f95bfc087ad3f2fe3bc4625dcea3568e Mon Sep 17 00:00:00 2001 +From 511d05f31824b375057ba8dea3f0343ce6e1c1e8 Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 06/17] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 29/37] pc-bios/s390-ccw/virtio: Introduce a macro for the DASD block size RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [1/10] 71033934e1e9988bcf71362e02665ceb7449009d (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [1/9] 1053101fd5fb591131c567ff98c7d92b63a9dfa9 +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit 1f2c2ee48e87ea743f8e23cc7569dd26c4cf9623 Author: Thomas Huth @@ -59,5 +59,5 @@ index 19fceb6495..9e410bde6f 100644 static inline ulong virtio_sector_adjust(ulong sector) { -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch index 41ae538..53f125a 100644 --- a/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch +++ b/kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch @@ -1,18 +1,18 @@ -From 20f8724d0837acbe642c8c7698a4b256f34c1209 Mon Sep 17 00:00:00 2001 +From a60940fb7ef026f3aa968e77389efa51ea648ddf Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 11/17] pc-bios/s390-ccw/virtio: Read device config after +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 34/37] pc-bios/s390-ccw/virtio: Read device config after feature negotiation RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [6/10] 54d21e430b2dfba9e0a0823d6bb8ec7e7f8ff2ff (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [6/9] 99ed8765d614207db19ded75d62c65171674d982 +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit aa5c69ce99411c4886bcd051f288afc02b6d968d Author: Thomas Huth @@ -63,5 +63,5 @@ index 4e85a2eb82..d8c2b52710 100644 VqInfo info = { .queue = (unsigned long long) ring_area + (i * VIRTIO_RING_SIZE), -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch index e976047..b25a352 100644 --- a/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch +++ b/kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch @@ -1,18 +1,18 @@ -From 303fb3ddcdbbd1373c5b1aa28e03f90507e217f3 Mon Sep 17 00:00:00 2001 +From 5cf01cccb7501c801fa9f21a021bc9e7d1fc56e3 Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 10/17] pc-bios/s390-ccw/virtio: Set missing status bits while +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 33/37] pc-bios/s390-ccw/virtio: Set missing status bits while initializing RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [5/10] 4bc44d9adae055fb60b79d04a2f08535b4d38d2b (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [5/9] 6072245f49c229518246b4a0d1be360331305bfa +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit 175aa06a152ef6b58ba9b2e47a1296b024dea70c Author: Thomas Huth @@ -89,5 +89,5 @@ index 5d2c6e3381..4e85a2eb82 100644 bool virtio_is_supported(SubChannelId schid) -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch index 109b98e..ff8aab3 100644 --- a/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch @@ -1,18 +1,18 @@ -From d3335a98a7b6e084aadf4907968536a67cf8e64c Mon Sep 17 00:00:00 2001 +From 5b3548c50e35729d724403b83e26579d31621367 Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 09/17] pc-bios/s390-ccw/virtio-blkdev: Remove +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 32/37] pc-bios/s390-ccw/virtio-blkdev: Remove virtio_assume_scsi() RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [4/10] bf27f75344f220a03475a2918ed49ec9cd5ba317 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [4/9] 5256c4e6f4d5c5aedf1bad3fee30dd3ad230a3dd +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit 5447de2619050a0a4dd480b97f88a9b58da360d1 Author: Thomas Huth @@ -97,5 +97,5 @@ index 241730effe..600ba5052b 100644 void virtio_assume_iso9660(void); -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch index 8bc7a11..ade5ff2 100644 --- a/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch @@ -1,18 +1,18 @@ -From db58915fcaf3d24b64fe2c34cc15b5596b9a81bb Mon Sep 17 00:00:00 2001 +From 042e966a70789bd3ed450fa4f57016129a34672e Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 14/17] pc-bios/s390-ccw/virtio-blkdev: Request the right +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 37/37] pc-bios/s390-ccw/virtio-blkdev: Request the right feature bits RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [9/10] 9dcd8c2f659f366f9487ab6473d1f0d7778b40a7 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [9/9] f04835423d648b04f2187ef9890f2d1689e2b57e +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit 9125a314cca4a1838b09305a87d8efb98f80ab67 Author: Thomas Huth @@ -59,5 +59,5 @@ index c175b66a47..8271c47296 100644 virtio_setup_ccw(vdev); -- -2.31.1 +2.35.3 diff --git a/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch index 818e515..1730dd3 100644 --- a/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch +++ b/kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch @@ -1,18 +1,18 @@ -From f07e4629a7c58407f903810a038660c88c6a6315 Mon Sep 17 00:00:00 2001 +From f09f2f12133073d6ccab3b2bd95717d435adc442 Mon Sep 17 00:00:00 2001 From: Thomas Huth -Date: Fri, 8 Jul 2022 20:49:01 +0200 -Subject: [PATCH 08/17] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix +Date: Fri, 8 Jul 2022 12:29:50 +0200 +Subject: [PATCH 31/37] pc-bios/s390-ccw/virtio-blkdev: Simplify/fix virtio_ipl_disk_is_valid() RH-Author: Thomas Huth -RH-MergeRequest: 106: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry -RH-Commit: [3/10] fb06830a3e50d9da3d84913b50bb227865cc44b3 (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2098077 +RH-MergeRequest: 198: pc-bios/s390-ccw: Fix boot from disks with 4k sectors that do not have the typical DASD geometry +RH-Commit: [3/9] ca0b836a417ce5bbd26e489551f573d6b2fc9e94 +RH-Bugzilla: 2098076 RH-Acked-by: Miroslav Rezanina RH-Acked-by: David Hildenbrand RH-Acked-by: Cornelia Huck -Bugzilla: http://bugzilla.redhat.com/2098077 +Bugzilla: http://bugzilla.redhat.com/2098076 commit bbf615f7b707f009ef8e757d170902ad33b90644 Author: Thomas Huth @@ -120,5 +120,5 @@ index 9e410bde6f..241730effe 100644 extern int virtio_get_block_size(void); extern uint8_t virtio_get_heads(void); -- -2.31.1 +2.35.3 diff --git a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch index 9010d3d..8ed0d2e 100644 --- a/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch +++ b/kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch @@ -1,15 +1,15 @@ -From 5e385a0e49a520550a83299632be175857b63f19 Mon Sep 17 00:00:00 2001 +From 552e7c8ae2c6e281a72791aefa1729be86f96642 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Tue, 5 Apr 2022 15:46:52 +0200 -Subject: [PATCH 06/16] qcow2: Add errp to rebuild_refcount_structure() +Subject: [PATCH 5/6] qcow2: Add errp to rebuild_refcount_structure() RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [3/4] 937b89a7eab6ec6b18618d59bc1526976ad03290 (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [3/4] 9dddd1d21383c4cbd528e5a0d42b0c2a7d87c8f6 +RH-Bugzilla: 1519071 RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake Instead of fprint()-ing error messages in rebuild_refcount_structure() and its rebuild_refcounts_write_refblocks() helper, pass them through an @@ -26,10 +26,10 @@ Signed-off-by: Hanna Reitz 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index c5669eaa51..ed0ecfaa89 100644 +index 555d8ba5ac..09f8ef4927 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c -@@ -2465,7 +2465,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, +@@ -2462,7 +2462,8 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, static int rebuild_refcounts_write_refblocks( BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters, int64_t first_cluster, int64_t end_cluster, @@ -39,7 +39,7 @@ index c5669eaa51..ed0ecfaa89 100644 ) { BDRVQcow2State *s = bs->opaque; -@@ -2516,8 +2517,8 @@ static int rebuild_refcounts_write_refblocks( +@@ -2513,8 +2514,8 @@ static int rebuild_refcounts_write_refblocks( nb_clusters, &first_free_cluster); if (refblock_offset < 0) { @@ -50,7 +50,7 @@ index c5669eaa51..ed0ecfaa89 100644 return refblock_offset; } -@@ -2539,6 +2540,7 @@ static int rebuild_refcounts_write_refblocks( +@@ -2536,6 +2537,7 @@ static int rebuild_refcounts_write_refblocks( on_disk_reftable_entries * REFTABLE_ENTRY_SIZE); if (!on_disk_reftable) { @@ -58,7 +58,7 @@ index c5669eaa51..ed0ecfaa89 100644 return -ENOMEM; } -@@ -2562,7 +2564,7 @@ static int rebuild_refcounts_write_refblocks( +@@ -2559,7 +2561,7 @@ static int rebuild_refcounts_write_refblocks( ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset, s->cluster_size, false); if (ret < 0) { @@ -67,7 +67,7 @@ index c5669eaa51..ed0ecfaa89 100644 return ret; } -@@ -2578,7 +2580,7 @@ static int rebuild_refcounts_write_refblocks( +@@ -2575,7 +2577,7 @@ static int rebuild_refcounts_write_refblocks( ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock, s->cluster_size); if (ret < 0) { @@ -76,7 +76,7 @@ index c5669eaa51..ed0ecfaa89 100644 return ret; } -@@ -2601,7 +2603,8 @@ static int rebuild_refcounts_write_refblocks( +@@ -2598,7 +2600,8 @@ static int rebuild_refcounts_write_refblocks( static int rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res, void **refcount_table, @@ -86,7 +86,7 @@ index c5669eaa51..ed0ecfaa89 100644 { BDRVQcow2State *s = bs->opaque; int64_t reftable_offset = -1; -@@ -2652,7 +2655,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, +@@ -2649,7 +2652,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters, 0, *nb_clusters, &on_disk_reftable, @@ -95,7 +95,7 @@ index c5669eaa51..ed0ecfaa89 100644 if (reftable_size_changed < 0) { res->check_errors++; ret = reftable_size_changed; -@@ -2676,8 +2679,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, +@@ -2673,8 +2676,8 @@ static int rebuild_refcount_structure(BlockDriverState *bs, refcount_table, nb_clusters, &first_free_cluster); if (reftable_offset < 0) { @@ -106,7 +106,7 @@ index c5669eaa51..ed0ecfaa89 100644 res->check_errors++; ret = reftable_offset; goto fail; -@@ -2695,7 +2698,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, +@@ -2692,7 +2695,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, reftable_start_cluster, reftable_end_cluster, &on_disk_reftable, @@ -115,7 +115,7 @@ index c5669eaa51..ed0ecfaa89 100644 if (reftable_size_changed < 0) { res->check_errors++; ret = reftable_size_changed; -@@ -2725,7 +2728,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, +@@ -2722,7 +2725,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length, false); if (ret < 0) { @@ -124,7 +124,7 @@ index c5669eaa51..ed0ecfaa89 100644 goto fail; } -@@ -2733,7 +2736,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, +@@ -2730,7 +2733,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable, reftable_length); if (ret < 0) { @@ -133,7 +133,7 @@ index c5669eaa51..ed0ecfaa89 100644 goto fail; } -@@ -2746,7 +2749,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, +@@ -2743,7 +2746,7 @@ static int rebuild_refcount_structure(BlockDriverState *bs, &reftable_offset_and_clusters, sizeof(reftable_offset_and_clusters)); if (ret < 0) { @@ -142,7 +142,7 @@ index c5669eaa51..ed0ecfaa89 100644 goto fail; } -@@ -2814,11 +2817,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, +@@ -2811,11 +2814,13 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, if (rebuild && (fix & BDRV_FIX_ERRORS)) { BdrvCheckResult old_res = *res; int fresh_leaks = 0; @@ -158,5 +158,5 @@ index c5669eaa51..ed0ecfaa89 100644 } -- -2.31.1 +2.27.0 diff --git a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch index cdc92b8..efae75f 100644 --- a/kvm-qcow2-Improve-refcount-structure-rebuilding.patch +++ b/kvm-qcow2-Improve-refcount-structure-rebuilding.patch @@ -1,15 +1,15 @@ -From b453cf6be8429f4438d51eb24fcf49e7d9f14db6 Mon Sep 17 00:00:00 2001 +From be54c6206b0f0a19e0ffe6a058f4f97277027a17 Mon Sep 17 00:00:00 2001 From: Hanna Reitz Date: Tue, 5 Apr 2022 15:46:50 +0200 -Subject: [PATCH 04/16] qcow2: Improve refcount structure rebuilding +Subject: [PATCH 3/6] qcow2: Improve refcount structure rebuilding RH-Author: Hanna Reitz -RH-MergeRequest: 96: qcow2: Improve refcount structure rebuilding -RH-Commit: [1/4] a3606b7abcaebb4930b566e95b1090aead62dfae (hreitz/qemu-kvm-c-9-s) -RH-Bugzilla: 2072379 +RH-MergeRequest: 171: qcow2: Improve refcount structure rebuilding +RH-Commit: [1/4] 0bb78f7735a0730204670ae5ec2e040ad1d23942 +RH-Bugzilla: 1519071 RH-Acked-by: Miroslav Rezanina -RH-Acked-by: Eric Blake RH-Acked-by: Stefan Hajnoczi +RH-Acked-by: Eric Blake When rebuilding the refcount structures (when qemu-img check -r found errors with refcount = 0, but reference count > 0), the new refcount @@ -40,10 +40,10 @@ Signed-off-by: Hanna Reitz 1 file changed, 235 insertions(+), 97 deletions(-) diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c -index b91499410c..c5669eaa51 100644 +index 4614572252..555d8ba5ac 100644 --- a/block/qcow2-refcount.c +++ b/block/qcow2-refcount.c -@@ -2438,111 +2438,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, +@@ -2435,111 +2435,140 @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs, } /* @@ -260,7 +260,7 @@ index b91499410c..c5669eaa51 100644 on_disk_refblock = (void *)((char *) *refcount_table + refblock_index * s->cluster_size); -@@ -2550,23 +2579,99 @@ write_refblocks: +@@ -2547,23 +2576,99 @@ write_refblocks: s->cluster_size); if (ret < 0) { fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret)); @@ -371,7 +371,7 @@ index b91499410c..c5669eaa51 100644 reftable_offset = alloc_clusters_imrt(bs, reftable_clusters, refcount_table, nb_clusters, &first_free_cluster); -@@ -2578,24 +2683,55 @@ write_refblocks: +@@ -2575,24 +2680,55 @@ write_refblocks: goto fail; } @@ -434,7 +434,7 @@ index b91499410c..c5669eaa51 100644 if (ret < 0) { fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret)); goto fail; -@@ -2604,7 +2740,7 @@ write_refblocks: +@@ -2601,7 +2737,7 @@ write_refblocks: /* Enter new reftable into the image header */ reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset); reftable_offset_and_clusters.reftable_clusters = @@ -443,7 +443,7 @@ index b91499410c..c5669eaa51 100644 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset), &reftable_offset_and_clusters, -@@ -2614,12 +2750,14 @@ write_refblocks: +@@ -2611,12 +2747,14 @@ write_refblocks: goto fail; } @@ -461,5 +461,5 @@ index b91499410c..c5669eaa51 100644 return 0; -- -2.31.1 +2.27.0 diff --git a/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch index f027c45..2ceb4e4 100644 --- a/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch +++ b/kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch @@ -1,18 +1,21 @@ -From 236f216309261bc924e49014267998fdc2ef7f46 Mon Sep 17 00:00:00 2001 +From 21b19213328826327eba18199b790425659af7d8 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 29 Jul 2022 16:55:34 +0200 -Subject: [PATCH 28/32] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 +Subject: [PATCH 1/3] redhat: Update linux-headers/linux/kvm.h to v5.18-rc6 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit -RH-Author: Thomas Huth -RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions -RH-Commit: [1/2] f306d7ff8efa64b14158388b95815ac556a25d8a (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2111994 -RH-Acked-by: Cornelia Huck +RH-Author: Cédric Le Goater +RH-MergeRequest: 220: s390x: Fix skey test in kvm_unit_test +RH-Bugzilla: 2124757 +RH-Acked-by: Thomas Huth RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Commit: [1/2] e514a00305cb0caab9d3acc0efb325853daa6d51 Upstream Status: RHEL-only -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124757 Based on upstream commit e4082063e47e9731dbeb1c26174c17f6038f577f ("linux-headers: Update to v5.18-rc6"), but this is focusing on @@ -20,6 +23,8 @@ the file linux-headers/linux/kvm.h only (since the other changes related to the VFIO renaming might break some stuff). Signed-off-by: Thomas Huth +(cherry picked from commit 71516db15469a02600932a5c1f0d4a9626a91193) +Signed-off-by: Cédric Le Goater --- linux-headers/linux/kvm.h | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) @@ -102,5 +107,5 @@ index d232feaae9..0d05d02ee4 100644 * struct kvm_stats_header - Header of per vm/vcpu binary statistics data. * @flags: Some extra information for header, always 0 for now. -- -2.31.1 +2.35.3 diff --git a/kvm-s390x-Add-KVM-PV-dump-interface.patch b/kvm-s390x-Add-KVM-PV-dump-interface.patch new file mode 100644 index 0000000..f42410d --- /dev/null +++ b/kvm-s390x-Add-KVM-PV-dump-interface.patch @@ -0,0 +1,124 @@ +From 95c229506a6e7261fce184488e880a94f9ba0789 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:21 +0000 +Subject: [PATCH 40/42] s390x: Add KVM PV dump interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [40/41] 5df512a63b2ed17991489565b70f89f4efc0b639 + +Let's add a few bits of code which hide the new KVM PV dump API from +us via new functions. + +Signed-off-by: Janosch Frank +Reviewed-by: Janis Schoetterl-Glausch +Reviewed-by: Steffen Eiden +[ Marc-André: fix up for compilation issue ] +Signed-off-by: Marc-André Lureau +Message-Id: <20221017083822.43118-10-frankja@linux.ibm.com> +(cherry picked from commit 753ca06f4706cd6e57750a606afb08c5c5299643) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 51 +++++++++++++++++++++++++++++++++++++++++++ + include/hw/s390x/pv.h | 9 ++++++++ + 2 files changed, 60 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 4c012f2eeb..728ba24547 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -175,6 +175,57 @@ bool kvm_s390_pv_info_basic_valid(void) + return info_valid; + } + ++static int s390_pv_dump_cmd(uint64_t subcmd, uint64_t uaddr, uint64_t gaddr, ++ uint64_t len) ++{ ++ struct kvm_s390_pv_dmp dmp = { ++ .subcmd = subcmd, ++ .buff_addr = uaddr, ++ .buff_len = len, ++ .gaddr = gaddr, ++ }; ++ int ret; ++ ++ ret = s390_pv_cmd(KVM_PV_DUMP, (void *)&dmp); ++ if (ret) { ++ error_report("KVM DUMP command %ld failed", subcmd); ++ } ++ return ret; ++} ++ ++int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) ++{ ++ struct kvm_s390_pv_dmp dmp = { ++ .subcmd = KVM_PV_DUMP_CPU, ++ .buff_addr = (uint64_t)buff, ++ .gaddr = 0, ++ .buff_len = info_dump.dump_cpu_buffer_len, ++ }; ++ struct kvm_pv_cmd pv = { ++ .cmd = KVM_PV_DUMP, ++ .data = (uint64_t)&dmp, ++ }; ++ ++ return kvm_vcpu_ioctl(CPU(cpu), KVM_S390_PV_CPU_COMMAND, &pv); ++} ++ ++int kvm_s390_dump_init(void) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_INIT, 0, 0, 0); ++} ++ ++int kvm_s390_dump_mem_state(uint64_t gaddr, size_t len, void *dest) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_CONFIG_STOR_STATE, (uint64_t)dest, ++ gaddr, len); ++} ++ ++int kvm_s390_dump_completion_data(void *buff) ++{ ++ return s390_pv_dump_cmd(KVM_PV_DUMP_COMPLETE, (uint64_t)buff, 0, ++ info_dump.dump_config_finalize_len); ++} ++ + #define TYPE_S390_PV_GUEST "s390-pv-guest" + OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST) + +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index e5ea0eca16..9360aa1091 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -51,6 +51,10 @@ uint64_t kvm_s390_pv_dmp_get_size_cpu(void); + uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); + uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); + bool kvm_s390_pv_info_basic_valid(void); ++int kvm_s390_dump_init(void); ++int kvm_s390_dump_cpu(S390CPU *cpu, void *buff); ++int kvm_s390_dump_mem_state(uint64_t addr, size_t len, void *dest); ++int kvm_s390_dump_completion_data(void *buff); + #else /* CONFIG_KVM */ + static inline bool s390_is_pv(void) { return false; } + static inline int s390_pv_query_info(void) { return 0; } +@@ -66,6 +70,11 @@ static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } + static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } + static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } + static inline bool kvm_s390_pv_info_basic_valid(void) { return false; } ++static inline int kvm_s390_dump_init(void) { return 0; } ++static inline int kvm_s390_dump_cpu(S390CPU *cpu, void *buff) { return 0; } ++static inline int kvm_s390_dump_mem_state(uint64_t addr, size_t len, ++ void *dest) { return 0; } ++static inline int kvm_s390_dump_completion_data(void *buff) { return 0; } + #endif /* CONFIG_KVM */ + + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-- +2.37.3 + diff --git a/kvm-s390x-Add-protected-dump-cap.patch b/kvm-s390x-Add-protected-dump-cap.patch new file mode 100644 index 0000000..94da295 --- /dev/null +++ b/kvm-s390x-Add-protected-dump-cap.patch @@ -0,0 +1,113 @@ +From 7634eed5aea61dc94f9a828c62ef3da9aeaa62ae Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:18 +0000 +Subject: [PATCH 37/42] s390x: Add protected dump cap +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [37/41] 52e1e7bf1a00ce3a220d3db2f733a65548bfec6d + +Add a protected dump capability for later feature checking. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Reviewed-by: Thomas Huth +Reviewed-by: Janis Schoetterl-Glausch +Message-Id: <20221017083822.43118-7-frankja@linux.ibm.com> +[ Marc-André - Add missing stubs when !kvm ] +Signed-off-by: Marc-André Lureau +(cherry picked from commit ad3b2e693daac6ed92db7361236028851d37c77c) +Signed-off-by: Cédric Le Goater +--- + target/s390x/kvm/kvm.c | 7 +++++++ + target/s390x/kvm/kvm_s390x.h | 1 + + target/s390x/kvm/meson.build | 2 ++ + target/s390x/kvm/stubs.c | 12 ++++++++++++ + 4 files changed, 22 insertions(+) + create mode 100644 target/s390x/kvm/stubs.c + +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index 30712487d4..d36b44f32a 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -159,6 +159,7 @@ static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; + static int cap_zpci_op; ++static int cap_protected_dump; + + static bool mem_op_storage_key_support; + +@@ -365,6 +366,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); + cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP); ++ cap_protected_dump = kvm_check_extension(s, KVM_CAP_S390_PROTECTED_DUMP); + + kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); +@@ -2042,6 +2044,11 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch, + return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick); + } + ++int kvm_s390_get_protected_dump(void) ++{ ++ return cap_protected_dump; ++} ++ + int kvm_s390_get_ri(void) + { + return cap_ri; +diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h +index aaae8570de..f9785564d0 100644 +--- a/target/s390x/kvm/kvm_s390x.h ++++ b/target/s390x/kvm/kvm_s390x.h +@@ -26,6 +26,7 @@ int kvm_s390_set_cpu_state(S390CPU *cpu, uint8_t cpu_state); + void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); + int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); + int kvm_s390_get_hpage_1m(void); ++int kvm_s390_get_protected_dump(void); + int kvm_s390_get_ri(void); + int kvm_s390_get_zpci_op(void); + int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); +diff --git a/target/s390x/kvm/meson.build b/target/s390x/kvm/meson.build +index d1356356b1..aef52b6686 100644 +--- a/target/s390x/kvm/meson.build ++++ b/target/s390x/kvm/meson.build +@@ -1,6 +1,8 @@ + + s390x_ss.add(when: 'CONFIG_KVM', if_true: files( + 'kvm.c' ++), if_false: files( ++ 'stubs.c' + )) + + # Newer kernels on s390 check for an S390_PGSTE program header and +diff --git a/target/s390x/kvm/stubs.c b/target/s390x/kvm/stubs.c +new file mode 100644 +index 0000000000..5fd63b9a7e +--- /dev/null ++++ b/target/s390x/kvm/stubs.c +@@ -0,0 +1,12 @@ ++/* ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "kvm_s390x.h" ++ ++int kvm_s390_get_protected_dump(void) ++{ ++ return false; ++} +-- +2.37.3 + diff --git a/kvm-s390x-Introduce-PV-query-interface.patch b/kvm-s390x-Introduce-PV-query-interface.patch new file mode 100644 index 0000000..dfb0169 --- /dev/null +++ b/kvm-s390x-Introduce-PV-query-interface.patch @@ -0,0 +1,174 @@ +From 760236b3633a8f532631256a899cab969e772196 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:19 +0000 +Subject: [PATCH 38/42] s390x: Introduce PV query interface +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [38/41] 3090615d81ec6b9e4c306f7fc3709e1935ff5a79 + +Introduce an interface over which we can get information about UV data. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Reviewed-by: Janis Schoetterl-Glausch +Acked-by: Thomas Huth +Message-Id: <20221017083822.43118-8-frankja@linux.ibm.com> +(cherry picked from commit 03d83ecfae46bf5e0074cb5808043b30df34064b) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/pv.c | 61 ++++++++++++++++++++++++++++++++++++++ + hw/s390x/s390-virtio-ccw.c | 6 ++++ + include/hw/s390x/pv.h | 10 +++++++ + 3 files changed, 77 insertions(+) + +diff --git a/hw/s390x/pv.c b/hw/s390x/pv.c +index 401b63d6cb..4c012f2eeb 100644 +--- a/hw/s390x/pv.c ++++ b/hw/s390x/pv.c +@@ -20,6 +20,11 @@ + #include "exec/confidential-guest-support.h" + #include "hw/s390x/ipl.h" + #include "hw/s390x/pv.h" ++#include "target/s390x/kvm/kvm_s390x.h" ++ ++static bool info_valid; ++static struct kvm_s390_pv_info_vm info_vm; ++static struct kvm_s390_pv_info_dump info_dump; + + static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + { +@@ -56,6 +61,42 @@ static int __s390_pv_cmd(uint32_t cmd, const char *cmdname, void *data) + } \ + } + ++int s390_pv_query_info(void) ++{ ++ struct kvm_s390_pv_info info = { ++ .header.id = KVM_PV_INFO_VM, ++ .header.len_max = sizeof(info.header) + sizeof(info.vm), ++ }; ++ int rc; ++ ++ /* Info API's first user is dump so they are bundled */ ++ if (!kvm_s390_get_protected_dump()) { ++ return 0; ++ } ++ ++ rc = s390_pv_cmd(KVM_PV_INFO, &info); ++ if (rc) { ++ error_report("KVM PV INFO cmd %x failed: %s", ++ info.header.id, strerror(-rc)); ++ return rc; ++ } ++ memcpy(&info_vm, &info.vm, sizeof(info.vm)); ++ ++ info.header.id = KVM_PV_INFO_DUMP; ++ info.header.len_max = sizeof(info.header) + sizeof(info.dump); ++ rc = s390_pv_cmd(KVM_PV_INFO, &info); ++ if (rc) { ++ error_report("KVM PV INFO cmd %x failed: %s", ++ info.header.id, strerror(-rc)); ++ return rc; ++ } ++ ++ memcpy(&info_dump, &info.dump, sizeof(info.dump)); ++ info_valid = true; ++ ++ return rc; ++} ++ + int s390_pv_vm_enable(void) + { + return s390_pv_cmd(KVM_PV_ENABLE, NULL); +@@ -114,6 +155,26 @@ void s390_pv_inject_reset_error(CPUState *cs) + env->regs[r1 + 1] = DIAG_308_RC_INVAL_FOR_PV; + } + ++uint64_t kvm_s390_pv_dmp_get_size_cpu(void) ++{ ++ return info_dump.dump_cpu_buffer_len; ++} ++ ++uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) ++{ ++ return info_dump.dump_config_finalize_len; ++} ++ ++uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) ++{ ++ return info_dump.dump_config_mem_buffer_per_1m; ++} ++ ++bool kvm_s390_pv_info_basic_valid(void) ++{ ++ return info_valid; ++} ++ + #define TYPE_S390_PV_GUEST "s390-pv-guest" + OBJECT_DECLARE_SIMPLE_TYPE(S390PVGuest, S390_PV_GUEST) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index bd80e72cf8..a9617ab79f 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -365,6 +365,12 @@ static int s390_machine_protect(S390CcwMachineState *ms) + + ms->pv = true; + ++ /* Will return 0 if API is not available since it's not vital */ ++ rc = s390_pv_query_info(); ++ if (rc) { ++ goto out_err; ++ } ++ + /* Set SE header and unpack */ + rc = s390_ipl_prepare_pv_header(); + if (rc) { +diff --git a/include/hw/s390x/pv.h b/include/hw/s390x/pv.h +index 1f1f545bfc..e5ea0eca16 100644 +--- a/include/hw/s390x/pv.h ++++ b/include/hw/s390x/pv.h +@@ -38,6 +38,7 @@ static inline bool s390_is_pv(void) + return ccw->pv; + } + ++int s390_pv_query_info(void); + int s390_pv_vm_enable(void); + void s390_pv_vm_disable(void); + int s390_pv_set_sec_parms(uint64_t origin, uint64_t length); +@@ -46,8 +47,13 @@ void s390_pv_prep_reset(void); + int s390_pv_verify(void); + void s390_pv_unshare(void); + void s390_pv_inject_reset_error(CPUState *cs); ++uint64_t kvm_s390_pv_dmp_get_size_cpu(void); ++uint64_t kvm_s390_pv_dmp_get_size_mem_state(void); ++uint64_t kvm_s390_pv_dmp_get_size_completion_data(void); ++bool kvm_s390_pv_info_basic_valid(void); + #else /* CONFIG_KVM */ + static inline bool s390_is_pv(void) { return false; } ++static inline int s390_pv_query_info(void) { return 0; } + static inline int s390_pv_vm_enable(void) { return 0; } + static inline void s390_pv_vm_disable(void) {} + static inline int s390_pv_set_sec_parms(uint64_t origin, uint64_t length) { return 0; } +@@ -56,6 +62,10 @@ static inline void s390_pv_prep_reset(void) {} + static inline int s390_pv_verify(void) { return 0; } + static inline void s390_pv_unshare(void) {} + static inline void s390_pv_inject_reset_error(CPUState *cs) {}; ++static inline uint64_t kvm_s390_pv_dmp_get_size_cpu(void) { return 0; } ++static inline uint64_t kvm_s390_pv_dmp_get_size_mem_state(void) { return 0; } ++static inline uint64_t kvm_s390_pv_dmp_get_size_completion_data(void) { return 0; } ++static inline bool kvm_s390_pv_info_basic_valid(void) { return false; } + #endif /* CONFIG_KVM */ + + int s390_pv_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); +-- +2.37.3 + diff --git a/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch b/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch new file mode 100644 index 0000000..f150979 --- /dev/null +++ b/kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch @@ -0,0 +1,209 @@ +From 429c4cc750affe82b89867668ff2515a8a66732e Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Fri, 18 Nov 2022 15:23:19 +0100 +Subject: [PATCH 2/3] s390x: Register TYPE_S390_CCW_MACHINE properties as class + properties +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 233: s390x: Document the "loadparm" machine property +RH-Bugzilla: 2128225 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Acked-by: Jon Maloy +RH-Commit: [2/2] 28a0086cb0e8be2535deafdd9115cadd7ff033f3 + +Currently, when running 'qemu-system-s390x -M s390-ccw-virtio,help' +the s390x-specific properties are not listed anymore. This happens +because since commit d8fb7d0969 ("vl: switch -M parsing to keyval") +the properties have to be defined at the class level and not at the +instance level anymore. Fix it on s390x now, too, by moving the +registration of the properties to the class level" + +Fixes: d8fb7d0969 ("vl: switch -M parsing to keyval") +Signed-off-by: Pierre Morel +Message-Id: <20221103170150.20789-2-pmorel@linux.ibm.com> +[thuth: Add patch description] +Signed-off-by: Thomas Huth +(cherry picked from commit 1fd396e32288bbf536483c74b68cb3ee86005a9f) + +Conflicts: + hw/s390x/s390-virtio-ccw.c + (dropped the "zpcii-disable" property code - it's not used in downstream) +Signed-off-by: Thomas Huth +--- + hw/s390x/s390-virtio-ccw.c | 117 +++++++++++++++++++++---------------- + 1 file changed, 67 insertions(+), 50 deletions(-) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index a9617ab79f..4a7cd21cac 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -42,6 +42,7 @@ + #include "sysemu/sysemu.h" + #include "hw/s390x/pv.h" + #include "migration/blocker.h" ++#include "qapi/visitor.h" + + static Error *pv_mig_blocker; + +@@ -588,38 +589,6 @@ static ram_addr_t s390_fixup_ram_size(ram_addr_t sz) + return newsz; + } + +-static void ccw_machine_class_init(ObjectClass *oc, void *data) +-{ +- MachineClass *mc = MACHINE_CLASS(oc); +- NMIClass *nc = NMI_CLASS(oc); +- HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); +- S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); +- +- s390mc->ri_allowed = true; +- s390mc->cpu_model_allowed = true; +- s390mc->css_migration_enabled = true; +- s390mc->hpage_1m_allowed = true; +- mc->init = ccw_init; +- mc->reset = s390_machine_reset; +- mc->block_default_type = IF_VIRTIO; +- mc->no_cdrom = 1; +- mc->no_floppy = 1; +- mc->no_parallel = 1; +- mc->no_sdcard = 1; +- mc->max_cpus = S390_MAX_CPUS; +- mc->has_hotpluggable_cpus = true; +- assert(!mc->get_hotplug_handler); +- mc->get_hotplug_handler = s390_get_hotplug_handler; +- mc->cpu_index_to_instance_props = s390_cpu_index_to_props; +- mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids; +- /* it is overridden with 'host' cpu *in kvm_arch_init* */ +- mc->default_cpu_type = S390_CPU_TYPE_NAME("qemu"); +- hc->plug = s390_machine_device_plug; +- hc->unplug_request = s390_machine_device_unplug_request; +- nc->nmi_monitor_handler = s390_nmi; +- mc->default_ram_id = "s390.ram"; +-} +- + static inline bool machine_get_aes_key_wrap(Object *obj, Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); +@@ -694,19 +663,29 @@ bool hpage_1m_allowed(void) + return get_machine_class()->hpage_1m_allowed; + } + +-static char *machine_get_loadparm(Object *obj, Error **errp) ++static void machine_get_loadparm(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ char *str = g_strndup((char *) ms->loadparm, sizeof(ms->loadparm)); + +- /* make a NUL-terminated string */ +- return g_strndup((char *) ms->loadparm, sizeof(ms->loadparm)); ++ visit_type_str(v, name, &str, errp); ++ g_free(str); + } + +-static void machine_set_loadparm(Object *obj, const char *val, Error **errp) ++static void machine_set_loadparm(Object *obj, Visitor *v, ++ const char *name, void *opaque, ++ Error **errp) + { + S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ char *val; + int i; + ++ if (!visit_type_str(v, name, &val, errp)) { ++ return; ++ } ++ + for (i = 0; i < sizeof(ms->loadparm) && val[i]; i++) { + uint8_t c = qemu_toupper(val[i]); /* mimic HMC */ + +@@ -724,29 +703,67 @@ static void machine_set_loadparm(Object *obj, const char *val, Error **errp) + ms->loadparm[i] = ' '; /* pad right with spaces */ + } + } +-static inline void s390_machine_initfn(Object *obj) ++ ++static void ccw_machine_class_init(ObjectClass *oc, void *data) + { +- object_property_add_bool(obj, "aes-key-wrap", +- machine_get_aes_key_wrap, +- machine_set_aes_key_wrap); +- object_property_set_description(obj, "aes-key-wrap", ++ MachineClass *mc = MACHINE_CLASS(oc); ++ NMIClass *nc = NMI_CLASS(oc); ++ HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(oc); ++ S390CcwMachineClass *s390mc = S390_CCW_MACHINE_CLASS(mc); ++ ++ s390mc->ri_allowed = true; ++ s390mc->cpu_model_allowed = true; ++ s390mc->css_migration_enabled = true; ++ s390mc->hpage_1m_allowed = true; ++ mc->init = ccw_init; ++ mc->reset = s390_machine_reset; ++ mc->block_default_type = IF_VIRTIO; ++ mc->no_cdrom = 1; ++ mc->no_floppy = 1; ++ mc->no_parallel = 1; ++ mc->no_sdcard = 1; ++ mc->max_cpus = S390_MAX_CPUS; ++ mc->has_hotpluggable_cpus = true; ++ assert(!mc->get_hotplug_handler); ++ mc->get_hotplug_handler = s390_get_hotplug_handler; ++ mc->cpu_index_to_instance_props = s390_cpu_index_to_props; ++ mc->possible_cpu_arch_ids = s390_possible_cpu_arch_ids; ++ /* it is overridden with 'host' cpu *in kvm_arch_init* */ ++ mc->default_cpu_type = S390_CPU_TYPE_NAME("qemu"); ++ hc->plug = s390_machine_device_plug; ++ hc->unplug_request = s390_machine_device_unplug_request; ++ nc->nmi_monitor_handler = s390_nmi; ++ mc->default_ram_id = "s390.ram"; ++ ++ object_class_property_add_bool(oc, "aes-key-wrap", ++ machine_get_aes_key_wrap, ++ machine_set_aes_key_wrap); ++ object_class_property_set_description(oc, "aes-key-wrap", + "enable/disable AES key wrapping using the CPACF wrapping key"); +- object_property_set_bool(obj, "aes-key-wrap", true, NULL); + +- object_property_add_bool(obj, "dea-key-wrap", +- machine_get_dea_key_wrap, +- machine_set_dea_key_wrap); +- object_property_set_description(obj, "dea-key-wrap", ++ object_class_property_add_bool(oc, "dea-key-wrap", ++ machine_get_dea_key_wrap, ++ machine_set_dea_key_wrap); ++ object_class_property_set_description(oc, "dea-key-wrap", + "enable/disable DEA key wrapping using the CPACF wrapping key"); +- object_property_set_bool(obj, "dea-key-wrap", true, NULL); +- object_property_add_str(obj, "loadparm", +- machine_get_loadparm, machine_set_loadparm); +- object_property_set_description(obj, "loadparm", ++ ++ object_class_property_add(oc, "loadparm", "loadparm", ++ machine_get_loadparm, machine_set_loadparm, ++ NULL, NULL); ++ object_class_property_set_description(oc, "loadparm", + "Up to 8 chars in set of [A-Za-z0-9. ] (lower case chars converted" + " to upper case) to pass to machine loader, boot manager," + " and guest kernel"); + } + ++static inline void s390_machine_initfn(Object *obj) ++{ ++ S390CcwMachineState *ms = S390_CCW_MACHINE(obj); ++ ++ ms->aes_key_wrap = true; ++ ms->dea_key_wrap = true; ++} ++ + static const TypeInfo ccw_machine_info = { + .name = TYPE_S390_CCW_MACHINE, + .parent = TYPE_MACHINE, +-- +2.37.3 + diff --git a/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch b/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch new file mode 100644 index 0000000..d62a45a --- /dev/null +++ b/kvm-s390x-ipl-support-extended-kernel-command-line-size.patch @@ -0,0 +1,97 @@ +From ddfee9d393af322938e4df466cd01b8f9570a1c9 Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 5 Apr 2022 10:20:59 +0200 +Subject: [PATCH 1/6] s390x/ipl: support extended kernel command line size + +RH-Author: Thomas Huth +RH-MergeRequest: 144: s390x/ipl: support extended kernel command line size +RH-Commit: [1/1] be227e50af5dbe7802605f873db29ac5358aa196 +RH-Bugzilla: 2043830 +RH-Acked-by: Miroslav Rezanina +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand + +Bugzilla: http://bugzilla.redhat.com/2043830 + +commit b2173046a64beed76715f310f98538f159276af1 +Author: Marc Hartmayer +Date: Mon Nov 22 12:29:09 2021 +0100 + + s390x/ipl: support extended kernel command line size + + In the past s390 used a fixed command line length of 896 bytes. This has changed + with the Linux commit 5ecb2da660ab ("s390: support command lines longer than 896 + bytes"). There is now a parm area indicating the maximum command line size. This + parm area has always been initialized to zero, so with older kernels this field + would read zero and we must then assume that only 896 bytes are available. + + Signed-off-by: Marc Hartmayer + Reviewed-by: David Hildenbrand + Reviewed-by: Christian Borntraeger + Acked-by: Viktor Mihajlovski + Message-Id: <20211122112909.18138-1-mhartmay@linux.ibm.com> + [thuth: Cosmetic fixes, and use PRIu64 instead of %lu] + Signed-off-by: Thomas Huth + +Signed-off-by: Thomas Huth +--- + hw/s390x/ipl.c | 27 +++++++++++++++++++++++---- + 1 file changed, 23 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c +index 7ddca0127f..eb7fc4c4ae 100644 +--- a/hw/s390x/ipl.c ++++ b/hw/s390x/ipl.c +@@ -37,8 +37,9 @@ + + #define KERN_IMAGE_START 0x010000UL + #define LINUX_MAGIC_ADDR 0x010008UL ++#define KERN_PARM_AREA_SIZE_ADDR 0x010430UL + #define KERN_PARM_AREA 0x010480UL +-#define KERN_PARM_AREA_SIZE 0x000380UL ++#define LEGACY_KERN_PARM_AREA_SIZE 0x000380UL + #define INITRD_START 0x800000UL + #define INITRD_PARM_START 0x010408UL + #define PARMFILE_START 0x001000UL +@@ -110,6 +111,21 @@ static uint64_t bios_translate_addr(void *opaque, uint64_t srcaddr) + return srcaddr + dstaddr; + } + ++static uint64_t get_max_kernel_cmdline_size(void) ++{ ++ uint64_t *size_ptr = rom_ptr(KERN_PARM_AREA_SIZE_ADDR, sizeof(*size_ptr)); ++ ++ if (size_ptr) { ++ uint64_t size; ++ ++ size = be64_to_cpu(*size_ptr); ++ if (size) { ++ return size; ++ } ++ } ++ return LEGACY_KERN_PARM_AREA_SIZE; ++} ++ + static void s390_ipl_realize(DeviceState *dev, Error **errp) + { + MachineState *ms = MACHINE(qdev_get_machine()); +@@ -197,10 +213,13 @@ static void s390_ipl_realize(DeviceState *dev, Error **errp) + ipl->start_addr = KERN_IMAGE_START; + /* Overwrite parameters in the kernel image, which are "rom" */ + if (parm_area) { +- if (cmdline_size > KERN_PARM_AREA_SIZE) { ++ uint64_t max_cmdline_size = get_max_kernel_cmdline_size(); ++ ++ if (cmdline_size > max_cmdline_size) { + error_setg(errp, +- "kernel command line exceeds maximum size: %zu > %lu", +- cmdline_size, KERN_PARM_AREA_SIZE); ++ "kernel command line exceeds maximum size:" ++ " %zu > %" PRIu64, ++ cmdline_size, max_cmdline_size); + return; + } + +-- +2.27.0 + diff --git a/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch b/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch new file mode 100644 index 0000000..88716f5 --- /dev/null +++ b/kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch @@ -0,0 +1,178 @@ +From 8020177f1c40da2a9ca09fa20dc90eda65739671 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:31 -0400 +Subject: [PATCH 06/42] s390x/pci: add routine to get host function handle from + CLP info +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [6/41] 8ab652cf4095e61f5f55726d41111de227d452e7 + +In order to interface with the underlying host zPCI device, we need +to know its function handle. Add a routine to grab this from the +vfio CLP capabilities chain. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-3-mjrosato@linux.ibm.com> +[thuth: Replace free(info) with g_free(info)] +Signed-off-by: Thomas Huth +(cherry picked from commit 21fa15298d88db2050a713cdf79c10cb0e09146f) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 83 ++++++++++++++++++++++++++------ + include/hw/s390x/s390-pci-vfio.h | 5 ++ + 2 files changed, 72 insertions(+), 16 deletions(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 6f80a47e29..08bcc55e85 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -124,6 +124,27 @@ static void s390_pci_read_base(S390PCIBusDevice *pbdev, + pbdev->zpci_fn.pft = 0; + } + ++static bool get_host_fh(S390PCIBusDevice *pbdev, struct vfio_device_info *info, ++ uint32_t *fh) ++{ ++ struct vfio_info_cap_header *hdr; ++ struct vfio_device_info_cap_zpci_base *cap; ++ VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ ++ hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_BASE); ++ ++ /* Can only get the host fh with version 2 or greater */ ++ if (hdr == NULL || hdr->version < 2) { ++ trace_s390_pci_clp_cap(vpci->vbasedev.name, ++ VFIO_DEVICE_INFO_CAP_ZPCI_BASE); ++ return false; ++ } ++ cap = (void *) hdr; ++ ++ *fh = cap->fh; ++ return true; ++} ++ + static void s390_pci_read_group(S390PCIBusDevice *pbdev, + struct vfio_device_info *info) + { +@@ -217,25 +238,13 @@ static void s390_pci_read_pfip(S390PCIBusDevice *pbdev, + memcpy(pbdev->zpci_fn.pfip, cap->pfip, CLP_PFIP_NR_SEGMENTS); + } + +-/* +- * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for +- * capabilities that contain information about CLP features provided by the +- * underlying host. +- * On entry, defaults have already been placed into the guest CLP response +- * buffers. On exit, defaults will have been overwritten for any CLP features +- * found in the capability chain; defaults will remain for any CLP features not +- * found in the chain. +- */ +-void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) ++static struct vfio_device_info *get_device_info(S390PCIBusDevice *pbdev, ++ uint32_t argsz) + { +- g_autofree struct vfio_device_info *info = NULL; ++ struct vfio_device_info *info = g_malloc0(argsz); + VFIOPCIDevice *vfio_pci; +- uint32_t argsz; + int fd; + +- argsz = sizeof(*info); +- info = g_malloc0(argsz); +- + vfio_pci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); + fd = vfio_pci->vbasedev.fd; + +@@ -250,7 +259,8 @@ retry: + + if (ioctl(fd, VFIO_DEVICE_GET_INFO, info)) { + trace_s390_pci_clp_dev_info(vfio_pci->vbasedev.name); +- return; ++ g_free(info); ++ return NULL; + } + + if (info->argsz > argsz) { +@@ -259,6 +269,47 @@ retry: + goto retry; + } + ++ return info; ++} ++ ++/* ++ * Get the host function handle from the vfio CLP capabilities chain. Returns ++ * true if a fh value was placed into the provided buffer. Returns false ++ * if a fh could not be obtained (ioctl failed or capabilitiy version does ++ * not include the fh) ++ */ ++bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) ++{ ++ g_autofree struct vfio_device_info *info = NULL; ++ ++ assert(fh); ++ ++ info = get_device_info(pbdev, sizeof(*info)); ++ if (!info) { ++ return false; ++ } ++ ++ return get_host_fh(pbdev, info, fh); ++} ++ ++/* ++ * This function will issue the VFIO_DEVICE_GET_INFO ioctl and look for ++ * capabilities that contain information about CLP features provided by the ++ * underlying host. ++ * On entry, defaults have already been placed into the guest CLP response ++ * buffers. On exit, defaults will have been overwritten for any CLP features ++ * found in the capability chain; defaults will remain for any CLP features not ++ * found in the chain. ++ */ ++void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) ++{ ++ g_autofree struct vfio_device_info *info = NULL; ++ ++ info = get_device_info(pbdev, sizeof(*info)); ++ if (!info) { ++ return; ++ } ++ + /* + * Find the CLP features provided and fill in the guest CLP responses. + * Always call s390_pci_read_base first as information from this could +diff --git a/include/hw/s390x/s390-pci-vfio.h b/include/hw/s390x/s390-pci-vfio.h +index ff708aef50..ae1b126ff7 100644 +--- a/include/hw/s390x/s390-pci-vfio.h ++++ b/include/hw/s390x/s390-pci-vfio.h +@@ -20,6 +20,7 @@ bool s390_pci_update_dma_avail(int fd, unsigned int *avail); + S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + S390PCIBusDevice *pbdev); + void s390_pci_end_dma_count(S390pciState *s, S390PCIDMACount *cnt); ++bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh); + void s390_pci_get_clp_info(S390PCIBusDevice *pbdev); + #else + static inline bool s390_pci_update_dma_avail(int fd, unsigned int *avail) +@@ -33,6 +34,10 @@ static inline S390PCIDMACount *s390_pci_start_dma_count(S390pciState *s, + } + static inline void s390_pci_end_dma_count(S390pciState *s, + S390PCIDMACount *cnt) { } ++static inline bool s390_pci_get_host_fh(S390PCIBusDevice *pbdev, uint32_t *fh) ++{ ++ return false; ++} + static inline void s390_pci_get_clp_info(S390PCIBusDevice *pbdev) { } + #endif + +-- +2.37.3 + diff --git a/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch b/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch new file mode 100644 index 0000000..563f782 --- /dev/null +++ b/kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch @@ -0,0 +1,99 @@ +From de6319fe0ce09297beae5ff4636c03217abe6f26 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:06 -0500 +Subject: [PATCH 04/42] s390x/pci: add supported DT information to clp response +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [4/41] 275668f6d38fbc1dfa2f1aa8f58b2c319de2657d + +The DTSM is a mask that specifies which I/O Address Translation designation +types are supported. Today QEMU only supports DT=1. + +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-5-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit ac6aa30ac47b2abaf142f76de46374da2a98f6e7) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 1 + + hw/s390x/s390-pci-inst.c | 1 + + hw/s390x/s390-pci-vfio.c | 1 + + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-clp.h | 3 ++- + 5 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 1b51a72838..01b58ebc70 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -782,6 +782,7 @@ static void s390_pci_init_default_group(void) + resgrp->i = 128; + resgrp->maxstbl = 128; + resgrp->version = 0; ++ resgrp->dtsm = ZPCI_DTSM; + } + + static void set_pbdev_info(S390PCIBusDevice *pbdev) +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 07bab85ce5..6d400d4147 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -329,6 +329,7 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra) + stw_p(&resgrp->i, group->zpci_group.i); + stw_p(&resgrp->maxstbl, group->zpci_group.maxstbl); + resgrp->version = group->zpci_group.version; ++ resgrp->dtsm = group->zpci_group.dtsm; + stw_p(&resgrp->hdr.rsp, CLP_RC_OK); + break; + } +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 2a153fa8c9..6f80a47e29 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -160,6 +160,7 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + resgrp->i = cap->noi; + resgrp->maxstbl = cap->maxstbl; + resgrp->version = cap->version; ++ resgrp->dtsm = ZPCI_DTSM; + } + } + +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 2727e7bdef..da3cde2bb4 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -37,6 +37,7 @@ + #define ZPCI_MAX_UID 0xffff + #define UID_UNDEFINED 0 + #define UID_CHECKING_ENABLED 0x01 ++#define ZPCI_DTSM 0x40 + + OBJECT_DECLARE_SIMPLE_TYPE(S390pciState, S390_PCI_HOST_BRIDGE) + OBJECT_DECLARE_SIMPLE_TYPE(S390PCIBus, S390_PCI_BUS) +diff --git a/include/hw/s390x/s390-pci-clp.h b/include/hw/s390x/s390-pci-clp.h +index 96b8e3f133..cc8c8662b8 100644 +--- a/include/hw/s390x/s390-pci-clp.h ++++ b/include/hw/s390x/s390-pci-clp.h +@@ -163,7 +163,8 @@ typedef struct ClpRspQueryPciGrp { + uint8_t fr; + uint16_t maxstbl; + uint16_t mui; +- uint64_t reserved3; ++ uint8_t dtsm; ++ uint8_t reserved3[7]; + uint64_t dasm; /* dma address space mask */ + uint64_t msia; /* MSI address */ + uint64_t reserved4; +-- +2.37.3 + diff --git a/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch b/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch new file mode 100644 index 0000000..4403658 --- /dev/null +++ b/kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch @@ -0,0 +1,60 @@ +From 5bd57d8ac3a4e75337eae81a3623b4dc2b417e2f Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:33 -0400 +Subject: [PATCH 08/42] s390x/pci: don't fence interpreted devices without + MSI-X +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [8/41] 52bad4368e9494c43133338b386dc0cc159aeedc + +Lack of MSI-X support is not an issue for interpreted passthrough +devices, so let's let these in. This will allow, for example, ISM +devices to be passed through -- but only when interpretation is +available and being used. + +Signed-off-by: Matthew Rosato +Reviewed-by: Thomas Huth +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-5-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 15d0e7942d3b31ff71d8e0e8cec3a8203214f19b) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 18bfae0465..07c7c155e3 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -881,6 +881,10 @@ static int s390_pci_msix_init(S390PCIBusDevice *pbdev) + + static void s390_pci_msix_free(S390PCIBusDevice *pbdev) + { ++ if (pbdev->msix.entries == 0) { ++ return; ++ } ++ + memory_region_del_subregion(&pbdev->iommu->mr, &pbdev->msix_notify_mr); + object_unparent(OBJECT(&pbdev->msix_notify_mr)); + } +@@ -1093,7 +1097,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + pbdev->interp = false; + } + +- if (s390_pci_msix_init(pbdev)) { ++ if (s390_pci_msix_init(pbdev) && !pbdev->interp) { + error_setg(errp, "MSI-X support is mandatory " + "in the S390 architecture"); + return; +-- +2.37.3 + diff --git a/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch b/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch new file mode 100644 index 0000000..c97b587 --- /dev/null +++ b/kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch @@ -0,0 +1,77 @@ +From 67ebb71d56e95adf185ab4971939e31c4c899863 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:04 -0500 +Subject: [PATCH 02/42] s390x/pci: don't use hard-coded dma range in reg_ioat +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [2/41] c7897321f9848ef8f115130832774bbcd6724f03 + +Instead use the values from clp info, they will either be the hard-coded +values or what came from the host driver via vfio. + +Fixes: 9670ee752727 ("s390x/pci: use a PCI Function structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-3-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit df7ce0a94d9283f0656b4bc0f21566973ff649a3) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 1c8ad91175..11b7f6bfa1 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -916,9 +916,10 @@ int pci_dereg_irqs(S390PCIBusDevice *pbdev) + return 0; + } + +-static int reg_ioat(CPUS390XState *env, S390PCIIOMMU *iommu, ZpciFib fib, ++static int reg_ioat(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib, + uintptr_t ra) + { ++ S390PCIIOMMU *iommu = pbdev->iommu; + uint64_t pba = ldq_p(&fib.pba); + uint64_t pal = ldq_p(&fib.pal); + uint64_t g_iota = ldq_p(&fib.iota); +@@ -927,7 +928,7 @@ static int reg_ioat(CPUS390XState *env, S390PCIIOMMU *iommu, ZpciFib fib, + + pba &= ~0xfff; + pal |= 0xfff; +- if (pba > pal || pba < ZPCI_SDMA_ADDR || pal > ZPCI_EDMA_ADDR) { ++ if (pba > pal || pba < pbdev->zpci_fn.sdma || pal > pbdev->zpci_fn.edma) { + s390_program_interrupt(env, PGM_OPERAND, ra); + return -EINVAL; + } +@@ -1125,7 +1126,7 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } else if (pbdev->iommu->enabled) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); +- } else if (reg_ioat(env, pbdev->iommu, fib, ra)) { ++ } else if (reg_ioat(env, pbdev, fib, ra)) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_INSUF_RES); + } +@@ -1150,7 +1151,7 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else { + pci_dereg_ioat(pbdev->iommu); +- if (reg_ioat(env, pbdev->iommu, fib, ra)) { ++ if (reg_ioat(env, pbdev, fib, ra)) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_INSUF_RES); + } +-- +2.37.3 + diff --git a/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch b/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch new file mode 100644 index 0000000..c36c575 --- /dev/null +++ b/kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch @@ -0,0 +1,265 @@ +From 362fae654bbae03741003e565fb95d73d8c0025f Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:34 -0400 +Subject: [PATCH 09/42] s390x/pci: enable adapter event notification for + interpreted devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [9/41] 771975c436c7cb608e0e9e40edd732ac310beb69 + +Use the associated kvm ioctl operation to enable adapter event notification +and forwarding for devices when requested. This feature will be set up +with or without firmware assist based upon the 'forwarding_assist' setting. + +Signed-off-by: Matthew Rosato +Message-Id: <20220902172737.170349-6-mjrosato@linux.ibm.com> +[thuth: Rename "forwarding_assist" property to "forwarding-assist"] +Signed-off-by: Thomas Huth +(cherry picked from commit d0bc7091c2013ad2fa164100cf7b17962370e8ab) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 20 ++++++++++++++--- + hw/s390x/s390-pci-inst.c | 40 +++++++++++++++++++++++++++++++-- + hw/s390x/s390-pci-kvm.c | 30 +++++++++++++++++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-kvm.h | 14 ++++++++++++ + 5 files changed, 100 insertions(+), 5 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 07c7c155e3..cd152ce711 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -190,7 +190,10 @@ void s390_pci_sclp_deconfigure(SCCB *sccb) + rc = SCLP_RC_NO_ACTION_REQUIRED; + break; + default: +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1082,6 +1085,7 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + } else { + DPRINTF("zPCI interpretation facilities missing.\n"); + pbdev->interp = false; ++ pbdev->forwarding_assist = false; + } + } + pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); +@@ -1090,11 +1094,13 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + if (!pbdev->interp) { + /* Do vfio passthrough but intercept for I/O */ + pbdev->fh |= FH_SHM_VFIO; ++ pbdev->forwarding_assist = false; + } + } else { + pbdev->fh |= FH_SHM_EMUL; + /* Always intercept emulated devices */ + pbdev->interp = false; ++ pbdev->forwarding_assist = false; + } + + if (s390_pci_msix_init(pbdev) && !pbdev->interp) { +@@ -1244,7 +1250,10 @@ static void s390_pcihost_reset(DeviceState *dev) + /* Process all pending unplug requests */ + QTAILQ_FOREACH_SAFE(pbdev, &s->zpci_devs, link, next) { + if (pbdev->unplug_requested) { +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1382,7 +1391,10 @@ static void s390_pci_device_reset(DeviceState *dev) + break; + } + +- if (pbdev->summary_ind) { ++ if (pbdev->interp && (pbdev->fh & FH_MASK_ENABLE)) { ++ /* Interpreted devices were using interrupt forwarding */ ++ s390_pci_kvm_aif_disable(pbdev); ++ } else if (pbdev->summary_ind) { + pci_dereg_irqs(pbdev); + } + if (pbdev->iommu->enabled) { +@@ -1428,6 +1440,8 @@ static Property s390_pci_device_properties[] = { + DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid), + DEFINE_PROP_STRING("target", S390PCIBusDevice, target), + DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true), ++ DEFINE_PROP_BOOL("forwarding-assist", S390PCIBusDevice, forwarding_assist, ++ true), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 651ec38635..20a9bcc7af 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -1066,6 +1066,32 @@ static void fmb_update(void *opaque) + timer_mod(pbdev->fmb_timer, t + pbdev->pci_group->zpci_group.mui); + } + ++static int mpcifc_reg_int_interp(S390PCIBusDevice *pbdev, ZpciFib *fib) ++{ ++ int rc; ++ ++ rc = s390_pci_kvm_aif_enable(pbdev, fib, pbdev->forwarding_assist); ++ if (rc) { ++ DPRINTF("Failed to enable interrupt forwarding\n"); ++ return rc; ++ } ++ ++ return 0; ++} ++ ++static int mpcifc_dereg_int_interp(S390PCIBusDevice *pbdev, ZpciFib *fib) ++{ ++ int rc; ++ ++ rc = s390_pci_kvm_aif_disable(pbdev); ++ if (rc) { ++ DPRINTF("Failed to disable interrupt forwarding\n"); ++ return rc; ++ } ++ ++ return 0; ++} ++ + int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + uintptr_t ra) + { +@@ -1120,7 +1146,12 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + + switch (oc) { + case ZPCI_MOD_FC_REG_INT: +- if (pbdev->summary_ind) { ++ if (pbdev->interp) { ++ if (mpcifc_reg_int_interp(pbdev, &fib)) { ++ cc = ZPCI_PCI_LS_ERR; ++ s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); ++ } ++ } else if (pbdev->summary_ind) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else if (reg_irqs(env, pbdev, fib)) { +@@ -1129,7 +1160,12 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } + break; + case ZPCI_MOD_FC_DEREG_INT: +- if (!pbdev->summary_ind) { ++ if (pbdev->interp) { ++ if (mpcifc_dereg_int_interp(pbdev, &fib)) { ++ cc = ZPCI_PCI_LS_ERR; ++ s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); ++ } ++ } else if (!pbdev->summary_ind) { + cc = ZPCI_PCI_LS_ERR; + s390_set_status_code(env, r1, ZPCI_MOD_ST_SEQUENCE); + } else { +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +index 0f16104a74..9134fe185f 100644 +--- a/hw/s390x/s390-pci-kvm.c ++++ b/hw/s390x/s390-pci-kvm.c +@@ -11,12 +11,42 @@ + + #include "qemu/osdep.h" + ++#include ++ + #include "kvm/kvm_s390x.h" + #include "hw/s390x/pv.h" ++#include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-kvm.h" ++#include "hw/s390x/s390-pci-inst.h" + #include "cpu_models.h" + + bool s390_pci_kvm_interp_allowed(void) + { + return kvm_s390_get_zpci_op() && !s390_is_pv(); + } ++ ++int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist) ++{ ++ struct kvm_s390_zpci_op args = { ++ .fh = pbdev->fh, ++ .op = KVM_S390_ZPCIOP_REG_AEN, ++ .u.reg_aen.ibv = fib->aibv, ++ .u.reg_aen.sb = fib->aisb, ++ .u.reg_aen.noi = FIB_DATA_NOI(fib->data), ++ .u.reg_aen.isc = FIB_DATA_ISC(fib->data), ++ .u.reg_aen.sbo = FIB_DATA_AISBO(fib->data), ++ .u.reg_aen.flags = (assist) ? 0 : KVM_S390_ZPCIOP_REGAEN_HOST ++ }; ++ ++ return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++} ++ ++int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) ++{ ++ struct kvm_s390_zpci_op args = { ++ .fh = pbdev->fh, ++ .op = KVM_S390_ZPCIOP_DEREG_AEN ++ }; ++ ++ return kvm_vm_ioctl(kvm_state, KVM_S390_ZPCI_OP, &args); ++} +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index a9843dfe97..5b09f0cf2f 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -351,6 +351,7 @@ struct S390PCIBusDevice { + bool pci_unplug_request_processed; + bool unplug_requested; + bool interp; ++ bool forwarding_assist; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h +index 80a2e7d0ca..933814a402 100644 +--- a/include/hw/s390x/s390-pci-kvm.h ++++ b/include/hw/s390x/s390-pci-kvm.h +@@ -12,13 +12,27 @@ + #ifndef HW_S390_PCI_KVM_H + #define HW_S390_PCI_KVM_H + ++#include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/s390-pci-inst.h" ++ + #ifdef CONFIG_KVM + bool s390_pci_kvm_interp_allowed(void); ++int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, bool assist); ++int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev); + #else + static inline bool s390_pci_kvm_interp_allowed(void) + { + return false; + } ++static inline int s390_pci_kvm_aif_enable(S390PCIBusDevice *pbdev, ZpciFib *fib, ++ bool assist) ++{ ++ return -EINVAL; ++} ++static inline int s390_pci_kvm_aif_disable(S390PCIBusDevice *pbdev) ++{ ++ return -EINVAL; ++} + #endif + + #endif +-- +2.37.3 + diff --git a/kvm-s390x-pci-enable-for-load-store-interpretation.patch b/kvm-s390x-pci-enable-for-load-store-interpretation.patch new file mode 100644 index 0000000..56f228b --- /dev/null +++ b/kvm-s390x-pci-enable-for-load-store-interpretation.patch @@ -0,0 +1,319 @@ +From 62fbb66d18f598d0896164383aab465e093fb0c1 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:32 -0400 +Subject: [PATCH 07/42] s390x/pci: enable for load/store interpretation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [7/41] 3a96e901e295bb9e0c530638c45b5da5d60c00bd + +If the ZPCI_OP ioctl reports that is is available and usable, then the +underlying KVM host will enable load/store intepretation for any guest +device without a SHM bit in the guest function handle. For a device that +will be using interpretation support, ensure the guest function handle +matches the host function handle; this value is re-checked every time the +guest issues a SET PCI FN to enable the guest device as it is the only +opportunity to reflect function handle changes. + +By default, unless interpret=off is specified, interpretation support will +always be assumed and exploited if the necessary ioctl and features are +available on the host kernel. When these are unavailable, we will silently +revert to the interception model; this allows existing guest configurations +to work unmodified on hosts with and without zPCI interpretation support, +allowing QEMU to choose the best support model available. + +Signed-off-by: Matthew Rosato +Acked-by: Thomas Huth +Message-Id: <20220902172737.170349-4-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit dd1d5fd9684beeb0c14c39f497ef2aa9ac683aa7) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/meson.build | 1 + + hw/s390x/s390-pci-bus.c | 66 ++++++++++++++++++++++++++++++++- + hw/s390x/s390-pci-inst.c | 16 ++++++++ + hw/s390x/s390-pci-kvm.c | 22 +++++++++++ + include/hw/s390x/s390-pci-bus.h | 1 + + include/hw/s390x/s390-pci-kvm.h | 24 ++++++++++++ + target/s390x/kvm/kvm.c | 7 ++++ + target/s390x/kvm/kvm_s390x.h | 1 + + 8 files changed, 137 insertions(+), 1 deletion(-) + create mode 100644 hw/s390x/s390-pci-kvm.c + create mode 100644 include/hw/s390x/s390-pci-kvm.h + +diff --git a/hw/s390x/meson.build b/hw/s390x/meson.build +index 28484256ec..6e6e47fcda 100644 +--- a/hw/s390x/meson.build ++++ b/hw/s390x/meson.build +@@ -23,6 +23,7 @@ s390x_ss.add(when: 'CONFIG_KVM', if_true: files( + 's390-skeys-kvm.c', + 's390-stattrib-kvm.c', + 'pv.c', ++ 's390-pci-kvm.c', + )) + s390x_ss.add(when: 'CONFIG_TCG', if_true: files( + 'tod-tcg.c', +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index 01b58ebc70..18bfae0465 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -16,6 +16,7 @@ + #include "qapi/visitor.h" + #include "hw/s390x/s390-pci-bus.h" + #include "hw/s390x/s390-pci-inst.h" ++#include "hw/s390x/s390-pci-kvm.h" + #include "hw/s390x/s390-pci-vfio.h" + #include "hw/pci/pci_bus.h" + #include "hw/qdev-properties.h" +@@ -971,12 +972,51 @@ static void s390_pci_update_subordinate(PCIDevice *dev, uint32_t nr) + } + } + ++static int s390_pci_interp_plug(S390pciState *s, S390PCIBusDevice *pbdev) ++{ ++ uint32_t idx, fh; ++ ++ if (!s390_pci_get_host_fh(pbdev, &fh)) { ++ return -EPERM; ++ } ++ ++ /* ++ * The host device is already in an enabled state, but we always present ++ * the initial device state to the guest as disabled (ZPCI_FS_DISABLED). ++ * Therefore, mask off the enable bit from the passthrough handle until ++ * the guest issues a CLP SET PCI FN later to enable the device. ++ */ ++ pbdev->fh = fh & ~FH_MASK_ENABLE; ++ ++ /* Next, see if the idx is already in-use */ ++ idx = pbdev->fh & FH_MASK_INDEX; ++ if (pbdev->idx != idx) { ++ if (s390_pci_find_dev_by_idx(s, idx)) { ++ return -EINVAL; ++ } ++ /* ++ * Update the idx entry with the passed through idx ++ * If the relinquished idx is lower than next_idx, use it ++ * to replace next_idx ++ */ ++ g_hash_table_remove(s->zpci_table, &pbdev->idx); ++ if (idx < s->next_idx) { ++ s->next_idx = idx; ++ } ++ pbdev->idx = idx; ++ g_hash_table_insert(s->zpci_table, &pbdev->idx, pbdev); ++ } ++ ++ return 0; ++} ++ + static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + Error **errp) + { + S390pciState *s = S390_PCI_HOST_BRIDGE(hotplug_dev); + PCIDevice *pdev = NULL; + S390PCIBusDevice *pbdev = NULL; ++ int rc; + + if (object_dynamic_cast(OBJECT(dev), TYPE_PCI_BRIDGE)) { + PCIBridge *pb = PCI_BRIDGE(dev); +@@ -1022,12 +1062,35 @@ static void s390_pcihost_plug(HotplugHandler *hotplug_dev, DeviceState *dev, + set_pbdev_info(pbdev); + + if (object_dynamic_cast(OBJECT(dev), "vfio-pci")) { +- pbdev->fh |= FH_SHM_VFIO; ++ /* ++ * By default, interpretation is always requested; if the available ++ * facilities indicate it is not available, fallback to the ++ * interception model. ++ */ ++ if (pbdev->interp) { ++ if (s390_pci_kvm_interp_allowed()) { ++ rc = s390_pci_interp_plug(s, pbdev); ++ if (rc) { ++ error_setg(errp, "Plug failed for zPCI device in " ++ "interpretation mode: %d", rc); ++ return; ++ } ++ } else { ++ DPRINTF("zPCI interpretation facilities missing.\n"); ++ pbdev->interp = false; ++ } ++ } + pbdev->iommu->dma_limit = s390_pci_start_dma_count(s, pbdev); + /* Fill in CLP information passed via the vfio region */ + s390_pci_get_clp_info(pbdev); ++ if (!pbdev->interp) { ++ /* Do vfio passthrough but intercept for I/O */ ++ pbdev->fh |= FH_SHM_VFIO; ++ } + } else { + pbdev->fh |= FH_SHM_EMUL; ++ /* Always intercept emulated devices */ ++ pbdev->interp = false; + } + + if (s390_pci_msix_init(pbdev)) { +@@ -1360,6 +1423,7 @@ static Property s390_pci_device_properties[] = { + DEFINE_PROP_UINT16("uid", S390PCIBusDevice, uid, UID_UNDEFINED), + DEFINE_PROP_S390_PCI_FID("fid", S390PCIBusDevice, fid), + DEFINE_PROP_STRING("target", S390PCIBusDevice, target), ++ DEFINE_PROP_BOOL("interpret", S390PCIBusDevice, interp, true), + DEFINE_PROP_END_OF_LIST(), + }; + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 6d400d4147..651ec38635 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -18,6 +18,8 @@ + #include "sysemu/hw_accel.h" + #include "hw/s390x/s390-pci-inst.h" + #include "hw/s390x/s390-pci-bus.h" ++#include "hw/s390x/s390-pci-kvm.h" ++#include "hw/s390x/s390-pci-vfio.h" + #include "hw/s390x/tod.h" + + #ifndef DEBUG_S390PCI_INST +@@ -246,6 +248,20 @@ int clp_service_call(S390CPU *cpu, uint8_t r2, uintptr_t ra) + goto out; + } + ++ /* ++ * Take this opportunity to make sure we still have an accurate ++ * host fh. It's possible part of the handle changed while the ++ * device was disabled to the guest (e.g. vfio hot reset for ++ * ISM during plug) ++ */ ++ if (pbdev->interp) { ++ /* Take this opportunity to make sure we are sync'd with host */ ++ if (!s390_pci_get_host_fh(pbdev, &pbdev->fh) || ++ !(pbdev->fh & FH_MASK_ENABLE)) { ++ stw_p(&ressetpci->hdr.rsp, CLP_RC_SETPCIFN_FH); ++ goto out; ++ } ++ } + pbdev->fh |= FH_MASK_ENABLE; + pbdev->state = ZPCI_FS_ENABLED; + stl_p(&ressetpci->fh, pbdev->fh); +diff --git a/hw/s390x/s390-pci-kvm.c b/hw/s390x/s390-pci-kvm.c +new file mode 100644 +index 0000000000..0f16104a74 +--- /dev/null ++++ b/hw/s390x/s390-pci-kvm.c +@@ -0,0 +1,22 @@ ++/* ++ * s390 zPCI KVM interfaces ++ * ++ * Copyright 2022 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#include "qemu/osdep.h" ++ ++#include "kvm/kvm_s390x.h" ++#include "hw/s390x/pv.h" ++#include "hw/s390x/s390-pci-kvm.h" ++#include "cpu_models.h" ++ ++bool s390_pci_kvm_interp_allowed(void) ++{ ++ return kvm_s390_get_zpci_op() && !s390_is_pv(); ++} +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index da3cde2bb4..a9843dfe97 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -350,6 +350,7 @@ struct S390PCIBusDevice { + IndAddr *indicator; + bool pci_unplug_request_processed; + bool unplug_requested; ++ bool interp; + QTAILQ_ENTRY(S390PCIBusDevice) link; + }; + +diff --git a/include/hw/s390x/s390-pci-kvm.h b/include/hw/s390x/s390-pci-kvm.h +new file mode 100644 +index 0000000000..80a2e7d0ca +--- /dev/null ++++ b/include/hw/s390x/s390-pci-kvm.h +@@ -0,0 +1,24 @@ ++/* ++ * s390 PCI KVM interfaces ++ * ++ * Copyright 2022 IBM Corp. ++ * Author(s): Matthew Rosato ++ * ++ * This work is licensed under the terms of the GNU GPL, version 2 or (at ++ * your option) any later version. See the COPYING file in the top-level ++ * directory. ++ */ ++ ++#ifndef HW_S390_PCI_KVM_H ++#define HW_S390_PCI_KVM_H ++ ++#ifdef CONFIG_KVM ++bool s390_pci_kvm_interp_allowed(void); ++#else ++static inline bool s390_pci_kvm_interp_allowed(void) ++{ ++ return false; ++} ++#endif ++ ++#endif +diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c +index ba04997da1..30712487d4 100644 +--- a/target/s390x/kvm/kvm.c ++++ b/target/s390x/kvm/kvm.c +@@ -158,6 +158,7 @@ static int cap_ri; + static int cap_hpage_1m; + static int cap_vcpu_resets; + static int cap_protected; ++static int cap_zpci_op; + + static bool mem_op_storage_key_support; + +@@ -363,6 +364,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s) + cap_s390_irq = kvm_check_extension(s, KVM_CAP_S390_INJECT_IRQ); + cap_vcpu_resets = kvm_check_extension(s, KVM_CAP_S390_VCPU_RESETS); + cap_protected = kvm_check_extension(s, KVM_CAP_S390_PROTECTED); ++ cap_zpci_op = kvm_check_extension(s, KVM_CAP_S390_ZPCI_OP); + + kvm_vm_enable_cap(s, KVM_CAP_S390_USER_SIGP, 0); + kvm_vm_enable_cap(s, KVM_CAP_S390_VECTOR_REGISTERS, 0); +@@ -2579,3 +2581,8 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return true; + } ++ ++int kvm_s390_get_zpci_op(void) ++{ ++ return cap_zpci_op; ++} +diff --git a/target/s390x/kvm/kvm_s390x.h b/target/s390x/kvm/kvm_s390x.h +index 05a5e1e6f4..aaae8570de 100644 +--- a/target/s390x/kvm/kvm_s390x.h ++++ b/target/s390x/kvm/kvm_s390x.h +@@ -27,6 +27,7 @@ void kvm_s390_vcpu_interrupt_pre_save(S390CPU *cpu); + int kvm_s390_vcpu_interrupt_post_load(S390CPU *cpu); + int kvm_s390_get_hpage_1m(void); + int kvm_s390_get_ri(void); ++int kvm_s390_get_zpci_op(void); + int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock); + int kvm_s390_get_clock_ext(uint8_t *tod_high, uint64_t *tod_clock); + int kvm_s390_set_clock(uint8_t tod_high, uint64_t tod_clock); +-- +2.37.3 + diff --git a/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch b/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch new file mode 100644 index 0000000..2778225 --- /dev/null +++ b/kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch @@ -0,0 +1,192 @@ +From b98a5bc4c21284dd0a8a1c86b91af81fcb75f060 Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:35 -0400 +Subject: [PATCH 10/42] s390x/pci: let intercept devices have separate PCI + groups +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [10/41] 1545bdcd2e21386afa9869f0414e96eecb62647d + +Let's use the reserved pool of simulated PCI groups to allow intercept +devices to have separate groups from interpreted devices as some group +values may be different. If we run out of simulated PCI groups, subsequent +intercept devices just get the default group. +Furthermore, if we encounter any PCI groups from hostdevs that are marked +as simulated, let's just assign them to the default group to avoid +conflicts between host simulated groups and our own simulated groups. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-7-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 30dcf4f7fd23bef7d72a2454c60881710fd4c785) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-bus.c | 19 ++++++++++++++-- + hw/s390x/s390-pci-vfio.c | 40 ++++++++++++++++++++++++++++++--- + include/hw/s390x/s390-pci-bus.h | 6 ++++- + 3 files changed, 59 insertions(+), 6 deletions(-) + +diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c +index cd152ce711..d8b1e44a02 100644 +--- a/hw/s390x/s390-pci-bus.c ++++ b/hw/s390x/s390-pci-bus.c +@@ -748,13 +748,14 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn) + object_unref(OBJECT(iommu)); + } + +-S390PCIGroup *s390_group_create(int id) ++S390PCIGroup *s390_group_create(int id, int host_id) + { + S390PCIGroup *group; + S390pciState *s = s390_get_phb(); + + group = g_new0(S390PCIGroup, 1); + group->id = id; ++ group->host_id = host_id; + QTAILQ_INSERT_TAIL(&s->zpci_groups, group, link); + return group; + } +@@ -772,12 +773,25 @@ S390PCIGroup *s390_group_find(int id) + return NULL; + } + ++S390PCIGroup *s390_group_find_host_sim(int host_id) ++{ ++ S390PCIGroup *group; ++ S390pciState *s = s390_get_phb(); ++ ++ QTAILQ_FOREACH(group, &s->zpci_groups, link) { ++ if (group->id >= ZPCI_SIM_GRP_START && group->host_id == host_id) { ++ return group; ++ } ++ } ++ return NULL; ++} ++ + static void s390_pci_init_default_group(void) + { + S390PCIGroup *group; + ClpRspQueryPciGrp *resgrp; + +- group = s390_group_create(ZPCI_DEFAULT_FN_GRP); ++ group = s390_group_create(ZPCI_DEFAULT_FN_GRP, ZPCI_DEFAULT_FN_GRP); + resgrp = &group->zpci_group; + resgrp->fr = 1; + resgrp->dasm = 0; +@@ -825,6 +839,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp) + NULL, g_free); + s->zpci_table = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, NULL); + s->bus_no = 0; ++ s->next_sim_grp = ZPCI_SIM_GRP_START; + QTAILQ_INIT(&s->pending_sei); + QTAILQ_INIT(&s->zpci_devs); + QTAILQ_INIT(&s->zpci_dma_limit); +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 08bcc55e85..338f436e87 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -150,13 +150,18 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + { + struct vfio_info_cap_header *hdr; + struct vfio_device_info_cap_zpci_group *cap; ++ S390pciState *s = s390_get_phb(); + ClpRspQueryPciGrp *resgrp; + VFIOPCIDevice *vpci = container_of(pbdev->pdev, VFIOPCIDevice, pdev); ++ uint8_t start_gid = pbdev->zpci_fn.pfgid; + + hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); + +- /* If capability not provided, just use the default group */ +- if (hdr == NULL) { ++ /* ++ * If capability not provided or the underlying hostdev is simulated, just ++ * use the default group. ++ */ ++ if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) { + trace_s390_pci_clp_cap(vpci->vbasedev.name, + VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); + pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; +@@ -165,11 +170,40 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + } + cap = (void *) hdr; + ++ /* ++ * For an intercept device, let's use an existing simulated group if one ++ * one was already created for other intercept devices in this group. ++ * If not, create a new simulated group if any are still available. ++ * If all else fails, just fall back on the default group. ++ */ ++ if (!pbdev->interp) { ++ pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid); ++ if (pbdev->pci_group) { ++ /* Use existing simulated group */ ++ pbdev->zpci_fn.pfgid = pbdev->pci_group->id; ++ return; ++ } else { ++ if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) { ++ /* All out of simulated groups, use default */ ++ trace_s390_pci_clp_cap(vpci->vbasedev.name, ++ VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); ++ pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP; ++ pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP); ++ return; ++ } else { ++ /* We can assign a new simulated group */ ++ pbdev->zpci_fn.pfgid = s->next_sim_grp; ++ s->next_sim_grp++; ++ /* Fall through to create the new sim group using CLP info */ ++ } ++ } ++ } ++ + /* See if the PCI group is already defined, create if not */ + pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid); + + if (!pbdev->pci_group) { +- pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid); ++ pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid, start_gid); + + resgrp = &pbdev->pci_group->zpci_group; + if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) { +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index 5b09f0cf2f..0605fcea24 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -315,13 +315,16 @@ typedef struct ZpciFmb { + QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb"); + + #define ZPCI_DEFAULT_FN_GRP 0xFF ++#define ZPCI_SIM_GRP_START 0xF0 + typedef struct S390PCIGroup { + ClpRspQueryPciGrp zpci_group; + int id; ++ int host_id; + QTAILQ_ENTRY(S390PCIGroup) link; + } S390PCIGroup; +-S390PCIGroup *s390_group_create(int id); ++S390PCIGroup *s390_group_create(int id, int host_id); + S390PCIGroup *s390_group_find(int id); ++S390PCIGroup *s390_group_find_host_sim(int host_id); + + struct S390PCIBusDevice { + DeviceState qdev; +@@ -370,6 +373,7 @@ struct S390pciState { + QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs; + QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit; + QTAILQ_HEAD(, S390PCIGroup) zpci_groups; ++ uint8_t next_sim_grp; + }; + + S390pciState *s390_get_phb(void); +-- +2.37.3 + diff --git a/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch b/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch new file mode 100644 index 0000000..bbd9612 --- /dev/null +++ b/kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch @@ -0,0 +1,52 @@ +From 65f90bfccf7500978879c15104a79de58173a06b Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 2 Sep 2022 13:27:36 -0400 +Subject: [PATCH 11/42] s390x/pci: reflect proper maxstbl for groups of + interpreted devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [11/41] 9ac2f5dedef3d743ef621525eef222a3e09d63b3 + +The maximum supported store block length might be different depending +on whether the instruction is interpretively executed (firmware-reported +maximum) or handled via userspace intercept (host kernel API maximum). +Choose the best available value during group creation. + +Signed-off-by: Matthew Rosato +Reviewed-by: Pierre Morel +Message-Id: <20220902172737.170349-8-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit 9ee8f7e46a7d42ede69a4780200129bf1acb0d01) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-vfio.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c +index 338f436e87..2aefa508a0 100644 +--- a/hw/s390x/s390-pci-vfio.c ++++ b/hw/s390x/s390-pci-vfio.c +@@ -213,7 +213,11 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev, + resgrp->msia = cap->msi_addr; + resgrp->mui = cap->mui; + resgrp->i = cap->noi; +- resgrp->maxstbl = cap->maxstbl; ++ if (pbdev->interp && hdr->version >= 2) { ++ resgrp->maxstbl = cap->imaxstbl; ++ } else { ++ resgrp->maxstbl = cap->maxstbl; ++ } + resgrp->version = cap->version; + resgrp->dtsm = ZPCI_DTSM; + } +-- +2.37.3 + diff --git a/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch b/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch new file mode 100644 index 0000000..399f115 --- /dev/null +++ b/kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch @@ -0,0 +1,49 @@ +From 55294fc4a955491f1fd947e4d98bd6df832c88ba Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:03 -0500 +Subject: [PATCH 01/42] s390x/pci: use a reserved ID for the default PCI group +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [1/41] ad3ed38dec95acf0da04d7669fe772d798d039fc + +The current default PCI group being used can technically collide with a +real group ID passed from a hostdev. Let's instead use a group ID that +comes from a special pool (0xF0-0xFF) that is architected to be reserved +for simulated devices. + +Fixes: 28dc86a072 ("s390x/pci: use a PCI Group structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-2-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit b2892a2b9d45d25b909108ca633d19f9d8d673f5) +Signed-off-by: Cédric Le Goater +--- + include/hw/s390x/s390-pci-bus.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h +index aa891c178d..2727e7bdef 100644 +--- a/include/hw/s390x/s390-pci-bus.h ++++ b/include/hw/s390x/s390-pci-bus.h +@@ -313,7 +313,7 @@ typedef struct ZpciFmb { + } ZpciFmb; + QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb"); + +-#define ZPCI_DEFAULT_FN_GRP 0x20 ++#define ZPCI_DEFAULT_FN_GRP 0xFF + typedef struct S390PCIGroup { + ClpRspQueryPciGrp zpci_group; + int id; +-- +2.37.3 + diff --git a/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch b/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch new file mode 100644 index 0000000..2fda07a --- /dev/null +++ b/kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch @@ -0,0 +1,59 @@ +From c251a7a16e776072b9c7bbc4e61cfa4f52599b0a Mon Sep 17 00:00:00 2001 +From: Matthew Rosato +Date: Fri, 3 Dec 2021 09:27:05 -0500 +Subject: [PATCH 03/42] s390x/pci: use the passthrough measurement update + interval +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [3/41] bc31ea731fe64e51522f1202e65528311397b919 + +We may have gotten a measurement update interval from the underlying host +via vfio -- Use it to set the interval via which we update the function +measurement block. + +Fixes: 28dc86a072 ("s390x/pci: use a PCI Group structure") +Signed-off-by: Matthew Rosato +Reviewed-by: Eric Farman +Reviewed-by: Pierre Morel +Message-Id: <20211203142706.427279-4-mjrosato@linux.ibm.com> +Signed-off-by: Thomas Huth +(cherry picked from commit cb6d6a3e6aa1226b67fd218953dcb3866c3a6845) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-pci-inst.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c +index 11b7f6bfa1..07bab85ce5 100644 +--- a/hw/s390x/s390-pci-inst.c ++++ b/hw/s390x/s390-pci-inst.c +@@ -1046,7 +1046,7 @@ static void fmb_update(void *opaque) + sizeof(pbdev->fmb.last_update))) { + return; + } +- timer_mod(pbdev->fmb_timer, t + DEFAULT_MUI); ++ timer_mod(pbdev->fmb_timer, t + pbdev->pci_group->zpci_group.mui); + } + + int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, +@@ -1204,7 +1204,8 @@ int mpcifc_service_call(S390CPU *cpu, uint8_t r1, uint64_t fiba, uint8_t ar, + } + pbdev->fmb_addr = fmb_addr; + timer_mod(pbdev->fmb_timer, +- qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + DEFAULT_MUI); ++ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + ++ pbdev->pci_group->zpci_group.mui); + break; + } + default: +-- +2.37.3 + diff --git a/kvm-s390x-pv-Add-dump-support.patch b/kvm-s390x-pv-Add-dump-support.patch new file mode 100644 index 0000000..dec84fd --- /dev/null +++ b/kvm-s390x-pv-Add-dump-support.patch @@ -0,0 +1,445 @@ +From 86aeb4fd7ff9395afba574e422d83f990ce1f047 Mon Sep 17 00:00:00 2001 +From: Janosch Frank +Date: Mon, 17 Oct 2022 08:38:22 +0000 +Subject: [PATCH 41/42] s390x: pv: Add dump support +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [41/41] 2731c2329276e76013e3b3df21e9743bc74edd2b + +Sometimes dumping a guest from the outside is the only way to get the +data that is needed. This can be the case if a dumping mechanism like +KDUMP hasn't been configured or data needs to be fetched at a specific +point. Dumping a protected guest from the outside without help from +fw/hw doesn't yield sufficient data to be useful. Hence we now +introduce PV dump support. + +The PV dump support works by integrating the firmware into the dump +process. New Ultravisor calls are used to initiate the dump process, +dump cpu data, dump memory state and lastly complete the dump process. +The UV calls are exposed by KVM via the new KVM_PV_DUMP command and +its subcommands. The guest's data is fully encrypted and can only be +decrypted by the entity that owns the customer communication key for +the dumped guest. Also dumping needs to be allowed via a flag in the +SE header. + +On the QEMU side of things we store the PV dump data in the newly +introduced architecture ELF sections (storage state and completion +data) and the cpu notes (for cpu dump data). + +Users can use the zgetdump tool to convert the encrypted QEMU dump to an +unencrypted one. + +Signed-off-by: Janosch Frank +Reviewed-by: Steffen Eiden +Message-Id: <20221017083822.43118-11-frankja@linux.ibm.com> +(cherry picked from commit 113d8f4e95cf0450bea421263de6ec016c779ad0) +Signed-off-by: Cédric Le Goater +--- + dump/dump.c | 12 +- + include/sysemu/dump.h | 5 + + target/s390x/arch_dump.c | 262 +++++++++++++++++++++++++++++++++++---- + 3 files changed, 246 insertions(+), 33 deletions(-) + +diff --git a/dump/dump.c b/dump/dump.c +index 4aa8fb64d2..5dee060b73 100644 +--- a/dump/dump.c ++++ b/dump/dump.c +@@ -709,9 +709,9 @@ static void dump_begin(DumpState *s, Error **errp) + write_elf_notes(s, errp); + } + +-static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, +- int64_t filter_area_start, +- int64_t filter_area_length) ++int64_t dump_filtered_memblock_size(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { + int64_t size, left, right; + +@@ -729,9 +729,9 @@ static int64_t dump_filtered_memblock_size(GuestPhysBlock *block, + return size; + } + +-static int64_t dump_filtered_memblock_start(GuestPhysBlock *block, +- int64_t filter_area_start, +- int64_t filter_area_length) ++int64_t dump_filtered_memblock_start(GuestPhysBlock *block, ++ int64_t filter_area_start, ++ int64_t filter_area_length) + { + if (filter_area_length) { + /* return -1 if the block is not within filter area */ +diff --git a/include/sysemu/dump.h b/include/sysemu/dump.h +index 38ccac7190..4ffed0b659 100644 +--- a/include/sysemu/dump.h ++++ b/include/sysemu/dump.h +@@ -215,4 +215,9 @@ typedef struct DumpState { + uint16_t cpu_to_dump16(DumpState *s, uint16_t val); + uint32_t cpu_to_dump32(DumpState *s, uint32_t val); + uint64_t cpu_to_dump64(DumpState *s, uint64_t val); ++ ++int64_t dump_filtered_memblock_size(GuestPhysBlock *block, int64_t filter_area_start, ++ int64_t filter_area_length); ++int64_t dump_filtered_memblock_start(GuestPhysBlock *block, int64_t filter_area_start, ++ int64_t filter_area_length); + #endif +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index f60a14920d..a2329141e8 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -12,11 +12,13 @@ + */ + + #include "qemu/osdep.h" ++#include "qemu/units.h" + #include "cpu.h" + #include "s390x-internal.h" + #include "elf.h" + #include "sysemu/dump.h" +- ++#include "hw/s390x/pv.h" ++#include "kvm/kvm_s390x.h" + + struct S390xUserRegsStruct { + uint64_t psw[2]; +@@ -76,9 +78,16 @@ typedef struct noteStruct { + uint64_t todcmp; + uint32_t todpreg; + uint64_t ctrs[16]; ++ uint8_t dynamic[1]; /* ++ * Would be a flexible array member, if ++ * that was legal inside a union. Real ++ * size comes from PV info interface. ++ */ + } contents; + } QEMU_PACKED Note; + ++static bool pv_dump_initialized; ++ + static void s390x_write_elf64_prstatus(Note *note, S390CPU *cpu, int id) + { + int i; +@@ -177,28 +186,39 @@ static void s390x_write_elf64_prefix(Note *note, S390CPU *cpu, int id) + note->contents.prefix = cpu_to_be32((uint32_t)(cpu->env.psa)); + } + ++static void s390x_write_elf64_pv(Note *note, S390CPU *cpu, int id) ++{ ++ note->hdr.n_type = cpu_to_be32(NT_S390_PV_CPU_DATA); ++ if (!pv_dump_initialized) { ++ return; ++ } ++ kvm_s390_dump_cpu(cpu, ¬e->contents.dynamic); ++} + + typedef struct NoteFuncDescStruct { + int contents_size; ++ uint64_t (*note_size_func)(void); /* NULL for non-dynamic sized contents */ + void (*note_contents_func)(Note *note, S390CPU *cpu, int id); ++ bool pvonly; + } NoteFuncDesc; + + static const NoteFuncDesc note_core[] = { +- {sizeof_field(Note, contents.prstatus), s390x_write_elf64_prstatus}, +- {sizeof_field(Note, contents.fpregset), s390x_write_elf64_fpregset}, +- { 0, NULL} ++ {sizeof_field(Note, contents.prstatus), NULL, s390x_write_elf64_prstatus, false}, ++ {sizeof_field(Note, contents.fpregset), NULL, s390x_write_elf64_fpregset, false}, ++ { 0, NULL, NULL, false} + }; + + static const NoteFuncDesc note_linux[] = { +- {sizeof_field(Note, contents.prefix), s390x_write_elf64_prefix}, +- {sizeof_field(Note, contents.ctrs), s390x_write_elf64_ctrs}, +- {sizeof_field(Note, contents.timer), s390x_write_elf64_timer}, +- {sizeof_field(Note, contents.todcmp), s390x_write_elf64_todcmp}, +- {sizeof_field(Note, contents.todpreg), s390x_write_elf64_todpreg}, +- {sizeof_field(Note, contents.vregslo), s390x_write_elf64_vregslo}, +- {sizeof_field(Note, contents.vregshi), s390x_write_elf64_vregshi}, +- {sizeof_field(Note, contents.gscb), s390x_write_elf64_gscb}, +- { 0, NULL} ++ {sizeof_field(Note, contents.prefix), NULL, s390x_write_elf64_prefix, false}, ++ {sizeof_field(Note, contents.ctrs), NULL, s390x_write_elf64_ctrs, false}, ++ {sizeof_field(Note, contents.timer), NULL, s390x_write_elf64_timer, false}, ++ {sizeof_field(Note, contents.todcmp), NULL, s390x_write_elf64_todcmp, false}, ++ {sizeof_field(Note, contents.todpreg), NULL, s390x_write_elf64_todpreg, false}, ++ {sizeof_field(Note, contents.vregslo), NULL, s390x_write_elf64_vregslo, false}, ++ {sizeof_field(Note, contents.vregshi), NULL, s390x_write_elf64_vregshi, false}, ++ {sizeof_field(Note, contents.gscb), NULL, s390x_write_elf64_gscb, false}, ++ {0, kvm_s390_pv_dmp_get_size_cpu, s390x_write_elf64_pv, true}, ++ { 0, NULL, NULL, false} + }; + + static int s390x_write_elf64_notes(const char *note_name, +@@ -207,22 +227,41 @@ static int s390x_write_elf64_notes(const char *note_name, + DumpState *s, + const NoteFuncDesc *funcs) + { +- Note note; ++ Note note, *notep; + const NoteFuncDesc *nf; +- int note_size; ++ int note_size, content_size; + int ret = -1; + + assert(strlen(note_name) < sizeof(note.name)); + + for (nf = funcs; nf->note_contents_func; nf++) { +- memset(¬e, 0, sizeof(note)); +- note.hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); +- note.hdr.n_descsz = cpu_to_be32(nf->contents_size); +- g_strlcpy(note.name, note_name, sizeof(note.name)); +- (*nf->note_contents_func)(¬e, cpu, id); ++ notep = ¬e; ++ if (nf->pvonly && !s390_is_pv()) { ++ continue; ++ } ++ ++ content_size = nf->note_size_func ? nf->note_size_func() : nf->contents_size; ++ note_size = sizeof(note) - sizeof(notep->contents) + content_size; ++ ++ /* Notes with dynamic sizes need to allocate a note */ ++ if (nf->note_size_func) { ++ notep = g_malloc(note_size); ++ } ++ ++ memset(notep, 0, sizeof(note)); + +- note_size = sizeof(note) - sizeof(note.contents) + nf->contents_size; +- ret = f(¬e, note_size, s); ++ /* Setup note header data */ ++ notep->hdr.n_descsz = cpu_to_be32(content_size); ++ notep->hdr.n_namesz = cpu_to_be32(strlen(note_name) + 1); ++ g_strlcpy(notep->name, note_name, sizeof(notep->name)); ++ ++ /* Get contents and write them out */ ++ (*nf->note_contents_func)(notep, cpu, id); ++ ret = f(notep, note_size, s); ++ ++ if (nf->note_size_func) { ++ g_free(notep); ++ } + + if (ret < 0) { + return -1; +@@ -247,13 +286,179 @@ int s390_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, + return s390x_write_elf64_notes("LINUX", f, cpu, cpuid, s, note_linux); + } + ++/* PV dump section size functions */ ++static uint64_t get_mem_state_size_from_len(uint64_t len) ++{ ++ return (len / (MiB)) * kvm_s390_pv_dmp_get_size_mem_state(); ++} ++ ++static uint64_t get_size_mem_state(DumpState *s) ++{ ++ return get_mem_state_size_from_len(s->total_size); ++} ++ ++static uint64_t get_size_completion_data(DumpState *s) ++{ ++ return kvm_s390_pv_dmp_get_size_completion_data(); ++} ++ ++/* PV dump section data functions*/ ++static int get_data_completion(DumpState *s, uint8_t *buff) ++{ ++ int rc; ++ ++ if (!pv_dump_initialized) { ++ return 0; ++ } ++ rc = kvm_s390_dump_completion_data(buff); ++ if (!rc) { ++ pv_dump_initialized = false; ++ } ++ return rc; ++} ++ ++static int get_mem_state(DumpState *s, uint8_t *buff) ++{ ++ int64_t memblock_size, memblock_start; ++ GuestPhysBlock *block; ++ uint64_t off; ++ int rc; ++ ++ QTAILQ_FOREACH(block, &s->guest_phys_blocks.head, next) { ++ memblock_start = dump_filtered_memblock_start(block, s->filter_area_begin, ++ s->filter_area_length); ++ if (memblock_start == -1) { ++ continue; ++ } ++ ++ memblock_size = dump_filtered_memblock_size(block, s->filter_area_begin, ++ s->filter_area_length); ++ ++ off = get_mem_state_size_from_len(block->target_start); ++ ++ rc = kvm_s390_dump_mem_state(block->target_start, ++ get_mem_state_size_from_len(memblock_size), ++ buff + off); ++ if (rc) { ++ return rc; ++ } ++ } ++ ++ return 0; ++} ++ ++static struct sections { ++ uint64_t (*sections_size_func)(DumpState *s); ++ int (*sections_contents_func)(DumpState *s, uint8_t *buff); ++ char sctn_str[12]; ++} sections[] = { ++ { get_size_mem_state, get_mem_state, "pv_mem_meta"}, ++ { get_size_completion_data, get_data_completion, "pv_compl"}, ++ {NULL , NULL, ""} ++}; ++ ++static uint64_t arch_sections_write_hdr(DumpState *s, uint8_t *buff) ++{ ++ Elf64_Shdr *shdr = (void *)buff; ++ struct sections *sctn = sections; ++ uint64_t off = s->section_offset; ++ ++ if (!pv_dump_initialized) { ++ return 0; ++ } ++ ++ for (; sctn->sections_size_func; off += shdr->sh_size, sctn++, shdr++) { ++ memset(shdr, 0, sizeof(*shdr)); ++ shdr->sh_type = SHT_PROGBITS; ++ shdr->sh_offset = off; ++ shdr->sh_size = sctn->sections_size_func(s); ++ shdr->sh_name = s->string_table_buf->len; ++ g_array_append_vals(s->string_table_buf, sctn->sctn_str, sizeof(sctn->sctn_str)); ++ } ++ ++ return (uintptr_t)shdr - (uintptr_t)buff; ++} ++ ++ ++/* Add arch specific number of sections and their respective sizes */ ++static void arch_sections_add(DumpState *s) ++{ ++ struct sections *sctn = sections; ++ ++ /* ++ * We only do a PV dump if we are running a PV guest, KVM supports ++ * the dump API and we got valid dump length information. ++ */ ++ if (!s390_is_pv() || !kvm_s390_get_protected_dump() || ++ !kvm_s390_pv_info_basic_valid()) { ++ return; ++ } ++ ++ /* ++ * Start the UV dump process by doing the initialize dump call via ++ * KVM as the proxy. ++ */ ++ if (!kvm_s390_dump_init()) { ++ pv_dump_initialized = true; ++ } else { ++ /* ++ * Dump init failed, maybe the guest owner disabled dumping. ++ * We'll continue the non-PV dump process since this is no ++ * reason to crash qemu. ++ */ ++ return; ++ } ++ ++ for (; sctn->sections_size_func; sctn++) { ++ s->shdr_num += 1; ++ s->elf_section_data_size += sctn->sections_size_func(s); ++ } ++} ++ ++/* ++ * After the PV dump has been initialized, the CPU data has been ++ * fetched and memory has been dumped, we need to grab the tweak data ++ * and the completion data. ++ */ ++static int arch_sections_write(DumpState *s, uint8_t *buff) ++{ ++ struct sections *sctn = sections; ++ int rc; ++ ++ if (!pv_dump_initialized) { ++ return -EINVAL; ++ } ++ ++ for (; sctn->sections_size_func; sctn++) { ++ rc = sctn->sections_contents_func(s, buff); ++ buff += sctn->sections_size_func(s); ++ if (rc) { ++ return rc; ++ } ++ } ++ return 0; ++} ++ + int cpu_get_dump_info(ArchDumpInfo *info, + const struct GuestPhysBlockList *guest_phys_blocks) + { + info->d_machine = EM_S390; + info->d_endian = ELFDATA2MSB; + info->d_class = ELFCLASS64; +- ++ /* ++ * This is evaluated for each dump so we can freely switch ++ * between PV and non-PV. ++ */ ++ if (s390_is_pv() && kvm_s390_get_protected_dump() && ++ kvm_s390_pv_info_basic_valid()) { ++ info->arch_sections_add_fn = *arch_sections_add; ++ info->arch_sections_write_hdr_fn = *arch_sections_write_hdr; ++ info->arch_sections_write_fn = *arch_sections_write; ++ } else { ++ info->arch_sections_add_fn = NULL; ++ info->arch_sections_write_hdr_fn = NULL; ++ info->arch_sections_write_fn = NULL; ++ } + return 0; + } + +@@ -261,7 +466,7 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + { + int name_size = 8; /* "LINUX" or "CORE" + pad */ + size_t elf_note_size = 0; +- int note_head_size; ++ int note_head_size, content_size; + const NoteFuncDesc *nf; + + assert(class == ELFCLASS64); +@@ -270,12 +475,15 @@ ssize_t cpu_get_note_size(int class, int machine, int nr_cpus) + note_head_size = sizeof(Elf64_Nhdr); + + for (nf = note_core; nf->note_contents_func; nf++) { +- elf_note_size = elf_note_size + note_head_size + name_size + +- nf->contents_size; ++ elf_note_size = elf_note_size + note_head_size + name_size + nf->contents_size; + } + for (nf = note_linux; nf->note_contents_func; nf++) { ++ if (nf->pvonly && !s390_is_pv()) { ++ continue; ++ } ++ content_size = nf->contents_size ? nf->contents_size : nf->note_size_func(); + elf_note_size = elf_note_size + note_head_size + name_size + +- nf->contents_size; ++ content_size; + } + + return (elf_note_size) * nr_cpus; +-- +2.37.3 + diff --git a/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch b/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch new file mode 100644 index 0000000..71dc7dc --- /dev/null +++ b/kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch @@ -0,0 +1,55 @@ +From c7b14d3af7c73a3bf0c00911b85f202840d9b414 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= +Date: Mon, 7 Nov 2022 17:13:49 +0100 +Subject: [PATCH 12/42] s390x/s390-virtio-ccw: Switch off zPCI enhancements on + older machines +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 226: s390: Enhanced Interpretation for PCI Functions and Secure Execution guest dump +RH-Bugzilla: 1664378 2043909 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Jon Maloy +RH-Commit: [12/41] 61e32bab6d68ee9abd6a0751944af82e002b05b4 + +zPCI enhancement features (interpretation and forward assist) were +recently introduced to improve performance on PCI passthrough devices. +To maintain the same behaviour on older Z machines, deactivate the +features with the associated properties. + +Signed-off-by: Cédric Le Goater +Message-Id: <20221107161349.1032730-3-clg@kaod.org> +Reviewed-by: Matthew Rosato +Signed-off-by: Thomas Huth +(cherry picked from commit d3d1a406127f7da482eafbdc871c120c2770bb91) +[ clg: applied zPCI restrictions to rhel8.5.0 machine and below ] +Signed-off-by: Cédric Le Goater +--- + hw/s390x/s390-virtio-ccw.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c +index bec270598b..bd80e72cf8 100644 +--- a/hw/s390x/s390-virtio-ccw.c ++++ b/hw/s390x/s390-virtio-ccw.c +@@ -1130,8 +1130,14 @@ static void ccw_machine_rhel850_instance_options(MachineState *machine) + + static void ccw_machine_rhel850_class_options(MachineClass *mc) + { ++ static GlobalProperty compat[] = { ++ { TYPE_S390_PCI_DEVICE, "interpret", "off", }, ++ { TYPE_S390_PCI_DEVICE, "forwarding-assist", "off", }, ++ }; ++ + ccw_machine_rhel860_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_rhel_8_5, hw_compat_rhel_8_5_len); ++ compat_props_add(mc->compat_props, compat, G_N_ELEMENTS(compat)); + mc->smp_props.prefer_sockets = true; + } + DEFINE_CCW_MACHINE(rhel850, "rhel8.5.0", false); +-- +2.37.3 + diff --git a/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch b/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch new file mode 100644 index 0000000..b07c902 --- /dev/null +++ b/kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch @@ -0,0 +1,70 @@ +From 90fcfe4b07afc5299c5ee69fa663ca46b597fd4b Mon Sep 17 00:00:00 2001 +From: Nico Boehr +Date: Wed, 12 Oct 2022 14:32:29 +0200 +Subject: [PATCH] s390x/tod-kvm: don't save/restore the TOD in PV guests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Cédric Le Goater +RH-MergeRequest: 244: s390x/tod-kvm: don't save/restore the TOD in PV guests +RH-Bugzilla: 2155448 +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cornelia Huck +RH-Acked-by: Thomas Huth +RH-Commit: [1/1] 3cb3154dd7c1549c54cf8c0483b5f23b235f6db3 + +Under PV, the guest's TOD clock is under control of the ultravisor and the +hypervisor cannot change it. + +With upcoming kernel changes[1], the Linux kernel will reject QEMU's +request to adjust the guest's clock in this case, so don't attempt to set +the clock. + +This avoids the following warning message on save/restore of a PV guest: + +warning: Unable to set KVM guest TOD clock: Operation not supported + +[1] https://lore.kernel.org/all/20221011160712.928239-2-nrb@linux.ibm.com/ + +Fixes: c3347ed0d2ee ("s390x: protvirt: Support unpack facility") +Signed-off-by: Nico Boehr +Message-Id: <20221012123229.1196007-1-nrb@linux.ibm.com> +[thuth: Add curly braces] +Signed-off-by: Thomas Huth +(cherry picked from commit 38621181ae3cbec62e3490fbc14f6ac01642d07a) +Signed-off-by: Cédric Le Goater +--- + hw/s390x/tod-kvm.c | 9 +++++++++ + 1 file changed, 9 insertions(+) + +diff --git a/hw/s390x/tod-kvm.c b/hw/s390x/tod-kvm.c +index ec855811ae..c804c979b5 100644 +--- a/hw/s390x/tod-kvm.c ++++ b/hw/s390x/tod-kvm.c +@@ -13,6 +13,7 @@ + #include "qemu/module.h" + #include "sysemu/runstate.h" + #include "hw/s390x/tod.h" ++#include "hw/s390x/pv.h" + #include "kvm/kvm_s390x.h" + + static void kvm_s390_get_tod_raw(S390TOD *tod, Error **errp) +@@ -84,6 +85,14 @@ static void kvm_s390_tod_vm_state_change(void *opaque, bool running, + S390TODState *td = opaque; + Error *local_err = NULL; + ++ /* ++ * Under PV, the clock is under ultravisor control, hence we cannot restore ++ * it on resume. ++ */ ++ if (s390_is_pv()) { ++ return; ++ } ++ + if (running && td->stopped) { + /* Set the old TOD when running the VM - start the TOD clock. */ + kvm_s390_set_tod_raw(&td->base, &local_err); +-- +2.37.3 + diff --git a/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch b/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch new file mode 100644 index 0000000..0fc1a71 --- /dev/null +++ b/kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch @@ -0,0 +1,97 @@ +From a9a4dfdd6312e192e9134d46edfac4c1b1bfa63d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Mon, 22 Aug 2022 14:53:20 +0200 +Subject: [PATCH] scsi-generic: Fix emulated block limits VPD page + +RH-Author: Kevin Wolf +RH-MergeRequest: 212: scsi-generic: Fix emulated block limits VPD page +RH-Commit: [1/1] d3ba6b2e03039043716ddc6b7d4a424d92249081 +RH-Bugzilla: 2120279 +RH-Acked-by: Emanuele Giuseppe Esposito +RH-Acked-by: Hanna Reitz +RH-Acked-by: Paolo Bonzini +RH-Acked-by: Stefan Hajnoczi + +Commits 01ef8185b80 amd 24b36e9813e updated the way that the maximum +transfer length is calculated for patching block limits VPD page in an +INQUIRY response. + +The same updates also need to be made for the case where the host device +does not support the block limits VPD page at all and we emulate the +whole page. + +Without this fix, on host block devices a maximum transfer length of +(INT_MAX - sector_size) bytes is advertised to the guest, resulting in +I/O errors when a request that exceeds the host limits is made by the +guest. (Prior to commit 24b36e9813e, this code path would use the +max_transfer value from the host instead of INT_MAX, but still miss the +fix from 01ef8185b80 where max_transfer is also capped to max_iov +host pages, so it would be less wrong, but still wrong.) + +Cc: qemu-stable@nongnu.org +Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2096251 +Fixes: 01ef8185b809af9d287e1a03a3f9d8ea8231118a +Fixes: 24b36e9813ec15da7db62e3b3621730710c5f020 +Signed-off-by: Kevin Wolf +Message-Id: <20220822125320.48257-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Signed-off-by: Kevin Wolf +(cherry picked from commit 51e15194b0a091e5c40aab2eb234a1d36c5c58ee) + +Resolved conflict: qemu_real_host_page_size() is a getter function in +current upstream, but still just a public global variable downstream. + +Signed-off-by: Kevin Wolf +--- + hw/scsi/scsi-generic.c | 21 ++++++++++++++------- + 1 file changed, 14 insertions(+), 7 deletions(-) + +diff --git a/hw/scsi/scsi-generic.c b/hw/scsi/scsi-generic.c +index 0306ccc7b1..3742899839 100644 +--- a/hw/scsi/scsi-generic.c ++++ b/hw/scsi/scsi-generic.c +@@ -147,6 +147,18 @@ static int execute_command(BlockBackend *blk, + return 0; + } + ++static uint64_t calculate_max_transfer(SCSIDevice *s) ++{ ++ uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); ++ uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); ++ ++ assert(max_transfer); ++ max_transfer = MIN_NON_ZERO(max_transfer, ++ max_iov * qemu_real_host_page_size); ++ ++ return max_transfer / s->blocksize; ++} ++ + static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + { + uint8_t page, page_idx; +@@ -179,12 +191,7 @@ static int scsi_handle_inquiry_reply(SCSIGenericReq *r, SCSIDevice *s, int len) + (r->req.cmd.buf[1] & 0x01)) { + page = r->req.cmd.buf[2]; + if (page == 0xb0) { +- uint64_t max_transfer = blk_get_max_hw_transfer(s->conf.blk); +- uint32_t max_iov = blk_get_max_hw_iov(s->conf.blk); +- +- assert(max_transfer); +- max_transfer = MIN_NON_ZERO(max_transfer, max_iov * qemu_real_host_page_size) +- / s->blocksize; ++ uint64_t max_transfer = calculate_max_transfer(s); + stl_be_p(&r->buf[8], max_transfer); + /* Also take care of the opt xfer len. */ + stl_be_p(&r->buf[12], +@@ -230,7 +237,7 @@ static int scsi_generic_emulate_block_limits(SCSIGenericReq *r, SCSIDevice *s) + uint8_t buf[64]; + + SCSIBlockLimits bl = { +- .max_io_sectors = blk_get_max_transfer(s->conf.blk) / s->blocksize ++ .max_io_sectors = calculate_max_transfer(s), + }; + + memset(r->buf, 0, r->buflen); +-- +2.35.3 + diff --git a/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch b/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch new file mode 100644 index 0000000..94ff608 --- /dev/null +++ b/kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch @@ -0,0 +1,65 @@ +From 3014c7c11b6e64433fe9f3c463bd91e318ac96b6 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 9 Nov 2022 18:41:18 -0500 +Subject: [PATCH 2/2] target/arm/kvm: Retry KVM_CREATE_VM call if it fails + EINTR + +RH-Author: Jon Maloy +RH-MergeRequest: 228: qemu-kvm: backport some aarch64 fixes +RH-Bugzilla: 2132609 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Eric Auger +RH-Acked-by: Gavin Shan +RH-Commit: [2/2] 8494bbfb3fcd8693f56312f984d2964d1ca275c2 (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2132609 +Upstream: Merged + +commit bbde13cd14ad4eec18529ce0bf5876058464e124 +Author: Peter Maydell +Date: Fri Sep 30 12:38:24 2022 +0100 + + target/arm/kvm: Retry KVM_CREATE_VM call if it fails EINTR + + Occasionally the KVM_CREATE_VM ioctl can return EINTR, even though + there is no pending signal to be taken. In commit 94ccff13382055 + we added a retry-on-EINTR loop to the KVM_CREATE_VM call in the + generic KVM code. Adopt the same approach for the use of the + ioctl in the Arm-specific KVM code (where we use it to create a + scratch VM for probing for various things). + + For more information, see the mailing list thread: + https://lore.kernel.org/qemu-devel/8735e0s1zw.wl-maz@kernel.org/ + + Reported-by: Vitaly Chikunov + Signed-off-by: Peter Maydell + Reviewed-by: Vitaly Chikunov + Reviewed-by: Eric Auger + Acked-by: Marc Zyngier + Message-id: 20220930113824.1933293-1-peter.maydell@linaro.org + +(cherry picked from commit bbde13cd14ad4eec18529ce0bf5876058464e124) +Signed-off-by: Jon Maloy +--- + target/arm/kvm.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/target/arm/kvm.c b/target/arm/kvm.c +index bbf1ce7ba3..1ae4e51055 100644 +--- a/target/arm/kvm.c ++++ b/target/arm/kvm.c +@@ -80,7 +80,9 @@ bool kvm_arm_create_scratch_host_vcpu(const uint32_t *cpus_to_try, + if (max_vm_pa_size < 0) { + max_vm_pa_size = 0; + } +- vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); ++ do { ++ vmfd = ioctl(kvmfd, KVM_CREATE_VM, max_vm_pa_size); ++ } while (vmfd == -1 && errno == EINTR); + if (vmfd < 0) { + goto err; + } +-- +2.37.3 + diff --git a/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch b/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch new file mode 100644 index 0000000..04db85f --- /dev/null +++ b/kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch @@ -0,0 +1,73 @@ +From 688c9f386635544dbc468171a32fbc84f0c9224e Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Fri, 18 Mar 2022 16:23:47 +0100 +Subject: [PATCH 12/24] target/i386: kvm: do not access uninitialized variable + on older kernels + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [12/13] 776fac1e7d1aa16ec5f4d99ddad3039eab8212af +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +KVM support for AMX includes a new system attribute, KVM_X86_XCOMP_GUEST_SUPP. +Commit 19db68ca68 ("x86: Grant AMX permission for guest", 2022-03-15) however +did not fully consider the behavior on older kernels. First, it warns +too aggressively. Second, it invokes the KVM_GET_DEVICE_ATTR ioctl +unconditionally and then uses the "bitmask" variable, which remains +uninitialized if the ioctl fails. Third, kvm_ioctl returns -errno rather +than -1 on errors. + +While at it, explain why the ioctl is needed and KVM_GET_SUPPORTED_CPUID +is not enough. + +Signed-off-by: Paolo Bonzini +(cherry picked from commit 3ec5ad40081b14af28496198b4d08dbe13386790) +Signed-off-by: Paul Lai +--- + target/i386/kvm/kvm.c | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index b1128b0e07..bd439e56ad 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -409,6 +409,12 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + } + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { ++ /* ++ * The value returned by KVM_GET_SUPPORTED_CPUID does not include ++ * features that still have to be enabled with the arch_prctl ++ * system call. QEMU needs the full value, which is retrieved ++ * with KVM_GET_DEVICE_ATTR. ++ */ + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, +@@ -417,13 +423,16 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { +- warn_report("cannot get sys attribute capabilities %d", sys_attr); ++ return ret; + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); +- if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { +- warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " +- "error: %d", rc); ++ if (rc < 0) { ++ if (rc != -ENXIO) { ++ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " ++ "error: %d", rc); ++ } ++ return ret; + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; + } else if (function == 0x80000001 && reg == R_ECX) { +-- +2.35.3 + diff --git a/kvm-target-i386-properly-reset-TSC-on-reset.patch b/kvm-target-i386-properly-reset-TSC-on-reset.patch new file mode 100644 index 0000000..47ce2af --- /dev/null +++ b/kvm-target-i386-properly-reset-TSC-on-reset.patch @@ -0,0 +1,83 @@ +From 416de21d11540a927cceb533bf54ce28ffa15ad6 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 24 Mar 2022 09:21:41 +0100 +Subject: [PATCH 2/3] target/i386: properly reset TSC on reset + +RH-Author: Paolo Bonzini +RH-MergeRequest: 172: target/i386: properly reset TSC on reset +RH-Commit: [1/1] 7008bc5d02ad0a2d8b78259459d22d8f0986c989 +RH-Bugzilla: 2070417 +RH-Acked-by: Marcelo Tosatti +RH-Acked-by: Igor Mammedov +RH-Acked-by: Vitaly Kuznetsov + +Some versions of Windows hang on reboot if their TSC value is greater +than 2^54. The calibration of the Hyper-V reference time overflows +and fails; as a result the processors' clock sources are out of sync. + +The issue is that the TSC _should_ be reset to 0 on CPU reset and +QEMU tries to do that. However, KVM special cases writing 0 to the +TSC and thinks that QEMU is trying to hot-plug a CPU, which is +correct the first time through but not later. Thwart this valiant +effort and reset the TSC to 1 instead, but only if the CPU has been +run once. + +For this to work, env->tsc has to be moved to the part of CPUArchState +that is not zeroed at the beginning of x86_cpu_reset. + +Reported-by: Vadim Rozenfeld +Supersedes: <20220324082346.72180-1-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 5286c3662294119dc2dd1e9296757337211451f6) +--- + target/i386/cpu.c | 13 +++++++++++++ + target/i386/cpu.h | 2 +- + 2 files changed, 14 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 6e25d13339..dd6935b1dd 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5871,6 +5871,19 @@ static void x86_cpu_reset(DeviceState *dev) + env->xstate_bv = 0; + + env->pat = 0x0007040600070406ULL; ++ ++ if (kvm_enabled()) { ++ /* ++ * KVM handles TSC = 0 specially and thinks we are hot-plugging ++ * a new CPU, use 1 instead to force a reset. ++ */ ++ if (env->tsc != 0) { ++ env->tsc = 1; ++ } ++ } else { ++ env->tsc = 0; ++ } ++ + env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT; + if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { + env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 04f2b790c9..c6a6c871f1 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1510,7 +1510,6 @@ typedef struct CPUX86State { + target_ulong kernelgsbase; + #endif + +- uint64_t tsc; + uint64_t tsc_adjust; + uint64_t tsc_deadline; + uint64_t tsc_aux; +@@ -1660,6 +1659,7 @@ typedef struct CPUX86State { + int64_t tsc_khz; + int64_t user_tsc_khz; /* for sanity check only */ + uint64_t apic_bus_freq; ++ uint64_t tsc; + #if defined(CONFIG_KVM) || defined(CONFIG_HVF) + void *xsave_buf; + uint32_t xsave_buf_len; +-- +2.35.1 + diff --git a/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch index 61752c7..cfc1e4a 100644 --- a/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch +++ b/kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch @@ -1,15 +1,18 @@ -From 27c1d979a994f5afc59c3520af58d15aa5aae723 Mon Sep 17 00:00:00 2001 +From 8d1a60069cddcc69ef1a6f50f2b55343de348b57 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Fri, 6 May 2022 17:39:56 +0200 -Subject: [PATCH 29/32] target/s390x: kvm: Honor storage keys during emulation +Subject: [PATCH 2/3] target/s390x: kvm: Honor storage keys during emulation +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit -RH-Author: Thomas Huth -RH-MergeRequest: 109: Honor storage keys during emulation of I/O instructions -RH-Commit: [2/2] 346dee1e13bfe1c074e4c6a4417091711d852f9c (thuth/qemu-kvm-cs9) -RH-Bugzilla: 2111994 -RH-Acked-by: Cornelia Huck +RH-Author: Cédric Le Goater +RH-MergeRequest: 220: s390x: Fix skey test in kvm_unit_test +RH-Bugzilla: 2124757 +RH-Acked-by: Thomas Huth RH-Acked-by: David Hildenbrand -RH-Acked-by: Claudio Imbrenda +RH-Acked-by: Cornelia Huck +RH-Commit: [2/2] 980dbb4eba8d2f1da7cf4113230d0a6483cffc4f Storage key controlled protection is currently not honored when emulating instructions. @@ -40,19 +43,19 @@ keys: * MPCIFC * STPCIFC +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2124757 + Signed-off-by: Janis Schoetterl-Glausch Message-Id: <20220506153956.2217601-3-scgl@linux.ibm.com> Signed-off-by: Thomas Huth - (cherry picked from commit 54354861d21b69ec0781f43e67b8d4f6edad7e3f) -Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2111994 -Signed-off-by: Thomas Huth +Signed-off-by: Cédric Le Goater --- target/s390x/kvm/kvm.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/target/s390x/kvm/kvm.c b/target/s390x/kvm/kvm.c -index 74f089d87f..1f1d1a33b8 100644 +index c52434985b..ba04997da1 100644 --- a/target/s390x/kvm/kvm.c +++ b/target/s390x/kvm/kvm.c @@ -152,12 +152,15 @@ const KVMCapabilityInfo kvm_arch_required_capabilities[] = { @@ -99,5 +102,5 @@ index 74f089d87f..1f1d1a33b8 100644 ret = kvm_vcpu_ioctl(CPU(cpu), KVM_S390_MEM_OP, &mem_op); if (ret < 0) { -- -2.31.1 +2.35.3 diff --git a/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch index 7b9a8f3..4c04458 100644 --- a/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch +++ b/kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch @@ -1,18 +1,19 @@ -From cea7b15c613a11ea15a1458d6990be7044df6643 Mon Sep 17 00:00:00 2001 +From 24af433728429578e586d179e27451b7d4a46cba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Thu, 18 Nov 2021 12:57:33 +0100 -Subject: [PATCH 17/17] tests/qtest/fdc-test: Add a regression test for +Subject: [PATCH 3/3] tests/qtest/fdc-test: Add a regression test for CVE-2021-3507 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jon Maloy -RH-MergeRequest: 107: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) -RH-Commit: [2/2] 067c052df790959c28c1fcc16547676d36523bd9 (mrezanin/centos-src-qemu-kvm) -RH-Bugzilla: 1951522 +RH-MergeRequest: 194: hw/block/fdc: Prevent end-of-track overrun (CVE-2021-3507) +RH-Commit: [2/2] 31ec71276b521b06d4142fffa88a3fa4d1494d92 (jmaloy/qemu-kvm) +RH-Bugzilla: 1951521 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Thomas Huth RH-Acked-by: Hanna Reitz -RH-Acked-by: Miroslav Rezanina Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339 @@ -76,7 +77,7 @@ Signed-off-by: Jon Maloy 1 file changed, 21 insertions(+) diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c -index b0d40012e6..1d4f852128 100644 +index 8f6eee84a4..6f5850354f 100644 --- a/tests/qtest/fdc-test.c +++ b/tests/qtest/fdc-test.c @@ -583,6 +583,26 @@ static void test_cve_2021_20196(void) @@ -115,5 +116,5 @@ index b0d40012e6..1d4f852128 100644 ret = g_test_run(); -- -2.31.1 +2.35.3 diff --git a/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch b/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch new file mode 100644 index 0000000..813d0ca --- /dev/null +++ b/kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch @@ -0,0 +1,105 @@ +From 87a318f0b8758f940a316831a77b6ebebca42b19 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Wed, 4 May 2022 10:35:17 -0400 +Subject: [PATCH 3/3] ui/cursor: fix integer overflow in cursor_alloc + (CVE-2021-4206) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 180: ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206) +RH-Commit: [1/1] 7ad711347bc6248dc5aefa45401ca74448dee5e5 (jmaloy/qemu-kvm) +RH-Bugzilla: 2040734 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: Mauro Matteo Cascella +RH-Acked-by: Gerd Hoffmann + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2040734 +Upstream: Merged +CVE: CVE-2021-4206 + +commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa +Author: Mauro Matteo Cascella +Date: Thu Apr 7 10:17:12 2022 +0200 + + ui/cursor: fix integer overflow in cursor_alloc (CVE-2021-4206) + + Prevent potential integer overflow by limiting 'width' and 'height' to + 512x512. Also change 'datasize' type to size_t. Refer to security + advisory https://starlabs.sg/advisories/22-4206/ for more information. + + Fixes: CVE-2021-4206 + Signed-off-by: Mauro Matteo Cascella + Reviewed-by: Marc-André Lureau + Message-Id: <20220407081712.345609-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit fa892e9abb728e76afcf27323ab29c57fb0fe7aa) +Signed-off-by: Jon Maloy +--- + hw/display/qxl-render.c | 7 +++++++ + hw/display/vmware_vga.c | 2 ++ + ui/cursor.c | 8 +++++++- + 3 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/hw/display/qxl-render.c b/hw/display/qxl-render.c +index 237ed293ba..ca217004bf 100644 +--- a/hw/display/qxl-render.c ++++ b/hw/display/qxl-render.c +@@ -247,6 +247,13 @@ static QEMUCursor *qxl_cursor(PCIQXLDevice *qxl, QXLCursor *cursor, + size_t size; + + c = cursor_alloc(cursor->header.width, cursor->header.height); ++ ++ if (!c) { ++ qxl_set_guest_bug(qxl, "%s: cursor %ux%u alloc error", __func__, ++ cursor->header.width, cursor->header.height); ++ goto fail; ++ } ++ + c->hot_x = cursor->header.hot_spot_x; + c->hot_y = cursor->header.hot_spot_y; + switch (cursor->header.type) { +diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c +index e2969a6c81..2b81d6122f 100644 +--- a/hw/display/vmware_vga.c ++++ b/hw/display/vmware_vga.c +@@ -509,6 +509,8 @@ static inline void vmsvga_cursor_define(struct vmsvga_state_s *s, + int i, pixels; + + qc = cursor_alloc(c->width, c->height); ++ assert(qc != NULL); ++ + qc->hot_x = c->hot_x; + qc->hot_y = c->hot_y; + switch (c->bpp) { +diff --git a/ui/cursor.c b/ui/cursor.c +index 1d62ddd4d0..835f0802f9 100644 +--- a/ui/cursor.c ++++ b/ui/cursor.c +@@ -46,6 +46,8 @@ static QEMUCursor *cursor_parse_xpm(const char *xpm[]) + + /* parse pixel data */ + c = cursor_alloc(width, height); ++ assert(c != NULL); ++ + for (pixel = 0, y = 0; y < height; y++, line++) { + for (x = 0; x < height; x++, pixel++) { + idx = xpm[line][x]; +@@ -91,7 +93,11 @@ QEMUCursor *cursor_builtin_left_ptr(void) + QEMUCursor *cursor_alloc(int width, int height) + { + QEMUCursor *c; +- int datasize = width * height * sizeof(uint32_t); ++ size_t datasize = width * height * sizeof(uint32_t); ++ ++ if (width > 512 || height > 512) { ++ return NULL; ++ } + + c = g_malloc0(sizeof(QEMUCursor) + datasize); + c->width = width; +-- +2.35.1 + diff --git a/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch b/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch new file mode 100644 index 0000000..9ade171 --- /dev/null +++ b/kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch @@ -0,0 +1,80 @@ +From d3602e5afa1e90c5e33625fc528db7f96195bada Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Mon, 7 Nov 2022 19:59:46 -0500 +Subject: [PATCH 42/42] ui/vnc-clipboard: fix integer underflow in + vnc_client_cut_text_ext +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 227: ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext +RH-Bugzilla: 2129760 +RH-Acked-by: Mauro Matteo Cascella +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Thomas Huth +RH-Acked-by: Gerd Hoffmann +RH-Commit: [1/1] ac19a6c0777e308061bcb6d1de5cc9beaa105a3a (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2129760 +CVE: CVE-2022-3165 +Upstream: Merged + +commit d307040b18bfcb1393b910f1bae753d5c12a4dc7 +Author: Mauro Matteo Cascella +Date: Sun Sep 25 22:45:11 2022 +0200 + + ui/vnc-clipboard: fix integer underflow in vnc_client_cut_text_ext + + Extended ClientCutText messages start with a 4-byte header. If len < 4, + an integer underflow occurs in vnc_client_cut_text_ext. The result is + used to decompress data in a while loop in inflate_buffer, leading to + CPU consumption and denial of service. Prevent this by checking dlen in + protocol_client_msg. + + Fixes: CVE-2022-3165 + Fixes: 0bf41cab93e5 ("ui/vnc: clipboard support") + Reported-by: TangPeng + Signed-off-by: Mauro Matteo Cascella + Message-Id: <20220925204511.1103214-1-mcascell@redhat.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit d307040b18bfcb1393b910f1bae753d5c12a4dc7) +Signed-off-by: Jon Maloy +--- + ui/vnc.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index af02522e84..a14b6861be 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -2442,8 +2442,8 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len) + if (len == 1) { + return 8; + } ++ uint32_t dlen = abs(read_s32(data, 4)); + if (len == 8) { +- uint32_t dlen = abs(read_s32(data, 4)); + if (dlen > (1 << 20)) { + error_report("vnc: client_cut_text msg payload has %u bytes" + " which exceeds our limit of 1MB.", dlen); +@@ -2456,8 +2456,13 @@ static int protocol_client_msg(VncState *vs, uint8_t *data, size_t len) + } + + if (read_s32(data, 4) < 0) { +- vnc_client_cut_text_ext(vs, abs(read_s32(data, 4)), +- read_u32(data, 8), data + 12); ++ if (dlen < 4) { ++ error_report("vnc: malformed payload (header less than 4 bytes)" ++ " in extended clipboard pseudo-encoding."); ++ vnc_client_error(vs); ++ break; ++ } ++ vnc_client_cut_text_ext(vs, dlen, read_u32(data, 8), data + 12); + break; + } + vnc_client_cut_text(vs, read_u32(data, 4), data + 8); +-- +2.37.3 + diff --git a/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch b/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch new file mode 100644 index 0000000..ae80c9d --- /dev/null +++ b/kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch @@ -0,0 +1,90 @@ +From ffdf44cb8b4c743e7ab0ce46d62687d7178f9a49 Mon Sep 17 00:00:00 2001 +From: Jon Maloy +Date: Fri, 18 Nov 2022 20:26:54 -0500 +Subject: [PATCH 3/3] ui/vnc.c: Fixed a deadlock bug. +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Jon Maloy +RH-MergeRequest: 234: ui/vnc.c: Fixed a deadlock bug. +RH-Bugzilla: 2141896 +RH-Acked-by: Gerd Hoffmann +RH-Acked-by: Marc-André Lureau +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [1/1] d3d1d28d7b621a8ae8a593a5bd5303fa7951c17c (jmaloy/qemu-kvm) + +BZ: https://bugzilla.redhat.com/show_bug.cgi?id=2141896 +Upstream: Merged + +commit 1dbbe6f172810026c51dc84ed927a3cc23017949 +Author: Rao Lei +Date: Wed Jan 5 10:08:08 2022 +0800 + + ui/vnc.c: Fixed a deadlock bug. + + The GDB statck is as follows: + (gdb) bt + 0 __lll_lock_wait (futex=futex@entry=0x56211df20360, private=0) at lowlevellock.c:52 + 1 0x00007f263caf20a3 in __GI___pthread_mutex_lock (mutex=0x56211df20360) at ../nptl/pthread_mutex_lock.c:80 + 2 0x000056211a757364 in qemu_mutex_lock_impl (mutex=0x56211df20360, file=0x56211a804857 "../ui/vnc-jobs.h", line=60) + at ../util/qemu-thread-posix.c:80 + 3 0x000056211a0ef8c7 in vnc_lock_output (vs=0x56211df14200) at ../ui/vnc-jobs.h:60 + 4 0x000056211a0efcb7 in vnc_clipboard_send (vs=0x56211df14200, count=1, dwords=0x7ffdf1701338) at ../ui/vnc-clipboard.c:138 + 5 0x000056211a0f0129 in vnc_clipboard_notify (notifier=0x56211df244c8, data=0x56211dd1bbf0) at ../ui/vnc-clipboard.c:209 + 6 0x000056211a75dde8 in notifier_list_notify (list=0x56211afa17d0 , data=0x56211dd1bbf0) at ../util/notify.c:39 + 7 0x000056211a0bf0e6 in qemu_clipboard_update (info=0x56211dd1bbf0) at ../ui/clipboard.c:50 + 8 0x000056211a0bf05d in qemu_clipboard_peer_release (peer=0x56211df244c0, selection=QEMU_CLIPBOARD_SELECTION_CLIPBOARD) + at ../ui/clipboard.c:41 + 9 0x000056211a0bef9b in qemu_clipboard_peer_unregister (peer=0x56211df244c0) at ../ui/clipboard.c:19 + 10 0x000056211a0d45f3 in vnc_disconnect_finish (vs=0x56211df14200) at ../ui/vnc.c:1358 + 11 0x000056211a0d4c9d in vnc_client_read (vs=0x56211df14200) at ../ui/vnc.c:1611 + 12 0x000056211a0d4df8 in vnc_client_io (ioc=0x56211ce70690, condition=G_IO_IN, opaque=0x56211df14200) at ../ui/vnc.c:1649 + 13 0x000056211a5b976c in qio_channel_fd_source_dispatch + (source=0x56211ce50a00, callback=0x56211a0d4d71 , user_data=0x56211df14200) at ../io/channel-watch.c:84 + 14 0x00007f263ccede8e in g_main_context_dispatch () at /lib/x86_64-linux-gnu/libglib-2.0.so.0 + 15 0x000056211a77d4a1 in glib_pollfds_poll () at ../util/main-loop.c:232 + 16 0x000056211a77d51f in os_host_main_loop_wait (timeout=958545) at ../util/main-loop.c:255 + 17 0x000056211a77d630 in main_loop_wait (nonblocking=0) at ../util/main-loop.c:531 + 18 0x000056211a45bc8e in qemu_main_loop () at ../softmmu/runstate.c:726 + 19 0x000056211a0b45fa in main (argc=69, argv=0x7ffdf1701778, envp=0x7ffdf17019a8) at ../softmmu/main.c:50 + + From the call trace, we can see it is a deadlock bug. + vnc_disconnect_finish will acquire the output_mutex. + But, the output_mutex will be acquired again in vnc_clipboard_send. + Repeated locking will cause deadlock. So, I move + qemu_clipboard_peer_unregister() behind vnc_unlock_output(); + Fixes: 0bf41cab93e ("ui/vnc: clipboard support") + Signed-off-by: Lei Rao + Reviewed-by: Marc-André Lureau + Message-Id: <20220105020808.597325-1-lei.rao@intel.com> + Signed-off-by: Gerd Hoffmann + +(cherry picked from commit 1dbbe6f172810026c51dc84ed927a3cc23017949) +Signed-off-by: Jon Maloy +--- + ui/vnc.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/ui/vnc.c b/ui/vnc.c +index a14b6861be..76372ca1de 100644 +--- a/ui/vnc.c ++++ b/ui/vnc.c +@@ -1354,12 +1354,12 @@ void vnc_disconnect_finish(VncState *vs) + /* last client gone */ + vnc_update_server_surface(vs->vd); + } ++ vnc_unlock_output(vs); ++ + if (vs->cbpeer.update.notify) { + qemu_clipboard_peer_unregister(&vs->cbpeer); + } + +- vnc_unlock_output(vs); +- + qemu_mutex_destroy(&vs->output_mutex); + if (vs->bh != NULL) { + qemu_bh_delete(vs->bh); +-- +2.37.3 + diff --git a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch index 70e8f59..c3dbcf9 100644 --- a/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch +++ b/kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch @@ -1,18 +1,18 @@ -From edb2bd99355f300b512c040e91f5870ea14a5d7e Mon Sep 17 00:00:00 2001 +From 9a62319b973ec33f9ccbeeae7f2f3b4b31db0c26 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 6 May 2022 19:28:15 -0700 -Subject: [PATCH 11/16] vhost-net: fix improper cleanup in vhost_net_start +Subject: [PATCH 17/24] vhost-net: fix improper cleanup in vhost_net_start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [4/7] e88e482dd4b344f0cc887a358268beaed4d62917 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [4/7] bebe7990a12e901fbb84e5e4b7a62744d75c9d9e +RH-Bugzilla: 2069946 RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier vhost_net_start() missed a corresponding stop_one() upon error from vhost_set_vring_enable(). While at it, make the error handling for @@ -52,5 +52,5 @@ index 30379d2ca4..d6d7c51f62 100644 } e = k->set_guest_notifiers(qbus->parent, total_notifiers, false); -- -2.31.1 +2.35.3 diff --git a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch index 747bf5f..ef700fd 100644 --- a/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch +++ b/kvm-vhost-vdpa-backend-feature-should-set-only-once.patch @@ -1,18 +1,18 @@ -From 46c5a35aa56cf0dd55376638dbf7d46e85f497e1 Mon Sep 17 00:00:00 2001 +From 01270bb66a4f7897a4fd06ba248eeeb41dc47571 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 6 May 2022 19:28:16 -0700 -Subject: [PATCH 12/16] vhost-vdpa: backend feature should set only once +Subject: [PATCH 18/24] vhost-vdpa: backend feature should set only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [5/7] 7531bb8da0c99b29997e8bfc6d1e811daf3cdd38 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [5/7] 0ab13542cf25c129dc403db95c7db12cdb012744 +RH-Bugzilla: 2069946 RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier The vhost_vdpa_one_time_request() branch in vhost_vdpa_set_backend_cap() incorrectly sends down @@ -41,10 +41,10 @@ Signed-off-by: Jason Wang 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 8adf7c0b92..6e3dbd9e89 100644 +index 78da48a333..a9be24776a 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c -@@ -665,7 +665,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) +@@ -525,7 +525,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) features &= f; @@ -54,5 +54,5 @@ index 8adf7c0b92..6e3dbd9e89 100644 if (r) { return -EFAULT; -- -2.31.1 +2.35.3 diff --git a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch index 2466557..bbc1c85 100644 --- a/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch +++ b/kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch @@ -1,19 +1,19 @@ -From 58acdab17ec00ab76105ab92a51c5ba4dec3df5a Mon Sep 17 00:00:00 2001 +From c8cb46fa93a3ccad6f3e183045b270f28eed7b12 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 6 May 2022 19:28:17 -0700 -Subject: [PATCH 13/16] vhost-vdpa: change name and polarity for +Subject: [PATCH 19/24] vhost-vdpa: change name and polarity for vhost_vdpa_one_time_request() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [6/7] 7029778f463a136ff412c63b86b6953390e47bf8 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [6/7] 727ab0bb813f073e8cd2f7e68a9acda60c2cb33d +RH-Bugzilla: 2069946 RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier The name vhost_vdpa_one_time_request() was confusing. No matter whatever it returns, its typical occurrence had @@ -27,6 +27,9 @@ This call is applicable to request which performs operation only once, before queues are set up, and usually at the beginning of the caller function. Document the requirement for it in place. +Conflicts: hw/virtio/vhost-vdpa.c since we don't have shadow virtqueue +suport. + Signed-off-by: Si-Wei Liu Message-Id: <1651890498-24478-7-git-send-email-si-wei.liu@oracle.com> Reviewed-by: Michael S. Tsirkin @@ -40,10 +43,10 @@ Signed-off-by: Jason Wang 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c -index 6e3dbd9e89..33dcaa135e 100644 +index a9be24776a..38bbcb3c18 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c -@@ -366,11 +366,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) +@@ -319,11 +319,18 @@ static void vhost_vdpa_get_iova_range(struct vhost_vdpa *v) v->iova_range.last); } @@ -63,8 +66,8 @@ index 6e3dbd9e89..33dcaa135e 100644 + return v->index == 0; } - static int vhost_vdpa_get_dev_features(struct vhost_dev *dev, -@@ -451,7 +458,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) + static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) +@@ -351,7 +358,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) vhost_vdpa_get_iova_range(v); @@ -73,7 +76,7 @@ index 6e3dbd9e89..33dcaa135e 100644 return 0; } -@@ -594,7 +601,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) +@@ -468,7 +475,7 @@ static int vhost_vdpa_memslots_limit(struct vhost_dev *dev) static int vhost_vdpa_set_mem_table(struct vhost_dev *dev, struct vhost_memory *mem) { @@ -82,8 +85,8 @@ index 6e3dbd9e89..33dcaa135e 100644 return 0; } -@@ -623,7 +630,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, - struct vhost_vdpa *v = dev->opaque; +@@ -496,7 +503,7 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, + { int ret; - if (vhost_vdpa_one_time_request(dev)) { @@ -91,7 +94,7 @@ index 6e3dbd9e89..33dcaa135e 100644 return 0; } -@@ -665,7 +672,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) +@@ -525,7 +532,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) features &= f; @@ -100,16 +103,16 @@ index 6e3dbd9e89..33dcaa135e 100644 r = vhost_vdpa_call(dev, VHOST_SET_BACKEND_FEATURES, &features); if (r) { return -EFAULT; -@@ -1118,7 +1125,7 @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, +@@ -670,7 +677,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) + static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, struct vhost_log *log) { - struct vhost_vdpa *v = dev->opaque; -- if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) { -+ if (v->shadow_vqs_enabled || !vhost_vdpa_first_dev(dev)) { +- if (vhost_vdpa_one_time_request(dev)) { ++ if (!vhost_vdpa_first_dev(dev)) { return 0; } -@@ -1240,7 +1247,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, +@@ -739,7 +746,7 @@ static int vhost_vdpa_get_features(struct vhost_dev *dev, static int vhost_vdpa_set_owner(struct vhost_dev *dev) { @@ -119,5 +122,5 @@ index 6e3dbd9e89..33dcaa135e 100644 } -- -2.31.1 +2.35.3 diff --git a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch index 7716cbf..68c7d5f 100644 --- a/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch +++ b/kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch @@ -1,18 +1,18 @@ -From 3142102adb98f46518c0ac1773b0c48710c6bed6 Mon Sep 17 00:00:00 2001 +From c10ef6f79d4a4c8ccc5901b25234501c621e4e04 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 6 May 2022 19:28:14 -0700 -Subject: [PATCH 10/16] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa +Subject: [PATCH 16/24] vhost-vdpa: fix improper cleanup in net_init_vhost_vdpa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [3/7] c83ff6c97d34cfae3c3447edde934b42a9ace75f (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [3/7] b3b658dcb4695defe1fdb199570fb984291e8e21 +RH-Bugzilla: 2069946 RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier ... such that no memory leaks on dangling net clients in case of error. @@ -29,7 +29,7 @@ Signed-off-by: Jason Wang 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c -index 1e9fe47c03..df1e69ee72 100644 +index 25dd6dd975..814f704687 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -306,7 +306,9 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, @@ -42,7 +42,7 @@ index 1e9fe47c03..df1e69ee72 100644 + } } qemu_close(vdpa_device_fd); - + g_free(ncs); -- -2.31.1 +2.35.3 diff --git a/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch b/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch new file mode 100644 index 0000000..9af491f --- /dev/null +++ b/kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch @@ -0,0 +1,102 @@ +From 56e2aef97e750ffdc572dcecbfc31314728d37a9 Mon Sep 17 00:00:00 2001 +From: Halil Pasic +Date: Mon, 7 Mar 2022 12:29:39 +0100 +Subject: [PATCH 2/2] virtio: fix feature negotiation for ACCESS_PLATFORM +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 224: virtiofs on s390 secure execution +RH-Bugzilla: 2116302 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Commit: [2/2] 264d3bdbbde985f16ed6f5a1786547c25fb8cc04 + +Unlike most virtio features ACCESS_PLATFORM is considered mandatory by +QEMU, i.e. the driver must accept it if offered by the device. The +virtio specification says that the driver SHOULD accept the +ACCESS_PLATFORM feature if offered, and that the device MAY fail to +operate if ACCESS_PLATFORM was offered but not negotiated. + +While a SHOULD ain't exactly a MUST, we are certainly allowed to fail +the device when the driver fences ACCESS_PLATFORM. With commit +2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM") we already made the +decision to do so whenever the get_dma_as() callback is implemented (by +the bus), which in practice means for the entirety of virtio-pci. + +That means, if the device needs to translate I/O addresses, then +ACCESS_PLATFORM is mandatory. The aforementioned commit tells us in the +commit message that this is for security reasons. More precisely if we +were to allow a less then trusted driver (e.g. an user-space driver, or +a nested guest) to make the device bypass the IOMMU by not negotiating +ACCESS_PLATFORM, then the guest kernel would have no ability to +control/police (by programming the IOMMU) what pieces of guest memory +the driver may manipulate using the device. Which would break security +assumptions within the guest. + +If ACCESS_PLATFORM is offered not because we want the device to utilize +an IOMMU and do address translation, but because the device does not +have access to the entire guest RAM, and needs the driver to grant +access to the bits it needs access to (e.g. confidential guest support), +we still require the guest to have the corresponding logic and to accept +ACCESS_PLATFORM. If the driver does not accept ACCESS_PLATFORM, then +things are bound to go wrong, and we may see failures much less graceful +than failing the device because the driver didn't negotiate +ACCESS_PLATFORM. + +So let us make ACCESS_PLATFORM mandatory for the driver regardless +of whether the get_dma_as() callback is implemented or not. + +Signed-off-by: Halil Pasic +Fixes: 2943b53f68 ("virtio: force VIRTIO_F_IOMMU_PLATFORM") + +Message-Id: <20220307112939.2780117-1-pasic@linux.ibm.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Cornelia Huck +(cherry picked from commit 06134e2bc35dc21543d4cbcf31f858c03d383442) +--- + hw/virtio/virtio-bus.c | 22 ++++++++++++++-------- + 1 file changed, 14 insertions(+), 8 deletions(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index 0f69d1c742..d7ec023adf 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -78,17 +78,23 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + +- vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +- if (klass->get_dma_as != NULL && has_iommu) { ++ vdev->dma_as = &address_space_memory; ++ if (has_iommu) { ++ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ /* ++ * Present IOMMU_PLATFORM to the driver iff iommu_plattform=on and ++ * device operational. If the driver does not accept IOMMU_PLATFORM ++ * we fail the device. ++ */ + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); +- vdev->dma_as = klass->get_dma_as(qbus->parent); +- if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { +- error_setg(errp, ++ if (klass->get_dma_as) { ++ vdev->dma_as = klass->get_dma_as(qbus->parent); ++ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { ++ error_setg(errp, + "iommu_platform=true is not supported by the device"); +- return; ++ return; ++ } + } +- } else { +- vdev->dma_as = &address_space_memory; + } + } + +-- +2.37.3 + diff --git a/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch b/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch new file mode 100644 index 0000000..b5632e1 --- /dev/null +++ b/kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch @@ -0,0 +1,115 @@ +From c731ffdf9faee74e9522dff06e61cda817902088 Mon Sep 17 00:00:00 2001 +From: Halil Pasic +Date: Mon, 7 Feb 2022 12:28:57 +0100 +Subject: [PATCH 1/2] virtio: fix the condition for iommu_platform not + supported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 224: virtiofs on s390 secure execution +RH-Bugzilla: 2116302 +RH-Acked-by: Thomas Huth +RH-Acked-by: Cornelia Huck +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/2] d7edc7e3905a04644c9ff44b0d36122c72068e08 + +The commit 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but +unsupported") claims to fail the device hotplug when iommu_platform +is requested, but not supported by the (vhost) device. On the first +glance the condition for detecting that situation looks perfect, but +because a certain peculiarity of virtio_platform it ain't. + +In fact the aforementioned commit introduces a regression. It breaks +virtio-fs support for Secure Execution, and most likely also for AMD SEV +or any other confidential guest scenario that relies encrypted guest +memory. The same also applies to any other vhost device that does not +support _F_ACCESS_PLATFORM. + +The peculiarity is that iommu_platform and _F_ACCESS_PLATFORM collates +"device can not access all of the guest RAM" and "iova != gpa, thus +device needs to translate iova". + +Confidential guest technologies currently rely on the device/hypervisor +offering _F_ACCESS_PLATFORM, so that, after the feature has been +negotiated, the guest grants access to the portions of memory the +device needs to see. So in for confidential guests, generally, +_F_ACCESS_PLATFORM is about the restricted access to memory, but not +about the addresses used being something else than guest physical +addresses. + +This is the very reason for which commit f7ef7e6e3b ("vhost: correctly +turn on VIRTIO_F_IOMMU_PLATFORM") fences _F_ACCESS_PLATFORM from the +vhost device that does not need it, because on the vhost interface it +only means "I/O address translation is needed". + +This patch takes inspiration from f7ef7e6e3b ("vhost: correctly turn on +VIRTIO_F_IOMMU_PLATFORM"), and uses the same condition for detecting the +situation when _F_ACCESS_PLATFORM is requested, but no I/O translation +by the device, and thus no device capability is needed. In this +situation claiming that the device does not support iommu_plattform=on +is counter-productive. So let us stop doing that! + +Signed-off-by: Halil Pasic +Reported-by: Jakob Naucke +Fixes: 04ceb61a40 ("virtio: Fail if iommu_platform is requested, but +unsupported") +Acked-by: Cornelia Huck +Reviewed-by: Daniel Henrique Barboza +Tested-by: Daniel Henrique Barboza +Cc: Kevin Wolf +Cc: qemu-stable@nongnu.org + +Message-Id: <20220207112857.607829-1-pasic@linux.ibm.com> +Reviewed-by: Michael S. Tsirkin +Signed-off-by: Michael S. Tsirkin +Acked-by: Jason Wang +(cherry picked from commit e65902a913bf31ba79a83a3bd3621108b85cf645) +--- + hw/virtio/virtio-bus.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c +index d23db98c56..0f69d1c742 100644 +--- a/hw/virtio/virtio-bus.c ++++ b/hw/virtio/virtio-bus.c +@@ -48,6 +48,7 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + VirtioBusClass *klass = VIRTIO_BUS_GET_CLASS(bus); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + bool has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); ++ bool vdev_has_iommu; + Error *local_err = NULL; + + DPRINTF("%s: plug device.\n", qbus->name); +@@ -69,11 +70,6 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + +- if (has_iommu && !virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) { +- error_setg(errp, "iommu_platform=true is not supported by the device"); +- return; +- } +- + if (klass->device_plugged != NULL) { + klass->device_plugged(qbus->parent, &local_err); + } +@@ -82,9 +78,15 @@ void virtio_bus_device_plugged(VirtIODevice *vdev, Error **errp) + return; + } + ++ vdev_has_iommu = virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); + if (klass->get_dma_as != NULL && has_iommu) { + virtio_add_feature(&vdev->host_features, VIRTIO_F_IOMMU_PLATFORM); + vdev->dma_as = klass->get_dma_as(qbus->parent); ++ if (!vdev_has_iommu && vdev->dma_as != &address_space_memory) { ++ error_setg(errp, ++ "iommu_platform=true is not supported by the device"); ++ return; ++ } + } else { + vdev->dma_as = &address_space_memory; + } +-- +2.37.3 + diff --git a/kvm-virtio-gpu-do-not-byteswap-padding.patch b/kvm-virtio-gpu-do-not-byteswap-padding.patch new file mode 100644 index 0000000..dc723bd --- /dev/null +++ b/kvm-virtio-gpu-do-not-byteswap-padding.patch @@ -0,0 +1,48 @@ +From e118a451dc1ed68f1371a5d8e042120542be6d31 Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Thu, 11 Nov 2021 12:06:00 +0100 +Subject: [PATCH 01/24] virtio-gpu: do not byteswap padding +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [1/13] 12714f53820b7632e7fc0a8a3bf8eb4a64f41750 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +In Linux 5.16, the padding of struct virtio_gpu_ctrl_hdr has become a +single-byte field followed by a uint8_t[3] array of padding bytes, +and virtio_gpu_ctrl_hdr_bswap does not compile anymore. + +Signed-off-by: Paolo Bonzini +Acked-by: Cornelia Huck +Reviewed-by: Alex Bennée +Reviewed-by: Michael S. Tsirkin +Reviewed-by: Philippe Mathieu-Daudé +Message-Id: <20211111110604.207376-2-pbonzini@redhat.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit a4663f1a5506626175fc64c86e52135587c36872) +Signed-off-by: Paul Lai +--- + include/hw/virtio/virtio-gpu-bswap.h | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/include/hw/virtio/virtio-gpu-bswap.h b/include/hw/virtio/virtio-gpu-bswap.h +index e2bee8f595..5faac0d8d5 100644 +--- a/include/hw/virtio/virtio-gpu-bswap.h ++++ b/include/hw/virtio/virtio-gpu-bswap.h +@@ -24,7 +24,6 @@ virtio_gpu_ctrl_hdr_bswap(struct virtio_gpu_ctrl_hdr *hdr) + le32_to_cpus(&hdr->flags); + le64_to_cpus(&hdr->fence_id); + le32_to_cpus(&hdr->ctx_id); +- le32_to_cpus(&hdr->padding); + } + + static inline void +-- +2.35.3 + diff --git a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch index 9da7ea7..f23f38c 100644 --- a/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch +++ b/kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch @@ -1,19 +1,19 @@ -From 316b73277de233c7a9b6917077c00d7012060944 Mon Sep 17 00:00:00 2001 +From 39cdd781c885b0695f8830a33420caa9e9b0bd50 Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 6 May 2022 19:28:13 -0700 -Subject: [PATCH 09/16] virtio-net: align ctrl_vq index for non-mq guest for +Subject: [PATCH 15/24] virtio-net: align ctrl_vq index for non-mq guest for vhost_vdpa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [2/7] 7f764bbb579c7b473ad67fc25b46e698d277e781 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [2/7] 2647cf59f3dd1e3d8af2d12c01e06ae26fbc1dc2 +RH-Bugzilla: 2069946 RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier With MQ enabled vdpa device and non-MQ supporting guest e.g. booting vdpa with mq=on over OVMF of single vqp, below assert @@ -79,7 +79,7 @@ Signed-off-by: Jason Wang 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index ffb3475201..f0bb29c741 100644 +index ec045c3f41..f118379bb4 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -14,6 +14,7 @@ @@ -90,7 +90,7 @@ index ffb3475201..f0bb29c741 100644 #include "qemu/main-loop.h" #include "qemu/module.h" #include "hw/virtio/virtio.h" -@@ -3171,8 +3172,22 @@ static NetClientInfo net_virtio_info = { +@@ -3163,8 +3164,22 @@ static NetClientInfo net_virtio_info = { static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) { VirtIONet *n = VIRTIO_NET(vdev); @@ -114,7 +114,7 @@ index ffb3475201..f0bb29c741 100644 return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx); } -@@ -3180,8 +3195,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, +@@ -3172,8 +3187,22 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) { VirtIONet *n = VIRTIO_NET(vdev); @@ -139,5 +139,5 @@ index ffb3475201..f0bb29c741 100644 vdev, idx, mask); } -- -2.31.1 +2.35.3 diff --git a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch index 3930cc2..25c1aa9 100644 --- a/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch +++ b/kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch @@ -1,19 +1,19 @@ -From 521a1953bc11ab6823dcbbee773bcf86e926a9e7 Mon Sep 17 00:00:00 2001 +From c9b51d54530c526f14ca0f3b9fc0bfa0b60d45ee Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 6 May 2022 19:28:18 -0700 -Subject: [PATCH 14/16] virtio-net: don't handle mq request in userspace +Subject: [PATCH 20/24] virtio-net: don't handle mq request in userspace handler for vhost-vdpa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [7/7] 9781cab45448ae16a00fbf10cf7995df6b984a0a (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [7/7] 0e6684d12e42752deae8f5ebc56456fed174e0ed +RH-Bugzilla: 2069946 RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier virtio_queue_host_notifier_read() tends to read pending event left behind on ioeventfd in the vhost_net_stop() path, and @@ -74,10 +74,10 @@ Signed-off-by: Jason Wang 1 file changed, 13 insertions(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index f0bb29c741..099e65036d 100644 +index f118379bb4..7e172ef829 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c -@@ -1381,6 +1381,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, +@@ -1373,6 +1373,7 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, { VirtIODevice *vdev = VIRTIO_DEVICE(n); uint16_t queue_pairs; @@ -85,7 +85,7 @@ index f0bb29c741..099e65036d 100644 virtio_net_disable_rss(n); if (cmd == VIRTIO_NET_CTRL_MQ_HASH_CONFIG) { -@@ -1412,6 +1413,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, +@@ -1404,6 +1405,18 @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, return VIRTIO_NET_ERR; } @@ -105,5 +105,5 @@ index f0bb29c741..099e65036d 100644 /* stop the backend before changing the number of queue_pairs to avoid handling a * disabled queue */ -- -2.31.1 +2.35.3 diff --git a/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch b/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch new file mode 100644 index 0000000..4855e59 --- /dev/null +++ b/kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch @@ -0,0 +1,60 @@ +From 10b3a7b56dc9b4c88e503c36c1b13d80bcb7b066 Mon Sep 17 00:00:00 2001 +From: Jason Wang +Date: Tue, 8 Mar 2022 10:42:51 +0800 +Subject: [PATCH 2/6] virtio-net: fix map leaking on error during receive + +RH-Author: Jon Maloy +RH-MergeRequest: 154: virtio-net: fix map leaking on error during receive +RH-Commit: [1/1] 7178b0cd5ce7c89fe476f2e199c9212c8b89327a (jmaloy/qemu-kvm) +RH-Bugzilla: 2063206 +RH-Acked-by: Jason Wang +RH-Acked-by: Kevin Wolf +RH-Acked-by: Laurent Vivier + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2063206 +Upstream: Merged +CVE: CVE-2022-26353 + +commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37 +Author: Jason Wang +Date: Tue Mar 8 10:42:51 2022 +0800 + + virtio-net: fix map leaking on error during receive + + Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") + tries to fix the use after free of the sg by caching the virtqueue + elements in an array and unmap them at once after receiving the + packets, But it forgot to unmap the cached elements on error which + will lead to leaking of mapping and other unexpected results. + + Fixing this by detaching the cached elements on error. This addresses + CVE-2022-26353. + + Reported-by: Victor Tom + Cc: qemu-stable@nongnu.org + Fixes: CVE-2022-26353 + Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg") + Reviewed-by: Michael S. Tsirkin + Signed-off-by: Jason Wang + +(cherry picked from commit abe300d9d894f7138e1af7c8e9c88c04bfe98b37) +Signed-off-by: Jon Maloy +--- + hw/net/virtio-net.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c +index f2014d5ea0..e1f4748831 100644 +--- a/hw/net/virtio-net.c ++++ b/hw/net/virtio-net.c +@@ -1862,6 +1862,7 @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf, + + err: + for (j = 0; j < i; j++) { ++ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]); + g_free(elems[j]); + } + +-- +2.27.0 + diff --git a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch index f6072d2..2e46cff 100644 --- a/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch +++ b/kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch @@ -1,19 +1,19 @@ -From 9e737aba614e94da4458f02d4ff97e95ffffd19f Mon Sep 17 00:00:00 2001 +From bc307149fe4e3fe2a3e0ac52534383c955051e7e Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Fri, 6 May 2022 19:28:12 -0700 -Subject: [PATCH 08/16] virtio-net: setup vhost_dev and notifiers for cvq only +Subject: [PATCH 14/24] virtio-net: setup vhost_dev and notifiers for cvq only when feature is negotiated MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit RH-Author: Jason Wang -RH-MergeRequest: 98: Multiqueue fixes for vhost-vDPA -RH-Commit: [1/7] a5c5a2862b2e4d15ef7c09da3e4234fdef37cc66 (jasowang/qemu-kvm-cs) -RH-Bugzilla: 2070804 +RH-MergeRequest: 187: Multiqueue fixes for vhost-vDPA +RH-Commit: [1/7] 38bcfaa661f437b3dfa6b6f152dffd60073dc054 +RH-Bugzilla: 2069946 RH-Acked-by: Eugenio Pérez -RH-Acked-by: Laurent Vivier RH-Acked-by: Cindy Lu +RH-Acked-by: Laurent Vivier When the control virtqueue feature is absent or not negotiated, vhost_net_start() still tries to set up vhost_dev and install @@ -34,10 +34,10 @@ Signed-off-by: Jason Wang 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c -index 1067e72b39..ffb3475201 100644 +index e1f4748831..ec045c3f41 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c -@@ -245,7 +245,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) +@@ -244,7 +244,8 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status) VirtIODevice *vdev = VIRTIO_DEVICE(n); NetClientState *nc = qemu_get_queue(n->nic); int queue_pairs = n->multiqueue ? n->max_queue_pairs : 1; @@ -48,5 +48,5 @@ index 1067e72b39..ffb3475201 100644 if (!get_vhost_net(nc->peer)) { return; -- -2.31.1 +2.35.3 diff --git a/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch b/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch new file mode 100644 index 0000000..7ee71ae --- /dev/null +++ b/kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch @@ -0,0 +1,63 @@ +From 1da951c4c3b4e403a6c1668a54e6264381c0003d Mon Sep 17 00:00:00 2001 +From: Vivek Goyal +Date: Tue, 8 Feb 2022 15:48:04 -0500 +Subject: [PATCH 1/3] virtiofsd: Fix breakage due to fuse_init_in size change + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 193: virtiofsd: Fix breakage due to fuse_init_in size change +RH-Commit: [1/1] 5809db034f9361fb462181d71e7cdde1324f8e54 +RH-Bugzilla: 2097209 +RH-Acked-by: German Maglione +RH-Acked-by: Laszlo Ersek +RH-Acked-by: Vivek Goyal +RH-Acked-by: Stefan Hajnoczi + +Kernel version 5.17 has increased the size of "struct fuse_init_in" struct. +Previously this struct was 16 bytes and now it has been extended to +64 bytes in size. + +Once qemu headers are updated to latest, it will expect to receive 64 byte +size struct (for protocol version major 7 and minor > 6). But if guest is +booting older kernel (older than 5.17), then it still sends older +fuse_init_in of size 16 bytes. And do_init() fails. It is expecting +64 byte struct. And this results in mount of virtiofs failing. + +Fix this by parsing 16 bytes only for now. Separate patches will be +posted which will parse rest of the bytes and enable new functionality. +Right now we don't support any of the new functionality, so we don't +lose anything by not parsing bytes beyond 16. + +Reviewed-by: Dr. David Alan Gilbert +Signed-off-by: Vivek Goyal +Message-Id: <20220208204813.682906-2-vgoyal@redhat.com> +Signed-off-by: Dr. David Alan Gilbert +(cherry picked from commit a086d54c6ffa38f7e71f182b63a25315304a3392) +--- + tools/virtiofsd/fuse_lowlevel.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/tools/virtiofsd/fuse_lowlevel.c b/tools/virtiofsd/fuse_lowlevel.c +index e4679c73ab..5d431a7038 100644 +--- a/tools/virtiofsd/fuse_lowlevel.c ++++ b/tools/virtiofsd/fuse_lowlevel.c +@@ -1880,6 +1880,8 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + struct fuse_mbuf_iter *iter) + { + size_t compat_size = offsetof(struct fuse_init_in, max_readahead); ++ size_t compat2_size = offsetof(struct fuse_init_in, flags) + ++ sizeof(uint32_t); + struct fuse_init_in *arg; + struct fuse_init_out outarg; + struct fuse_session *se = req->se; +@@ -1897,7 +1899,7 @@ static void do_init(fuse_req_t req, fuse_ino_t nodeid, + + /* ...and now consume the new fields. */ + if (arg->major == 7 && arg->minor >= 6) { +- if (!fuse_mbuf_iter_advance(iter, sizeof(*arg) - compat_size)) { ++ if (!fuse_mbuf_iter_advance(iter, compat2_size - compat_size)) { + fuse_reply_err(req, EINVAL); + return; + } +-- +2.35.3 + diff --git a/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch b/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch new file mode 100644 index 0000000..e6ffec1 --- /dev/null +++ b/kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch @@ -0,0 +1,65 @@ +From ebf6be5ba316ffda354af5eb1f1241ad6543b3cd Mon Sep 17 00:00:00 2001 +From: Yusuke Okada +Date: Thu, 18 Aug 2022 14:46:19 -0400 +Subject: [PATCH 3/3] virtiofsd: use g_date_time_get_microsecond to get + subsecond +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Dr. David Alan Gilbert +RH-MergeRequest: 222: virtiofsd: use g_date_time_get_microsecond to get subsecond +RH-Bugzilla: 2018885 +RH-Acked-by: Vivek Goyal +RH-Acked-by: Daniel P. Berrangé +RH-Acked-by: Sergio Lopez +RH-Commit: [1/1] da8795576acc7029044a801ef42676d66471a577 + +The "%f" specifier in g_date_time_format() is only available in glib +2.65.2 or later. If combined with older glib, the function returns null +and the timestamp displayed as "(null)". + +For backward compatibility, g_date_time_get_microsecond should be used +to retrieve subsecond. + +In this patch the g_date_time_format() leaves subsecond field as "%06d" +and let next snprintf to format with g_date_time_get_microsecond. + +Signed-off-by: Yusuke Okada +Reviewed-by: Dr. David Alan Gilbert +Message-id: 20220818184618.2205172-1-yokada.996@gmail.com +Signed-off-by: Stefan Hajnoczi +(cherry picked from commit f16d15c9276bd8f501f861c39cbd4adc812d0c1d) +--- + tools/virtiofsd/passthrough_ll.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c +index b3d0674f6d..523d8fbe1e 100644 +--- a/tools/virtiofsd/passthrough_ll.c ++++ b/tools/virtiofsd/passthrough_ll.c +@@ -3791,6 +3791,7 @@ static void setup_nofile_rlimit(unsigned long rlimit_nofile) + static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + { + g_autofree char *localfmt = NULL; ++ char buf[64]; + + if (current_log_level < level) { + return; +@@ -3803,9 +3804,11 @@ static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) + fmt); + } else { + g_autoptr(GDateTime) now = g_date_time_new_now_utc(); +- g_autofree char *nowstr = g_date_time_format(now, "%Y-%m-%d %H:%M:%S.%f%z"); ++ g_autofree char *nowstr = g_date_time_format(now, ++ "%Y-%m-%d %H:%M:%S.%%06d%z"); ++ snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now)); + localfmt = g_strdup_printf("[%s] [ID: %08ld] %s", +- nowstr, syscall(__NR_gettid), fmt); ++ buf, syscall(__NR_gettid), fmt); + } + fmt = localfmt; + } +-- +2.35.3 + diff --git a/kvm-x86-Add-AMX-CPUIDs-enumeration.patch b/kvm-x86-Add-AMX-CPUIDs-enumeration.patch new file mode 100644 index 0000000..d61e4cf --- /dev/null +++ b/kvm-x86-Add-AMX-CPUIDs-enumeration.patch @@ -0,0 +1,135 @@ +From d0826a8c2c3c389eeeed1014d7e316f39f083971 Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:31 -0800 +Subject: [PATCH 09/24] x86: Add AMX CPUIDs enumeration + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [9/13] fab147992ad927c9538529f018f06e2f48546c5b +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Add AMX primary feature bits XFD and AMX_TILE to +enumerate the CPU's AMX capability. Meanwhile, add +AMX TILE and TMUL CPUID leaf and subleaves which +exist when AMX TILE is present to provide the maximum +capability of TILE and TMUL. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-6-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit f21a48171cf3fa39532fc8553fd82e81b88b6474) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 55 ++++++++++++++++++++++++++++++++++++++++--- + target/i386/kvm/kvm.c | 4 +++- + 2 files changed, 55 insertions(+), 4 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index cd27c0eb81..09e08f7f38 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -574,6 +574,18 @@ static CPUCacheInfo legacy_l3_cache = { + #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ + #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ + ++/* CPUID Leaf 0x1D constants: */ ++#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1 ++#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000 ++#define INTEL_AMX_BYTES_PER_TILE 0x400 ++#define INTEL_AMX_BYTES_PER_ROW 0x40 ++#define INTEL_AMX_TILE_MAX_NAMES 0x8 ++#define INTEL_AMX_TILE_MAX_ROWS 0x10 ++ ++/* CPUID Leaf 0x1E constants: */ ++#define INTEL_AMX_TMUL_MAX_K 0x10 ++#define INTEL_AMX_TMUL_MAX_N 0x40 ++ + void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + uint32_t vendor2, uint32_t vendor3) + { +@@ -843,8 +855,8 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, "serialize", NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, NULL, +- NULL, NULL, NULL, "avx512-fp16", +- NULL, NULL, "spec-ctrl", "stibp", ++ NULL, NULL, "amx-bf16", "avx512-fp16", ++ "amx-tile", "amx-int8", "spec-ctrl", "stibp", + NULL, "arch-capabilities", "core-capability", "ssbd", + }, + .cpuid = { +@@ -909,7 +921,7 @@ FeatureWordInfo feature_word_info[FEATURE_WORDS] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "xsaveopt", "xsavec", "xgetbv1", "xsaves", +- NULL, NULL, NULL, NULL, ++ "xfd", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +@@ -5593,6 +5605,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + } + break; + } ++ case 0x1D: { ++ /* AMX TILE */ ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { ++ break; ++ } ++ ++ if (count == 0) { ++ /* Highest numbered palette subleaf */ ++ *eax = INTEL_AMX_TILE_MAX_SUBLEAF; ++ } else if (count == 1) { ++ *eax = INTEL_AMX_TOTAL_TILE_BYTES | ++ (INTEL_AMX_BYTES_PER_TILE << 16); ++ *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16); ++ *ecx = INTEL_AMX_TILE_MAX_ROWS; ++ } ++ break; ++ } ++ case 0x1E: { ++ /* AMX TMUL */ ++ *eax = 0; ++ *ebx = 0; ++ *ecx = 0; ++ *edx = 0; ++ if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { ++ break; ++ } ++ ++ if (count == 0) { ++ /* Highest numbered palette subleaf */ ++ *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8); ++ } ++ break; ++ } + case 0x40000000: + /* + * CPUID code in kvm_arch_init_vcpu() ignores stuff +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index b5d98c4361..a64a79d870 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -1779,7 +1779,9 @@ int kvm_arch_init_vcpu(CPUState *cs) + c = &cpuid_data.entries[cpuid_i++]; + } + break; +- case 0x14: { ++ case 0x14: ++ case 0x1d: ++ case 0x1e: { + uint32_t times; + + c->function = i; +-- +2.35.3 + diff --git a/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch b/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch new file mode 100644 index 0000000..064b124 --- /dev/null +++ b/kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch @@ -0,0 +1,112 @@ +From 3ba6092159b6e3b25505af2a49c0f6ac99043db9 Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:28 -0800 +Subject: [PATCH 06/24] x86: Add AMX XTILECFG and XTILEDATA components + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [6/13] 95229f87b4494631d57232f374a174f7bc95843a +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +The AMX TILECFG register and the TMMx tile data registers are +saved/restored via XSAVE, respectively in state component 17 +(64 bytes) and state component 18 (8192 bytes). + +Add AMX feature bits to x86_ext_save_areas array to set +up AMX components. Add structs that define the layout of +AMX XSAVE areas and use QEMU_BUILD_BUG_ON to validate the +structs sizes. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-3-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 1f16764f7d4515bfd5e4ae0aae814fa280a7d0c8) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 8 ++++++++ + target/i386/cpu.h | 18 +++++++++++++++++- + 2 files changed, 25 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index f44fad3a2a..0453c27c9d 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -1401,6 +1401,14 @@ ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { + [XSTATE_PKRU_BIT] = + { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, + .size = sizeof(XSavePKRU) }, ++ [XSTATE_XTILE_CFG_BIT] = { ++ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, ++ .size = sizeof(XSaveXTILECFG), ++ }, ++ [XSTATE_XTILE_DATA_BIT] = { ++ .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, ++ .size = sizeof(XSaveXTILEDATA) ++ }, + }; + + static uint32_t xsave_area_size(uint64_t mask) +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 5d9702a991..e1dd8b9555 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -537,6 +537,8 @@ typedef enum X86Seg { + #define XSTATE_ZMM_Hi256_BIT 6 + #define XSTATE_Hi16_ZMM_BIT 7 + #define XSTATE_PKRU_BIT 9 ++#define XSTATE_XTILE_CFG_BIT 17 ++#define XSTATE_XTILE_DATA_BIT 18 + + #define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT) + #define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT) +@@ -845,6 +847,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) + /* AVX512_FP16 instruction */ + #define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) ++/* AMX tile (two-dimensional register) */ ++#define CPUID_7_0_EDX_AMX_TILE (1U << 24) + /* Speculation Control */ + #define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) + /* Single Thread Indirect Branch Predictors */ +@@ -1348,6 +1352,16 @@ typedef struct XSavePKRU { + uint32_t padding; + } XSavePKRU; + ++/* Ext. save area 17: AMX XTILECFG state */ ++typedef struct XSaveXTILECFG { ++ uint8_t xtilecfg[64]; ++} XSaveXTILECFG; ++ ++/* Ext. save area 18: AMX XTILEDATA state */ ++typedef struct XSaveXTILEDATA { ++ uint8_t xtiledata[8][1024]; ++} XSaveXTILEDATA; ++ + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); + QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); + QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); +@@ -1355,6 +1369,8 @@ QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40); + QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200); + QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); + QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); ++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); ++QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); + + typedef struct ExtSaveArea { + uint32_t feature, bits; +@@ -1362,7 +1378,7 @@ typedef struct ExtSaveArea { + uint32_t ecx; + } ExtSaveArea; + +-#define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) ++#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1) + + extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT]; + +-- +2.35.3 + diff --git a/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch b/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch new file mode 100644 index 0000000..5c0fd0a --- /dev/null +++ b/kvm-x86-Add-XFD-faulting-bit-for-state-components.patch @@ -0,0 +1,62 @@ +From 098d6a965ada02f5897b73f0489413a050a176bb Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:30 -0800 +Subject: [PATCH 08/24] x86: Add XFD faulting bit for state components + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [8/13] 0b1b46c5d075655ab94bc79e042b187c5dc55551 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Intel introduces XFD faulting mechanism for extended +XSAVE features to dynamically enable the features in +runtime. If CPUID (EAX=0Dh, ECX=n, n>1).ECX[2] is set +as 1, it indicates support for XFD faulting of this +state component. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-5-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 0f17f6b30f3b051f0f96ccc98c9f7f395713699f) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 3 ++- + target/i386/cpu.h | 2 ++ + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index c19b51ea32..cd27c0eb81 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5503,7 +5503,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + *eax = esa->size; + *ebx = esa->offset; +- *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK; ++ *ecx = esa->ecx & ++ (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); + } + } + break; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 58676390e6..f2bdef9c26 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -555,8 +555,10 @@ typedef enum X86Seg { + #define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + + #define ESA_FEATURE_ALIGN64_BIT 1 ++#define ESA_FEATURE_XFD_BIT 2 + + #define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) ++#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) + + + /* CPUID feature words */ +-- +2.35.3 + diff --git a/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch b/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch new file mode 100644 index 0000000..2db4c60 --- /dev/null +++ b/kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch @@ -0,0 +1,88 @@ +From 6eae12166341c236da023e5117b64b842ae72083 Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:27 -0800 +Subject: [PATCH 05/24] x86: Fix the 64-byte boundary enumeration for extended + state + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [5/13] 64fc93e3b0ad0fc56da9d71b33d9eefd3cbba1d7 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +The extended state subleaves (EAX=0Dh, ECX=n, n>1).ECX[1] +indicate whether the extended state component locates +on the next 64-byte boundary following the preceding state +component when the compacted format of an XSAVE area is +used. + +Right now, they are all zero because no supported component +needed the bit to be set, but the upcoming AMX feature will +use it. Fix the subleaves value according to KVM's supported +cpuid. + +Signed-off-by: Jing Liu +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-2-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 131266b7565bd437127bd231563572696bb27235) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 1 + + target/i386/cpu.h | 6 ++++++ + target/i386/kvm/kvm-cpu.c | 1 + + 3 files changed, 8 insertions(+) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index dd6935b1dd..f44fad3a2a 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -5495,6 +5495,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + *eax = esa->size; + *ebx = esa->offset; ++ *ecx = esa->ecx & ESA_FEATURE_ALIGN64_MASK; + } + } + break; +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index c6a6c871f1..5d9702a991 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -548,6 +548,11 @@ typedef enum X86Seg { + #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) + #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) + ++#define ESA_FEATURE_ALIGN64_BIT 1 ++ ++#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) ++ ++ + /* CPUID feature words */ + typedef enum FeatureWord { + FEAT_1_EDX, /* CPUID[1].EDX */ +@@ -1354,6 +1359,7 @@ QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); + typedef struct ExtSaveArea { + uint32_t feature, bits; + uint32_t offset, size; ++ uint32_t ecx; + } ExtSaveArea; + + #define XSAVE_STATE_AREA_COUNT (XSTATE_PKRU_BIT + 1) +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index 7b004065ae..86ef7b2712 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -104,6 +104,7 @@ static void kvm_cpu_xsave_init(void) + if (sz != 0) { + assert(esa->size == sz); + esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); ++ esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX); + } + } + } +-- +2.35.3 + diff --git a/kvm-x86-Grant-AMX-permission-for-guest.patch b/kvm-x86-Grant-AMX-permission-for-guest.patch new file mode 100644 index 0000000..c2ab95d --- /dev/null +++ b/kvm-x86-Grant-AMX-permission-for-guest.patch @@ -0,0 +1,215 @@ +From 50840e01d05a466a1dfbc219e49233834e5d7ed0 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Wed, 16 Feb 2022 22:04:29 -0800 +Subject: [PATCH 07/24] x86: Grant AMX permission for guest + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [7/13] 437578191f61139ca710cc7045ab38eb0d05eae2 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +Kernel allocates 4K xstate buffer by default. For XSAVE features +which require large state component (e.g. AMX), Linux kernel +dynamically expands the xstate buffer only after the process has +acquired the necessary permissions. Those are called dynamically- +enabled XSAVE features (or dynamic xfeatures). + +There are separate permissions for native tasks and guests. + +Qemu should request the guest permissions for dynamic xfeatures +which will be exposed to the guest. This only needs to be done +once before the first vcpu is created. + +KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to +get host side supported_xcr0 and Qemu can decide if it can request +dynamically enabled XSAVE features permission. +https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/ + +Suggested-by: Paolo Bonzini +Signed-off-by: Yang Zhong +Signed-off-by: Jing Liu +Message-Id: <20220217060434.52460-4-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit 19db68ca68a78fa033a21d419036b6e416554564) +Signed-off-by: Paul Lai +--- + target/i386/cpu.c | 7 +++++ + target/i386/cpu.h | 4 +++ + target/i386/kvm/kvm-cpu.c | 12 ++++---- + target/i386/kvm/kvm.c | 57 ++++++++++++++++++++++++++++++++++++++ + target/i386/kvm/kvm_i386.h | 1 + + 5 files changed, 75 insertions(+), 6 deletions(-) + +diff --git a/target/i386/cpu.c b/target/i386/cpu.c +index 0453c27c9d..c19b51ea32 100644 +--- a/target/i386/cpu.c ++++ b/target/i386/cpu.c +@@ -6027,6 +6027,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + CPUX86State *env = &cpu->env; + int i; + uint64_t mask; ++ static bool request_perm; + + if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + env->features[FEAT_XSAVE_COMP_LO] = 0; +@@ -6042,6 +6043,12 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) + } + } + ++ /* Only request permission for first vcpu */ ++ if (kvm_enabled() && !request_perm) { ++ kvm_request_xsave_components(cpu, mask); ++ request_perm = true; ++ } ++ + env->features[FEAT_XSAVE_COMP_LO] = mask; + env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; + } +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index e1dd8b9555..58676390e6 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -549,6 +549,10 @@ typedef enum X86Seg { + #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) + #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) + #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) ++#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) ++#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) ++ ++#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + + #define ESA_FEATURE_ALIGN64_BIT 1 + +diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c +index 86ef7b2712..bdc967c484 100644 +--- a/target/i386/kvm/kvm-cpu.c ++++ b/target/i386/kvm/kvm-cpu.c +@@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu) + static void kvm_cpu_xsave_init(void) + { + static bool first = true; +- KVMState *s = kvm_state; ++ uint32_t eax, ebx, ecx, edx; + int i; + + if (!first) { +@@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void) + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (esa->size) { +- int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX); +- if (sz != 0) { +- assert(esa->size == sz); +- esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX); +- esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX); ++ host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); ++ if (eax != 0) { ++ assert(esa->size == eax); ++ esa->offset = ebx; ++ esa->ecx = ecx; + } + } + } +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a668f521ac..b5d98c4361 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -17,6 +17,7 @@ + #include "qapi/error.h" + #include + #include ++#include + + #include + #include "standard-headers/asm-x86/kvm_para.h" +@@ -347,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + struct kvm_cpuid2 *cpuid; + uint32_t ret = 0; + uint32_t cpuid_1_edx; ++ uint64_t bitmask; + + cpuid = get_supported_cpuid(s); + +@@ -404,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + if (!has_msr_arch_capabs) { + ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; + } ++ } else if (function == 0xd && index == 0 && ++ (reg == R_EAX || reg == R_EDX)) { ++ struct kvm_device_attr attr = { ++ .group = 0, ++ .attr = KVM_X86_XCOMP_GUEST_SUPP, ++ .addr = (unsigned long) &bitmask ++ }; ++ ++ bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); ++ if (!sys_attr) { ++ warn_report("cannot get sys attribute capabilities %d", sys_attr); ++ } ++ ++ int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); ++ if (rc == -1 && (errno == ENXIO || errno == EINVAL)) { ++ warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " ++ "error: %d", rc); ++ } ++ ret = (reg == R_EAX) ? bitmask : bitmask >> 32; + } else if (function == 0x80000001 && reg == R_ECX) { + /* + * It's safe to enable TOPOEXT even if it's not returned by +@@ -5054,3 +5075,39 @@ bool kvm_arch_cpu_check_are_resettable(void) + { + return !sev_es_enabled(); + } ++ ++#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 ++ ++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) ++{ ++ KVMState *s = kvm_state; ++ uint64_t supported; ++ ++ mask &= XSTATE_DYNAMIC_MASK; ++ if (!mask) { ++ return; ++ } ++ /* ++ * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. ++ * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned ++ * about them already because they are not supported features. ++ */ ++ supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); ++ supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; ++ mask &= supported; ++ ++ while (mask) { ++ int bit = ctz64(mask); ++ int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); ++ if (rc) { ++ /* ++ * Older kernel version (<5.17) do not support ++ * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return ++ * any dynamic feature from kvm_arch_get_supported_cpuid. ++ */ ++ warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " ++ "for feature bit %d", bit); ++ } ++ mask &= ~BIT_ULL(bit); ++ } ++} +diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h +index a978509d50..4124912c20 100644 +--- a/target/i386/kvm/kvm_i386.h ++++ b/target/i386/kvm/kvm_i386.h +@@ -52,5 +52,6 @@ bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); + + bool kvm_enable_sgx_provisioning(KVMState *s); ++void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + + #endif +-- +2.35.3 + diff --git a/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch b/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch new file mode 100644 index 0000000..e4846b3 --- /dev/null +++ b/kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch @@ -0,0 +1,178 @@ +From 90a276ed72deab84f3fdd4b57e9ccfc6514934fb Mon Sep 17 00:00:00 2001 +From: Zeng Guang +Date: Wed, 16 Feb 2022 22:04:33 -0800 +Subject: [PATCH 11/24] x86: Support XFD and AMX xsave data migration + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [11/13] 4ff6e5544ffdac4e6d2f568f7f63b937502ca6c5 +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +XFD(eXtended Feature Disable) allows to enable a +feature on xsave state while preventing specific +user threads from using the feature. + +Support save and restore XFD MSRs if CPUID.D.1.EAX[4] +enumerate to be valid. Likewise migrate the MSRs and +related xsave state necessarily. + +Signed-off-by: Zeng Guang +Signed-off-by: Wei Wang +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-8-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit cdec2b753b487d9e8aab028231c35d87789ea083) +Signed-off-by: Paul Lai +--- + target/i386/cpu.h | 9 +++++++++ + target/i386/kvm/kvm.c | 18 +++++++++++++++++ + target/i386/machine.c | 46 +++++++++++++++++++++++++++++++++++++++++++ + 3 files changed, 73 insertions(+) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index 14a3501b87..8ab2a4042a 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -505,6 +505,9 @@ typedef enum X86Seg { + + #define MSR_VM_HSAVE_PA 0xc0010117 + ++#define MSR_IA32_XFD 0x000001c4 ++#define MSR_IA32_XFD_ERR 0x000001c5 ++ + #define MSR_IA32_BNDCFGS 0x00000d90 + #define MSR_IA32_XSS 0x00000da0 + #define MSR_IA32_UMWAIT_CONTROL 0xe1 +@@ -870,6 +873,8 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; + #define CPUID_7_1_EAX_AVX_VNNI (1U << 4) + /* AVX512 BFloat16 Instruction */ + #define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) ++/* XFD Extend Feature Disabled */ ++#define CPUID_D_1_EAX_XFD (1U << 4) + + /* Packets which contain IP payload have LIP values */ + #define CPUID_14_0_ECX_LIP (1U << 31) +@@ -1610,6 +1615,10 @@ typedef struct CPUX86State { + uint64_t msr_rtit_cr3_match; + uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; + ++ /* Per-VCPU XFD MSRs */ ++ uint64_t msr_xfd; ++ uint64_t msr_xfd_err; ++ + /* exception/interrupt handling */ + int error_code; + int exception_is_int; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index d3d476df27..b1128b0e07 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -3219,6 +3219,13 @@ static int kvm_put_msrs(X86CPU *cpu, int level) + env->msr_ia32_sgxlepubkeyhash[3]); + } + ++ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD, ++ env->msr_xfd); ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, ++ env->msr_xfd_err); ++ } ++ + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ + } +@@ -3571,6 +3578,11 @@ static int kvm_get_msrs(X86CPU *cpu) + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); + } + ++ if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); ++ kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); ++ } ++ + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); + if (ret < 0) { + return ret; +@@ -3870,6 +3882,12 @@ static int kvm_get_msrs(X86CPU *cpu) + env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = + msrs[i].data; + break; ++ case MSR_IA32_XFD: ++ env->msr_xfd = msrs[i].data; ++ break; ++ case MSR_IA32_XFD_ERR: ++ env->msr_xfd_err = msrs[i].data; ++ break; + } + } + +diff --git a/target/i386/machine.c b/target/i386/machine.c +index 83c2b91529..3977e9d8f8 100644 +--- a/target/i386/machine.c ++++ b/target/i386/machine.c +@@ -1455,6 +1455,48 @@ static const VMStateDescription vmstate_msr_intel_sgx = { + } + }; + ++static bool xfd_msrs_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD); ++} ++ ++static const VMStateDescription vmstate_msr_xfd = { ++ .name = "cpu/msr_xfd", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = xfd_msrs_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT64(env.msr_xfd, X86CPU), ++ VMSTATE_UINT64(env.msr_xfd_err, X86CPU), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++ ++#ifdef TARGET_X86_64 ++static bool amx_xtile_needed(void *opaque) ++{ ++ X86CPU *cpu = opaque; ++ CPUX86State *env = &cpu->env; ++ ++ return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE); ++} ++ ++static const VMStateDescription vmstate_amx_xtile = { ++ .name = "cpu/intel_amx_xtile", ++ .version_id = 1, ++ .minimum_version_id = 1, ++ .needed = amx_xtile_needed, ++ .fields = (VMStateField[]) { ++ VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64), ++ VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192), ++ VMSTATE_END_OF_LIST() ++ } ++}; ++#endif ++ + const VMStateDescription vmstate_x86_cpu = { + .name = "cpu", + .version_id = 12, +@@ -1593,6 +1635,10 @@ const VMStateDescription vmstate_x86_cpu = { + #endif + &vmstate_msr_tsx_ctrl, + &vmstate_msr_intel_sgx, ++ &vmstate_msr_xfd, ++#ifdef TARGET_X86_64 ++ &vmstate_amx_xtile, ++#endif + NULL + } + }; +-- +2.35.3 + diff --git a/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch b/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch new file mode 100644 index 0000000..13566b1 --- /dev/null +++ b/kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch @@ -0,0 +1,182 @@ +From 28cf1b55f346a9f56e84fa57921f5a28a99cd59b Mon Sep 17 00:00:00 2001 +From: Jing Liu +Date: Wed, 16 Feb 2022 22:04:32 -0800 +Subject: [PATCH 10/24] x86: add support for KVM_CAP_XSAVE2 and AMX state + migration + +RH-Author: Paul Lai +RH-MergeRequest: 176: Enable KVM AMX support +RH-Commit: [10/13] d584f455ba1ecd8a4a87f3470e6aac24ba9a1f5a +RH-Bugzilla: 1916415 +RH-Acked-by: Cornelia Huck +RH-Acked-by: Igor Mammedov +RH-Acked-by: Paolo Bonzini + +When dynamic xfeatures (e.g. AMX) are used by the guest, the xsave +area would be larger than 4KB. KVM_GET_XSAVE2 and KVM_SET_XSAVE +under KVM_CAP_XSAVE2 works with a xsave buffer larger than 4KB. +Always use the new ioctls under KVM_CAP_XSAVE2 when KVM supports it. + +Signed-off-by: Jing Liu +Signed-off-by: Zeng Guang +Signed-off-by: Wei Wang +Signed-off-by: Yang Zhong +Message-Id: <20220217060434.52460-7-yang.zhong@intel.com> +Signed-off-by: Paolo Bonzini +(cherry picked from commit e56dd3c70abb31893c61ac834109fa7a38841330) +Signed-off-by: Paul Lai +--- + target/i386/cpu.h | 4 ++++ + target/i386/kvm/kvm.c | 42 ++++++++++++++++++++++++-------------- + target/i386/xsave_helper.c | 28 +++++++++++++++++++++++++ + 3 files changed, 59 insertions(+), 15 deletions(-) + +diff --git a/target/i386/cpu.h b/target/i386/cpu.h +index f2bdef9c26..14a3501b87 100644 +--- a/target/i386/cpu.h ++++ b/target/i386/cpu.h +@@ -1522,6 +1522,10 @@ typedef struct CPUX86State { + uint64_t opmask_regs[NB_OPMASK_REGS]; + YMMReg zmmh_regs[CPU_NB_REGS]; + ZMMReg hi16_zmm_regs[CPU_NB_REGS]; ++#ifdef TARGET_X86_64 ++ uint8_t xtilecfg[64]; ++ uint8_t xtiledata[8192]; ++#endif + + /* sysenter registers */ + uint32_t sysenter_cs; +diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c +index a64a79d870..d3d476df27 100644 +--- a/target/i386/kvm/kvm.c ++++ b/target/i386/kvm/kvm.c +@@ -123,6 +123,7 @@ static uint32_t num_architectural_pmu_gp_counters; + static uint32_t num_architectural_pmu_fixed_counters; + + static int has_xsave; ++static int has_xsave2; + static int has_xcrs; + static int has_pit_state2; + static int has_exception_payload; +@@ -1585,6 +1586,26 @@ static Error *invtsc_mig_blocker; + + #define KVM_MAX_CPUID_ENTRIES 100 + ++static void kvm_init_xsave(CPUX86State *env) ++{ ++ if (has_xsave2) { ++ env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); ++ } else if (has_xsave) { ++ env->xsave_buf_len = sizeof(struct kvm_xsave); ++ } else { ++ return; ++ } ++ ++ env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); ++ memset(env->xsave_buf, 0, env->xsave_buf_len); ++ /* ++ * The allocated storage must be large enough for all of the ++ * possible XSAVE state components. ++ */ ++ assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= ++ env->xsave_buf_len); ++} ++ + int kvm_arch_init_vcpu(CPUState *cs) + { + struct { +@@ -1614,6 +1635,8 @@ int kvm_arch_init_vcpu(CPUState *cs) + + cpuid_i = 0; + ++ has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); ++ + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; +@@ -2003,19 +2026,7 @@ int kvm_arch_init_vcpu(CPUState *cs) + if (r) { + goto fail; + } +- +- if (has_xsave) { +- env->xsave_buf_len = sizeof(struct kvm_xsave); +- env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); +- memset(env->xsave_buf, 0, env->xsave_buf_len); +- +- /* +- * The allocated storage must be large enough for all of the +- * possible XSAVE state components. +- */ +- assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) +- <= env->xsave_buf_len); +- } ++ kvm_init_xsave(env); + + max_nested_state_len = kvm_max_nested_state_length(); + if (max_nested_state_len > 0) { +@@ -3263,13 +3274,14 @@ static int kvm_get_xsave(X86CPU *cpu) + { + CPUX86State *env = &cpu->env; + void *xsave = env->xsave_buf; +- int ret; ++ int type, ret; + + if (!has_xsave) { + return kvm_get_fpu(cpu); + } + +- ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); ++ type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; ++ ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); + if (ret < 0) { + return ret; + } +diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c +index ac61a96344..996e9f3bfe 100644 +--- a/target/i386/xsave_helper.c ++++ b/target/i386/xsave_helper.c +@@ -126,6 +126,20 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) + + memcpy(pkru, &env->pkru, sizeof(env->pkru)); + } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; ++ if (e->size && e->offset) { ++ XSaveXTILECFG *tilecfg = buf + e->offset; ++ ++ memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); ++ } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; ++ if (e->size && e->offset && buflen >= e->size + e->offset) { ++ XSaveXTILEDATA *tiledata = buf + e->offset; ++ ++ memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); ++ } + #endif + } + +@@ -247,5 +261,19 @@ void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen) + pkru = buf + e->offset; + memcpy(&env->pkru, pkru, sizeof(env->pkru)); + } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; ++ if (e->size && e->offset) { ++ const XSaveXTILECFG *tilecfg = buf + e->offset; ++ ++ memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); ++ } ++ ++ e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; ++ if (e->size && e->offset && buflen >= e->size + e->offset) { ++ const XSaveXTILEDATA *tiledata = buf + e->offset; ++ ++ memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); ++ } + #endif + } +-- +2.35.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index 66f14a2..f22b861 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -83,7 +83,7 @@ Obsoletes: %1-rhev <= %{epoch}:%{version}-%{release} Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 6.2.0 -Release: 12%{?rcrel}%{?dist} +Release: 28%{?rcrel}%{?dist} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped Epoch: 15 License: GPLv2 and GPLv2+ and CC-BY @@ -257,6 +257,339 @@ Patch78: kvm-softmmu-physmem-Introduce-MemTxAttrs-memory-field-an.patch Patch79: kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch # For bz#2063262 - CVE-2022-26354 virt:rhel/qemu-kvm: QEMU: vhost-vsock: missing virtqueue detach on error can lead to memory leak [rhel-8] Patch80: kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch +# For bz#2043830 - [IBM 8.7 FEAT] KVM: Allow long kernel command lines for QEMU +Patch81: kvm-s390x-ipl-support-extended-kernel-command-line-size.patch +# For bz#2063206 - CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8] +Patch82: kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch83: kvm-qcow2-Improve-refcount-structure-rebuilding.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch84: kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch85: kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch +# For bz#1519071 - Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs) +Patch86: kvm-iotests-108-Fix-when-missing-user_allow_other.patch +# For bz#2065043 - Remove upstream-only devices from the qemu-kvm binary +Patch87: kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch +# For bz#2070417 - Windows guest hangs after updating and restarting from the guest OS [rhel-8.7.0] +Patch88: kvm-target-i386-properly-reset-TSC-on-reset.patch +# For bz#2040734 - CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.7] +Patch89: kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch90: kvm-virtio-gpu-do-not-byteswap-padding.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch91: kvm-linux-headers-update-to-5.16-rc1.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch92: kvm-linux-headers-Update-headers-to-v5.17-rc1.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch93: kvm-linux-headers-include-missing-changes-from-5.17.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch94: kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch95: kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch96: kvm-x86-Grant-AMX-permission-for-guest.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch97: kvm-x86-Add-XFD-faulting-bit-for-state-components.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch98: kvm-x86-Add-AMX-CPUIDs-enumeration.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch99: kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch100: kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch101: kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch +# For bz#1916415 - [Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions +Patch102: kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch103: kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch104: kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch105: kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch106: kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch107: kvm-vhost-vdpa-backend-feature-should-set-only-once.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch108: kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch +# For bz#2069946 - PXE boot crash qemu when using multiqueue vDPA +Patch109: kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch +# For bz#2029980 - Failed assertion in IDE emulation with Ceph backend +Patch110: kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch +# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7] +Patch111: kvm-block-Make-bdrv_refresh_limits-non-recursive.patch +# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7] +Patch112: kvm-iotests-Allow-using-QMP-with-the-QSD.patch +# For bz#2072932 - Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7] +Patch113: kvm-iotests-graph-changes-while-io-New-test.patch +# For bz#2097209 - [virtiofs] mount virtiofs failed: SELinux: (dev virtiofs, type virtiofs) getxattr errno 111 +Patch114: kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch +# For bz#1951521 - CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8] +Patch115: kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch +# For bz#1951521 - CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8] +Patch116: kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch117: kvm-migration-Never-call-twice-qemu_target_page_size.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch118: kvm-multifd-Rename-used-field-to-num.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch119: kvm-multifd-Add-missing-documentation.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch120: kvm-multifd-The-variable-is-only-used-inside-the-loop.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch121: kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch122: kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch123: kvm-multifd-Fill-offset-and-block-for-reception.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch124: kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch125: kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch126: kvm-migration-All-this-fields-are-unsigned.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch127: kvm-multifd-Move-iov-from-pages-to-params.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch128: kvm-multifd-Make-zlib-use-iov-s.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch129: kvm-multifd-Make-zstd-use-iov-s.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch130: kvm-multifd-Remove-send_write-method.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch131: kvm-multifd-Use-a-single-writev-on-the-send-side.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch132: kvm-multifd-Use-normal-pages-array-on-the-send-side.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch133: kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch134: kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch135: kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch136: kvm-migration-Add-migrate_use_tls-helper.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch137: kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch138: kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch139: kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch140: kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch141: kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch +# For bz#2072049 - Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8 +Patch142: kvm-migration-Change-zero_copy_send-from-migration-param.patch +# For bz#2097652 - The migration port is not released if use it again for recovering postcopy migration +Patch143: kvm-migration-Add-migration_incoming_transport_cleanup.patch +# For bz#2097652 - The migration port is not released if use it again for recovering postcopy migration +Patch144: kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch145: kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch146: kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch147: kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch148: kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch149: kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch150: kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch151: kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch152: kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch +# For bz#2098076 - virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions +Patch153: kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch +# For bz#2105410 - Stalled IO Operations in VM +Patch154: kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch +# For bz#2105410 - Stalled IO Operations in VM +Patch155: kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch156: kvm-migration-Introduce-ram_transferred_add.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch157: kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch158: kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch159: kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch160: kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch161: kvm-migration-Avoid-false-positive-on-non-supported-scen.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch162: kvm-migration-add-remaining-params-has_-true-in-migratio.patch +# For bz#2110203 - zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together +Patch163: kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch +# For bz#2112296 - virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions +Patch164: kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch +# For bz#2120279 - Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-8.7] +Patch165: kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch +# For bz#2117149 - Can't run when memory backing with hugepages and backend type memfd +Patch166: kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch +# For bz#2125271 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0] +Patch167: kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch +# For bz#2125271 - [RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0] +Patch168: kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch +# For bz#2124757 - RHEL8: skey test in kvm_unit_test got failed +Patch169: kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch +# For bz#2124757 - RHEL8: skey test in kvm_unit_test got failed +Patch170: kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch +# For bz#2018885 - [virtiofs] virtiofsd debug log's timestamp is NULL +Patch171: kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch +# For bz#2116302 - RHEL8.6 - virtiofs will not mount fs on secure execution guest +Patch172: kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch +# For bz#2116302 - RHEL8.6 - virtiofs will not mount fs on secure execution guest +Patch173: kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch174: kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch175: kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch176: kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch177: kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch178: kvm-Update-linux-headers-to-v6.0-rc4.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch179: kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch180: kvm-s390x-pci-enable-for-load-store-interpretation.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch181: kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch182: kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch183: kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch184: kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch185: kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch186: kvm-dump-Use-ERRP_GUARD.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch187: kvm-dump-Remove-the-sh_info-variable.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch188: kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch189: kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch190: kvm-dump-Add-more-offset-variables.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch191: kvm-dump-Introduce-dump_is_64bit-helper-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch192: kvm-dump-Consolidate-phdr-note-writes.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch193: kvm-dump-Cleanup-dump_begin-write-functions.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch194: kvm-dump-Consolidate-elf-note-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch195: kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch196: kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch197: kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch198: kvm-dump-Rework-get_start_block.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch199: kvm-dump-Rework-filter-area-variables.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch200: kvm-dump-Rework-dump_calculate_size-function.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch201: kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch202: kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch203: kvm-dump-simplify-a-bit-kdump-get_next_page.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch204: kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch205: kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch206: kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch207: kvm-dump-Reorder-struct-DumpState.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch208: kvm-dump-Reintroduce-memory_offset-and-section_offset.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch209: kvm-dump-Add-architecture-section-and-section-string-tab.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch210: kvm-s390x-Add-protected-dump-cap.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch211: kvm-s390x-Introduce-PV-query-interface.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch212: kvm-include-elf.h-add-s390x-note-types.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch213: kvm-s390x-Add-KVM-PV-dump-interface.patch +# For bz#1664378 - [IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part +# For bz#2043909 - [IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part +Patch214: kvm-s390x-pv-Add-dump-support.patch +# For bz#2129760 - CVE-2022-3165 virt:rhel/qemu-kvm: QEMU: VNC: integer underflow in vnc_client_cut_text_ext leads to CPU exhaustion [rhel-8] +Patch215: kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch +# For bz#2132609 - qemu-kvm: backport some aarch64 fixes +Patch216: kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch +# For bz#2132609 - qemu-kvm: backport some aarch64 fixes +Patch217: kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch +# For bz#2128225 - [s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu +Patch218: kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch +# For bz#2128225 - [s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu +Patch219: kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch +# For bz#2141896 - VMs hung on vnc_clipboard_send +Patch220: kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch221: kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch222: kvm-hw-display-qxl-Document-qxl_phys2virt.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch223: kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch224: kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch +# For bz#2148545 - CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8] +Patch225: kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch +# For bz#2155448 - RHEL8.8 - KVM: s390: pv: don't allow userspace to set the clock under PV - QEMU part +Patch226: kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch BuildRequires: wget BuildRequires: rpm-build @@ -1426,6 +1759,248 @@ sh %{_sysconfdir}/sysconfig/modules/kvm.modules &> /dev/null || : %changelog +* Wed Jan 04 2023 Jon Maloy - 6.2.0-28 +- kvm-s390x-tod-kvm-don-t-save-restore-the-TOD-in-PV-guest.patch [bz#2155448] +- Resolves: bz#2155448 + (RHEL8.8 - KVM: s390: pv: don't allow userspace to set the clock under PV - QEMU part) + +* Thu Dec 08 2022 Jon Maloy - 6.2.0-27 +- kvm-hw-display-qxl-Have-qxl_log_command-Return-early-if-.patch [bz#2148545] +- kvm-hw-display-qxl-Document-qxl_phys2virt.patch [bz#2148545] +- kvm-hw-display-qxl-Pass-requested-buffer-size-to-qxl_phy.patch [bz#2148545] +- kvm-hw-display-qxl-Avoid-buffer-overrun-in-qxl_phys2virt.patch [bz#2148545] +- kvm-hw-display-qxl-Assert-memory-slot-fits-in-preallocat.patch [bz#2148545] +- Resolves: bz#2148545 + (CVE-2022-4144 virt:rhel/qemu-kvm: QEMU: QXL: qxl_phys2virt unsafe address translation can lead to out-of-bounds read [rhel-8]) + +* Wed Nov 23 2022 Jon Maloy - 6.2.0-26 +- kvm-docs-system-s390x-Document-the-loadparm-machine-prop.patch [bz#2128225] +- kvm-s390x-Register-TYPE_S390_CCW_MACHINE-properties-as-c.patch [bz#2128225] +- kvm-ui-vnc.c-Fixed-a-deadlock-bug.patch [bz#2141896] +- Resolves: bz#2128225 + ([s390x] [RHEL8][s390x-ccw bios] lacking document about parameter loadparm in qemu) +- Resolves: bz#2141896 + (VMs hung on vnc_clipboard_send) + +* Wed Nov 16 2022 Jon Maloy - 6.2.0-25 +- kvm-hw-acpi-Add-ospm_status-hook-implementation-for-acpi.patch [bz#2132609] +- kvm-target-arm-kvm-Retry-KVM_CREATE_VM-call-if-it-fails-.patch [bz#2132609] +- Resolves: bz#2132609 + (qemu-kvm: backport some aarch64 fixes) + +* Thu Nov 10 2022 Jon Maloy - 6.2.0-24 +- kvm-s390x-pci-use-a-reserved-ID-for-the-default-PCI-grou.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-don-t-use-hard-coded-dma-range-in-reg_ioat.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-use-the-passthrough-measurement-update-int.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-add-supported-DT-information-to-clp-respon.patch [bz#1664378 bz#2043909] +- kvm-Update-linux-headers-to-v6.0-rc4.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-add-routine-to-get-host-function-handle-fr.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-enable-for-load-store-interpretation.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-don-t-fence-interpreted-devices-without-MS.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-enable-adapter-event-notification-for-inte.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-let-intercept-devices-have-separate-PCI-gr.patch [bz#1664378 bz#2043909] +- kvm-s390x-pci-reflect-proper-maxstbl-for-groups-of-inter.patch [bz#1664378 bz#2043909] +- kvm-s390x-s390-virtio-ccw-Switch-off-zPCI-enhancements-o.patch [bz#1664378 bz#2043909] +- kvm-dump-Use-ERRP_GUARD.patch [bz#1664378 bz#2043909] +- kvm-dump-Remove-the-sh_info-variable.patch [bz#1664378 bz#2043909] +- kvm-dump-Introduce-shdr_num-to-decrease-complexity.patch [bz#1664378 bz#2043909] +- kvm-dump-Remove-the-section-if-when-calculating-the-memo.patch [bz#1664378 bz#2043909] +- kvm-dump-Add-more-offset-variables.patch [bz#1664378 bz#2043909] +- kvm-dump-Introduce-dump_is_64bit-helper-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Consolidate-phdr-note-writes.patch [bz#1664378 bz#2043909] +- kvm-dump-Cleanup-dump_begin-write-functions.patch [bz#1664378 bz#2043909] +- kvm-dump-Consolidate-elf-note-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Replace-opaque-DumpState-pointer-with-a-typed-o.patch [bz#1664378 bz#2043909] +- kvm-dump-Rename-write_elf_loads-to-write_elf_phdr_loads.patch [bz#1664378 bz#2043909] +- kvm-dump-Refactor-dump_iterate-and-introduce-dump_filter.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-get_start_block.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-filter-area-variables.patch [bz#1664378 bz#2043909] +- kvm-dump-Rework-dump_calculate_size-function.patch [bz#1664378 bz#2043909] +- kvm-dump-Split-elf-header-functions-into-prepare-and-wri.patch [bz#1664378 bz#2043909] +- kvm-dump-Rename-write_elf-_phdr_note-to-prepare_elf-_phd.patch [bz#1664378 bz#2043909] +- kvm-dump-simplify-a-bit-kdump-get_next_page.patch [bz#1664378 bz#2043909] +- kvm-dump-fix-kdump-to-work-over-non-aligned-blocks.patch [bz#1664378 bz#2043909] +- kvm-dump-Use-a-buffer-for-ELF-section-data-and-headers.patch [bz#1664378 bz#2043909] +- kvm-dump-Write-ELF-section-headers-right-after-ELF-heade.patch [bz#1664378 bz#2043909] +- kvm-dump-Reorder-struct-DumpState.patch [bz#1664378 bz#2043909] +- kvm-dump-Reintroduce-memory_offset-and-section_offset.patch [bz#1664378 bz#2043909] +- kvm-dump-Add-architecture-section-and-section-string-tab.patch [bz#1664378 bz#2043909] +- kvm-s390x-Add-protected-dump-cap.patch [bz#1664378 bz#2043909] +- kvm-s390x-Introduce-PV-query-interface.patch [bz#1664378 bz#2043909] +- kvm-include-elf.h-add-s390x-note-types.patch [bz#1664378 bz#2043909] +- kvm-s390x-Add-KVM-PV-dump-interface.patch [bz#1664378 bz#2043909] +- kvm-s390x-pv-Add-dump-support.patch [bz#1664378 bz#2043909] +- kvm-ui-vnc-clipboard-fix-integer-underflow-in-vnc_client.patch [bz#2129760] +- Resolves: bz#1664378 + ([IBM 8.8 FEAT] Enhanced Interpretation for PCI Functions (kvm) - qemu part) +- Resolves: bz#2043909 + ([IBM 8.8 FEAT] KVM: Secure Execution guest dump encryption with customer keys - qemu part) +- Resolves: bz#2129760 + (CVE-2022-3165 virt:rhel/qemu-kvm: QEMU: VNC: integer underflow in vnc_client_cut_text_ext leads to CPU exhaustion [rhel-8]) + +* Wed Oct 26 2022 Jon Maloy - 6.2.0-23 +- kvm-virtio-fix-the-condition-for-iommu_platform-not-supp.patch [bz#2116302] +- kvm-virtio-fix-feature-negotiation-for-ACCESS_PLATFORM.patch [bz#2116302] +- Resolves: bz#2116302 + (RHEL8.6 - virtiofs will not mount fs on secure execution guest) + +* Wed Oct 05 2022 Jon Maloy - 6.2.0-22 +- kvm-redhat-Update-linux-headers-linux-kvm.h-to-v5.18-rc6.patch [bz#2124757] +- kvm-target-s390x-kvm-Honor-storage-keys-during-emulation.patch [bz#2124757] +- kvm-virtiofsd-use-g_date_time_get_microsecond-to-get-sub.patch [bz#2018885] +- Resolves: bz#2124757 + (RHEL8: skey test in kvm_unit_test got failed) +- Resolves: bz#2018885 + ([virtiofs] virtiofsd debug log's timestamp is NULL) + +* Thu Sep 29 2022 Jon Maloy - 6.2.0-21 +- kvm-backends-hostmem-Fix-support-of-memory-backend-memfd.patch [bz#2117149] +- kvm-i386-reset-KVM-nested-state-upon-CPU-reset.patch [bz#2125271] +- kvm-i386-do-kvm_put_msr_feature_control-first-thing-when.patch [bz#2125271] +- Resolves: bz#2117149 + (Can't run when memory backing with hugepages and backend type memfd) +- Resolves: bz#2125271 + ([RHEL8.7] Guests in VMX root operation fail to reboot with QEMU's 'system_reset' command [rhel-8.8.0]) + +* Fri Aug 26 2022 Jon Maloy - 6.2.0-20 +- kvm-scsi-generic-Fix-emulated-block-limits-VPD-page.patch [bz#2120279] +- Resolves: bz#2120279 + (Wrong max_sectors_kb and Maximum transfer length on the pass-through device [rhel-8.7]) + +* Tue Aug 16 2022 Miroslav Rezanina - 6.2.0-19 +- kvm-migration-Introduce-ram_transferred_add.patch [bz#2110203] +- kvm-migration-Tally-pre-copy-downtime-and-post-copy-byte.patch [bz#2110203] +- kvm-QIOChannelSocket-Fix-zero-copy-flush-returning-code-.patch [bz#2110203] +- kvm-Add-dirty-sync-missed-zero-copy-migration-stat.patch [bz#2110203] +- kvm-migration-multifd-Report-to-user-when-zerocopy-not-w.patch [bz#2110203] +- kvm-migration-Avoid-false-positive-on-non-supported-scen.patch [bz#2110203] +- kvm-migration-add-remaining-params-has_-true-in-migratio.patch [bz#2110203] +- kvm-QIOChannelSocket-Add-support-for-MSG_ZEROCOPY-IPV6.patch [bz#2110203] +- kvm-pc-bios-s390-ccw-Fix-booting-with-logical-block-size.patch [bz#2112296] +- Resolves: bz#2110203 + (zerocopy capability can be enabled when set migrate capabilities with multifd and compress/xbzrle together) +- Resolves: bz#2112296 + (virtio-blk: Can't boot fresh installation from used 512 cluster_size image under certain conditions) + +* Tue Jul 19 2022 Camilla Conte - 6.2.0-18 +- kvm-linux-aio-fix-unbalanced-plugged-counter-in-laio_io_.patch [bz#2105410] +- kvm-linux-aio-explain-why-max-batch-is-checked-in-laio_i.patch [bz#2105410] +- Resolves: bz#2105410 + (Stalled IO Operations in VM) + +* Tue Jul 12 2022 Camilla Conte - 6.2.0-17 +- kvm-migration-Never-call-twice-qemu_target_page_size.patch [bz#2072049] +- kvm-multifd-Rename-used-field-to-num.patch [bz#2072049] +- kvm-multifd-Add-missing-documentation.patch [bz#2072049] +- kvm-multifd-The-variable-is-only-used-inside-the-loop.patch [bz#2072049] +- kvm-multifd-remove-used-parameter-from-send_prepare-meth.patch [bz#2072049] +- kvm-multifd-remove-used-parameter-from-send_recv_pages-m.patch [bz#2072049] +- kvm-multifd-Fill-offset-and-block-for-reception.patch [bz#2072049] +- kvm-multifd-Make-zstd-compression-method-not-use-iovs.patch [bz#2072049] +- kvm-multifd-Make-zlib-compression-method-not-use-iovs.patch [bz#2072049] +- kvm-migration-All-this-fields-are-unsigned.patch [bz#2072049] +- kvm-multifd-Move-iov-from-pages-to-params.patch [bz#2072049] +- kvm-multifd-Make-zlib-use-iov-s.patch [bz#2072049] +- kvm-multifd-Make-zstd-use-iov-s.patch [bz#2072049] +- kvm-multifd-Remove-send_write-method.patch [bz#2072049] +- kvm-multifd-Use-a-single-writev-on-the-send-side.patch [bz#2072049] +- kvm-multifd-Use-normal-pages-array-on-the-send-side.patch [bz#2072049] +- kvm-QIOChannel-Add-flags-on-io_writev-and-introduce-io_f.patch [bz#2072049] +- kvm-QIOChannelSocket-Implement-io_writev-zero-copy-flag-.patch [bz#2072049] +- kvm-migration-Add-zero-copy-send-parameter-for-QMP-HMP-f.patch [bz#2072049] +- kvm-migration-Add-migrate_use_tls-helper.patch [bz#2072049] +- kvm-multifd-multifd_send_sync_main-now-returns-negative-.patch [bz#2072049] +- kvm-multifd-Send-header-packet-without-flags-if-zero-cop.patch [bz#2072049] +- kvm-multifd-Implement-zero-copy-write-in-multifd-migrati.patch [bz#2072049] +- kvm-QIOChannelSocket-Introduce-assert-and-reduce-ifdefs-.patch [bz#2072049] +- kvm-QIOChannelSocket-Fix-zero-copy-send-so-socket-flush-.patch [bz#2072049] +- kvm-migration-Change-zero_copy_send-from-migration-param.patch [bz#2072049] +- kvm-migration-Add-migration_incoming_transport_cleanup.patch [bz#2097652] +- kvm-migration-Allow-migrate-recover-to-run-multiple-time.patch [bz#2097652] +- kvm-pc-bios-s390-ccw-virtio-Introduce-a-macro-for-the-DA.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-bootmap-Improve-the-guessing-logic-.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Simplify-fix-virtio_i.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Remove-virtio_assume_.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-Set-missing-status-bits-whil.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-Read-device-config-after-fea.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-Beautify-the-code-for-readin.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-Split-virtio-scsi-code-from-virtio_.patch [bz#2098076] +- kvm-pc-bios-s390-ccw-virtio-blkdev-Request-the-right-fea.patch [bz#2098076] +- Resolves: bz#2072049 + (Pull MSG_ZEROCOPY on QEMU Live Migration Patches into RHEL 8) +- Resolves: bz#2097652 + (The migration port is not released if use it again for recovering postcopy migration) +- Resolves: bz#2098076 + (virtio-blk: Can't boot fresh installation from used virtio-blk dasd disk under certain conditions) + +* Thu Jun 23 2022 Jon Maloy - 6.2.0-16 +- kvm-virtiofsd-Fix-breakage-due-to-fuse_init_in-size-chan.patch [bz#2097209] +- kvm-hw-block-fdc-Prevent-end-of-track-overrun-CVE-2021-3.patch [bz#1951521] +- kvm-tests-qtest-fdc-test-Add-a-regression-test-for-CVE-2.patch [bz#1951521] +- Resolves: bz#2097209 + ([virtiofs] mount virtiofs failed: SELinux: (dev virtiofs, type virtiofs) getxattr errno 111) +- Resolves: bz#1951521 + (CVE-2021-3507 virt:rhel/qemu-kvm: QEMU: fdc: heap buffer overflow in DMA read data transfers [rhel-8]) + +* Tue Jun 14 2022 Jon Maloy - 6.2.0-15 +- kvm-virtio-gpu-do-not-byteswap-padding.patch [bz#1916415] +- kvm-linux-headers-update-to-5.16-rc1.patch [bz#1916415] +- kvm-linux-headers-Update-headers-to-v5.17-rc1.patch [bz#1916415] +- kvm-linux-headers-include-missing-changes-from-5.17.patch [bz#1916415] +- kvm-x86-Fix-the-64-byte-boundary-enumeration-for-extende.patch [bz#1916415] +- kvm-x86-Add-AMX-XTILECFG-and-XTILEDATA-components.patch [bz#1916415] +- kvm-x86-Grant-AMX-permission-for-guest.patch [bz#1916415] +- kvm-x86-Add-XFD-faulting-bit-for-state-components.patch [bz#1916415] +- kvm-x86-Add-AMX-CPUIDs-enumeration.patch [bz#1916415] +- kvm-x86-add-support-for-KVM_CAP_XSAVE2-and-AMX-state-mig.patch [bz#1916415] +- kvm-x86-Support-XFD-and-AMX-xsave-data-migration.patch [bz#1916415] +- kvm-target-i386-kvm-do-not-access-uninitialized-variable.patch [bz#1916415] +- kvm-KVM-x86-workaround-invalid-CPUID-0xD-9-info-on-some-.patch [bz#1916415] +- kvm-virtio-net-setup-vhost_dev-and-notifiers-for-cvq-onl.patch [bz#2069946] +- kvm-virtio-net-align-ctrl_vq-index-for-non-mq-guest-for-.patch [bz#2069946] +- kvm-vhost-vdpa-fix-improper-cleanup-in-net_init_vhost_vd.patch [bz#2069946] +- kvm-vhost-net-fix-improper-cleanup-in-vhost_net_start.patch [bz#2069946] +- kvm-vhost-vdpa-backend-feature-should-set-only-once.patch [bz#2069946] +- kvm-vhost-vdpa-change-name-and-polarity-for-vhost_vdpa_o.patch [bz#2069946] +- kvm-virtio-net-don-t-handle-mq-request-in-userspace-hand.patch [bz#2069946] +- kvm-ide-Increment-BB-in-flight-counter-for-TRIM-BH.patch [bz#2029980] +- kvm-block-Make-bdrv_refresh_limits-non-recursive.patch [bz#2072932] +- kvm-iotests-Allow-using-QMP-with-the-QSD.patch [bz#2072932] +- kvm-iotests-graph-changes-while-io-New-test.patch [bz#2072932] +- Resolves: bz#1916415 + ([Intel 8.7 FEAT] qemu-kvm Sapphire Rapids (SPR) AMX Instructions) +- Resolves: bz#2069946 + (PXE boot crash qemu when using multiqueue vDPA) +- Resolves: bz#2029980 + (Failed assertion in IDE emulation with Ceph backend) +- Resolves: bz#2072932 + (Qemu coredump when refreshing block limits on an actively used iothread block device [rhel.8.7]) + +* Thu May 19 2022 Jon Maloy - 6.2.0-14 +- kvm-Revert-redhat-Add-some-devices-for-exporting-upstrea.patch [bz#2065043] +- kvm-target-i386-properly-reset-TSC-on-reset.patch [bz#2070417] +- kvm-ui-cursor-fix-integer-overflow-in-cursor_alloc-CVE-2.patch [bz#2040734] +- Resolves: bz#2065043 + (Remove upstream-only devices from the qemu-kvm binary) +- Resolves: bz#2070417 + (Windows guest hangs after updating and restarting from the guest OS [rhel-8.7.0]) +- Resolves: bz#2040734 + (CVE-2021-4206 virt:rhel/qemu-kvm: QEMU: QXL: integer overflow in cursor_alloc() can lead to heap buffer overflow [rhel-8.7]) + +* Tue May 03 2022 Jon Maloy - 6.2.0-13 +- kvm-s390x-ipl-support-extended-kernel-command-line-size.patch [bz#2043830] +- kvm-virtio-net-fix-map-leaking-on-error-during-receive.patch [bz#2063206] +- kvm-qcow2-Improve-refcount-structure-rebuilding.patch [bz#1519071] +- kvm-iotests-108-Test-new-refcount-rebuild-algorithm.patch [bz#1519071] +- kvm-qcow2-Add-errp-to-rebuild_refcount_structure.patch [bz#1519071] +- kvm-iotests-108-Fix-when-missing-user_allow_other.patch [bz#1519071] +- Resolves: bz#2043830 + ([IBM 8.7 FEAT] KVM: Allow long kernel command lines for QEMU) +- Resolves: bz#2063206 + (CVE-2022-26353 virt:rhel/qemu-kvm: QEMU: virtio-net: map leaking on error during receive [rhel-8]) +- Resolves: bz#1519071 + (Fail to rebuild the reference count tables of qcow2 image on host block devices (e.g. LVs)) + * Thu Apr 21 2022 Jon Maloy - 6.2.0-12 - kvm-display-qxl-render-fix-race-condition-in-qxl_cursor-.patch [bz#2040738] - kvm-vhost-vsock-detach-the-virqueue-element-in-case-of-e.patch [bz#2063262] diff --git a/sources b/sources index f1545cf..b6290ac 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (qemu-6.2.0.tar.xz) = e9f8231c9e1cfcc41cb47f10a55d63f6b8aee307af00cf6acf64acb7aa4f49fa7e9d6330703a2abea15d8b7bbaba7d3cb08c83edd98d82642367b527df730817 +SHA1 (qemu-6.2.0.tar.xz) = 68cd61a466170115b88817e2d52db2cd7a92f43a