From 382f65f59d97541bf005a4a2205fd08469b76ace Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Fri, 17 Feb 2023 02:08:06 -0500 Subject: [PATCH] * Fri Feb 17 2023 Miroslav Rezanina - 7.2.0-9 - kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232] - kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232] - kvm-linux-headers-Update-to-v6.1.patch [bz#2158704] - kvm-util-userfaultfd-Add-uffd_open.patch [bz#2158704] - kvm-util-userfaultfd-Support-dev-userfaultfd.patch [bz#2158704] - kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2169732] - kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2169732] - kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168172] - Resolves: bz#2169232 (RFE: reconnect option for stream socket back-end) - Resolves: bz#2158704 (RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall) - Resolves: bz#2169732 (Multifd migration fails under a weak network/socket ordering race) - Resolves: bz#2168172 ([s390x] qemu-kvm coredumps when SE crashes) --- ...port-for-MSG_PEEK-for-socket-channel.patch | 386 ++++++++++++ kvm-linux-headers-Update-to-v6.1.patch | 577 ++++++++++++++++++ ...magic-value-for-deciding-the-mapping.patch | 330 ++++++++++ ...-new-option-to-automatically-reconne.patch | 325 ++++++++++ ...h_dump-Fix-memory-corruption-in-s390.patch | 50 ++ ...etdev-test-stream-and-dgram-backends.patch | 505 +++++++++++++++ kvm-util-userfaultfd-Add-uffd_open.patch | 169 +++++ ...-userfaultfd-Support-dev-userfaultfd.patch | 94 +++ qemu-kvm.spec | 36 +- 9 files changed, 2471 insertions(+), 1 deletion(-) create mode 100644 kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch create mode 100644 kvm-linux-headers-Update-to-v6.1.patch create mode 100644 kvm-migration-check-magic-value-for-deciding-the-mapping.patch create mode 100644 kvm-net-stream-add-a-new-option-to-automatically-reconne.patch create mode 100644 kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch create mode 100644 kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch create mode 100644 kvm-util-userfaultfd-Add-uffd_open.patch create mode 100644 kvm-util-userfaultfd-Support-dev-userfaultfd.patch diff --git a/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch b/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch new file mode 100644 index 0000000..22abf35 --- /dev/null +++ b/kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch @@ -0,0 +1,386 @@ +From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:17 +0000 +Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders +RH-Bugzilla: 2169732 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm) + +MSG_PEEK peeks at the channel, The data is treated as unread and +the next read shall still return this data. This support is +currently added only for socket class. Extra parameter 'flags' +is added to io_readv calls to pass extra read flags like MSG_PEEK. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3) +Signed-off-by: Peter Xu +--- + chardev/char-socket.c | 4 ++-- + include/io/channel.h | 6 ++++++ + io/channel-buffer.c | 1 + + io/channel-command.c | 1 + + io/channel-file.c | 1 + + io/channel-null.c | 1 + + io/channel-socket.c | 19 ++++++++++++++++++- + io/channel-tls.c | 1 + + io/channel-websock.c | 1 + + io/channel.c | 16 ++++++++++++---- + migration/channel-block.c | 1 + + migration/rdma.c | 1 + + scsi/qemu-pr-helper.c | 2 +- + tests/qtest/tpm-emu.c | 2 +- + tests/unit/test-io-channel-socket.c | 1 + + util/vhost-user-server.c | 2 +- + 16 files changed, 50 insertions(+), 10 deletions(-) + +diff --git a/chardev/char-socket.c b/chardev/char-socket.c +index 879564aa8a..5afce9a464 100644 +--- a/chardev/char-socket.c ++++ b/chardev/char-socket.c +@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len) + if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + &msgfds, &msgfds_num, +- NULL); ++ 0, NULL); + } else { + ret = qio_channel_readv_full(s->ioc, &iov, 1, + NULL, NULL, +- NULL); ++ 0, NULL); + } + + if (msgfds_num) { +diff --git a/include/io/channel.h b/include/io/channel.h +index c680ee7480..716235d496 100644 +--- a/include/io/channel.h ++++ b/include/io/channel.h +@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass, + + #define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1 + ++#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1 ++ + typedef enum QIOChannelFeature QIOChannelFeature; + + enum QIOChannelFeature { +@@ -41,6 +43,7 @@ enum QIOChannelFeature { + QIO_CHANNEL_FEATURE_SHUTDOWN, + QIO_CHANNEL_FEATURE_LISTEN, + QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY, ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK, + }; + + +@@ -114,6 +117,7 @@ struct QIOChannelClass { + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + int (*io_close)(QIOChannel *ioc, + Error **errp); +@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc, + * @niov: the length of the @iov array + * @fds: pointer to an array that will received file handles + * @nfds: pointer filled with number of elements in @fds on return ++ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*) + * @errp: pointer to a NULL-initialized error object + * + * Read data from the IO channel, storing it in the +@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp); + + +diff --git a/io/channel-buffer.c b/io/channel-buffer.c +index bf52011be2..8096180f85 100644 +--- a/io/channel-buffer.c ++++ b/io/channel-buffer.c +@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc); +diff --git a/io/channel-command.c b/io/channel-command.c +index 74516252ba..e7edd091af 100644 +--- a/io/channel-command.c ++++ b/io/channel-command.c +@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc); +diff --git a/io/channel-file.c b/io/channel-file.c +index b67687c2aa..d76663e6ae 100644 +--- a/io/channel-file.c ++++ b/io/channel-file.c +@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc); +diff --git a/io/channel-null.c b/io/channel-null.c +index 75e3781507..4fafdb770d 100644 +--- a/io/channel-null.c ++++ b/io/channel-null.c +@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc, + size_t niov, + int **fds G_GNUC_UNUSED, + size_t *nfds G_GNUC_UNUSED, ++ int flags, + Error **errp) + { + QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc); +diff --git a/io/channel-socket.c b/io/channel-socket.c +index b76dca9cc1..7aca84f61a 100644 +--- a/io/channel-socket.c ++++ b/io/channel-socket.c +@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc, + } + #endif + ++ qio_channel_set_feature(QIO_CHANNEL(ioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + return 0; + } + +@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc, + } + #endif /* WIN32 */ + ++ qio_channel_set_feature(QIO_CHANNEL(cioc), ++ QIO_CHANNEL_FEATURE_READ_MSG_PEEK); ++ + trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd); + return cioc; + +@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); +@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + + } + ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } ++ + retry: + ret = recvmsg(sioc->fd, &msg, sflags); + if (ret < 0) { +@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc); + ssize_t done = 0; + ssize_t i; ++ int sflags = 0; ++ ++ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) { ++ sflags |= MSG_PEEK; ++ } + + for (i = 0; i < niov; i++) { + ssize_t ret; +@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc, + ret = recv(sioc->fd, + iov[i].iov_base, + iov[i].iov_len, +- 0); ++ sflags); + if (ret < 0) { + if (errno == EAGAIN) { + if (done) { +diff --git a/io/channel-tls.c b/io/channel-tls.c +index 4ce890a538..c730cb8ec5 100644 +--- a/io/channel-tls.c ++++ b/io/channel-tls.c +@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc); +diff --git a/io/channel-websock.c b/io/channel-websock.c +index fb4932ade7..a12acc27cf 100644 +--- a/io/channel-websock.c ++++ b/io/channel-websock.c +@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc); +diff --git a/io/channel.c b/io/channel.c +index 0640941ac5..a8c7f11649 100644 +--- a/io/channel.c ++++ b/io/channel.c +@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc); +@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc, + return -1; + } + +- return klass->io_readv(ioc, iov, niov, fds, nfds, errp); ++ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) && ++ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ error_setg_errno(errp, EINVAL, ++ "Channel does not support peek read"); ++ return -1; ++ } ++ ++ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp); + } + + +@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc, + while ((nlocal_iov > 0) || local_fds) { + ssize_t len; + len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds, +- local_nfds, errp); ++ local_nfds, 0, errp); + if (len == QIO_CHANNEL_ERR_BLOCK) { + if (qemu_in_coroutine()) { + qio_channel_yield(ioc, G_IO_IN); +@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc, + size_t niov, + Error **errp) + { +- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp); + } + + +@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc, + Error **errp) + { + struct iovec iov = { .iov_base = buf, .iov_len = buflen }; +- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp); ++ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp); + } + + +diff --git a/migration/channel-block.c b/migration/channel-block.c +index f4ab53acdb..b7374363c3 100644 +--- a/migration/channel-block.c ++++ b/migration/channel-block.c +@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc); +diff --git a/migration/rdma.c b/migration/rdma.c +index 94a55dd95b..d8b4632094 100644 +--- a/migration/rdma.c ++++ b/migration/rdma.c +@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc, + size_t niov, + int **fds, + size_t *nfds, ++ int flags, + Error **errp) + { + QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc); +diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c +index 196b78c00d..199227a556 100644 +--- a/scsi/qemu-pr-helper.c ++++ b/scsi/qemu-pr-helper.c +@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz, + iov.iov_base = buf; + iov.iov_len = sz; + n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1, +- &fds, &nfds, errp); ++ &fds, &nfds, 0, errp); + + if (n_read == QIO_CHANNEL_ERR_BLOCK) { + qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN); +diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c +index 2994d1cf42..3cf1acaf7d 100644 +--- a/tests/qtest/tpm-emu.c ++++ b/tests/qtest/tpm-emu.c +@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data) + int *pfd = NULL; + size_t nfd = 0; + +- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort); ++ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort); + cmd = be32_to_cpu(cmd); + g_assert_cmpint(cmd, ==, CMD_SET_DATAFD); + g_assert_cmpint(nfd, ==, 1); +diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c +index b36a5d972a..b964bb202d 100644 +--- a/tests/unit/test-io-channel-socket.c ++++ b/tests/unit/test-io-channel-socket.c +@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void) + G_N_ELEMENTS(iorecv), + &fdrecv, + &nfdrecv, ++ 0, + &error_abort); + + g_assert(nfdrecv == G_N_ELEMENTS(fdsend)); +diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c +index 232984ace6..145eb17c08 100644 +--- a/util/vhost-user-server.c ++++ b/util/vhost-user-server.c +@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg) + * qio_channel_readv_full may have short reads, keeping calling it + * until getting VHOST_USER_HDR_SIZE or 0 bytes in total + */ +- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err); ++ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err); + if (rc < 0) { + if (rc == QIO_CHANNEL_ERR_BLOCK) { + assert(local_err == NULL); +-- +2.31.1 + diff --git a/kvm-linux-headers-Update-to-v6.1.patch b/kvm-linux-headers-Update-to-v6.1.patch new file mode 100644 index 0000000..6ce9c7d --- /dev/null +++ b/kvm-linux-headers-Update-to-v6.1.patch @@ -0,0 +1,577 @@ +From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 7 Feb 2023 15:57:10 -0500 +Subject: [PATCH 3/8] linux-headers: Update to v6.1 + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm) + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Acked-by: Cornelia Huck +Signed-off-by: Juan Quintela +(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc) +Signed-off-by: Peter Xu +--- + include/standard-headers/drm/drm_fourcc.h | 34 ++++- + include/standard-headers/linux/ethtool.h | 63 +++++++- + include/standard-headers/linux/fuse.h | 6 +- + .../linux/input-event-codes.h | 1 + + include/standard-headers/linux/virtio_blk.h | 19 +++ + linux-headers/asm-generic/hugetlb_encode.h | 26 ++-- + linux-headers/asm-generic/mman-common.h | 2 + + linux-headers/asm-mips/mman.h | 2 + + linux-headers/asm-riscv/kvm.h | 4 + + linux-headers/linux/kvm.h | 1 + + linux-headers/linux/psci.h | 14 ++ + linux-headers/linux/userfaultfd.h | 4 + + linux-headers/linux/vfio.h | 142 ++++++++++++++++++ + 13 files changed, 298 insertions(+), 20 deletions(-) + +diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h +index 48b620cbef..b868488f93 100644 +--- a/include/standard-headers/drm/drm_fourcc.h ++++ b/include/standard-headers/drm/drm_fourcc.h +@@ -98,18 +98,42 @@ extern "C" { + #define DRM_FORMAT_INVALID 0 + + /* color index */ ++#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */ ++#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */ + #define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */ + +-/* 8 bpp Red */ ++/* 1 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++ ++/* 2 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */ ++ ++/* 4 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */ ++ ++/* 8 bpp Darkness (inverse relationship between channel value and brightness) */ ++#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */ ++ ++/* 1 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */ ++ ++/* 2 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */ ++ ++/* 4 bpp Red (direct relationship between channel value and brightness) */ ++#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */ ++ ++/* 8 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */ + +-/* 10 bpp Red */ ++/* 10 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */ + +-/* 12 bpp Red */ ++/* 12 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */ + +-/* 16 bpp Red */ ++/* 16 bpp Red (direct relationship between channel value and brightness) */ + #define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */ + + /* 16 bpp RG */ +@@ -204,7 +228,9 @@ extern "C" { + #define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */ + + #define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */ ++#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */ + #define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */ ++#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */ + #define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */ + #define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */ + +diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h +index 4537da20cc..1dc56cdc0a 100644 +--- a/include/standard-headers/linux/ethtool.h ++++ b/include/standard-headers/linux/ethtool.h +@@ -736,6 +736,51 @@ enum ethtool_module_power_mode { + ETHTOOL_MODULE_POWER_MODE_HIGH, + }; + ++/** ++ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE ++ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are ++ * unknown ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled ++ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled ++ */ ++enum ethtool_podl_pse_admin_state { ++ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1, ++ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED, ++ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED, ++}; ++ ++/** ++ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE. ++ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus: ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is ++ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is ++ * false" ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is ++ * asserted true when either of the PSE state diagram variables ++ * pi_detecting or pi_classifying is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower” ++ * is asserted true when the PoDL PSE state diagram variable pi_powered is ++ * true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted ++ * true when the PoDL PSE state diagram variable pi_sleeping is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true ++ * when the logical combination of the PoDL PSE state diagram variables ++ * pi_prebiased*!pi_sleeping is true." ++ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted ++ * true when the PoDL PSE state diagram variable overload_held is true." ++ */ ++enum ethtool_podl_pse_pw_d_status { ++ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE, ++ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR, ++}; ++ + /** + * struct ethtool_gstrings - string set for data tagging + * @cmd: Command number = %ETHTOOL_GSTRINGS +@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex) + #define MASTER_SLAVE_STATE_SLAVE 3 + #define MASTER_SLAVE_STATE_ERR 4 + ++/* These are used to throttle the rate of data on the phy interface when the ++ * native speed of the interface is higher than the link speed. These should ++ * not be used for phy interfaces which natively support multiple speeds (e.g. ++ * MII or SGMII). ++ */ ++/* No rate matching performed. */ ++#define RATE_MATCH_NONE 0 ++/* The phy sends pause frames to throttle the MAC. */ ++#define RATE_MATCH_PAUSE 1 ++/* The phy asserts CRS to prevent the MAC from transmitting. */ ++#define RATE_MATCH_CRS 2 ++/* The MAC is programmed with a sufficiently-large IPG. */ ++#define RATE_MATCH_OPEN_LOOP 3 ++ + /* Which connector port. */ + #define PORT_TP 0x00 + #define PORT_AUI 0x01 +@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags { + * reported consistently by PHYLIB. Read-only. + * @master_slave_cfg: Master/slave port mode. + * @master_slave_state: Master/slave port state. ++ * @rate_matching: Rate adaptation performed by the PHY + * @reserved: Reserved for future use; see the note on reserved space. +- * @reserved1: Reserved for future use; see the note on reserved space. + * @link_mode_masks: Variable length bitmaps. + * + * If autonegotiation is disabled, the speed and @duplex represent the +@@ -2085,7 +2144,7 @@ struct ethtool_link_settings { + uint8_t transceiver; + uint8_t master_slave_cfg; + uint8_t master_slave_state; +- uint8_t reserved1[1]; ++ uint8_t rate_matching; + uint32_t reserved[7]; + uint32_t link_mode_masks[]; + /* layout of link_mode_masks fields: +diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h +index bda06258be..713d259768 100644 +--- a/include/standard-headers/linux/fuse.h ++++ b/include/standard-headers/linux/fuse.h +@@ -194,6 +194,9 @@ + * - add FUSE_SECURITY_CTX init flag + * - add security context to create, mkdir, symlink, and mknod requests + * - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX ++ * ++ * 7.37 ++ * - add FUSE_TMPFILE + */ + + #ifndef _LINUX_FUSE_H +@@ -225,7 +228,7 @@ + #define FUSE_KERNEL_VERSION 7 + + /** Minor version number of this interface */ +-#define FUSE_KERNEL_MINOR_VERSION 36 ++#define FUSE_KERNEL_MINOR_VERSION 37 + + /** The node ID of the root inode */ + #define FUSE_ROOT_ID 1 +@@ -533,6 +536,7 @@ enum fuse_opcode { + FUSE_SETUPMAPPING = 48, + FUSE_REMOVEMAPPING = 49, + FUSE_SYNCFS = 50, ++ FUSE_TMPFILE = 51, + + /* CUSE specific operations */ + CUSE_INIT = 4096, +diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h +index 50790aee5a..815f7a1dff 100644 +--- a/include/standard-headers/linux/input-event-codes.h ++++ b/include/standard-headers/linux/input-event-codes.h +@@ -862,6 +862,7 @@ + #define ABS_TOOL_WIDTH 0x1c + + #define ABS_VOLUME 0x20 ++#define ABS_PROFILE 0x21 + + #define ABS_MISC 0x28 + +diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h +index 2dcc90826a..e81715cd70 100644 +--- a/include/standard-headers/linux/virtio_blk.h ++++ b/include/standard-headers/linux/virtio_blk.h +@@ -40,6 +40,7 @@ + #define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ + #define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */ + #define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */ ++#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */ + + /* Legacy feature bits */ + #ifndef VIRTIO_BLK_NO_LEGACY +@@ -119,6 +120,21 @@ struct virtio_blk_config { + uint8_t write_zeroes_may_unmap; + + uint8_t unused1[3]; ++ ++ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */ ++ /* ++ * The maximum secure erase sectors (in 512-byte sectors) for ++ * one segment. ++ */ ++ __virtio32 max_secure_erase_sectors; ++ /* ++ * The maximum number of secure erase segments in a ++ * secure erase command. ++ */ ++ __virtio32 max_secure_erase_seg; ++ /* Secure erase commands must be aligned to this number of sectors. */ ++ __virtio32 secure_erase_sector_alignment; ++ + } QEMU_PACKED; + + /* +@@ -153,6 +169,9 @@ struct virtio_blk_config { + /* Write zeroes command */ + #define VIRTIO_BLK_T_WRITE_ZEROES 13 + ++/* Secure erase command */ ++#define VIRTIO_BLK_T_SECURE_ERASE 14 ++ + #ifndef VIRTIO_BLK_NO_LEGACY + /* Barrier before this op. */ + #define VIRTIO_BLK_T_BARRIER 0x80000000 +diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h +index 4f3d5aaa11..de687009bf 100644 +--- a/linux-headers/asm-generic/hugetlb_encode.h ++++ b/linux-headers/asm-generic/hugetlb_encode.h +@@ -20,18 +20,18 @@ + #define HUGETLB_FLAG_ENCODE_SHIFT 26 + #define HUGETLB_FLAG_ENCODE_MASK 0x3f + +-#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT) +-#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT) ++#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT) + + #endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */ +diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h +index 6c1aa92a92..6ce1f1ceb4 100644 +--- a/linux-headers/asm-generic/mman-common.h ++++ b/linux-headers/asm-generic/mman-common.h +@@ -77,6 +77,8 @@ + + #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + ++#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ ++ + /* compatibility flags */ + #define MAP_FILE 0 + +diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h +index 1be428663c..c6e1fc77c9 100644 +--- a/linux-headers/asm-mips/mman.h ++++ b/linux-headers/asm-mips/mman.h +@@ -103,6 +103,8 @@ + + #define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */ + ++#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ ++ + /* compatibility flags */ + #define MAP_FILE 0 + +diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h +index 7351417afd..8985ff234c 100644 +--- a/linux-headers/asm-riscv/kvm.h ++++ b/linux-headers/asm-riscv/kvm.h +@@ -48,6 +48,7 @@ struct kvm_sregs { + /* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ + struct kvm_riscv_config { + unsigned long isa; ++ unsigned long zicbom_block_size; + }; + + /* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */ +@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID { + KVM_RISCV_ISA_EXT_M, + KVM_RISCV_ISA_EXT_SVPBMT, + KVM_RISCV_ISA_EXT_SSTC, ++ KVM_RISCV_ISA_EXT_SVINVAL, ++ KVM_RISCV_ISA_EXT_ZIHINTPAUSE, ++ KVM_RISCV_ISA_EXT_ZICBOM, + KVM_RISCV_ISA_EXT_MAX, + }; + +diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h +index ebdafa576d..b2783c5202 100644 +--- a/linux-headers/linux/kvm.h ++++ b/linux-headers/linux/kvm.h +@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt { + #define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220 + #define KVM_CAP_S390_ZPCI_OP 221 + #define KVM_CAP_S390_CPU_TOPOLOGY 222 ++#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 + + #ifdef KVM_CAP_IRQ_ROUTING + +diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h +index 213b2a0f70..e60dfd8907 100644 +--- a/linux-headers/linux/psci.h ++++ b/linux-headers/linux/psci.h +@@ -48,12 +48,26 @@ + #define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7) + + #define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10) ++#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11) ++#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12) ++#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13) + #define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14) + #define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15) ++#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16) ++#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17) ++ + #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) ++#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) ++#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) + ++#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) ++#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) + #define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14) ++#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16) ++#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) ++ + #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) ++#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) + + /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ + #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff +diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h +index a3a377cd44..ba5d0df52f 100644 +--- a/linux-headers/linux/userfaultfd.h ++++ b/linux-headers/linux/userfaultfd.h +@@ -12,6 +12,10 @@ + + #include + ++/* ioctls for /dev/userfaultfd */ ++#define USERFAULTFD_IOC 0xAA ++#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00) ++ + /* + * If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and + * UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In +diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h +index ede44b5572..bee7e42198 100644 +--- a/linux-headers/linux/vfio.h ++++ b/linux-headers/linux/vfio.h +@@ -986,6 +986,148 @@ enum vfio_device_mig_state { + VFIO_DEVICE_STATE_RUNNING_P2P = 5, + }; + ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power ++ * state with the platform-based power management. Device use of lower power ++ * states depends on factors managed by the runtime power management core, ++ * including system level support and coordinating support among dependent ++ * devices. Enabling device low power entry does not guarantee lower power ++ * usage by the device, nor is a mechanism provided through this feature to ++ * know the current power state of the device. If any device access happens ++ * (either from the host or through the vfio uAPI) when the device is in the ++ * low power state, then the host will move the device out of the low power ++ * state as necessary prior to the access. Once the access is completed, the ++ * device may re-enter the low power state. For single shot low power support ++ * with wake-up notification, see ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd ++ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after ++ * calling LOW_POWER_EXIT. ++ */ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3 ++ ++/* ++ * This device feature has the same behavior as ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user ++ * provides an eventfd for wake-up notification. When the device moves out of ++ * the low power state for the wake-up, the host will not allow the device to ++ * re-enter a low power state without a subsequent user call to one of the low ++ * power entry device feature IOCTLs. Access to mmap'd device regions is ++ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the ++ * low power exit. The low power exit can happen either through LOW_POWER_EXIT ++ * or through any other access (where the wake-up notification has been ++ * generated). The access to mmap'd device regions will not trigger low power ++ * exit. ++ * ++ * The notification through the provided eventfd will be generated only when ++ * the device has entered and is resumed from a low power state after ++ * calling this device feature IOCTL. A device that has not entered low power ++ * state, as managed through the runtime power management core, will not ++ * generate a notification through the provided eventfd on access. Calling the ++ * LOW_POWER_EXIT feature is optional in the case where notification has been ++ * signaled on the provided eventfd that a resume from low power has occurred. ++ */ ++struct vfio_device_low_power_entry_with_wakeup { ++ __s32 wakeup_eventfd; ++ __u32 reserved; ++}; ++ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as ++ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or ++ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features. ++ * This device feature IOCTL may itself generate a wakeup eventfd notification ++ * in the latter case if the device had previously entered a low power state. ++ */ ++#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging. ++ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports ++ * DMA logging. ++ * ++ * DMA logging allows a device to internally record what DMAs the device is ++ * initiating and report them back to userspace. It is part of the VFIO ++ * migration infrastructure that allows implementing dirty page tracking ++ * during the pre copy phase of live migration. Only DMA WRITEs are logged, ++ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE. ++ * ++ * When DMA logging is started a range of IOVAs to monitor is provided and the ++ * device can optimize its logging to cover only the IOVA range given. Each ++ * DMA that the device initiates inside the range will be logged by the device ++ * for later retrieval. ++ * ++ * page_size is an input that hints what tracking granularity the device ++ * should try to achieve. If the device cannot do the hinted page size then ++ * it's the driver choice which page size to pick based on its support. ++ * On output the device will return the page size it selected. ++ * ++ * ranges is a pointer to an array of ++ * struct vfio_device_feature_dma_logging_range. ++ * ++ * The core kernel code guarantees to support by minimum num_ranges that fit ++ * into a single kernel page. User space can try higher values but should give ++ * up if the above can't be achieved as of some driver limitations. ++ * ++ * A single call to start device DMA logging can be issued and a matching stop ++ * should follow at the end. Another start is not allowed in the meantime. ++ */ ++struct vfio_device_feature_dma_logging_control { ++ __aligned_u64 page_size; ++ __u32 num_ranges; ++ __u32 __reserved; ++ __aligned_u64 ranges; ++}; ++ ++struct vfio_device_feature_dma_logging_range { ++ __aligned_u64 iova; ++ __aligned_u64 length; ++}; ++ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started ++ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START ++ */ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7 ++ ++/* ++ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log ++ * ++ * Query the device's DMA log for written pages within the given IOVA range. ++ * During querying the log is cleared for the IOVA range. ++ * ++ * bitmap is a pointer to an array of u64s that will hold the output bitmap ++ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits ++ * is given by: ++ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64)) ++ * ++ * The input page_size can be any power of two value and does not have to ++ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver ++ * will format its internal logging to match the reporting page size, possibly ++ * by replicating bits if the internal page size is lower than requested. ++ * ++ * The LOGGING_REPORT will only set bits in the bitmap and never clear or ++ * perform any initialization of the user provided bitmap. ++ * ++ * If any error is returned userspace should assume that the dirty log is ++ * corrupted. Error recovery is to consider all memory dirty and try to ++ * restart the dirty tracking, or to abort/restart the whole migration. ++ * ++ * If DMA logging is not enabled, an error will be returned. ++ * ++ */ ++struct vfio_device_feature_dma_logging_report { ++ __aligned_u64 iova; ++ __aligned_u64 length; ++ __aligned_u64 page_size; ++ __aligned_u64 bitmap; ++}; ++ ++#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8 ++ + /* -------- API for Type1 VFIO IOMMU -------- */ + + /** +-- +2.31.1 + diff --git a/kvm-migration-check-magic-value-for-deciding-the-mapping.patch b/kvm-migration-check-magic-value-for-deciding-the-mapping.patch new file mode 100644 index 0000000..387d0b3 --- /dev/null +++ b/kvm-migration-check-magic-value-for-deciding-the-mapping.patch @@ -0,0 +1,330 @@ +From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001 +From: "manish.mishra" +Date: Tue, 20 Dec 2022 18:44:18 +0000 +Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of + channels +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders +RH-Bugzilla: 2169732 +RH-Acked-by: quintela1 +RH-Acked-by: Leonardo Brás +RH-Acked-by: Dr. David Alan Gilbert +RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm) + +Current logic assumes that channel connections on the destination side are +always established in the same order as the source and the first one will +always be the main channel followed by the multifid or post-copy +preemption channel. This may not be always true, as even if a channel has a +connection established on the source side it can be in the pending state on +the destination side and a newer connection can be established first. +Basically causing out of order mapping of channels on the destination side. +Currently, all channels except post-copy preempt send a magic number, this +patch uses that magic number to decide the type of channel. This logic is +applicable only for precopy(multifd) live migration, as mentioned, the +post-copy preempt channel does not send any magic number. Also, tls live +migrations already does tls handshake before creating other channels, so +this issue is not possible with tls, hence this logic is avoided for tls +live migrations. This patch uses read peek to check the magic number of +channels so that current data/control stream management remains +un-effected. + +Reviewed-by: Peter Xu +Reviewed-by: Daniel P. Berrange +Reviewed-by: Juan Quintela +Suggested-by: Daniel P. Berrange +Signed-off-by: manish.mishra +Signed-off-by: Juan Quintela +(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2) +Signed-off-by: Peter Xu +--- + migration/channel.c | 45 +++++++++++++++++++++++++++++++++ + migration/channel.h | 5 ++++ + migration/migration.c | 54 ++++++++++++++++++++++++++++------------ + migration/multifd.c | 19 +++++++------- + migration/multifd.h | 2 +- + migration/postcopy-ram.c | 5 +--- + migration/postcopy-ram.h | 2 +- + 7 files changed, 101 insertions(+), 31 deletions(-) + +diff --git a/migration/channel.c b/migration/channel.c +index 1b0815039f..ca3319a309 100644 +--- a/migration/channel.c ++++ b/migration/channel.c +@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s, + migrate_fd_connect(s, error); + error_free(error); + } ++ ++ ++/** ++ * @migration_channel_read_peek - Peek at migration channel, without ++ * actually removing it from channel buffer. ++ * ++ * @ioc: the channel object ++ * @buf: the memory region to read data into ++ * @buflen: the number of bytes to read in @buf ++ * @errp: pointer to a NULL-initialized error object ++ * ++ * Returns 0 if successful, returns -1 and sets @errp if fails. ++ */ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp) ++{ ++ ssize_t len = 0; ++ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen }; ++ ++ while (true) { ++ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, ++ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); ++ ++ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { ++ error_setg(errp, ++ "Failed to peek at channel"); ++ return -1; ++ } ++ ++ if (len == buflen) { ++ break; ++ } ++ ++ /* 1ms sleep. */ ++ if (qemu_in_coroutine()) { ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ } else { ++ g_usleep(1000); ++ } ++ } ++ ++ return 0; ++} +diff --git a/migration/channel.h b/migration/channel.h +index 67a461c28a..5bdb8208a7 100644 +--- a/migration/channel.h ++++ b/migration/channel.h +@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s, + QIOChannel *ioc, + const char *hostname, + Error *error_in); ++ ++int migration_channel_read_peek(QIOChannel *ioc, ++ const char *buf, ++ const size_t buflen, ++ Error **errp); + #endif +diff --git a/migration/migration.c b/migration/migration.c +index f485eea5fb..593dbd25de 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -31,6 +31,7 @@ + #include "migration.h" + #include "savevm.h" + #include "qemu-file.h" ++#include "channel.h" + #include "migration/vmstate.h" + #include "block/block.h" + #include "qapi/error.h" +@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + +- if (multifd_load_setup(errp) != 0) { +- return false; +- } +- + if (!mis->from_src_file) { + mis->from_src_file = f; + } +@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + { + MigrationIncomingState *mis = migration_incoming_get_current(); + Error *local_err = NULL; +- bool start_migration; + QEMUFile *f; ++ bool default_channel = true; ++ uint32_t channel_magic = 0; ++ int ret = 0; + +- if (!mis->from_src_file) { +- /* The first connection (multifd may have multiple) */ ++ if (migrate_use_multifd() && !migrate_postcopy_ram() && ++ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) { ++ /* ++ * With multiple channels, it is possible that we receive channels ++ * out of order on destination side, causing incorrect mapping of ++ * source channels on destination side. Check channel MAGIC to ++ * decide type of channel. Please note this is best effort, postcopy ++ * preempt channel does not send any magic number so avoid it for ++ * postcopy live migration. Also tls live migration already does ++ * tls handshake while initializing main channel so with tls this ++ * issue is not possible. ++ */ ++ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, ++ sizeof(channel_magic), &local_err); ++ ++ if (ret != 0) { ++ error_propagate(errp, local_err); ++ return; ++ } ++ ++ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC)); ++ } else { ++ default_channel = !mis->from_src_file; ++ } ++ ++ if (multifd_load_setup(errp) != 0) { ++ error_setg(errp, "Failed to setup multifd channels"); ++ return; ++ } ++ ++ if (default_channel) { + f = qemu_file_new_input(ioc); + + if (!migration_incoming_setup(f, errp)) { + return; + } +- +- /* +- * Common migration only needs one channel, so we can start +- * right now. Some features need more than one channel, we wait. +- */ +- start_migration = !migration_needs_multiple_sockets(); + } else { + /* Multiple connections */ + assert(migration_needs_multiple_sockets()); + if (migrate_use_multifd()) { +- start_migration = multifd_recv_new_channel(ioc, &local_err); ++ multifd_recv_new_channel(ioc, &local_err); + } else { + assert(migrate_postcopy_preempt()); + f = qemu_file_new_input(ioc); +- start_migration = postcopy_preempt_new_channel(mis, f); ++ postcopy_preempt_new_channel(mis, f); + } + if (local_err) { + error_propagate(errp, local_err); +@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) + } + } + +- if (start_migration) { ++ if (migration_has_all_channels()) { + /* If it's a recovery, we're done */ + if (postcopy_try_recover()) { + return; +diff --git a/migration/multifd.c b/migration/multifd.c +index 509bbbe3bf..c3385529cf 100644 +--- a/migration/multifd.c ++++ b/migration/multifd.c +@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp) + uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size(); + uint8_t i; + +- if (!migrate_use_multifd()) { ++ /* ++ * Return successfully if multiFD recv state is already initialised ++ * or multiFD is not enabled. ++ */ ++ if (multifd_recv_state || !migrate_use_multifd()) { + return 0; + } ++ + if (!migrate_multi_channels_is_allowed()) { + error_setg(errp, "multifd is not supported by current protocol"); + return -1; +@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void) + + /* + * Try to receive all multifd channels to get ready for the migration. +- * - Return true and do not set @errp when correctly receiving all channels; +- * - Return false and do not set @errp when correctly receiving the current one; +- * - Return false and set @errp when failing to receive the current channel. ++ * Sets @errp when failing to receive the current channel. + */ +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + { + MultiFDRecvParams *p; + Error *local_err = NULL; +@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + "failed to receive packet" + " via multifd channel %d: ", + qatomic_read(&multifd_recv_state->count)); +- return false; ++ return; + } + trace_multifd_recv_new_channel(id); + +@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + id); + multifd_recv_terminate_threads(local_err); + error_propagate(errp, local_err); +- return false; ++ return; + } + p->c = ioc; + object_ref(OBJECT(ioc)); +@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp) + qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p, + QEMU_THREAD_JOINABLE); + qatomic_inc(&multifd_recv_state->count); +- return qatomic_read(&multifd_recv_state->count) == +- migrate_multifd_channels(); + } +diff --git a/migration/multifd.h b/migration/multifd.h +index 519f498643..913e4ba274 100644 +--- a/migration/multifd.h ++++ b/migration/multifd.h +@@ -18,7 +18,7 @@ void multifd_save_cleanup(void); + int multifd_load_setup(Error **errp); + int multifd_load_cleanup(Error **errp); + bool multifd_recv_all_channels_created(void); +-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp); ++void multifd_recv_new_channel(QIOChannel *ioc, Error **errp); + void multifd_recv_sync_main(void); + int multifd_send_sync_main(QEMUFile *f); + int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset); +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index 0c55df0e52..b98e95dab0 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd) + } + } + +-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) ++void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) + { + /* + * The new loading channel has its own threads, so it needs to be +@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file) + qemu_file_set_blocking(file, true); + mis->postcopy_qemufile_dst = file; + trace_postcopy_preempt_new_channel(); +- +- /* Start the migration immediately */ +- return true; + } + + /* +diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h +index 6147bf7d1d..25881c4127 100644 +--- a/migration/postcopy-ram.h ++++ b/migration/postcopy-ram.h +@@ -190,7 +190,7 @@ enum PostcopyChannels { + RAM_CHANNEL_MAX, + }; + +-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); ++void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file); + int postcopy_preempt_setup(MigrationState *s, Error **errp); + int postcopy_preempt_wait_channel(MigrationState *s); + +-- +2.31.1 + diff --git a/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch b/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch new file mode 100644 index 0000000..707c80f --- /dev/null +++ b/kvm-net-stream-add-a-new-option-to-automatically-reconne.patch @@ -0,0 +1,325 @@ +From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Thu, 19 Jan 2023 11:16:45 +0100 +Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect +RH-Bugzilla: 2169232 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos) + +In stream mode, if the server shuts down there is currently +no way to reconnect the client to a new server without removing +the NIC device and the netdev backend (or to reboot). + +This patch introduces a reconnect option that specifies a delay +to try to reconnect with the same parameters. + +Add a new test in qtest to test the reconnect option and the +connect/disconnect events. + +Signed-off-by: Laurent Vivier +Signed-off-by: Jason Wang +(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb) +--- + net/stream.c | 53 ++++++++++++++++++- + qapi/net.json | 7 ++- + qemu-options.hx | 6 +-- + tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++ + 4 files changed, 162 insertions(+), 5 deletions(-) + +diff --git a/net/stream.c b/net/stream.c +index 37ff727e0c..9204b4c96e 100644 +--- a/net/stream.c ++++ b/net/stream.c +@@ -39,6 +39,8 @@ + #include "io/channel-socket.h" + #include "io/net-listener.h" + #include "qapi/qapi-events-net.h" ++#include "qapi/qapi-visit-sockets.h" ++#include "qapi/clone-visitor.h" + + typedef struct NetStreamState { + NetClientState nc; +@@ -49,11 +51,15 @@ typedef struct NetStreamState { + guint ioc_write_tag; + SocketReadState rs; + unsigned int send_index; /* number of bytes sent*/ ++ uint32_t reconnect; ++ guint timer_tag; ++ SocketAddress *addr; + } NetStreamState; + + static void net_stream_listen(QIONetListener *listener, + QIOChannelSocket *cioc, + void *opaque); ++static void net_stream_arm_reconnect(NetStreamState *s); + + static gboolean net_stream_writable(QIOChannel *ioc, + GIOCondition condition, +@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc, + qemu_set_info_str(&s->nc, "%s", ""); + + qapi_event_send_netdev_stream_disconnected(s->nc.name); ++ net_stream_arm_reconnect(s); + + return G_SOURCE_REMOVE; + } +@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc, + static void net_stream_cleanup(NetClientState *nc) + { + NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); ++ if (s->timer_tag) { ++ g_source_remove(s->timer_tag); ++ s->timer_tag = 0; ++ } ++ if (s->addr) { ++ qapi_free_SocketAddress(s->addr); ++ s->addr = NULL; ++ } + if (s->ioc) { + if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { + if (s->ioc_read_tag) { +@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) + error: + object_unref(OBJECT(s->ioc)); + s->ioc = NULL; ++ net_stream_arm_reconnect(s); ++} ++ ++static gboolean net_stream_reconnect(gpointer data) ++{ ++ NetStreamState *s = data; ++ QIOChannelSocket *sioc; ++ ++ s->timer_tag = 0; ++ ++ sioc = qio_channel_socket_new(); ++ s->ioc = QIO_CHANNEL(sioc); ++ qio_channel_socket_connect_async(sioc, s->addr, ++ net_stream_client_connected, s, ++ NULL, NULL); ++ return G_SOURCE_REMOVE; ++} ++ ++static void net_stream_arm_reconnect(NetStreamState *s) ++{ ++ if (s->reconnect && s->timer_tag == 0) { ++ s->timer_tag = g_timeout_add_seconds(s->reconnect, ++ net_stream_reconnect, s); ++ } + } + + static int net_stream_client_init(NetClientState *peer, + const char *model, + const char *name, + SocketAddress *addr, ++ uint32_t reconnect, + Error **errp) + { + NetStreamState *s; +@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer, + s->ioc = QIO_CHANNEL(sioc); + s->nc.link_down = true; + ++ s->reconnect = reconnect; ++ if (reconnect) { ++ s->addr = QAPI_CLONE(SocketAddress, addr); ++ } + qio_channel_socket_connect_async(sioc, addr, + net_stream_client_connected, s, + NULL, NULL); +@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name, + sock = &netdev->u.stream; + + if (!sock->has_server || !sock->server) { +- return net_stream_client_init(peer, "stream", name, sock->addr, errp); ++ return net_stream_client_init(peer, "stream", name, sock->addr, ++ sock->has_reconnect ? sock->reconnect : 0, ++ errp); ++ } ++ if (sock->has_reconnect) { ++ error_setg(errp, "'reconnect' option is incompatible with " ++ "socket in server mode"); ++ return -1; + } + return net_stream_server_init(peer, "stream", name, sock->addr, errp); + } +diff --git a/qapi/net.json b/qapi/net.json +index 522ac582ed..d6eb30008b 100644 +--- a/qapi/net.json ++++ b/qapi/net.json +@@ -585,6 +585,10 @@ + # @addr: socket address to listen on (server=true) + # or connect to (server=false) + # @server: create server socket (default: false) ++# @reconnect: For a client socket, if a socket is disconnected, ++# then attempt a reconnect after the given number of seconds. ++# Setting this to zero disables this function. (default: 0) ++# (since 8.0) + # + # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. + # +@@ -593,7 +597,8 @@ + { 'struct': 'NetdevStreamOptions', + 'data': { + 'addr': 'SocketAddress', +- '*server': 'bool' } } ++ '*server': 'bool', ++ '*reconnect': 'uint32' } } + + ## + # @NetdevDgramOptions: +diff --git a/qemu-options.hx b/qemu-options.hx +index ea02ca3a45..48eef4aa2c 100644 +--- a/qemu-options.hx ++++ b/qemu-options.hx +@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, + "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" + " configure a network backend to connect to another network\n" + " using an UDP tunnel\n" +- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" +- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" +- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" ++ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" + " configure a network backend to connect to another network\n" + " using a socket connection in stream mode.\n" + "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" +diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c +index 6ba256e173..acc32c378b 100644 +--- a/tests/qtest/netdev-socket.c ++++ b/tests/qtest/netdev-socket.c +@@ -11,6 +11,10 @@ + #include + #include "../unit/socket-helpers.h" + #include "libqtest.h" ++#include "qapi/qmp/qstring.h" ++#include "qemu/sockets.h" ++#include "qapi/qobject-input-visitor.h" ++#include "qapi/qapi-visit-sockets.h" + + #define CONNECTION_TIMEOUT 5 + +@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void) + qtest_quit(qts0); + } + ++static void wait_stream_connected(QTestState *qts, const char *id, ++ SocketAddress **addr) ++{ ++ QDict *resp, *data; ++ QString *qstr; ++ QObject *obj; ++ Visitor *v = NULL; ++ ++ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED"); ++ g_assert_nonnull(resp); ++ data = qdict_get_qdict(resp, "data"); ++ g_assert_nonnull(data); ++ ++ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); ++ g_assert_nonnull(data); ++ ++ g_assert(!strcmp(qstring_get_str(qstr), id)); ++ ++ obj = qdict_get(data, "addr"); ++ ++ v = qobject_input_visitor_new(obj); ++ visit_type_SocketAddress(v, NULL, addr, NULL); ++ visit_free(v); ++ qobject_unref(resp); ++} ++ ++static void wait_stream_disconnected(QTestState *qts, const char *id) ++{ ++ QDict *resp, *data; ++ QString *qstr; ++ ++ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED"); ++ g_assert_nonnull(resp); ++ data = qdict_get_qdict(resp, "data"); ++ g_assert_nonnull(data); ++ ++ qstr = qobject_to(QString, qdict_get(data, "netdev-id")); ++ g_assert_nonnull(data); ++ ++ g_assert(!strcmp(qstring_get_str(qstr), id)); ++ qobject_unref(resp); ++} ++ ++static void test_stream_inet_reconnect(void) ++{ ++ QTestState *qts0, *qts1; ++ int port; ++ SocketAddress *addr; ++ ++ port = inet_get_free_port(false); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off,reconnect=1," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ wait_stream_connected(qts0, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ qapi_free_SocketAddress(addr); ++ ++ /* kill server */ ++ qtest_quit(qts0); ++ ++ /* check client has been disconnected */ ++ wait_stream_disconnected(qts1, "st0"); ++ ++ /* restart server */ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ /* wait connection events*/ ++ wait_stream_connected(qts0, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ qapi_free_SocketAddress(addr); ++ ++ wait_stream_connected(qts1, "st0", &addr); ++ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); ++ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); ++ g_assert_cmpint(atoi(addr->u.inet.port), ==, port); ++ qapi_free_SocketAddress(addr); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ + static void test_stream_inet_ipv6(void) + { + QTestState *qts0, *qts1; +@@ -418,6 +517,8 @@ int main(int argc, char **argv) + #ifndef _WIN32 + qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); + #endif ++ qtest_add_func("/netdev/stream/inet/reconnect", ++ test_stream_inet_reconnect); + } + if (has_ipv6) { + qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); +-- +2.31.1 + diff --git a/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch b/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch new file mode 100644 index 0000000..b9536c3 --- /dev/null +++ b/kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch @@ -0,0 +1,50 @@ +From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001 +From: Thomas Huth +Date: Tue, 14 Feb 2023 14:48:37 +0100 +Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in + s390x_write_elf64_notes() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Thomas Huth +RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes() +RH-Bugzilla: 2168172 +RH-Acked-by: Cornelia Huck +RH-Acked-by: David Hildenbrand +RH-Acked-by: Cédric Le Goater +RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9) + +Bugzilla: https://bugzilla.redhat.com/2168172 +Upstream-Status: Posted (and reviewed, but not merged yet) + +"note_size" can be smaller than sizeof(note), so unconditionally calling +memset(notep, 0, sizeof(note)) could cause a memory corruption here in +case notep has been allocated dynamically, thus let's use note_size as +length argument for memset() instead. + +Fixes: 113d8f4e95 ("s390x: pv: Add dump support") +Message-Id: <20230214141056.680969-1-thuth@redhat.com> +Reviewed-by: Janosch Frank +Reviewed-by: Philippe Mathieu-Daudé +Signed-off-by: Thomas Huth +--- + target/s390x/arch_dump.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c +index a2329141e8..a7c44ba49d 100644 +--- a/target/s390x/arch_dump.c ++++ b/target/s390x/arch_dump.c +@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name, + notep = g_malloc(note_size); + } + +- memset(notep, 0, sizeof(note)); ++ memset(notep, 0, note_size); + + /* Setup note header data */ + notep->hdr.n_descsz = cpu_to_be32(content_size); +-- +2.31.1 + diff --git a/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch b/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch new file mode 100644 index 0000000..ebd52cd --- /dev/null +++ b/kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch @@ -0,0 +1,505 @@ +From 39d5761fe1f546e764dedf2ea32c55d8f5222696 Mon Sep 17 00:00:00 2001 +From: Laurent Vivier +Date: Wed, 18 Jan 2023 13:04:05 +0100 +Subject: [PATCH 1/8] tests/qtest: netdev: test stream and dgram backends +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Laurent Vivier +RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect +RH-Bugzilla: 2169232 +RH-Acked-by: Eugenio Pérez +RH-Acked-by: Cindy Lu +RH-Acked-by: MST +RH-Acked-by: David Gibson (Red Hat) +RH-Commit: [1/2] 75c71b47eea072e14651a96612d402b50d2b8f1e (lvivier/qemu-kvm-centos) + +Signed-off-by: Laurent Vivier +Acked-by: Michael S. Tsirkin +Message-Id: <20230118120405.1876329-1-lvivier@redhat.com> +Signed-off-by: Thomas Huth +(cherry picked from commit c95031a19f0d7f418a597243f6f84b031a858997) +--- + tests/qtest/meson.build | 2 + + tests/qtest/netdev-socket.c | 448 ++++++++++++++++++++++++++++++++++++ + 2 files changed, 450 insertions(+) + create mode 100644 tests/qtest/netdev-socket.c + +diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build +index 9df3f9f8b9..2e7c6fe5e3 100644 +--- a/tests/qtest/meson.build ++++ b/tests/qtest/meson.build +@@ -27,6 +27,7 @@ qtests_generic = [ + 'test-hmp', + 'qos-test', + 'readconfig-test', ++ 'netdev-socket', + ] + if config_host.has_key('CONFIG_MODULES') + qtests_generic += [ 'modules-test' ] +@@ -299,6 +300,7 @@ qtests = { + 'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'], + 'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'], + 'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'), ++ 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'), + } + + gvnc = dependency('gvnc-1.0', required: false) +diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c +new file mode 100644 +index 0000000000..6ba256e173 +--- /dev/null ++++ b/tests/qtest/netdev-socket.c +@@ -0,0 +1,448 @@ ++/* ++ * QTest testcase for netdev stream and dgram ++ * ++ * Copyright (c) 2022 Red Hat, Inc. ++ * ++ * SPDX-License-Identifier: GPL-2.0-or-later ++ */ ++ ++#include "qemu/osdep.h" ++#include "qemu/sockets.h" ++#include ++#include "../unit/socket-helpers.h" ++#include "libqtest.h" ++ ++#define CONNECTION_TIMEOUT 5 ++ ++#define EXPECT_STATE(q, e, t) \ ++do { \ ++ char *resp = NULL; \ ++ g_test_timer_start(); \ ++ do { \ ++ g_free(resp); \ ++ resp = qtest_hmp(q, "info network"); \ ++ if (t) { \ ++ strrchr(resp, t)[0] = 0; \ ++ } \ ++ if (g_str_equal(resp, e)) { \ ++ break; \ ++ } \ ++ } while (g_test_timer_elapsed() < CONNECTION_TIMEOUT); \ ++ g_assert_cmpstr(resp, ==, e); \ ++ g_free(resp); \ ++} while (0) ++ ++static gchar *tmpdir; ++ ++static int inet_get_free_port_socket_ipv4(int sock) ++{ ++ struct sockaddr_in addr; ++ socklen_t len; ++ ++ memset(&addr, 0, sizeof(addr)); ++ addr.sin_family = AF_INET; ++ addr.sin_addr.s_addr = INADDR_ANY; ++ addr.sin_port = 0; ++ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { ++ return -1; ++ } ++ ++ len = sizeof(addr); ++ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { ++ return -1; ++ } ++ ++ return ntohs(addr.sin_port); ++} ++ ++static int inet_get_free_port_socket_ipv6(int sock) ++{ ++ struct sockaddr_in6 addr; ++ socklen_t len; ++ ++ memset(&addr, 0, sizeof(addr)); ++ addr.sin6_family = AF_INET6; ++ addr.sin6_addr = in6addr_any; ++ addr.sin6_port = 0; ++ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { ++ return -1; ++ } ++ ++ len = sizeof(addr); ++ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) { ++ return -1; ++ } ++ ++ return ntohs(addr.sin6_port); ++} ++ ++static int inet_get_free_port_multiple(int nb, int *port, bool ipv6) ++{ ++ int sock[nb]; ++ int i; ++ ++ for (i = 0; i < nb; i++) { ++ sock[i] = socket(ipv6 ? AF_INET6 : AF_INET, SOCK_STREAM, 0); ++ if (sock[i] < 0) { ++ break; ++ } ++ port[i] = ipv6 ? inet_get_free_port_socket_ipv6(sock[i]) : ++ inet_get_free_port_socket_ipv4(sock[i]); ++ if (port[i] == -1) { ++ break; ++ } ++ } ++ ++ nb = i; ++ for (i = 0; i < nb; i++) { ++ closesocket(sock[i]); ++ } ++ ++ return nb; ++} ++ ++static int inet_get_free_port(bool ipv6) ++{ ++ int nb, port; ++ ++ nb = inet_get_free_port_multiple(1, &port, ipv6); ++ g_assert_cmpint(nb, ==, 1); ++ ++ return port; ++} ++ ++static void test_stream_inet_ipv4(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port; ++ ++ port = inet_get_free_port(false); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=on,addr.ipv6=off," ++ "addr.host=127.0.0.1,addr.port=%d", port); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,tcp:127.0.0.1:%d\r\n", ++ port); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ /* the port is unknown, check only the address */ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:127.0.0.1", ':'); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_stream_inet_ipv6(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port; ++ ++ port = inet_get_free_port(true); ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true,addr.type=inet," ++ "addr.ipv4=off,addr.ipv6=on," ++ "addr.host=::1,addr.port=%d", port); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,server=false,id=st0,addr.type=inet," ++ "addr.ipv4=off,addr.ipv6=on," ++ "addr.host=::1,addr.port=%d", port); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,tcp:::1:%d\r\n", ++ port); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ /* the port is unknown, check only the address */ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:::1", ':'); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_stream_unix(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path; ++ ++ path = g_strconcat(tmpdir, "/stream_unix", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true," ++ "addr.type=unix,addr.path=%s,", ++ path); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=false," ++ "addr.type=unix,addr.path=%s", ++ path); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); ++ EXPECT_STATE(qts1, expect, 0); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ g_free(path); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++#ifdef CONFIG_LINUX ++static void test_stream_unix_abstract(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path; ++ ++ path = g_strconcat(tmpdir, "/stream_unix_abstract", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=true," ++ "addr.type=unix,addr.path=%s," ++ "addr.abstract=on", ++ path); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,server=false," ++ "addr.type=unix,addr.path=%s,addr.abstract=on", ++ path); ++ ++ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path); ++ EXPECT_STATE(qts1, expect, 0); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ g_free(path); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++#endif ++ ++#ifndef _WIN32 ++static void test_stream_fd(void) ++{ ++ QTestState *qts0, *qts1; ++ int sock[2]; ++ int ret; ++ ++ ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, sock); ++ g_assert_true(ret == 0); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", ++ sock[0]); ++ ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev stream,id=st0,addr.type=fd,addr.str=%d", ++ sock[1]); ++ ++ EXPECT_STATE(qts1, "st0: index=0,type=stream,unix:\r\n", 0); ++ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++ ++ closesocket(sock[0]); ++ closesocket(sock[1]); ++} ++#endif ++ ++static void test_dgram_inet(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int port[2]; ++ int nb; ++ ++ nb = inet_get_free_port_multiple(2, port, false); ++ g_assert_cmpint(nb, ==, 2); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "local.type=inet,local.host=127.0.0.1,local.port=%d," ++ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", ++ port[0], port[1]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram," ++ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", ++ port[0], port[1]); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "local.type=inet,local.host=127.0.0.1,local.port=%d," ++ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d", ++ port[1], port[0]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram," ++ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n", ++ port[1], port[0]); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++#ifndef _WIN32 ++static void test_dgram_mcast(void) ++{ ++ QTestState *qts; ++ ++ qts = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0," ++ "remote.type=inet,remote.host=230.0.0.1,remote.port=1234"); ++ ++ EXPECT_STATE(qts, "st0: index=0,type=dgram,mcast=230.0.0.1:1234\r\n", 0); ++ ++ qtest_quit(qts); ++} ++ ++static void test_dgram_unix(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ gchar *path0, *path1; ++ ++ path0 = g_strconcat(tmpdir, "/dgram_unix0", NULL); ++ path1 = g_strconcat(tmpdir, "/dgram_unix1", NULL); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=unix,local.path=%s," ++ "remote.type=unix,remote.path=%s", ++ path0, path1); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", ++ path0, path1); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=unix,local.path=%s," ++ "remote.type=unix,remote.path=%s", ++ path1, path0); ++ ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n", ++ path1, path0); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ unlink(path0); ++ g_free(path0); ++ unlink(path1); ++ g_free(path1); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++} ++ ++static void test_dgram_fd(void) ++{ ++ QTestState *qts0, *qts1; ++ char *expect; ++ int ret; ++ int sv[2]; ++ ++ ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, sv); ++ g_assert_cmpint(ret, !=, -1); ++ ++ qts0 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=fd,local.str=%d", ++ sv[0]); ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[0]); ++ EXPECT_STATE(qts0, expect, 0); ++ g_free(expect); ++ ++ qts1 = qtest_initf("-nodefaults -M none " ++ "-netdev dgram,id=st0,local.type=fd,local.str=%d", ++ sv[1]); ++ ++ ++ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[1]); ++ EXPECT_STATE(qts1, expect, 0); ++ g_free(expect); ++ ++ qtest_quit(qts1); ++ qtest_quit(qts0); ++ ++ closesocket(sv[0]); ++ closesocket(sv[1]); ++} ++#endif ++ ++int main(int argc, char **argv) ++{ ++ int ret; ++ bool has_ipv4, has_ipv6, has_afunix; ++ g_autoptr(GError) err = NULL; ++ ++ socket_init(); ++ g_test_init(&argc, &argv, NULL); ++ ++ if (socket_check_protocol_support(&has_ipv4, &has_ipv6) < 0) { ++ g_error("socket_check_protocol_support() failed\n"); ++ } ++ ++ tmpdir = g_dir_make_tmp("netdev-socket.XXXXXX", &err); ++ if (tmpdir == NULL) { ++ g_error("Can't create temporary directory in %s: %s", ++ g_get_tmp_dir(), err->message); ++ } ++ ++ if (has_ipv4) { ++ qtest_add_func("/netdev/stream/inet/ipv4", test_stream_inet_ipv4); ++ qtest_add_func("/netdev/dgram/inet", test_dgram_inet); ++#ifndef _WIN32 ++ qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); ++#endif ++ } ++ if (has_ipv6) { ++ qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); ++ } ++ ++ socket_check_afunix_support(&has_afunix); ++ if (has_afunix) { ++#ifndef _WIN32 ++ qtest_add_func("/netdev/dgram/unix", test_dgram_unix); ++#endif ++ qtest_add_func("/netdev/stream/unix", test_stream_unix); ++#ifdef CONFIG_LINUX ++ qtest_add_func("/netdev/stream/unix/abstract", ++ test_stream_unix_abstract); ++#endif ++#ifndef _WIN32 ++ qtest_add_func("/netdev/stream/fd", test_stream_fd); ++ qtest_add_func("/netdev/dgram/fd", test_dgram_fd); ++#endif ++ } ++ ++ ret = g_test_run(); ++ ++ g_rmdir(tmpdir); ++ g_free(tmpdir); ++ ++ return ret; ++} +-- +2.31.1 + diff --git a/kvm-util-userfaultfd-Add-uffd_open.patch b/kvm-util-userfaultfd-Add-uffd_open.patch new file mode 100644 index 0000000..5a5f90c --- /dev/null +++ b/kvm-util-userfaultfd-Add-uffd_open.patch @@ -0,0 +1,169 @@ +From 80445fed73a7d1a87e8ce96f6cb7d505e437f845 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Wed, 1 Feb 2023 16:10:54 -0500 +Subject: [PATCH 4/8] util/userfaultfd: Add uffd_open() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/3] 4c81696314ab26db47c3415fa2c2501c6a572b5c (peterx/qemu-kvm) + +Add a helper to create the uffd handle. + +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Juan Quintela +Signed-off-by: Peter Xu +Signed-off-by: Juan Quintela +(cherry picked from commit d5890ea0722831eea76a0efd23a496b3e8815fe8) +Signed-off-by: Peter Xu +--- + include/qemu/userfaultfd.h | 12 ++++++++++++ + migration/postcopy-ram.c | 11 +++++------ + tests/qtest/migration-test.c | 4 ++-- + util/userfaultfd.c | 13 +++++++++++-- + 4 files changed, 30 insertions(+), 10 deletions(-) + +diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h +index 6b74f92792..d764496f0b 100644 +--- a/include/qemu/userfaultfd.h ++++ b/include/qemu/userfaultfd.h +@@ -13,10 +13,20 @@ + #ifndef USERFAULTFD_H + #define USERFAULTFD_H + ++#ifdef CONFIG_LINUX ++ + #include "qemu/osdep.h" + #include "exec/hwaddr.h" + #include + ++/** ++ * uffd_open(): Open an userfaultfd handle for current context. ++ * ++ * @flags: The flags we want to pass in when creating the handle. ++ * ++ * Returns: the uffd handle if >=0, or <0 if error happens. ++ */ ++int uffd_open(int flags); + int uffd_query_features(uint64_t *features); + int uffd_create_fd(uint64_t features, bool non_blocking); + void uffd_close_fd(int uffd_fd); +@@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length); + int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count); + bool uffd_poll_events(int uffd_fd, int tmo); + ++#endif /* CONFIG_LINUX */ ++ + #endif /* USERFAULTFD_H */ +diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c +index b9a37ef255..0c55df0e52 100644 +--- a/migration/postcopy-ram.c ++++ b/migration/postcopy-ram.c +@@ -37,6 +37,7 @@ + #include "qemu-file.h" + #include "yank_functions.h" + #include "tls.h" ++#include "qemu/userfaultfd.h" + + /* Arbitrary limit on size of each discard command, + * keeps them around ~200 bytes +@@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features) + int ufd; + bool ret = true; + +- /* if we are here __NR_userfaultfd should exists */ +- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { +- error_report("%s: syscall __NR_userfaultfd failed: %s", __func__, +- strerror(errno)); ++ error_report("%s: uffd_open() failed: %s", __func__, strerror(errno)); + return false; + } + +@@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis) + goto out; + } + +- ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ ufd = uffd_open(O_CLOEXEC); + if (ufd == -1) { + error_report("%s: userfaultfd not available: %s", __func__, + strerror(errno)); +@@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis) + int postcopy_ram_incoming_setup(MigrationIncomingState *mis) + { + /* Open the fd for the kernel to give us userfaults */ +- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); ++ mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK); + if (mis->userfault_fd == -1) { + error_report("%s: Failed to open userfault fd: %s", __func__, + strerror(errno)); +diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c +index dbde726adf..0100e1bdbc 100644 +--- a/tests/qtest/migration-test.c ++++ b/tests/qtest/migration-test.c +@@ -61,14 +61,14 @@ static bool uffd_feature_thread_id; + #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD) + #include + #include +-#include ++#include "qemu/userfaultfd.h" + + static bool ufd_version_check(void) + { + struct uffdio_api api_struct; + uint64_t ioctl_mask; + +- int ufd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ int ufd = uffd_open(O_CLOEXEC); + + if (ufd == -1) { + g_test_message("Skipping test: userfaultfd not available"); +diff --git a/util/userfaultfd.c b/util/userfaultfd.c +index f1cd6af2b1..4953b3137d 100644 +--- a/util/userfaultfd.c ++++ b/util/userfaultfd.c +@@ -19,6 +19,15 @@ + #include + #include + ++int uffd_open(int flags) ++{ ++#if defined(__NR_userfaultfd) ++ return syscall(__NR_userfaultfd, flags); ++#else ++ return -EINVAL; ++#endif ++} ++ + /** + * uffd_query_features: query UFFD features + * +@@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features) + struct uffdio_api api_struct = { 0 }; + int ret = -1; + +- uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC); ++ uffd_fd = uffd_open(O_CLOEXEC); + if (uffd_fd < 0) { + trace_uffd_query_features_nosys(errno); + return -1; +@@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking) + uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER); + + flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0); +- uffd_fd = syscall(__NR_userfaultfd, flags); ++ uffd_fd = uffd_open(flags); + if (uffd_fd < 0) { + trace_uffd_create_fd_nosys(errno); + return -1; +-- +2.31.1 + diff --git a/kvm-util-userfaultfd-Support-dev-userfaultfd.patch b/kvm-util-userfaultfd-Support-dev-userfaultfd.patch new file mode 100644 index 0000000..b0a22eb --- /dev/null +++ b/kvm-util-userfaultfd-Support-dev-userfaultfd.patch @@ -0,0 +1,94 @@ +From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001 +From: Peter Xu +Date: Tue, 7 Feb 2023 15:57:11 -0500 +Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd + +RH-Author: Peter Xu +RH-MergeRequest: 149: Support /dev/userfaultfd +RH-Bugzilla: 2158704 +RH-Acked-by: Dr. David Alan Gilbert +RH-Acked-by: quintela1 +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm) + +Teach QEMU to use /dev/userfaultfd when it existed and fallback to the +system call if either it's not there or doesn't have enough permission. + +Firstly, as long as the app has permission to access /dev/userfaultfd, it +always have the ability to trap kernel faults which QEMU mostly wants. +Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be +forbidden, so it can be the major way to use postcopy in a restricted +environment with strict seccomp setup. + +Signed-off-by: Peter Xu +Reviewed-by: Juan Quintela +Signed-off-by: Juan Quintela +(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828) +Signed-off-by: Peter Xu +--- + util/trace-events | 1 + + util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++ + 2 files changed, 33 insertions(+) + +diff --git a/util/trace-events b/util/trace-events +index c8f53d7d9f..16f78d8fe5 100644 +--- a/util/trace-events ++++ b/util/trace-events +@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz + qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p" + + #userfaultfd.c ++uffd_detect_open_mode(int mode) "%d" + uffd_query_features_nosys(int err) "errno: %i" + uffd_query_features_api_failed(int err) "errno: %i" + uffd_create_fd_nosys(int err) "errno: %i" +diff --git a/util/userfaultfd.c b/util/userfaultfd.c +index 4953b3137d..fdff4867e8 100644 +--- a/util/userfaultfd.c ++++ b/util/userfaultfd.c +@@ -18,10 +18,42 @@ + #include + #include + #include ++#include ++ ++typedef enum { ++ UFFD_UNINITIALIZED = 0, ++ UFFD_USE_DEV_PATH, ++ UFFD_USE_SYSCALL, ++} uffd_open_mode; + + int uffd_open(int flags) + { + #if defined(__NR_userfaultfd) ++ static uffd_open_mode open_mode; ++ static int uffd_dev; ++ ++ /* Detect how to generate uffd desc when run the 1st time */ ++ if (open_mode == UFFD_UNINITIALIZED) { ++ /* ++ * Make /dev/userfaultfd the default approach because it has better ++ * permission controls, meanwhile allows kernel faults without any ++ * privilege requirement (e.g. SYS_CAP_PTRACE). ++ */ ++ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); ++ if (uffd_dev >= 0) { ++ open_mode = UFFD_USE_DEV_PATH; ++ } else { ++ /* Fallback to the system call */ ++ open_mode = UFFD_USE_SYSCALL; ++ } ++ trace_uffd_detect_open_mode(open_mode); ++ } ++ ++ if (open_mode == UFFD_USE_DEV_PATH) { ++ assert(uffd_dev >= 0); ++ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags); ++ } ++ + return syscall(__NR_userfaultfd, flags); + #else + return -EINVAL; +-- +2.31.1 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index a9baa8e..ff53eaf 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 7.2.0 -Release: 8%{?rcrel}%{?dist}%{?cc_suffix} +Release: 9%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -356,6 +356,22 @@ Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch # For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2 Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch +# For bz#2169232 - RFE: reconnect option for stream socket back-end +Patch106: kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch +# For bz#2169232 - RFE: reconnect option for stream socket back-end +Patch107: kvm-net-stream-add-a-new-option-to-automatically-reconne.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch108: kvm-linux-headers-Update-to-v6.1.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch109: kvm-util-userfaultfd-Add-uffd_open.patch +# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall +Patch110: kvm-util-userfaultfd-Support-dev-userfaultfd.patch +# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race +Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch +# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race +Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch +# For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes +Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch %if %{have_clang} BuildRequires: clang @@ -1386,6 +1402,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Fri Feb 17 2023 Miroslav Rezanina - 7.2.0-9 +- kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232] +- kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232] +- kvm-linux-headers-Update-to-v6.1.patch [bz#2158704] +- kvm-util-userfaultfd-Add-uffd_open.patch [bz#2158704] +- kvm-util-userfaultfd-Support-dev-userfaultfd.patch [bz#2158704] +- kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2169732] +- kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2169732] +- kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168172] +- Resolves: bz#2169232 + (RFE: reconnect option for stream socket back-end) +- Resolves: bz#2158704 + (RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall) +- Resolves: bz#2169732 + (Multifd migration fails under a weak network/socket ordering race) +- Resolves: bz#2168172 + ([s390x] qemu-kvm coredumps when SE crashes) + * Thu Feb 09 2023 Miroslav Rezanina - 7.2.0-8 - kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2150180] - kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2150180]