* Fri Feb 17 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-9

- kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232]
- kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232]
- kvm-linux-headers-Update-to-v6.1.patch [bz#2158704]
- kvm-util-userfaultfd-Add-uffd_open.patch [bz#2158704]
- kvm-util-userfaultfd-Support-dev-userfaultfd.patch [bz#2158704]
- kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2169732]
- kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2169732]
- kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168172]
- Resolves: bz#2169232
  (RFE: reconnect option for stream socket back-end)
- Resolves: bz#2158704
  (RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall)
- Resolves: bz#2169732
  (Multifd migration fails under a weak network/socket ordering race)
- Resolves: bz#2168172
  ([s390x] qemu-kvm coredumps when SE crashes)
This commit is contained in:
Miroslav Rezanina 2023-02-17 02:08:06 -05:00
parent a6628605f7
commit 382f65f59d
9 changed files with 2471 additions and 1 deletions

View File

@ -0,0 +1,386 @@
From 3a29b50036b972caae5bca0e5dfc34d910b1d5e9 Mon Sep 17 00:00:00 2001
From: "manish.mishra" <manish.mishra@nutanix.com>
Date: Tue, 20 Dec 2022 18:44:17 +0000
Subject: [PATCH 6/8] io: Add support for MSG_PEEK for socket channel
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders
RH-Bugzilla: 2169732
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Commit: [1/2] 266563f3e387e97ec710d9bc179e5de26dfd09f1 (peterx/qemu-kvm)
MSG_PEEK peeks at the channel, The data is treated as unread and
the next read shall still return this data. This support is
currently added only for socket class. Extra parameter 'flags'
is added to io_readv calls to pass extra read flags like MSG_PEEK.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit 84615a19ddf2bfb38d7b3a0d487d2397ee55e4f3)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
chardev/char-socket.c | 4 ++--
include/io/channel.h | 6 ++++++
io/channel-buffer.c | 1 +
io/channel-command.c | 1 +
io/channel-file.c | 1 +
io/channel-null.c | 1 +
io/channel-socket.c | 19 ++++++++++++++++++-
io/channel-tls.c | 1 +
io/channel-websock.c | 1 +
io/channel.c | 16 ++++++++++++----
migration/channel-block.c | 1 +
migration/rdma.c | 1 +
scsi/qemu-pr-helper.c | 2 +-
tests/qtest/tpm-emu.c | 2 +-
tests/unit/test-io-channel-socket.c | 1 +
util/vhost-user-server.c | 2 +-
16 files changed, 50 insertions(+), 10 deletions(-)
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
index 879564aa8a..5afce9a464 100644
--- a/chardev/char-socket.c
+++ b/chardev/char-socket.c
@@ -283,11 +283,11 @@ static ssize_t tcp_chr_recv(Chardev *chr, char *buf, size_t len)
if (qio_channel_has_feature(s->ioc, QIO_CHANNEL_FEATURE_FD_PASS)) {
ret = qio_channel_readv_full(s->ioc, &iov, 1,
&msgfds, &msgfds_num,
- NULL);
+ 0, NULL);
} else {
ret = qio_channel_readv_full(s->ioc, &iov, 1,
NULL, NULL,
- NULL);
+ 0, NULL);
}
if (msgfds_num) {
diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..716235d496 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -34,6 +34,8 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+#define QIO_CHANNEL_READ_FLAG_MSG_PEEK 0x1
+
typedef enum QIOChannelFeature QIOChannelFeature;
enum QIOChannelFeature {
@@ -41,6 +43,7 @@ enum QIOChannelFeature {
QIO_CHANNEL_FEATURE_SHUTDOWN,
QIO_CHANNEL_FEATURE_LISTEN,
QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK,
};
@@ -114,6 +117,7 @@ struct QIOChannelClass {
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp);
int (*io_close)(QIOChannel *ioc,
Error **errp);
@@ -188,6 +192,7 @@ void qio_channel_set_name(QIOChannel *ioc,
* @niov: the length of the @iov array
* @fds: pointer to an array that will received file handles
* @nfds: pointer filled with number of elements in @fds on return
+ * @flags: read flags (QIO_CHANNEL_READ_FLAG_*)
* @errp: pointer to a NULL-initialized error object
*
* Read data from the IO channel, storing it in the
@@ -224,6 +229,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp);
diff --git a/io/channel-buffer.c b/io/channel-buffer.c
index bf52011be2..8096180f85 100644
--- a/io/channel-buffer.c
+++ b/io/channel-buffer.c
@@ -54,6 +54,7 @@ static ssize_t qio_channel_buffer_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelBuffer *bioc = QIO_CHANNEL_BUFFER(ioc);
diff --git a/io/channel-command.c b/io/channel-command.c
index 74516252ba..e7edd091af 100644
--- a/io/channel-command.c
+++ b/io/channel-command.c
@@ -203,6 +203,7 @@ static ssize_t qio_channel_command_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelCommand *cioc = QIO_CHANNEL_COMMAND(ioc);
diff --git a/io/channel-file.c b/io/channel-file.c
index b67687c2aa..d76663e6ae 100644
--- a/io/channel-file.c
+++ b/io/channel-file.c
@@ -86,6 +86,7 @@ static ssize_t qio_channel_file_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelFile *fioc = QIO_CHANNEL_FILE(ioc);
diff --git a/io/channel-null.c b/io/channel-null.c
index 75e3781507..4fafdb770d 100644
--- a/io/channel-null.c
+++ b/io/channel-null.c
@@ -60,6 +60,7 @@ qio_channel_null_readv(QIOChannel *ioc,
size_t niov,
int **fds G_GNUC_UNUSED,
size_t *nfds G_GNUC_UNUSED,
+ int flags,
Error **errp)
{
QIOChannelNull *nioc = QIO_CHANNEL_NULL(ioc);
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..7aca84f61a 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -173,6 +173,9 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
}
#endif
+ qio_channel_set_feature(QIO_CHANNEL(ioc),
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
return 0;
}
@@ -406,6 +409,9 @@ qio_channel_socket_accept(QIOChannelSocket *ioc,
}
#endif /* WIN32 */
+ qio_channel_set_feature(QIO_CHANNEL(cioc),
+ QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
+
trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
return cioc;
@@ -496,6 +502,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
@@ -517,6 +524,10 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
}
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+ sflags |= MSG_PEEK;
+ }
+
retry:
ret = recvmsg(sioc->fd, &msg, sflags);
if (ret < 0) {
@@ -624,11 +635,17 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
ssize_t done = 0;
ssize_t i;
+ int sflags = 0;
+
+ if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
+ sflags |= MSG_PEEK;
+ }
for (i = 0; i < niov; i++) {
ssize_t ret;
@@ -636,7 +653,7 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
ret = recv(sioc->fd,
iov[i].iov_base,
iov[i].iov_len,
- 0);
+ sflags);
if (ret < 0) {
if (errno == EAGAIN) {
if (done) {
diff --git a/io/channel-tls.c b/io/channel-tls.c
index 4ce890a538..c730cb8ec5 100644
--- a/io/channel-tls.c
+++ b/io/channel-tls.c
@@ -260,6 +260,7 @@ static ssize_t qio_channel_tls_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelTLS *tioc = QIO_CHANNEL_TLS(ioc);
diff --git a/io/channel-websock.c b/io/channel-websock.c
index fb4932ade7..a12acc27cf 100644
--- a/io/channel-websock.c
+++ b/io/channel-websock.c
@@ -1081,6 +1081,7 @@ static ssize_t qio_channel_websock_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelWebsock *wioc = QIO_CHANNEL_WEBSOCK(ioc);
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..a8c7f11649 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -52,6 +52,7 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
@@ -63,7 +64,14 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
return -1;
}
- return klass->io_readv(ioc, iov, niov, fds, nfds, errp);
+ if ((flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) &&
+ !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ error_setg_errno(errp, EINVAL,
+ "Channel does not support peek read");
+ return -1;
+ }
+
+ return klass->io_readv(ioc, iov, niov, fds, nfds, flags, errp);
}
@@ -146,7 +154,7 @@ int qio_channel_readv_full_all_eof(QIOChannel *ioc,
while ((nlocal_iov > 0) || local_fds) {
ssize_t len;
len = qio_channel_readv_full(ioc, local_iov, nlocal_iov, local_fds,
- local_nfds, errp);
+ local_nfds, 0, errp);
if (len == QIO_CHANNEL_ERR_BLOCK) {
if (qemu_in_coroutine()) {
qio_channel_yield(ioc, G_IO_IN);
@@ -284,7 +292,7 @@ ssize_t qio_channel_readv(QIOChannel *ioc,
size_t niov,
Error **errp)
{
- return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, errp);
+ return qio_channel_readv_full(ioc, iov, niov, NULL, NULL, 0, errp);
}
@@ -303,7 +311,7 @@ ssize_t qio_channel_read(QIOChannel *ioc,
Error **errp)
{
struct iovec iov = { .iov_base = buf, .iov_len = buflen };
- return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, errp);
+ return qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, 0, errp);
}
diff --git a/migration/channel-block.c b/migration/channel-block.c
index f4ab53acdb..b7374363c3 100644
--- a/migration/channel-block.c
+++ b/migration/channel-block.c
@@ -53,6 +53,7 @@ qio_channel_block_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelBlock *bioc = QIO_CHANNEL_BLOCK(ioc);
diff --git a/migration/rdma.c b/migration/rdma.c
index 94a55dd95b..d8b4632094 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -2854,6 +2854,7 @@ static ssize_t qio_channel_rdma_readv(QIOChannel *ioc,
size_t niov,
int **fds,
size_t *nfds,
+ int flags,
Error **errp)
{
QIOChannelRDMA *rioc = QIO_CHANNEL_RDMA(ioc);
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 196b78c00d..199227a556 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -614,7 +614,7 @@ static int coroutine_fn prh_read(PRHelperClient *client, void *buf, int sz,
iov.iov_base = buf;
iov.iov_len = sz;
n_read = qio_channel_readv_full(QIO_CHANNEL(client->ioc), &iov, 1,
- &fds, &nfds, errp);
+ &fds, &nfds, 0, errp);
if (n_read == QIO_CHANNEL_ERR_BLOCK) {
qio_channel_yield(QIO_CHANNEL(client->ioc), G_IO_IN);
diff --git a/tests/qtest/tpm-emu.c b/tests/qtest/tpm-emu.c
index 2994d1cf42..3cf1acaf7d 100644
--- a/tests/qtest/tpm-emu.c
+++ b/tests/qtest/tpm-emu.c
@@ -106,7 +106,7 @@ void *tpm_emu_ctrl_thread(void *data)
int *pfd = NULL;
size_t nfd = 0;
- qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, &error_abort);
+ qio_channel_readv_full(ioc, &iov, 1, &pfd, &nfd, 0, &error_abort);
cmd = be32_to_cpu(cmd);
g_assert_cmpint(cmd, ==, CMD_SET_DATAFD);
g_assert_cmpint(nfd, ==, 1);
diff --git a/tests/unit/test-io-channel-socket.c b/tests/unit/test-io-channel-socket.c
index b36a5d972a..b964bb202d 100644
--- a/tests/unit/test-io-channel-socket.c
+++ b/tests/unit/test-io-channel-socket.c
@@ -460,6 +460,7 @@ static void test_io_channel_unix_fd_pass(void)
G_N_ELEMENTS(iorecv),
&fdrecv,
&nfdrecv,
+ 0,
&error_abort);
g_assert(nfdrecv == G_N_ELEMENTS(fdsend));
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
index 232984ace6..145eb17c08 100644
--- a/util/vhost-user-server.c
+++ b/util/vhost-user-server.c
@@ -116,7 +116,7 @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
* qio_channel_readv_full may have short reads, keeping calling it
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
*/
- rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, 0, &local_err);
if (rc < 0) {
if (rc == QIO_CHANNEL_ERR_BLOCK) {
assert(local_err == NULL);
--
2.31.1

View File

@ -0,0 +1,577 @@
From cbe35c6a4794107ea1ddecf0b381ba4b1c8799f5 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 7 Feb 2023 15:57:10 -0500
Subject: [PATCH 3/8] linux-headers: Update to v6.1
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 149: Support /dev/userfaultfd
RH-Bugzilla: 2158704
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [1/3] 15d97026e802a0f01b5f80f81fb4414dc69b2b2d (peterx/qemu-kvm)
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit 93e0932b7be2498024cd6ba8446a0fa2cb1769bc)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/standard-headers/drm/drm_fourcc.h | 34 ++++-
include/standard-headers/linux/ethtool.h | 63 +++++++-
include/standard-headers/linux/fuse.h | 6 +-
.../linux/input-event-codes.h | 1 +
include/standard-headers/linux/virtio_blk.h | 19 +++
linux-headers/asm-generic/hugetlb_encode.h | 26 ++--
linux-headers/asm-generic/mman-common.h | 2 +
linux-headers/asm-mips/mman.h | 2 +
linux-headers/asm-riscv/kvm.h | 4 +
linux-headers/linux/kvm.h | 1 +
linux-headers/linux/psci.h | 14 ++
linux-headers/linux/userfaultfd.h | 4 +
linux-headers/linux/vfio.h | 142 ++++++++++++++++++
13 files changed, 298 insertions(+), 20 deletions(-)
diff --git a/include/standard-headers/drm/drm_fourcc.h b/include/standard-headers/drm/drm_fourcc.h
index 48b620cbef..b868488f93 100644
--- a/include/standard-headers/drm/drm_fourcc.h
+++ b/include/standard-headers/drm/drm_fourcc.h
@@ -98,18 +98,42 @@ extern "C" {
#define DRM_FORMAT_INVALID 0
/* color index */
+#define DRM_FORMAT_C1 fourcc_code('C', '1', ' ', ' ') /* [7:0] C0:C1:C2:C3:C4:C5:C6:C7 1:1:1:1:1:1:1:1 eight pixels/byte */
+#define DRM_FORMAT_C2 fourcc_code('C', '2', ' ', ' ') /* [7:0] C0:C1:C2:C3 2:2:2:2 four pixels/byte */
+#define DRM_FORMAT_C4 fourcc_code('C', '4', ' ', ' ') /* [7:0] C0:C1 4:4 two pixels/byte */
#define DRM_FORMAT_C8 fourcc_code('C', '8', ' ', ' ') /* [7:0] C */
-/* 8 bpp Red */
+/* 1 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D1 fourcc_code('D', '1', ' ', ' ') /* [7:0] D0:D1:D2:D3:D4:D5:D6:D7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D2 fourcc_code('D', '2', ' ', ' ') /* [7:0] D0:D1:D2:D3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D4 fourcc_code('D', '4', ' ', ' ') /* [7:0] D0:D1 4:4 two pixels/byte */
+
+/* 8 bpp Darkness (inverse relationship between channel value and brightness) */
+#define DRM_FORMAT_D8 fourcc_code('D', '8', ' ', ' ') /* [7:0] D */
+
+/* 1 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R1 fourcc_code('R', '1', ' ', ' ') /* [7:0] R0:R1:R2:R3:R4:R5:R6:R7 1:1:1:1:1:1:1:1 eight pixels/byte */
+
+/* 2 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R2 fourcc_code('R', '2', ' ', ' ') /* [7:0] R0:R1:R2:R3 2:2:2:2 four pixels/byte */
+
+/* 4 bpp Red (direct relationship between channel value and brightness) */
+#define DRM_FORMAT_R4 fourcc_code('R', '4', ' ', ' ') /* [7:0] R0:R1 4:4 two pixels/byte */
+
+/* 8 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R8 fourcc_code('R', '8', ' ', ' ') /* [7:0] R */
-/* 10 bpp Red */
+/* 10 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R10 fourcc_code('R', '1', '0', ' ') /* [15:0] x:R 6:10 little endian */
-/* 12 bpp Red */
+/* 12 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R12 fourcc_code('R', '1', '2', ' ') /* [15:0] x:R 4:12 little endian */
-/* 16 bpp Red */
+/* 16 bpp Red (direct relationship between channel value and brightness) */
#define DRM_FORMAT_R16 fourcc_code('R', '1', '6', ' ') /* [15:0] R little endian */
/* 16 bpp RG */
@@ -204,7 +228,9 @@ extern "C" {
#define DRM_FORMAT_VYUY fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */
#define DRM_FORMAT_AYUV fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_AVUY8888 fourcc_code('A', 'V', 'U', 'Y') /* [31:0] A:Cr:Cb:Y 8:8:8:8 little endian */
#define DRM_FORMAT_XYUV8888 fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XVUY8888 fourcc_code('X', 'V', 'U', 'Y') /* [31:0] X:Cr:Cb:Y 8:8:8:8 little endian */
#define DRM_FORMAT_VUY888 fourcc_code('V', 'U', '2', '4') /* [23:0] Cr:Cb:Y 8:8:8 little endian */
#define DRM_FORMAT_VUY101010 fourcc_code('V', 'U', '3', '0') /* Y followed by U then V, 10:10:10. Non-linear modifier only */
diff --git a/include/standard-headers/linux/ethtool.h b/include/standard-headers/linux/ethtool.h
index 4537da20cc..1dc56cdc0a 100644
--- a/include/standard-headers/linux/ethtool.h
+++ b/include/standard-headers/linux/ethtool.h
@@ -736,6 +736,51 @@ enum ethtool_module_power_mode {
ETHTOOL_MODULE_POWER_MODE_HIGH,
};
+/**
+ * enum ethtool_podl_pse_admin_state - operational state of the PoDL PSE
+ * functions. IEEE 802.3-2018 30.15.1.1.2 aPoDLPSEAdminState
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN: state of PoDL PSE functions are
+ * unknown
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED: PoDL PSE functions are disabled
+ * @ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED: PoDL PSE functions are enabled
+ */
+enum ethtool_podl_pse_admin_state {
+ ETHTOOL_PODL_PSE_ADMIN_STATE_UNKNOWN = 1,
+ ETHTOOL_PODL_PSE_ADMIN_STATE_DISABLED,
+ ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED,
+};
+
+/**
+ * enum ethtool_podl_pse_pw_d_status - power detection status of the PoDL PSE.
+ * IEEE 802.3-2018 30.15.1.1.3 aPoDLPSEPowerDetectionStatus:
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN: PoDL PSE
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED: "The enumeration “disabled” is
+ * asserted true when the PoDL PSE state diagram variable mr_pse_enable is
+ * false"
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING: "The enumeration “searching” is
+ * asserted true when either of the PSE state diagram variables
+ * pi_detecting or pi_classifying is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING: "The enumeration “deliveringPower”
+ * is asserted true when the PoDL PSE state diagram variable pi_powered is
+ * true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP: "The enumeration “sleep” is asserted
+ * true when the PoDL PSE state diagram variable pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE: "The enumeration “idle” is asserted true
+ * when the logical combination of the PoDL PSE state diagram variables
+ * pi_prebiased*!pi_sleeping is true."
+ * @ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR: "The enumeration “error” is asserted
+ * true when the PoDL PSE state diagram variable overload_held is true."
+ */
+enum ethtool_podl_pse_pw_d_status {
+ ETHTOOL_PODL_PSE_PW_D_STATUS_UNKNOWN = 1,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_DISABLED,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_SEARCHING,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_DELIVERING,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_SLEEP,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_IDLE,
+ ETHTOOL_PODL_PSE_PW_D_STATUS_ERROR,
+};
+
/**
* struct ethtool_gstrings - string set for data tagging
* @cmd: Command number = %ETHTOOL_GSTRINGS
@@ -1840,6 +1885,20 @@ static inline int ethtool_validate_duplex(uint8_t duplex)
#define MASTER_SLAVE_STATE_SLAVE 3
#define MASTER_SLAVE_STATE_ERR 4
+/* These are used to throttle the rate of data on the phy interface when the
+ * native speed of the interface is higher than the link speed. These should
+ * not be used for phy interfaces which natively support multiple speeds (e.g.
+ * MII or SGMII).
+ */
+/* No rate matching performed. */
+#define RATE_MATCH_NONE 0
+/* The phy sends pause frames to throttle the MAC. */
+#define RATE_MATCH_PAUSE 1
+/* The phy asserts CRS to prevent the MAC from transmitting. */
+#define RATE_MATCH_CRS 2
+/* The MAC is programmed with a sufficiently-large IPG. */
+#define RATE_MATCH_OPEN_LOOP 3
+
/* Which connector port. */
#define PORT_TP 0x00
#define PORT_AUI 0x01
@@ -2033,8 +2092,8 @@ enum ethtool_reset_flags {
* reported consistently by PHYLIB. Read-only.
* @master_slave_cfg: Master/slave port mode.
* @master_slave_state: Master/slave port state.
+ * @rate_matching: Rate adaptation performed by the PHY
* @reserved: Reserved for future use; see the note on reserved space.
- * @reserved1: Reserved for future use; see the note on reserved space.
* @link_mode_masks: Variable length bitmaps.
*
* If autonegotiation is disabled, the speed and @duplex represent the
@@ -2085,7 +2144,7 @@ struct ethtool_link_settings {
uint8_t transceiver;
uint8_t master_slave_cfg;
uint8_t master_slave_state;
- uint8_t reserved1[1];
+ uint8_t rate_matching;
uint32_t reserved[7];
uint32_t link_mode_masks[];
/* layout of link_mode_masks fields:
diff --git a/include/standard-headers/linux/fuse.h b/include/standard-headers/linux/fuse.h
index bda06258be..713d259768 100644
--- a/include/standard-headers/linux/fuse.h
+++ b/include/standard-headers/linux/fuse.h
@@ -194,6 +194,9 @@
* - add FUSE_SECURITY_CTX init flag
* - add security context to create, mkdir, symlink, and mknod requests
* - add FUSE_HAS_INODE_DAX, FUSE_ATTR_DAX
+ *
+ * 7.37
+ * - add FUSE_TMPFILE
*/
#ifndef _LINUX_FUSE_H
@@ -225,7 +228,7 @@
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 36
+#define FUSE_KERNEL_MINOR_VERSION 37
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -533,6 +536,7 @@ enum fuse_opcode {
FUSE_SETUPMAPPING = 48,
FUSE_REMOVEMAPPING = 49,
FUSE_SYNCFS = 50,
+ FUSE_TMPFILE = 51,
/* CUSE specific operations */
CUSE_INIT = 4096,
diff --git a/include/standard-headers/linux/input-event-codes.h b/include/standard-headers/linux/input-event-codes.h
index 50790aee5a..815f7a1dff 100644
--- a/include/standard-headers/linux/input-event-codes.h
+++ b/include/standard-headers/linux/input-event-codes.h
@@ -862,6 +862,7 @@
#define ABS_TOOL_WIDTH 0x1c
#define ABS_VOLUME 0x20
+#define ABS_PROFILE 0x21
#define ABS_MISC 0x28
diff --git a/include/standard-headers/linux/virtio_blk.h b/include/standard-headers/linux/virtio_blk.h
index 2dcc90826a..e81715cd70 100644
--- a/include/standard-headers/linux/virtio_blk.h
+++ b/include/standard-headers/linux/virtio_blk.h
@@ -40,6 +40,7 @@
#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
#define VIRTIO_BLK_F_DISCARD 13 /* DISCARD is supported */
#define VIRTIO_BLK_F_WRITE_ZEROES 14 /* WRITE ZEROES is supported */
+#define VIRTIO_BLK_F_SECURE_ERASE 16 /* Secure Erase is supported */
/* Legacy feature bits */
#ifndef VIRTIO_BLK_NO_LEGACY
@@ -119,6 +120,21 @@ struct virtio_blk_config {
uint8_t write_zeroes_may_unmap;
uint8_t unused1[3];
+
+ /* the next 3 entries are guarded by VIRTIO_BLK_F_SECURE_ERASE */
+ /*
+ * The maximum secure erase sectors (in 512-byte sectors) for
+ * one segment.
+ */
+ __virtio32 max_secure_erase_sectors;
+ /*
+ * The maximum number of secure erase segments in a
+ * secure erase command.
+ */
+ __virtio32 max_secure_erase_seg;
+ /* Secure erase commands must be aligned to this number of sectors. */
+ __virtio32 secure_erase_sector_alignment;
+
} QEMU_PACKED;
/*
@@ -153,6 +169,9 @@ struct virtio_blk_config {
/* Write zeroes command */
#define VIRTIO_BLK_T_WRITE_ZEROES 13
+/* Secure erase command */
+#define VIRTIO_BLK_T_SECURE_ERASE 14
+
#ifndef VIRTIO_BLK_NO_LEGACY
/* Barrier before this op. */
#define VIRTIO_BLK_T_BARRIER 0x80000000
diff --git a/linux-headers/asm-generic/hugetlb_encode.h b/linux-headers/asm-generic/hugetlb_encode.h
index 4f3d5aaa11..de687009bf 100644
--- a/linux-headers/asm-generic/hugetlb_encode.h
+++ b/linux-headers/asm-generic/hugetlb_encode.h
@@ -20,18 +20,18 @@
#define HUGETLB_FLAG_ENCODE_SHIFT 26
#define HUGETLB_FLAG_ENCODE_MASK 0x3f
-#define HUGETLB_FLAG_ENCODE_16KB (14 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_32MB (25 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_512MB (29 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
-#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16KB (14U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_64KB (16U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB (19U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB (20U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB (21U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB (23U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB (24U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_32MB (25U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB (28U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512MB (29U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB (30U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB (31U << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB (34U << HUGETLB_FLAG_ENCODE_SHIFT)
#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
diff --git a/linux-headers/asm-generic/mman-common.h b/linux-headers/asm-generic/mman-common.h
index 6c1aa92a92..6ce1f1ceb4 100644
--- a/linux-headers/asm-generic/mman-common.h
+++ b/linux-headers/asm-generic/mman-common.h
@@ -77,6 +77,8 @@
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/linux-headers/asm-mips/mman.h b/linux-headers/asm-mips/mman.h
index 1be428663c..c6e1fc77c9 100644
--- a/linux-headers/asm-mips/mman.h
+++ b/linux-headers/asm-mips/mman.h
@@ -103,6 +103,8 @@
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
+#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
index 7351417afd..8985ff234c 100644
--- a/linux-headers/asm-riscv/kvm.h
+++ b/linux-headers/asm-riscv/kvm.h
@@ -48,6 +48,7 @@ struct kvm_sregs {
/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
struct kvm_riscv_config {
unsigned long isa;
+ unsigned long zicbom_block_size;
};
/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -98,6 +99,9 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_M,
KVM_RISCV_ISA_EXT_SVPBMT,
KVM_RISCV_ISA_EXT_SSTC,
+ KVM_RISCV_ISA_EXT_SVINVAL,
+ KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
+ KVM_RISCV_ISA_EXT_ZICBOM,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index ebdafa576d..b2783c5202 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1175,6 +1175,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_DISABLE_NX_HUGE_PAGES 220
#define KVM_CAP_S390_ZPCI_OP 221
#define KVM_CAP_S390_CPU_TOPOLOGY 222
+#define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/linux-headers/linux/psci.h b/linux-headers/linux/psci.h
index 213b2a0f70..e60dfd8907 100644
--- a/linux-headers/linux/psci.h
+++ b/linux-headers/linux/psci.h
@@ -48,12 +48,26 @@
#define PSCI_0_2_FN64_MIGRATE_INFO_UP_CPU PSCI_0_2_FN64(7)
#define PSCI_1_0_FN_PSCI_FEATURES PSCI_0_2_FN(10)
+#define PSCI_1_0_FN_CPU_FREEZE PSCI_0_2_FN(11)
+#define PSCI_1_0_FN_CPU_DEFAULT_SUSPEND PSCI_0_2_FN(12)
+#define PSCI_1_0_FN_NODE_HW_STATE PSCI_0_2_FN(13)
#define PSCI_1_0_FN_SYSTEM_SUSPEND PSCI_0_2_FN(14)
#define PSCI_1_0_FN_SET_SUSPEND_MODE PSCI_0_2_FN(15)
+#define PSCI_1_0_FN_STAT_RESIDENCY PSCI_0_2_FN(16)
+#define PSCI_1_0_FN_STAT_COUNT PSCI_0_2_FN(17)
+
#define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18)
+#define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19)
+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19)
+#define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12)
+#define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13)
#define PSCI_1_0_FN64_SYSTEM_SUSPEND PSCI_0_2_FN64(14)
+#define PSCI_1_0_FN64_STAT_RESIDENCY PSCI_0_2_FN64(16)
+#define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17)
+
#define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18)
+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19)
/* PSCI v0.2 power state encoding for CPU_SUSPEND function */
#define PSCI_0_2_POWER_STATE_ID_MASK 0xffff
diff --git a/linux-headers/linux/userfaultfd.h b/linux-headers/linux/userfaultfd.h
index a3a377cd44..ba5d0df52f 100644
--- a/linux-headers/linux/userfaultfd.h
+++ b/linux-headers/linux/userfaultfd.h
@@ -12,6 +12,10 @@
#include <linux/types.h>
+/* ioctls for /dev/userfaultfd */
+#define USERFAULTFD_IOC 0xAA
+#define USERFAULTFD_IOC_NEW _IO(USERFAULTFD_IOC, 0x00)
+
/*
* If the UFFDIO_API is upgraded someday, the UFFDIO_UNREGISTER and
* UFFDIO_WAKE ioctls should be defined as _IOW and not as _IOR. In
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index ede44b5572..bee7e42198 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -986,6 +986,148 @@ enum vfio_device_mig_state {
VFIO_DEVICE_STATE_RUNNING_P2P = 5,
};
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power
+ * state with the platform-based power management. Device use of lower power
+ * states depends on factors managed by the runtime power management core,
+ * including system level support and coordinating support among dependent
+ * devices. Enabling device low power entry does not guarantee lower power
+ * usage by the device, nor is a mechanism provided through this feature to
+ * know the current power state of the device. If any device access happens
+ * (either from the host or through the vfio uAPI) when the device is in the
+ * low power state, then the host will move the device out of the low power
+ * state as necessary prior to the access. Once the access is completed, the
+ * device may re-enter the low power state. For single shot low power support
+ * with wake-up notification, see
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP below. Access to mmap'd
+ * device regions is disabled on LOW_POWER_ENTRY and may only be resumed after
+ * calling LOW_POWER_EXIT.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY 3
+
+/*
+ * This device feature has the same behavior as
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY with the exception that the user
+ * provides an eventfd for wake-up notification. When the device moves out of
+ * the low power state for the wake-up, the host will not allow the device to
+ * re-enter a low power state without a subsequent user call to one of the low
+ * power entry device feature IOCTLs. Access to mmap'd device regions is
+ * disabled on LOW_POWER_ENTRY_WITH_WAKEUP and may only be resumed after the
+ * low power exit. The low power exit can happen either through LOW_POWER_EXIT
+ * or through any other access (where the wake-up notification has been
+ * generated). The access to mmap'd device regions will not trigger low power
+ * exit.
+ *
+ * The notification through the provided eventfd will be generated only when
+ * the device has entered and is resumed from a low power state after
+ * calling this device feature IOCTL. A device that has not entered low power
+ * state, as managed through the runtime power management core, will not
+ * generate a notification through the provided eventfd on access. Calling the
+ * LOW_POWER_EXIT feature is optional in the case where notification has been
+ * signaled on the provided eventfd that a resume from low power has occurred.
+ */
+struct vfio_device_low_power_entry_with_wakeup {
+ __s32 wakeup_eventfd;
+ __u32 reserved;
+};
+
+#define VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP 4
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET, disallow use of device low power states as
+ * previously enabled via VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY or
+ * VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP device features.
+ * This device feature IOCTL may itself generate a wakeup eventfd notification
+ * in the latter case if the device had previously entered a low power state.
+ */
+#define VFIO_DEVICE_FEATURE_LOW_POWER_EXIT 5
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET start/stop device DMA logging.
+ * VFIO_DEVICE_FEATURE_PROBE can be used to detect if the device supports
+ * DMA logging.
+ *
+ * DMA logging allows a device to internally record what DMAs the device is
+ * initiating and report them back to userspace. It is part of the VFIO
+ * migration infrastructure that allows implementing dirty page tracking
+ * during the pre copy phase of live migration. Only DMA WRITEs are logged,
+ * and this API is not connected to VFIO_DEVICE_FEATURE_MIG_DEVICE_STATE.
+ *
+ * When DMA logging is started a range of IOVAs to monitor is provided and the
+ * device can optimize its logging to cover only the IOVA range given. Each
+ * DMA that the device initiates inside the range will be logged by the device
+ * for later retrieval.
+ *
+ * page_size is an input that hints what tracking granularity the device
+ * should try to achieve. If the device cannot do the hinted page size then
+ * it's the driver choice which page size to pick based on its support.
+ * On output the device will return the page size it selected.
+ *
+ * ranges is a pointer to an array of
+ * struct vfio_device_feature_dma_logging_range.
+ *
+ * The core kernel code guarantees to support by minimum num_ranges that fit
+ * into a single kernel page. User space can try higher values but should give
+ * up if the above can't be achieved as of some driver limitations.
+ *
+ * A single call to start device DMA logging can be issued and a matching stop
+ * should follow at the end. Another start is not allowed in the meantime.
+ */
+struct vfio_device_feature_dma_logging_control {
+ __aligned_u64 page_size;
+ __u32 num_ranges;
+ __u32 __reserved;
+ __aligned_u64 ranges;
+};
+
+struct vfio_device_feature_dma_logging_range {
+ __aligned_u64 iova;
+ __aligned_u64 length;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_START 6
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_SET stop device DMA logging that was started
+ * by VFIO_DEVICE_FEATURE_DMA_LOGGING_START
+ */
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP 7
+
+/*
+ * Upon VFIO_DEVICE_FEATURE_GET read back and clear the device DMA log
+ *
+ * Query the device's DMA log for written pages within the given IOVA range.
+ * During querying the log is cleared for the IOVA range.
+ *
+ * bitmap is a pointer to an array of u64s that will hold the output bitmap
+ * with 1 bit reporting a page_size unit of IOVA. The mapping of IOVA to bits
+ * is given by:
+ * bitmap[(addr - iova)/page_size] & (1ULL << (addr % 64))
+ *
+ * The input page_size can be any power of two value and does not have to
+ * match the value given to VFIO_DEVICE_FEATURE_DMA_LOGGING_START. The driver
+ * will format its internal logging to match the reporting page size, possibly
+ * by replicating bits if the internal page size is lower than requested.
+ *
+ * The LOGGING_REPORT will only set bits in the bitmap and never clear or
+ * perform any initialization of the user provided bitmap.
+ *
+ * If any error is returned userspace should assume that the dirty log is
+ * corrupted. Error recovery is to consider all memory dirty and try to
+ * restart the dirty tracking, or to abort/restart the whole migration.
+ *
+ * If DMA logging is not enabled, an error will be returned.
+ *
+ */
+struct vfio_device_feature_dma_logging_report {
+ __aligned_u64 iova;
+ __aligned_u64 length;
+ __aligned_u64 page_size;
+ __aligned_u64 bitmap;
+};
+
+#define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
+
/* -------- API for Type1 VFIO IOMMU -------- */
/**
--
2.31.1

View File

@ -0,0 +1,330 @@
From 29eee1fbb84c0e2f0ece9e6d996afa7238ed2912 Mon Sep 17 00:00:00 2001
From: "manish.mishra" <manish.mishra@nutanix.com>
Date: Tue, 20 Dec 2022 18:44:18 +0000
Subject: [PATCH 7/8] migration: check magic value for deciding the mapping of
channels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 150: migration: Fix multifd crash on channel disorders
RH-Bugzilla: 2169732
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Leonardo Brás <leobras@redhat.com>
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Commit: [2/2] 4fb9408478923415a91fe0527bf4b1a0f022f329 (peterx/qemu-kvm)
Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the main channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, tls live
migrations already does tls handshake before creating other channels, so
this issue is not possible with tls, hence this logic is avoided for tls
live migrations. This patch uses read peek to check the magic number of
channels so that current data/control stream management remains
un-effected.
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Daniel P. Berrange <berrange@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Suggested-by: Daniel P. Berrange <berrange@redhat.com>
Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit 6720c2b32725e6ac404f22851a0ecd0a71d0cbe2)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
migration/channel.c | 45 +++++++++++++++++++++++++++++++++
migration/channel.h | 5 ++++
migration/migration.c | 54 ++++++++++++++++++++++++++++------------
migration/multifd.c | 19 +++++++-------
migration/multifd.h | 2 +-
migration/postcopy-ram.c | 5 +---
migration/postcopy-ram.h | 2 +-
7 files changed, 101 insertions(+), 31 deletions(-)
diff --git a/migration/channel.c b/migration/channel.c
index 1b0815039f..ca3319a309 100644
--- a/migration/channel.c
+++ b/migration/channel.c
@@ -92,3 +92,48 @@ void migration_channel_connect(MigrationState *s,
migrate_fd_connect(s, error);
error_free(error);
}
+
+
+/**
+ * @migration_channel_read_peek - Peek at migration channel, without
+ * actually removing it from channel buffer.
+ *
+ * @ioc: the channel object
+ * @buf: the memory region to read data into
+ * @buflen: the number of bytes to read in @buf
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Returns 0 if successful, returns -1 and sets @errp if fails.
+ */
+int migration_channel_read_peek(QIOChannel *ioc,
+ const char *buf,
+ const size_t buflen,
+ Error **errp)
+{
+ ssize_t len = 0;
+ struct iovec iov = { .iov_base = (char *)buf, .iov_len = buflen };
+
+ while (true) {
+ len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL,
+ QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp);
+
+ if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) {
+ error_setg(errp,
+ "Failed to peek at channel");
+ return -1;
+ }
+
+ if (len == buflen) {
+ break;
+ }
+
+ /* 1ms sleep. */
+ if (qemu_in_coroutine()) {
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000);
+ } else {
+ g_usleep(1000);
+ }
+ }
+
+ return 0;
+}
diff --git a/migration/channel.h b/migration/channel.h
index 67a461c28a..5bdb8208a7 100644
--- a/migration/channel.h
+++ b/migration/channel.h
@@ -24,4 +24,9 @@ void migration_channel_connect(MigrationState *s,
QIOChannel *ioc,
const char *hostname,
Error *error_in);
+
+int migration_channel_read_peek(QIOChannel *ioc,
+ const char *buf,
+ const size_t buflen,
+ Error **errp);
#endif
diff --git a/migration/migration.c b/migration/migration.c
index f485eea5fb..593dbd25de 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -31,6 +31,7 @@
#include "migration.h"
#include "savevm.h"
#include "qemu-file.h"
+#include "channel.h"
#include "migration/vmstate.h"
#include "block/block.h"
#include "qapi/error.h"
@@ -663,10 +664,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
- if (multifd_load_setup(errp) != 0) {
- return false;
- }
-
if (!mis->from_src_file) {
mis->from_src_file = f;
}
@@ -733,31 +730,56 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
{
MigrationIncomingState *mis = migration_incoming_get_current();
Error *local_err = NULL;
- bool start_migration;
QEMUFile *f;
+ bool default_channel = true;
+ uint32_t channel_magic = 0;
+ int ret = 0;
- if (!mis->from_src_file) {
- /* The first connection (multifd may have multiple) */
+ if (migrate_use_multifd() && !migrate_postcopy_ram() &&
+ qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_READ_MSG_PEEK)) {
+ /*
+ * With multiple channels, it is possible that we receive channels
+ * out of order on destination side, causing incorrect mapping of
+ * source channels on destination side. Check channel MAGIC to
+ * decide type of channel. Please note this is best effort, postcopy
+ * preempt channel does not send any magic number so avoid it for
+ * postcopy live migration. Also tls live migration already does
+ * tls handshake while initializing main channel so with tls this
+ * issue is not possible.
+ */
+ ret = migration_channel_read_peek(ioc, (void *)&channel_magic,
+ sizeof(channel_magic), &local_err);
+
+ if (ret != 0) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
+ } else {
+ default_channel = !mis->from_src_file;
+ }
+
+ if (multifd_load_setup(errp) != 0) {
+ error_setg(errp, "Failed to setup multifd channels");
+ return;
+ }
+
+ if (default_channel) {
f = qemu_file_new_input(ioc);
if (!migration_incoming_setup(f, errp)) {
return;
}
-
- /*
- * Common migration only needs one channel, so we can start
- * right now. Some features need more than one channel, we wait.
- */
- start_migration = !migration_needs_multiple_sockets();
} else {
/* Multiple connections */
assert(migration_needs_multiple_sockets());
if (migrate_use_multifd()) {
- start_migration = multifd_recv_new_channel(ioc, &local_err);
+ multifd_recv_new_channel(ioc, &local_err);
} else {
assert(migrate_postcopy_preempt());
f = qemu_file_new_input(ioc);
- start_migration = postcopy_preempt_new_channel(mis, f);
+ postcopy_preempt_new_channel(mis, f);
}
if (local_err) {
error_propagate(errp, local_err);
@@ -765,7 +787,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
}
}
- if (start_migration) {
+ if (migration_has_all_channels()) {
/* If it's a recovery, we're done */
if (postcopy_try_recover()) {
return;
diff --git a/migration/multifd.c b/migration/multifd.c
index 509bbbe3bf..c3385529cf 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1167,9 +1167,14 @@ int multifd_load_setup(Error **errp)
uint32_t page_count = MULTIFD_PACKET_SIZE / qemu_target_page_size();
uint8_t i;
- if (!migrate_use_multifd()) {
+ /*
+ * Return successfully if multiFD recv state is already initialised
+ * or multiFD is not enabled.
+ */
+ if (multifd_recv_state || !migrate_use_multifd()) {
return 0;
}
+
if (!migrate_multi_channels_is_allowed()) {
error_setg(errp, "multifd is not supported by current protocol");
return -1;
@@ -1228,11 +1233,9 @@ bool multifd_recv_all_channels_created(void)
/*
* Try to receive all multifd channels to get ready for the migration.
- * - Return true and do not set @errp when correctly receiving all channels;
- * - Return false and do not set @errp when correctly receiving the current one;
- * - Return false and set @errp when failing to receive the current channel.
+ * Sets @errp when failing to receive the current channel.
*/
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
{
MultiFDRecvParams *p;
Error *local_err = NULL;
@@ -1245,7 +1248,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
"failed to receive packet"
" via multifd channel %d: ",
qatomic_read(&multifd_recv_state->count));
- return false;
+ return;
}
trace_multifd_recv_new_channel(id);
@@ -1255,7 +1258,7 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
id);
multifd_recv_terminate_threads(local_err);
error_propagate(errp, local_err);
- return false;
+ return;
}
p->c = ioc;
object_ref(OBJECT(ioc));
@@ -1266,6 +1269,4 @@ bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
QEMU_THREAD_JOINABLE);
qatomic_inc(&multifd_recv_state->count);
- return qatomic_read(&multifd_recv_state->count) ==
- migrate_multifd_channels();
}
diff --git a/migration/multifd.h b/migration/multifd.h
index 519f498643..913e4ba274 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -18,7 +18,7 @@ void multifd_save_cleanup(void);
int multifd_load_setup(Error **errp);
int multifd_load_cleanup(Error **errp);
bool multifd_recv_all_channels_created(void);
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
void multifd_recv_sync_main(void);
int multifd_send_sync_main(QEMUFile *f);
int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 0c55df0e52..b98e95dab0 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1538,7 +1538,7 @@ void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
}
}
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
{
/*
* The new loading channel has its own threads, so it needs to be
@@ -1547,9 +1547,6 @@ bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
qemu_file_set_blocking(file, true);
mis->postcopy_qemufile_dst = file;
trace_postcopy_preempt_new_channel();
-
- /* Start the migration immediately */
- return true;
}
/*
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 6147bf7d1d..25881c4127 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -190,7 +190,7 @@ enum PostcopyChannels {
RAM_CHANNEL_MAX,
};
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
int postcopy_preempt_setup(MigrationState *s, Error **errp);
int postcopy_preempt_wait_channel(MigrationState *s);
--
2.31.1

View File

@ -0,0 +1,325 @@
From e5834364958a3914d7b8b46b985a1b054728b466 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Thu, 19 Jan 2023 11:16:45 +0100
Subject: [PATCH 2/8] net: stream: add a new option to automatically reconnect
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect
RH-Bugzilla: 2169232
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Acked-by: David Gibson (Red Hat) <dgibson@redhat.com>
RH-Commit: [2/2] 9b87647a9ed2e7c1b91bdfa9d0a736e091c892a5 (lvivier/qemu-kvm-centos)
In stream mode, if the server shuts down there is currently
no way to reconnect the client to a new server without removing
the NIC device and the netdev backend (or to reboot).
This patch introduces a reconnect option that specifies a delay
to try to reconnect with the same parameters.
Add a new test in qtest to test the reconnect option and the
connect/disconnect events.
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
(cherry picked from commit b95c0d4440950fba6dbef0f781962911fa42abdb)
---
net/stream.c | 53 ++++++++++++++++++-
qapi/net.json | 7 ++-
qemu-options.hx | 6 +--
tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++
4 files changed, 162 insertions(+), 5 deletions(-)
diff --git a/net/stream.c b/net/stream.c
index 37ff727e0c..9204b4c96e 100644
--- a/net/stream.c
+++ b/net/stream.c
@@ -39,6 +39,8 @@
#include "io/channel-socket.h"
#include "io/net-listener.h"
#include "qapi/qapi-events-net.h"
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/clone-visitor.h"
typedef struct NetStreamState {
NetClientState nc;
@@ -49,11 +51,15 @@ typedef struct NetStreamState {
guint ioc_write_tag;
SocketReadState rs;
unsigned int send_index; /* number of bytes sent*/
+ uint32_t reconnect;
+ guint timer_tag;
+ SocketAddress *addr;
} NetStreamState;
static void net_stream_listen(QIONetListener *listener,
QIOChannelSocket *cioc,
void *opaque);
+static void net_stream_arm_reconnect(NetStreamState *s);
static gboolean net_stream_writable(QIOChannel *ioc,
GIOCondition condition,
@@ -170,6 +176,7 @@ static gboolean net_stream_send(QIOChannel *ioc,
qemu_set_info_str(&s->nc, "%s", "");
qapi_event_send_netdev_stream_disconnected(s->nc.name);
+ net_stream_arm_reconnect(s);
return G_SOURCE_REMOVE;
}
@@ -187,6 +194,14 @@ static gboolean net_stream_send(QIOChannel *ioc,
static void net_stream_cleanup(NetClientState *nc)
{
NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc);
+ if (s->timer_tag) {
+ g_source_remove(s->timer_tag);
+ s->timer_tag = 0;
+ }
+ if (s->addr) {
+ qapi_free_SocketAddress(s->addr);
+ s->addr = NULL;
+ }
if (s->ioc) {
if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) {
if (s->ioc_read_tag) {
@@ -346,12 +361,37 @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque)
error:
object_unref(OBJECT(s->ioc));
s->ioc = NULL;
+ net_stream_arm_reconnect(s);
+}
+
+static gboolean net_stream_reconnect(gpointer data)
+{
+ NetStreamState *s = data;
+ QIOChannelSocket *sioc;
+
+ s->timer_tag = 0;
+
+ sioc = qio_channel_socket_new();
+ s->ioc = QIO_CHANNEL(sioc);
+ qio_channel_socket_connect_async(sioc, s->addr,
+ net_stream_client_connected, s,
+ NULL, NULL);
+ return G_SOURCE_REMOVE;
+}
+
+static void net_stream_arm_reconnect(NetStreamState *s)
+{
+ if (s->reconnect && s->timer_tag == 0) {
+ s->timer_tag = g_timeout_add_seconds(s->reconnect,
+ net_stream_reconnect, s);
+ }
}
static int net_stream_client_init(NetClientState *peer,
const char *model,
const char *name,
SocketAddress *addr,
+ uint32_t reconnect,
Error **errp)
{
NetStreamState *s;
@@ -364,6 +404,10 @@ static int net_stream_client_init(NetClientState *peer,
s->ioc = QIO_CHANNEL(sioc);
s->nc.link_down = true;
+ s->reconnect = reconnect;
+ if (reconnect) {
+ s->addr = QAPI_CLONE(SocketAddress, addr);
+ }
qio_channel_socket_connect_async(sioc, addr,
net_stream_client_connected, s,
NULL, NULL);
@@ -380,7 +424,14 @@ int net_init_stream(const Netdev *netdev, const char *name,
sock = &netdev->u.stream;
if (!sock->has_server || !sock->server) {
- return net_stream_client_init(peer, "stream", name, sock->addr, errp);
+ return net_stream_client_init(peer, "stream", name, sock->addr,
+ sock->has_reconnect ? sock->reconnect : 0,
+ errp);
+ }
+ if (sock->has_reconnect) {
+ error_setg(errp, "'reconnect' option is incompatible with "
+ "socket in server mode");
+ return -1;
}
return net_stream_server_init(peer, "stream", name, sock->addr, errp);
}
diff --git a/qapi/net.json b/qapi/net.json
index 522ac582ed..d6eb30008b 100644
--- a/qapi/net.json
+++ b/qapi/net.json
@@ -585,6 +585,10 @@
# @addr: socket address to listen on (server=true)
# or connect to (server=false)
# @server: create server socket (default: false)
+# @reconnect: For a client socket, if a socket is disconnected,
+# then attempt a reconnect after the given number of seconds.
+# Setting this to zero disables this function. (default: 0)
+# (since 8.0)
#
# Only SocketAddress types 'unix', 'inet' and 'fd' are supported.
#
@@ -593,7 +597,8 @@
{ 'struct': 'NetdevStreamOptions',
'data': {
'addr': 'SocketAddress',
- '*server': 'bool' } }
+ '*server': 'bool',
+ '*reconnect': 'uint32' } }
##
# @NetdevDgramOptions:
diff --git a/qemu-options.hx b/qemu-options.hx
index ea02ca3a45..48eef4aa2c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -2766,9 +2766,9 @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
"-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n"
" configure a network backend to connect to another network\n"
" using an UDP tunnel\n"
- "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n"
- "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n"
- "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n"
+ "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n"
+ "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n"
+ "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n"
" configure a network backend to connect to another network\n"
" using a socket connection in stream mode.\n"
"-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n"
diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c
index 6ba256e173..acc32c378b 100644
--- a/tests/qtest/netdev-socket.c
+++ b/tests/qtest/netdev-socket.c
@@ -11,6 +11,10 @@
#include <glib/gstdio.h>
#include "../unit/socket-helpers.h"
#include "libqtest.h"
+#include "qapi/qmp/qstring.h"
+#include "qemu/sockets.h"
+#include "qapi/qobject-input-visitor.h"
+#include "qapi/qapi-visit-sockets.h"
#define CONNECTION_TIMEOUT 5
@@ -142,6 +146,101 @@ static void test_stream_inet_ipv4(void)
qtest_quit(qts0);
}
+static void wait_stream_connected(QTestState *qts, const char *id,
+ SocketAddress **addr)
+{
+ QDict *resp, *data;
+ QString *qstr;
+ QObject *obj;
+ Visitor *v = NULL;
+
+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED");
+ g_assert_nonnull(resp);
+ data = qdict_get_qdict(resp, "data");
+ g_assert_nonnull(data);
+
+ qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+ g_assert_nonnull(data);
+
+ g_assert(!strcmp(qstring_get_str(qstr), id));
+
+ obj = qdict_get(data, "addr");
+
+ v = qobject_input_visitor_new(obj);
+ visit_type_SocketAddress(v, NULL, addr, NULL);
+ visit_free(v);
+ qobject_unref(resp);
+}
+
+static void wait_stream_disconnected(QTestState *qts, const char *id)
+{
+ QDict *resp, *data;
+ QString *qstr;
+
+ resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED");
+ g_assert_nonnull(resp);
+ data = qdict_get_qdict(resp, "data");
+ g_assert_nonnull(data);
+
+ qstr = qobject_to(QString, qdict_get(data, "netdev-id"));
+ g_assert_nonnull(data);
+
+ g_assert(!strcmp(qstring_get_str(qstr), id));
+ qobject_unref(resp);
+}
+
+static void test_stream_inet_reconnect(void)
+{
+ QTestState *qts0, *qts1;
+ int port;
+ SocketAddress *addr;
+
+ port = inet_get_free_port(false);
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,server=false,id=st0,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,reconnect=1,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ wait_stream_connected(qts0, "st0", &addr);
+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+ qapi_free_SocketAddress(addr);
+
+ /* kill server */
+ qtest_quit(qts0);
+
+ /* check client has been disconnected */
+ wait_stream_disconnected(qts1, "st0");
+
+ /* restart server */
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ /* wait connection events*/
+ wait_stream_connected(qts0, "st0", &addr);
+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+ qapi_free_SocketAddress(addr);
+
+ wait_stream_connected(qts1, "st0", &addr);
+ g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET);
+ g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1");
+ g_assert_cmpint(atoi(addr->u.inet.port), ==, port);
+ qapi_free_SocketAddress(addr);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+
static void test_stream_inet_ipv6(void)
{
QTestState *qts0, *qts1;
@@ -418,6 +517,8 @@ int main(int argc, char **argv)
#ifndef _WIN32
qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast);
#endif
+ qtest_add_func("/netdev/stream/inet/reconnect",
+ test_stream_inet_reconnect);
}
if (has_ipv6) {
qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6);
--
2.31.1

View File

@ -0,0 +1,50 @@
From b330bf0a2ad5af73d3c62997f7f0fa5b61f1796b Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@redhat.com>
Date: Tue, 14 Feb 2023 14:48:37 +0100
Subject: [PATCH 8/8] target/s390x/arch_dump: Fix memory corruption in
s390x_write_elf64_notes()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Thomas Huth <thuth@redhat.com>
RH-MergeRequest: 152: Fix memory corruption in s390x_write_elf64_notes()
RH-Bugzilla: 2168172
RH-Acked-by: Cornelia Huck <cohuck@redhat.com>
RH-Acked-by: David Hildenbrand <david@redhat.com>
RH-Acked-by: Cédric Le Goater <clg@redhat.com>
RH-Commit: [1/1] 37a2c997b2c8b7524e0b6299891bf3ea7c9a46d0 (thuth/qemu-kvm-cs9)
Bugzilla: https://bugzilla.redhat.com/2168172
Upstream-Status: Posted (and reviewed, but not merged yet)
"note_size" can be smaller than sizeof(note), so unconditionally calling
memset(notep, 0, sizeof(note)) could cause a memory corruption here in
case notep has been allocated dynamically, thus let's use note_size as
length argument for memset() instead.
Fixes: 113d8f4e95 ("s390x: pv: Add dump support")
Message-Id: <20230214141056.680969-1-thuth@redhat.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Thomas Huth <thuth@redhat.com>
---
target/s390x/arch_dump.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/s390x/arch_dump.c b/target/s390x/arch_dump.c
index a2329141e8..a7c44ba49d 100644
--- a/target/s390x/arch_dump.c
+++ b/target/s390x/arch_dump.c
@@ -248,7 +248,7 @@ static int s390x_write_elf64_notes(const char *note_name,
notep = g_malloc(note_size);
}
- memset(notep, 0, sizeof(note));
+ memset(notep, 0, note_size);
/* Setup note header data */
notep->hdr.n_descsz = cpu_to_be32(content_size);
--
2.31.1

View File

@ -0,0 +1,505 @@
From 39d5761fe1f546e764dedf2ea32c55d8f5222696 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <lvivier@redhat.com>
Date: Wed, 18 Jan 2023 13:04:05 +0100
Subject: [PATCH 1/8] tests/qtest: netdev: test stream and dgram backends
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Laurent Vivier <lvivier@redhat.com>
RH-MergeRequest: 148: net: stream: add a new option to automatically reconnect
RH-Bugzilla: 2169232
RH-Acked-by: Eugenio Pérez <eperezma@redhat.com>
RH-Acked-by: Cindy Lu <lulu@redhat.com>
RH-Acked-by: MST <mst@redhat.com>
RH-Acked-by: David Gibson (Red Hat) <dgibson@redhat.com>
RH-Commit: [1/2] 75c71b47eea072e14651a96612d402b50d2b8f1e (lvivier/qemu-kvm-centos)
Signed-off-by: Laurent Vivier <lvivier@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <20230118120405.1876329-1-lvivier@redhat.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
(cherry picked from commit c95031a19f0d7f418a597243f6f84b031a858997)
---
tests/qtest/meson.build | 2 +
tests/qtest/netdev-socket.c | 448 ++++++++++++++++++++++++++++++++++++
2 files changed, 450 insertions(+)
create mode 100644 tests/qtest/netdev-socket.c
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
index 9df3f9f8b9..2e7c6fe5e3 100644
--- a/tests/qtest/meson.build
+++ b/tests/qtest/meson.build
@@ -27,6 +27,7 @@ qtests_generic = [
'test-hmp',
'qos-test',
'readconfig-test',
+ 'netdev-socket',
]
if config_host.has_key('CONFIG_MODULES')
qtests_generic += [ 'modules-test' ]
@@ -299,6 +300,7 @@ qtests = {
'tpm-tis-device-swtpm-test': [io, tpmemu_files, 'tpm-tis-util.c'],
'tpm-tis-device-test': [io, tpmemu_files, 'tpm-tis-util.c'],
'vmgenid-test': files('boot-sector.c', 'acpi-utils.c'),
+ 'netdev-socket': files('netdev-socket.c', '../unit/socket-helpers.c'),
}
gvnc = dependency('gvnc-1.0', required: false)
diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c
new file mode 100644
index 0000000000..6ba256e173
--- /dev/null
+++ b/tests/qtest/netdev-socket.c
@@ -0,0 +1,448 @@
+/*
+ * QTest testcase for netdev stream and dgram
+ *
+ * Copyright (c) 2022 Red Hat, Inc.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/sockets.h"
+#include <glib/gstdio.h>
+#include "../unit/socket-helpers.h"
+#include "libqtest.h"
+
+#define CONNECTION_TIMEOUT 5
+
+#define EXPECT_STATE(q, e, t) \
+do { \
+ char *resp = NULL; \
+ g_test_timer_start(); \
+ do { \
+ g_free(resp); \
+ resp = qtest_hmp(q, "info network"); \
+ if (t) { \
+ strrchr(resp, t)[0] = 0; \
+ } \
+ if (g_str_equal(resp, e)) { \
+ break; \
+ } \
+ } while (g_test_timer_elapsed() < CONNECTION_TIMEOUT); \
+ g_assert_cmpstr(resp, ==, e); \
+ g_free(resp); \
+} while (0)
+
+static gchar *tmpdir;
+
+static int inet_get_free_port_socket_ipv4(int sock)
+{
+ struct sockaddr_in addr;
+ socklen_t len;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = INADDR_ANY;
+ addr.sin_port = 0;
+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ return -1;
+ }
+
+ len = sizeof(addr);
+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) {
+ return -1;
+ }
+
+ return ntohs(addr.sin_port);
+}
+
+static int inet_get_free_port_socket_ipv6(int sock)
+{
+ struct sockaddr_in6 addr;
+ socklen_t len;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_any;
+ addr.sin6_port = 0;
+ if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+ return -1;
+ }
+
+ len = sizeof(addr);
+ if (getsockname(sock, (struct sockaddr *)&addr, &len) < 0) {
+ return -1;
+ }
+
+ return ntohs(addr.sin6_port);
+}
+
+static int inet_get_free_port_multiple(int nb, int *port, bool ipv6)
+{
+ int sock[nb];
+ int i;
+
+ for (i = 0; i < nb; i++) {
+ sock[i] = socket(ipv6 ? AF_INET6 : AF_INET, SOCK_STREAM, 0);
+ if (sock[i] < 0) {
+ break;
+ }
+ port[i] = ipv6 ? inet_get_free_port_socket_ipv6(sock[i]) :
+ inet_get_free_port_socket_ipv4(sock[i]);
+ if (port[i] == -1) {
+ break;
+ }
+ }
+
+ nb = i;
+ for (i = 0; i < nb; i++) {
+ closesocket(sock[i]);
+ }
+
+ return nb;
+}
+
+static int inet_get_free_port(bool ipv6)
+{
+ int nb, port;
+
+ nb = inet_get_free_port_multiple(1, &port, ipv6);
+ g_assert_cmpint(nb, ==, 1);
+
+ return port;
+}
+
+static void test_stream_inet_ipv4(void)
+{
+ QTestState *qts0, *qts1;
+ char *expect;
+ int port;
+
+ port = inet_get_free_port(false);
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,server=false,id=st0,addr.type=inet,"
+ "addr.ipv4=on,addr.ipv6=off,"
+ "addr.host=127.0.0.1,addr.port=%d", port);
+
+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:127.0.0.1:%d\r\n",
+ port);
+ EXPECT_STATE(qts1, expect, 0);
+ g_free(expect);
+
+ /* the port is unknown, check only the address */
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:127.0.0.1", ':');
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+
+static void test_stream_inet_ipv6(void)
+{
+ QTestState *qts0, *qts1;
+ char *expect;
+ int port;
+
+ port = inet_get_free_port(true);
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,addr.type=inet,"
+ "addr.ipv4=off,addr.ipv6=on,"
+ "addr.host=::1,addr.port=%d", port);
+
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,server=false,id=st0,addr.type=inet,"
+ "addr.ipv4=off,addr.ipv6=on,"
+ "addr.host=::1,addr.port=%d", port);
+
+ expect = g_strdup_printf("st0: index=0,type=stream,tcp:::1:%d\r\n",
+ port);
+ EXPECT_STATE(qts1, expect, 0);
+ g_free(expect);
+
+ /* the port is unknown, check only the address */
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,tcp:::1", ':');
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+
+static void test_stream_unix(void)
+{
+ QTestState *qts0, *qts1;
+ char *expect;
+ gchar *path;
+
+ path = g_strconcat(tmpdir, "/stream_unix", NULL);
+
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,"
+ "addr.type=unix,addr.path=%s,",
+ path);
+
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=false,"
+ "addr.type=unix,addr.path=%s",
+ path);
+
+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path);
+ EXPECT_STATE(qts1, expect, 0);
+ EXPECT_STATE(qts0, expect, 0);
+ g_free(expect);
+ g_free(path);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+
+#ifdef CONFIG_LINUX
+static void test_stream_unix_abstract(void)
+{
+ QTestState *qts0, *qts1;
+ char *expect;
+ gchar *path;
+
+ path = g_strconcat(tmpdir, "/stream_unix_abstract", NULL);
+
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=true,"
+ "addr.type=unix,addr.path=%s,"
+ "addr.abstract=on",
+ path);
+
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,server=false,"
+ "addr.type=unix,addr.path=%s,addr.abstract=on",
+ path);
+
+ expect = g_strdup_printf("st0: index=0,type=stream,unix:%s\r\n", path);
+ EXPECT_STATE(qts1, expect, 0);
+ EXPECT_STATE(qts0, expect, 0);
+ g_free(expect);
+ g_free(path);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+#endif
+
+#ifndef _WIN32
+static void test_stream_fd(void)
+{
+ QTestState *qts0, *qts1;
+ int sock[2];
+ int ret;
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM, 0, sock);
+ g_assert_true(ret == 0);
+
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d",
+ sock[0]);
+
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev stream,id=st0,addr.type=fd,addr.str=%d",
+ sock[1]);
+
+ EXPECT_STATE(qts1, "st0: index=0,type=stream,unix:\r\n", 0);
+ EXPECT_STATE(qts0, "st0: index=0,type=stream,unix:\r\n", 0);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+
+ closesocket(sock[0]);
+ closesocket(sock[1]);
+}
+#endif
+
+static void test_dgram_inet(void)
+{
+ QTestState *qts0, *qts1;
+ char *expect;
+ int port[2];
+ int nb;
+
+ nb = inet_get_free_port_multiple(2, port, false);
+ g_assert_cmpint(nb, ==, 2);
+
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev dgram,id=st0,"
+ "local.type=inet,local.host=127.0.0.1,local.port=%d,"
+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d",
+ port[0], port[1]);
+
+ expect = g_strdup_printf("st0: index=0,type=dgram,"
+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n",
+ port[0], port[1]);
+ EXPECT_STATE(qts0, expect, 0);
+ g_free(expect);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev dgram,id=st0,"
+ "local.type=inet,local.host=127.0.0.1,local.port=%d,"
+ "remote.type=inet,remote.host=127.0.0.1,remote.port=%d",
+ port[1], port[0]);
+
+ expect = g_strdup_printf("st0: index=0,type=dgram,"
+ "udp=127.0.0.1:%d/127.0.0.1:%d\r\n",
+ port[1], port[0]);
+ EXPECT_STATE(qts1, expect, 0);
+ g_free(expect);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+
+#ifndef _WIN32
+static void test_dgram_mcast(void)
+{
+ QTestState *qts;
+
+ qts = qtest_initf("-nodefaults -M none "
+ "-netdev dgram,id=st0,"
+ "remote.type=inet,remote.host=230.0.0.1,remote.port=1234");
+
+ EXPECT_STATE(qts, "st0: index=0,type=dgram,mcast=230.0.0.1:1234\r\n", 0);
+
+ qtest_quit(qts);
+}
+
+static void test_dgram_unix(void)
+{
+ QTestState *qts0, *qts1;
+ char *expect;
+ gchar *path0, *path1;
+
+ path0 = g_strconcat(tmpdir, "/dgram_unix0", NULL);
+ path1 = g_strconcat(tmpdir, "/dgram_unix1", NULL);
+
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev dgram,id=st0,local.type=unix,local.path=%s,"
+ "remote.type=unix,remote.path=%s",
+ path0, path1);
+
+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n",
+ path0, path1);
+ EXPECT_STATE(qts0, expect, 0);
+ g_free(expect);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev dgram,id=st0,local.type=unix,local.path=%s,"
+ "remote.type=unix,remote.path=%s",
+ path1, path0);
+
+
+ expect = g_strdup_printf("st0: index=0,type=dgram,udp=%s:%s\r\n",
+ path1, path0);
+ EXPECT_STATE(qts1, expect, 0);
+ g_free(expect);
+
+ unlink(path0);
+ g_free(path0);
+ unlink(path1);
+ g_free(path1);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+}
+
+static void test_dgram_fd(void)
+{
+ QTestState *qts0, *qts1;
+ char *expect;
+ int ret;
+ int sv[2];
+
+ ret = socketpair(PF_UNIX, SOCK_DGRAM, 0, sv);
+ g_assert_cmpint(ret, !=, -1);
+
+ qts0 = qtest_initf("-nodefaults -M none "
+ "-netdev dgram,id=st0,local.type=fd,local.str=%d",
+ sv[0]);
+
+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[0]);
+ EXPECT_STATE(qts0, expect, 0);
+ g_free(expect);
+
+ qts1 = qtest_initf("-nodefaults -M none "
+ "-netdev dgram,id=st0,local.type=fd,local.str=%d",
+ sv[1]);
+
+
+ expect = g_strdup_printf("st0: index=0,type=dgram,fd=%d unix\r\n", sv[1]);
+ EXPECT_STATE(qts1, expect, 0);
+ g_free(expect);
+
+ qtest_quit(qts1);
+ qtest_quit(qts0);
+
+ closesocket(sv[0]);
+ closesocket(sv[1]);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ int ret;
+ bool has_ipv4, has_ipv6, has_afunix;
+ g_autoptr(GError) err = NULL;
+
+ socket_init();
+ g_test_init(&argc, &argv, NULL);
+
+ if (socket_check_protocol_support(&has_ipv4, &has_ipv6) < 0) {
+ g_error("socket_check_protocol_support() failed\n");
+ }
+
+ tmpdir = g_dir_make_tmp("netdev-socket.XXXXXX", &err);
+ if (tmpdir == NULL) {
+ g_error("Can't create temporary directory in %s: %s",
+ g_get_tmp_dir(), err->message);
+ }
+
+ if (has_ipv4) {
+ qtest_add_func("/netdev/stream/inet/ipv4", test_stream_inet_ipv4);
+ qtest_add_func("/netdev/dgram/inet", test_dgram_inet);
+#ifndef _WIN32
+ qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast);
+#endif
+ }
+ if (has_ipv6) {
+ qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6);
+ }
+
+ socket_check_afunix_support(&has_afunix);
+ if (has_afunix) {
+#ifndef _WIN32
+ qtest_add_func("/netdev/dgram/unix", test_dgram_unix);
+#endif
+ qtest_add_func("/netdev/stream/unix", test_stream_unix);
+#ifdef CONFIG_LINUX
+ qtest_add_func("/netdev/stream/unix/abstract",
+ test_stream_unix_abstract);
+#endif
+#ifndef _WIN32
+ qtest_add_func("/netdev/stream/fd", test_stream_fd);
+ qtest_add_func("/netdev/dgram/fd", test_dgram_fd);
+#endif
+ }
+
+ ret = g_test_run();
+
+ g_rmdir(tmpdir);
+ g_free(tmpdir);
+
+ return ret;
+}
--
2.31.1

View File

@ -0,0 +1,169 @@
From 80445fed73a7d1a87e8ce96f6cb7d505e437f845 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Wed, 1 Feb 2023 16:10:54 -0500
Subject: [PATCH 4/8] util/userfaultfd: Add uffd_open()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 149: Support /dev/userfaultfd
RH-Bugzilla: 2158704
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [2/3] 4c81696314ab26db47c3415fa2c2501c6a572b5c (peterx/qemu-kvm)
Add a helper to create the uffd handle.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit d5890ea0722831eea76a0efd23a496b3e8815fe8)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/qemu/userfaultfd.h | 12 ++++++++++++
migration/postcopy-ram.c | 11 +++++------
tests/qtest/migration-test.c | 4 ++--
util/userfaultfd.c | 13 +++++++++++--
4 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/include/qemu/userfaultfd.h b/include/qemu/userfaultfd.h
index 6b74f92792..d764496f0b 100644
--- a/include/qemu/userfaultfd.h
+++ b/include/qemu/userfaultfd.h
@@ -13,10 +13,20 @@
#ifndef USERFAULTFD_H
#define USERFAULTFD_H
+#ifdef CONFIG_LINUX
+
#include "qemu/osdep.h"
#include "exec/hwaddr.h"
#include <linux/userfaultfd.h>
+/**
+ * uffd_open(): Open an userfaultfd handle for current context.
+ *
+ * @flags: The flags we want to pass in when creating the handle.
+ *
+ * Returns: the uffd handle if >=0, or <0 if error happens.
+ */
+int uffd_open(int flags);
int uffd_query_features(uint64_t *features);
int uffd_create_fd(uint64_t features, bool non_blocking);
void uffd_close_fd(int uffd_fd);
@@ -32,4 +42,6 @@ int uffd_wakeup(int uffd_fd, void *addr, uint64_t length);
int uffd_read_events(int uffd_fd, struct uffd_msg *msgs, int count);
bool uffd_poll_events(int uffd_fd, int tmo);
+#endif /* CONFIG_LINUX */
+
#endif /* USERFAULTFD_H */
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index b9a37ef255..0c55df0e52 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -37,6 +37,7 @@
#include "qemu-file.h"
#include "yank_functions.h"
#include "tls.h"
+#include "qemu/userfaultfd.h"
/* Arbitrary limit on size of each discard command,
* keeps them around ~200 bytes
@@ -226,11 +227,9 @@ static bool receive_ufd_features(uint64_t *features)
int ufd;
bool ret = true;
- /* if we are here __NR_userfaultfd should exists */
- ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+ ufd = uffd_open(O_CLOEXEC);
if (ufd == -1) {
- error_report("%s: syscall __NR_userfaultfd failed: %s", __func__,
- strerror(errno));
+ error_report("%s: uffd_open() failed: %s", __func__, strerror(errno));
return false;
}
@@ -375,7 +374,7 @@ bool postcopy_ram_supported_by_host(MigrationIncomingState *mis)
goto out;
}
- ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+ ufd = uffd_open(O_CLOEXEC);
if (ufd == -1) {
error_report("%s: userfaultfd not available: %s", __func__,
strerror(errno));
@@ -1160,7 +1159,7 @@ static int postcopy_temp_pages_setup(MigrationIncomingState *mis)
int postcopy_ram_incoming_setup(MigrationIncomingState *mis)
{
/* Open the fd for the kernel to give us userfaults */
- mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ mis->userfault_fd = uffd_open(O_CLOEXEC | O_NONBLOCK);
if (mis->userfault_fd == -1) {
error_report("%s: Failed to open userfault fd: %s", __func__,
strerror(errno));
diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
index dbde726adf..0100e1bdbc 100644
--- a/tests/qtest/migration-test.c
+++ b/tests/qtest/migration-test.c
@@ -61,14 +61,14 @@ static bool uffd_feature_thread_id;
#if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
#include <sys/eventfd.h>
#include <sys/ioctl.h>
-#include <linux/userfaultfd.h>
+#include "qemu/userfaultfd.h"
static bool ufd_version_check(void)
{
struct uffdio_api api_struct;
uint64_t ioctl_mask;
- int ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
+ int ufd = uffd_open(O_CLOEXEC);
if (ufd == -1) {
g_test_message("Skipping test: userfaultfd not available");
diff --git a/util/userfaultfd.c b/util/userfaultfd.c
index f1cd6af2b1..4953b3137d 100644
--- a/util/userfaultfd.c
+++ b/util/userfaultfd.c
@@ -19,6 +19,15 @@
#include <sys/syscall.h>
#include <sys/ioctl.h>
+int uffd_open(int flags)
+{
+#if defined(__NR_userfaultfd)
+ return syscall(__NR_userfaultfd, flags);
+#else
+ return -EINVAL;
+#endif
+}
+
/**
* uffd_query_features: query UFFD features
*
@@ -32,7 +41,7 @@ int uffd_query_features(uint64_t *features)
struct uffdio_api api_struct = { 0 };
int ret = -1;
- uffd_fd = syscall(__NR_userfaultfd, O_CLOEXEC);
+ uffd_fd = uffd_open(O_CLOEXEC);
if (uffd_fd < 0) {
trace_uffd_query_features_nosys(errno);
return -1;
@@ -69,7 +78,7 @@ int uffd_create_fd(uint64_t features, bool non_blocking)
uint64_t ioctl_mask = BIT(_UFFDIO_REGISTER) | BIT(_UFFDIO_UNREGISTER);
flags = O_CLOEXEC | (non_blocking ? O_NONBLOCK : 0);
- uffd_fd = syscall(__NR_userfaultfd, flags);
+ uffd_fd = uffd_open(flags);
if (uffd_fd < 0) {
trace_uffd_create_fd_nosys(errno);
return -1;
--
2.31.1

View File

@ -0,0 +1,94 @@
From a91da7741464dadeb306a741b4fb562e49ffea57 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 7 Feb 2023 15:57:11 -0500
Subject: [PATCH 5/8] util/userfaultfd: Support /dev/userfaultfd
RH-Author: Peter Xu <peterx@redhat.com>
RH-MergeRequest: 149: Support /dev/userfaultfd
RH-Bugzilla: 2158704
RH-Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
RH-Acked-by: quintela1 <quintela@redhat.com>
RH-Acked-by: Miroslav Rezanina <mrezanin@redhat.com>
RH-Commit: [3/3] 5f427d8c18c210ff8f66724c9e358a7120619e69 (peterx/qemu-kvm)
Teach QEMU to use /dev/userfaultfd when it existed and fallback to the
system call if either it's not there or doesn't have enough permission.
Firstly, as long as the app has permission to access /dev/userfaultfd, it
always have the ability to trap kernel faults which QEMU mostly wants.
Meanwhile, in some context (e.g. containers) the userfaultfd syscall can be
forbidden, so it can be the major way to use postcopy in a restricted
environment with strict seccomp setup.
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
(cherry picked from commit c40c0463413b941c13fe5f99a90c02d7d6584828)
Signed-off-by: Peter Xu <peterx@redhat.com>
---
util/trace-events | 1 +
util/userfaultfd.c | 32 ++++++++++++++++++++++++++++++++
2 files changed, 33 insertions(+)
diff --git a/util/trace-events b/util/trace-events
index c8f53d7d9f..16f78d8fe5 100644
--- a/util/trace-events
+++ b/util/trace-events
@@ -93,6 +93,7 @@ qemu_vfio_region_info(const char *desc, uint64_t region_ofs, uint64_t region_siz
qemu_vfio_pci_map_bar(int index, uint64_t region_ofs, uint64_t region_size, int ofs, void *host) "map region bar#%d addr 0x%"PRIx64" size 0x%"PRIx64" ofs 0x%x host %p"
#userfaultfd.c
+uffd_detect_open_mode(int mode) "%d"
uffd_query_features_nosys(int err) "errno: %i"
uffd_query_features_api_failed(int err) "errno: %i"
uffd_create_fd_nosys(int err) "errno: %i"
diff --git a/util/userfaultfd.c b/util/userfaultfd.c
index 4953b3137d..fdff4867e8 100644
--- a/util/userfaultfd.c
+++ b/util/userfaultfd.c
@@ -18,10 +18,42 @@
#include <poll.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
+#include <fcntl.h>
+
+typedef enum {
+ UFFD_UNINITIALIZED = 0,
+ UFFD_USE_DEV_PATH,
+ UFFD_USE_SYSCALL,
+} uffd_open_mode;
int uffd_open(int flags)
{
#if defined(__NR_userfaultfd)
+ static uffd_open_mode open_mode;
+ static int uffd_dev;
+
+ /* Detect how to generate uffd desc when run the 1st time */
+ if (open_mode == UFFD_UNINITIALIZED) {
+ /*
+ * Make /dev/userfaultfd the default approach because it has better
+ * permission controls, meanwhile allows kernel faults without any
+ * privilege requirement (e.g. SYS_CAP_PTRACE).
+ */
+ uffd_dev = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (uffd_dev >= 0) {
+ open_mode = UFFD_USE_DEV_PATH;
+ } else {
+ /* Fallback to the system call */
+ open_mode = UFFD_USE_SYSCALL;
+ }
+ trace_uffd_detect_open_mode(open_mode);
+ }
+
+ if (open_mode == UFFD_USE_DEV_PATH) {
+ assert(uffd_dev >= 0);
+ return ioctl(uffd_dev, USERFAULTFD_IOC_NEW, flags);
+ }
+
return syscall(__NR_userfaultfd, flags);
#else
return -EINVAL;
--
2.31.1

View File

@ -148,7 +148,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \
Summary: QEMU is a machine emulator and virtualizer
Name: qemu-kvm
Version: 7.2.0
Release: 8%{?rcrel}%{?dist}%{?cc_suffix}
Release: 9%{?rcrel}%{?dist}%{?cc_suffix}
# Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped
# Epoch 15 used for RHEL 8
# Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5)
@ -356,6 +356,22 @@ Patch103: kvm-Revert-vhost-user-Monitor-slave-channel-in-vhost_use.patch
Patch104: kvm-Revert-vhost-user-Introduce-nested-event-loop-in-vho.patch
# For bz#2162569 - [transitional device][virtio-rng-pci-transitional]Stable Guest ABI failed between RHEL 8.6 to RHEL 9.2
Patch105: kvm-virtio-rng-pci-fix-transitional-migration-compat-for.patch
# For bz#2169232 - RFE: reconnect option for stream socket back-end
Patch106: kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch
# For bz#2169232 - RFE: reconnect option for stream socket back-end
Patch107: kvm-net-stream-add-a-new-option-to-automatically-reconne.patch
# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall
Patch108: kvm-linux-headers-Update-to-v6.1.patch
# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall
Patch109: kvm-util-userfaultfd-Add-uffd_open.patch
# For bz#2158704 - RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall
Patch110: kvm-util-userfaultfd-Support-dev-userfaultfd.patch
# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race
Patch111: kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch
# For bz#2169732 - Multifd migration fails under a weak network/socket ordering race
Patch112: kvm-migration-check-magic-value-for-deciding-the-mapping.patch
# For bz#2168172 - [s390x] qemu-kvm coredumps when SE crashes
Patch113: kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch
%if %{have_clang}
BuildRequires: clang
@ -1386,6 +1402,24 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \
%endif
%changelog
* Fri Feb 17 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-9
- kvm-tests-qtest-netdev-test-stream-and-dgram-backends.patch [bz#2169232]
- kvm-net-stream-add-a-new-option-to-automatically-reconne.patch [bz#2169232]
- kvm-linux-headers-Update-to-v6.1.patch [bz#2158704]
- kvm-util-userfaultfd-Add-uffd_open.patch [bz#2158704]
- kvm-util-userfaultfd-Support-dev-userfaultfd.patch [bz#2158704]
- kvm-io-Add-support-for-MSG_PEEK-for-socket-channel.patch [bz#2169732]
- kvm-migration-check-magic-value-for-deciding-the-mapping.patch [bz#2169732]
- kvm-target-s390x-arch_dump-Fix-memory-corruption-in-s390.patch [bz#2168172]
- Resolves: bz#2169232
(RFE: reconnect option for stream socket back-end)
- Resolves: bz#2158704
(RFE: Prefer /dev/userfaultfd over userfaultfd(2) syscall)
- Resolves: bz#2169732
(Multifd migration fails under a weak network/socket ordering race)
- Resolves: bz#2168172
([s390x] qemu-kvm coredumps when SE crashes)
* Thu Feb 09 2023 Miroslav Rezanina <mrezanin@redhat.com> - 7.2.0-8
- kvm-qcow2-Fix-theoretical-corruption-in-store_bitmap-err.patch [bz#2150180]
- kvm-qemu-img-commit-Report-errors-while-closing-the-imag.patch [bz#2150180]