diff --git a/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch b/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch new file mode 100644 index 0000000..3cfb6a7 --- /dev/null +++ b/kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch @@ -0,0 +1,42 @@ +From 42fc4705817fc4d16f2ba785fd29777ed2b7355a Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Tue, 29 Apr 2025 17:05:41 +0200 +Subject: [PATCH 1/9] file-posix: Define DM_MPATH_PROBE_PATHS + +RH-Author: Kevin Wolf +RH-MergeRequest: 370: file-posix: Fix multipath failover with SCSI passthrough +RH-Jira: RHEL-65852 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [1/2] 6680f4a3f15a768b1ca51aafea452c3d0886f12e (kmwolf/centos-qemu-kvm) + +While the kernel side isn't merged yet and we're still using old kernel +headers, just define DM_MPATH_PROBE_PATHS manually. + +This is a downstream-only patch that can be removed after the next minor +release. + +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/block/file-posix.c b/block/file-posix.c +index dea7b09b6c..52cc25db84 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -134,6 +134,11 @@ + #define RAW_LOCK_PERM_BASE 100 + #define RAW_LOCK_SHARED_BASE 200 + ++/* TODO Remove this when the kernel side is merged */ ++#if !defined(DM_MPATH_PROBE_PATHS) && defined(DM_GET_TARGET_VERSION) ++#define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_GET_TARGET_VERSION_CMD + 1) ++#endif ++ + typedef struct BDRVRawState { + int fd; + bool use_lock; +-- +2.39.3 + diff --git a/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch b/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch new file mode 100644 index 0000000..23a25f7 --- /dev/null +++ b/kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch @@ -0,0 +1,215 @@ +From 35a2470f67cb38c52246974a853d843dbb80b84d Mon Sep 17 00:00:00 2001 +From: Kevin Wolf +Date: Thu, 22 May 2025 15:08:03 +0200 +Subject: [PATCH 2/9] file-posix: Probe paths and retry SG_IO on potential path + errors + +RH-Author: Kevin Wolf +RH-MergeRequest: 370: file-posix: Fix multipath failover with SCSI passthrough +RH-Jira: RHEL-65852 +RH-Acked-by: Hanna Czenczek +RH-Acked-by: Stefan Hajnoczi +RH-Commit: [2/2] bbaa1d4de0675a2c67dafed74eacc0d1103aab18 (kmwolf/centos-qemu-kvm) + +When scsi-block is used on a host multipath device, it runs into the +problem that the kernel dm-mpath doesn't know anything about SCSI or +SG_IO and therefore can't decide if a SG_IO request returned an error +and needs to be retried on a different path. Instead of getting working +failover, an error is returned to scsi-block and handled according to +the configured error policy. Obviously, this is not what users want, +they want working failover. + +QEMU can parse the SG_IO result and determine whether this could have +been a path error, but just retrying the same request could just send it +to the same failing path again and result in the same error. + +With a kernel that supports the DM_MPATH_PROBE_PATHS ioctl on dm-mpath +block devices (queued in the device mapper tree for Linux 6.16), we can +tell the kernel to probe all paths and tell us if any usable paths +remained. If so, we can now retry the SG_IO ioctl and expect it to be +sent to a working path. + +Signed-off-by: Kevin Wolf +Message-ID: <20250522130803.34738-1-kwolf@redhat.com> +Reviewed-by: Stefan Hajnoczi +Reviewed-by: Hanna Czenczek +Signed-off-by: Kevin Wolf +(cherry picked from commit bf627788ef17721955bfcfba84209a07ae5f54ea) +Signed-off-by: Kevin Wolf +--- + block/file-posix.c | 115 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 114 insertions(+), 1 deletion(-) + +diff --git a/block/file-posix.c b/block/file-posix.c +index 52cc25db84..77a35d9ae9 100644 +--- a/block/file-posix.c ++++ b/block/file-posix.c +@@ -41,6 +41,7 @@ + + #include "scsi/pr-manager.h" + #include "scsi/constants.h" ++#include "scsi/utils.h" + + #if defined(__APPLE__) && (__MACH__) + #include +@@ -72,6 +73,7 @@ + #include + #endif + #include ++#include + #include + #include + #include +@@ -139,6 +141,22 @@ + #define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_GET_TARGET_VERSION_CMD + 1) + #endif + ++/* ++ * Multiple retries are mostly meant for two separate scenarios: ++ * ++ * - DM_MPATH_PROBE_PATHS returns success, but before SG_IO completes, another ++ * path goes down. ++ * ++ * - DM_MPATH_PROBE_PATHS failed all paths in the current path group, so we have ++ * to send another SG_IO to switch to another path group to probe the paths in ++ * it. ++ * ++ * Even if each path is in a separate path group (path_grouping_policy set to ++ * failover), it's rare to have more than eight path groups - and even then ++ * pretty unlikely that only bad path groups would be chosen in eight retries. ++ */ ++#define SG_IO_MAX_RETRIES 8 ++ + typedef struct BDRVRawState { + int fd; + bool use_lock; +@@ -166,6 +184,7 @@ typedef struct BDRVRawState { + bool use_linux_aio:1; + bool has_laio_fdsync:1; + bool use_linux_io_uring:1; ++ bool use_mpath:1; + int page_cache_inconsistent; /* errno from fdatasync failure */ + bool has_fallocate; + bool needs_alignment; +@@ -4262,15 +4281,105 @@ hdev_open_Mac_error: + /* Since this does ioctl the device must be already opened */ + bs->sg = hdev_is_sg(bs); + ++ /* sg devices aren't even block devices and can't use dm-mpath */ ++ s->use_mpath = !bs->sg; ++ + return ret; + } + + #if defined(__linux__) ++#if defined(DM_MPATH_PROBE_PATHS) ++static bool coroutine_fn sgio_path_error(int ret, sg_io_hdr_t *io_hdr) ++{ ++ if (ret < 0) { ++ switch (ret) { ++ case -ENODEV: ++ return true; ++ case -EAGAIN: ++ /* ++ * The device is probably suspended. This happens while the dm table ++ * is reloaded, e.g. because a path is added or removed. This is an ++ * operation that should complete within 1ms, so just wait a bit and ++ * retry. ++ * ++ * If the device was suspended for another reason, we'll wait and ++ * retry SG_IO_MAX_RETRIES times. This is a tolerable delay before ++ * we return an error and potentially stop the VM. ++ */ ++ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000); ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ if (io_hdr->host_status != SCSI_HOST_OK) { ++ return true; ++ } ++ ++ switch (io_hdr->status) { ++ case GOOD: ++ case CONDITION_GOOD: ++ case INTERMEDIATE_GOOD: ++ case INTERMEDIATE_C_GOOD: ++ case RESERVATION_CONFLICT: ++ case COMMAND_TERMINATED: ++ return false; ++ case CHECK_CONDITION: ++ return !scsi_sense_buf_is_guest_recoverable(io_hdr->sbp, ++ io_hdr->mx_sb_len); ++ default: ++ return true; ++ } ++} ++ ++static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) ++{ ++ BDRVRawState *s = acb->bs->opaque; ++ RawPosixAIOData probe_acb; ++ ++ if (!s->use_mpath) { ++ return false; ++ } ++ ++ if (!sgio_path_error(ret, acb->ioctl.buf)) { ++ return false; ++ } ++ ++ probe_acb = (RawPosixAIOData) { ++ .bs = acb->bs, ++ .aio_type = QEMU_AIO_IOCTL, ++ .aio_fildes = s->fd, ++ .aio_offset = 0, ++ .ioctl = { ++ .buf = NULL, ++ .cmd = DM_MPATH_PROBE_PATHS, ++ }, ++ }; ++ ++ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &probe_acb); ++ if (ret == -ENOTTY) { ++ s->use_mpath = false; ++ } else if (ret == -EAGAIN) { ++ /* The device might be suspended for a table reload, worth retrying */ ++ return true; ++ } ++ ++ return ret == 0; ++} ++#else ++static bool coroutine_fn hdev_co_ioctl_sgio_retry(RawPosixAIOData *acb, int ret) ++{ ++ return false; ++} ++#endif /* DM_MPATH_PROBE_PATHS */ ++ + static int coroutine_fn + hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) + { + BDRVRawState *s = bs->opaque; + RawPosixAIOData acb; ++ int retries = SG_IO_MAX_RETRIES; + int ret; + + ret = fd_open(bs); +@@ -4298,7 +4407,11 @@ hdev_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) + }, + }; + +- return raw_thread_pool_submit(handle_aiocb_ioctl, &acb); ++ do { ++ ret = raw_thread_pool_submit(handle_aiocb_ioctl, &acb); ++ } while (req == SG_IO && retries-- && hdev_co_ioctl_sgio_retry(&acb, ret)); ++ ++ return ret; + } + #endif /* linux */ + +-- +2.39.3 + diff --git a/kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch b/kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch new file mode 100644 index 0000000..d22cb95 --- /dev/null +++ b/kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch @@ -0,0 +1,73 @@ +From dcf18bc367aac87def0b03e2f4450d14b6dd53a5 Mon Sep 17 00:00:00 2001 +From: Juraj Marcin +Date: Wed, 21 May 2025 15:52:30 +0200 +Subject: [PATCH 3/9] io: Fix partial struct copy in + qio_dns_resolver_lookup_sync_inet() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juraj Marcin +RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive +RH-Jira: RHEL-67706 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [1/7] 157425de9a5bcab4a63f84cceb35eb4954e7ed8b (JurajMarcin/centos-src-qemu-kvm) + +Commit aec21d3175 (qapi: Add InetSocketAddress member keep-alive) +introduces the keep-alive flag, but this flag is not copied together +with other options in qio_dns_resolver_lookup_sync_inet(). + +This patch fixes this issue and also prevents future ones by copying the +entire structure first and only then overriding a few attributes that +need to be different. + +Fixes: aec21d31756c (qapi: Add InetSocketAddress member keep-alive) +Signed-off-by: Juraj Marcin +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Daniel P. Berrangé + +(cherry picked from commit 0dc051aa85e1bd68d5c5110fa8af69204e6dbd3d) + +JIRA: https://issues.redhat.com/browse/RHEL-67706 + +Signed-off-by: Juraj Marcin +--- + io/dns-resolver.c | 21 +++++---------------- + 1 file changed, 5 insertions(+), 16 deletions(-) + +diff --git a/io/dns-resolver.c b/io/dns-resolver.c +index 53b0e8407a..3712438f82 100644 +--- a/io/dns-resolver.c ++++ b/io/dns-resolver.c +@@ -111,22 +111,11 @@ static int qio_dns_resolver_lookup_sync_inet(QIODNSResolver *resolver, + uaddr, INET6_ADDRSTRLEN, uport, 32, + NI_NUMERICHOST | NI_NUMERICSERV); + +- newaddr->u.inet = (InetSocketAddress){ +- .host = g_strdup(uaddr), +- .port = g_strdup(uport), +- .has_numeric = true, +- .numeric = true, +- .has_to = iaddr->has_to, +- .to = iaddr->to, +- .has_ipv4 = iaddr->has_ipv4, +- .ipv4 = iaddr->ipv4, +- .has_ipv6 = iaddr->has_ipv6, +- .ipv6 = iaddr->ipv6, +-#ifdef HAVE_IPPROTO_MPTCP +- .has_mptcp = iaddr->has_mptcp, +- .mptcp = iaddr->mptcp, +-#endif +- }; ++ newaddr->u.inet = *iaddr; ++ newaddr->u.inet.host = g_strdup(uaddr), ++ newaddr->u.inet.port = g_strdup(uport), ++ newaddr->u.inet.has_numeric = true, ++ newaddr->u.inet.numeric = true, + + (*addrs)[i] = newaddr; + } +-- +2.39.3 + diff --git a/kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch b/kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch new file mode 100644 index 0000000..74a0b67 --- /dev/null +++ b/kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch @@ -0,0 +1,53 @@ +From 450ca242645a6a2c68ea8aacbedfafec11c75fec Mon Sep 17 00:00:00 2001 +From: Matheus Tavares Bernardino +Date: Mon, 26 May 2025 10:20:55 -0700 +Subject: [PATCH 9/9] tests/unit/test-util-sockets: fix mem-leak on error + object + +RH-Author: Juraj Marcin +RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive +RH-Jira: RHEL-67706 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [7/7] ed714ea8693fe69166e7f991904bebd20636804a (JurajMarcin/centos-src-qemu-kvm) + +The test fails with --enable-asan as the error struct is never freed. +In the case where the test expects a success but it fails, let's also +report the error for debugging (it will be freed internally). + +Fixes 316e8ee8d6 ("util/qemu-sockets: Refactor inet_parse() to use QemuOpts") + +Signed-off-by: Matheus Tavares Bernardino +Reviewed-by: Juraj Marcin +Message-ID: <518d94c7db20060b2a086cf55ee9bffab992a907.1748280011.git.matheus.bernardino@oss.qualcomm.com> +Signed-off-by: Thomas Huth + +(cherry picked from commit 5c54a367265ec19ed94a535cd15d178c16b8cae0) + +JIRA: https://issues.redhat.com/browse/RHEL-67706 + +Signed-off-by: Juraj Marcin +--- + tests/unit/test-util-sockets.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c +index 8492f4d68f..ee66d727c3 100644 +--- a/tests/unit/test-util-sockets.c ++++ b/tests/unit/test-util-sockets.c +@@ -341,8 +341,12 @@ static void inet_parse_test_helper(const char *str, + int rc = inet_parse(&addr, str, &error); + + if (success) { ++ if (error) { ++ error_report_err(error); ++ } + g_assert_cmpint(rc, ==, 0); + } else { ++ error_free(error); + g_assert_cmpint(rc, <, 0); + } + if (exp_addr != NULL) { +-- +2.39.3 + diff --git a/kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch b/kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch new file mode 100644 index 0000000..32d7358 --- /dev/null +++ b/kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch @@ -0,0 +1,86 @@ +From df0a8441d5352faaa1e2a3a27a48cafd6fa737e4 Mon Sep 17 00:00:00 2001 +From: Juraj Marcin +Date: Wed, 21 May 2025 15:52:33 +0200 +Subject: [PATCH 6/9] util/qemu-sockets: Add support for keep-alive flag to + passive sockets +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juraj Marcin +RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive +RH-Jira: RHEL-67706 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [4/7] af13774c325fc152814885c858813bdc2f1d62e7 (JurajMarcin/centos-src-qemu-kvm) + +Commit aec21d3175 (qapi: Add InetSocketAddress member keep-alive) +introduces the keep-alive flag, which enables the SO_KEEPALIVE socket +option, but only on client-side sockets. However, this option is also +useful for server-side sockets, so they can check if a client is still +reachable or drop the connection otherwise. + +This patch enables the SO_KEEPALIVE socket option on passive server-side +sockets if the keep-alive flag is enabled. This socket option is then +inherited by active server-side sockets communicating with connected +clients. + +Signed-off-by: Juraj Marcin +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Daniel P. Berrangé + +(cherry picked from commit 00064705ed1f3943d3634be25da434466c87e7d5) + +JIRA: https://issues.redhat.com/browse/RHEL-67706 + +Signed-off-by: Juraj Marcin +--- + qapi/sockets.json | 4 ++-- + util/qemu-sockets.c | 9 +++------ + 2 files changed, 5 insertions(+), 8 deletions(-) + +diff --git a/qapi/sockets.json b/qapi/sockets.json +index 6a95023315..62797cd027 100644 +--- a/qapi/sockets.json ++++ b/qapi/sockets.json +@@ -56,8 +56,8 @@ + # @ipv6: whether to accept IPv6 addresses, default try both IPv4 and + # IPv6 + # +-# @keep-alive: enable keep-alive when connecting to this socket. Not +-# supported for passive sockets. (Since 4.2) ++# @keep-alive: enable keep-alive when connecting to/listening on this socket. ++# (Since 4.2, not supported for listening sockets until 10.1) + # + # @mptcp: enable multi-path TCP. (Since 6.1) + # +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 329fdbfd97..4fbf1ed5bf 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -236,12 +236,6 @@ static int inet_listen_saddr(InetSocketAddress *saddr, + int saved_errno = 0; + bool socket_created = false; + +- if (saddr->keep_alive) { +- error_setg(errp, "keep-alive option is not supported for passive " +- "sockets"); +- return -1; +- } +- + memset(&ai,0, sizeof(ai)); + ai.ai_flags = AI_PASSIVE; + if (saddr->has_numeric && saddr->numeric) { +@@ -349,6 +343,9 @@ static int inet_listen_saddr(InetSocketAddress *saddr, + goto fail; + } + /* We have a listening socket */ ++ if (inet_set_sockopts(slisten, saddr, errp) < 0) { ++ goto fail; ++ } + freeaddrinfo(res); + return slisten; + } +-- +2.39.3 + diff --git a/kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch b/kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch new file mode 100644 index 0000000..a4b1629 --- /dev/null +++ b/kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch @@ -0,0 +1,314 @@ +From 51becf11d7727e6f6e224ca2a1d56e26fd3d0fb2 Mon Sep 17 00:00:00 2001 +From: Juraj Marcin +Date: Wed, 21 May 2025 15:52:35 +0200 +Subject: [PATCH 8/9] util/qemu-sockets: Introduce inet socket options + controlling TCP keep-alive +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juraj Marcin +RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive +RH-Jira: RHEL-67706 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [6/7] 4c8bac67f739c923fa1590e1cc34cd84c08653c1 (JurajMarcin/centos-src-qemu-kvm) + +With the default TCP stack configuration, it could be even 2 hours +before the connection times out due to the other side not being +reachable. However, in some cases, the application needs to be aware of +a connection issue much sooner. + +This is the case, for example, for postcopy live migration. If there is +no traffic from the migration destination guest (server-side) to the +migration source guest (client-side), the destination keeps waiting for +pages indefinitely and does not switch to the postcopy-paused state. +This can happen, for example, if the destination QEMU instance is +started with the '-S' command line option and the machine is not started +yet, or if the machine is idle and produces no new page faults for +not-yet-migrated pages. + +This patch introduces new inet socket parameters that control count, +idle period, and interval of TCP keep-alive packets before the +connection is considered broken. These parameters are available on +systems where the respective TCP socket options are defined, that +includes Linux, Windows, macOS, but not OpenBSD. Additionally, macOS +defines TCP_KEEPIDLE as TCP_KEEPALIVE instead, so the patch supplies its +own definition. + +The default value for all is 0, which means the system configuration is +used. + +Signed-off-by: Juraj Marcin +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Daniel P. Berrangé + +(cherry picked from commit 1bd4237cb1095d71c16afad3ce93b4a1e453173e) + +JIRA: https://issues.redhat.com/browse/RHEL-67706 + +Signed-off-by: Juraj Marcin +--- + meson.build | 30 +++++++++++++ + qapi/sockets.json | 19 ++++++++ + tests/unit/test-util-sockets.c | 39 +++++++++++++++++ + util/qemu-sockets.c | 80 ++++++++++++++++++++++++++++++++++ + 4 files changed, 168 insertions(+) + +diff --git a/meson.build b/meson.build +index dadd47d362..23b56175d5 100644 +--- a/meson.build ++++ b/meson.build +@@ -2745,6 +2745,36 @@ if linux_io_uring.found() + config_host_data.set('HAVE_IO_URING_PREP_WRITEV2', + cc.has_header_symbol('liburing.h', 'io_uring_prep_writev2')) + endif ++config_host_data.set('HAVE_TCP_KEEPCNT', ++ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPCNT') or ++ cc.compiles(''' ++ #include ++ #ifndef TCP_KEEPCNT ++ #error ++ #endif ++ int main(void) { return 0; }''', ++ name: 'Win32 TCP_KEEPCNT')) ++# On Darwin TCP_KEEPIDLE is available under different name, TCP_KEEPALIVE. ++# https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172 ++config_host_data.set('HAVE_TCP_KEEPIDLE', ++ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPIDLE') or ++ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPALIVE') or ++ cc.compiles(''' ++ #include ++ #ifndef TCP_KEEPIDLE ++ #error ++ #endif ++ int main(void) { return 0; }''', ++ name: 'Win32 TCP_KEEPIDLE')) ++config_host_data.set('HAVE_TCP_KEEPINTVL', ++ cc.has_header_symbol('netinet/tcp.h', 'TCP_KEEPINTVL') or ++ cc.compiles(''' ++ #include ++ #ifndef TCP_KEEPINTVL ++ #error ++ #endif ++ int main(void) { return 0; }''', ++ name: 'Win32 TCP_KEEPINTVL')) + + # has_member + config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID', +diff --git a/qapi/sockets.json b/qapi/sockets.json +index 62797cd027..f9f559daba 100644 +--- a/qapi/sockets.json ++++ b/qapi/sockets.json +@@ -59,6 +59,22 @@ + # @keep-alive: enable keep-alive when connecting to/listening on this socket. + # (Since 4.2, not supported for listening sockets until 10.1) + # ++# @keep-alive-count: number of keep-alive packets sent before the connection is ++# closed. Only supported for TCP sockets on systems where TCP_KEEPCNT ++# socket option is defined (this includes Linux, Windows, macOS, FreeBSD, ++# but not OpenBSD). When set to 0, system setting is used. (Since 10.1) ++# ++# @keep-alive-idle: time in seconds the connection needs to be idle before ++# sending a keepalive packet. Only supported for TCP sockets on systems ++# where TCP_KEEPIDLE socket option is defined (this includes Linux, ++# Windows, macOS, FreeBSD, but not OpenBSD). When set to 0, system setting ++# is used. (Since 10.1) ++# ++# @keep-alive-interval: time in seconds between keep-alive packets. Only ++# supported for TCP sockets on systems where TCP_KEEPINTVL is defined (this ++# includes Linux, Windows, macOS, FreeBSD, but not OpenBSD). When set to ++# 0, system setting is used. (Since 10.1) ++# + # @mptcp: enable multi-path TCP. (Since 6.1) + # + # Since: 1.3 +@@ -71,6 +87,9 @@ + '*ipv4': 'bool', + '*ipv6': 'bool', + '*keep-alive': 'bool', ++ '*keep-alive-count': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPCNT' }, ++ '*keep-alive-idle': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPIDLE' }, ++ '*keep-alive-interval': { 'type': 'uint32', 'if': 'HAVE_TCP_KEEPINTVL' }, + '*mptcp': { 'type': 'bool', 'if': 'HAVE_IPPROTO_MPTCP' } } } + + ## +diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c +index 9e39b92e7c..8492f4d68f 100644 +--- a/tests/unit/test-util-sockets.c ++++ b/tests/unit/test-util-sockets.c +@@ -359,6 +359,24 @@ static void inet_parse_test_helper(const char *str, + g_assert_cmpint(addr.ipv6, ==, exp_addr->ipv6); + g_assert_cmpint(addr.has_keep_alive, ==, exp_addr->has_keep_alive); + g_assert_cmpint(addr.keep_alive, ==, exp_addr->keep_alive); ++#ifdef HAVE_TCP_KEEPCNT ++ g_assert_cmpint(addr.has_keep_alive_count, ==, ++ exp_addr->has_keep_alive_count); ++ g_assert_cmpint(addr.keep_alive_count, ==, ++ exp_addr->keep_alive_count); ++#endif ++#ifdef HAVE_TCP_KEEPIDLE ++ g_assert_cmpint(addr.has_keep_alive_idle, ==, ++ exp_addr->has_keep_alive_idle); ++ g_assert_cmpint(addr.keep_alive_idle, ==, ++ exp_addr->keep_alive_idle); ++#endif ++#ifdef HAVE_TCP_KEEPINTVL ++ g_assert_cmpint(addr.has_keep_alive_interval, ==, ++ exp_addr->has_keep_alive_interval); ++ g_assert_cmpint(addr.keep_alive_interval, ==, ++ exp_addr->keep_alive_interval); ++#endif + #ifdef HAVE_IPPROTO_MPTCP + g_assert_cmpint(addr.has_mptcp, ==, exp_addr->has_mptcp); + g_assert_cmpint(addr.mptcp, ==, exp_addr->mptcp); +@@ -460,6 +478,18 @@ static void test_inet_parse_all_options_good(void) + .ipv6 = true, + .has_keep_alive = true, + .keep_alive = true, ++#ifdef HAVE_TCP_KEEPCNT ++ .has_keep_alive_count = true, ++ .keep_alive_count = 10, ++#endif ++#ifdef HAVE_TCP_KEEPIDLE ++ .has_keep_alive_idle = true, ++ .keep_alive_idle = 60, ++#endif ++#ifdef HAVE_TCP_KEEPINTVL ++ .has_keep_alive_interval = true, ++ .keep_alive_interval = 30, ++#endif + #ifdef HAVE_IPPROTO_MPTCP + .has_mptcp = true, + .mptcp = false, +@@ -467,6 +497,15 @@ static void test_inet_parse_all_options_good(void) + }; + inet_parse_test_helper( + "[::1]:5000,numeric=on,to=5006,ipv4=off,ipv6=on,keep-alive=on" ++#ifdef HAVE_TCP_KEEPCNT ++ ",keep-alive-count=10" ++#endif ++#ifdef HAVE_TCP_KEEPIDLE ++ ",keep-alive-idle=60" ++#endif ++#ifdef HAVE_TCP_KEEPINTVL ++ ",keep-alive-interval=30" ++#endif + #ifdef HAVE_IPPROTO_MPTCP + ",mptcp=off" + #endif +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 403dc26b36..4773755fd5 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -45,6 +45,14 @@ + # define AI_NUMERICSERV 0 + #endif + ++/* ++ * On macOS TCP_KEEPIDLE is available under a different name, TCP_KEEPALIVE. ++ * https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/bsd/man/man4/tcp.4#L172 ++ */ ++#if defined(TCP_KEEPALIVE) && !defined(TCP_KEEPIDLE) ++# define TCP_KEEPIDLE TCP_KEEPALIVE ++#endif ++ + + static int inet_getport(struct addrinfo *e) + { +@@ -218,6 +226,42 @@ static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp) + "Unable to set keep-alive option on socket"); + return -1; + } ++#ifdef HAVE_TCP_KEEPCNT ++ if (saddr->has_keep_alive_count && saddr->keep_alive_count) { ++ int keep_count = saddr->keep_alive_count; ++ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT, &keep_count, ++ sizeof(keep_count)); ++ if (ret < 0) { ++ error_setg_errno(errp, errno, ++ "Unable to set TCP keep-alive count option on socket"); ++ return -1; ++ } ++ } ++#endif ++#ifdef HAVE_TCP_KEEPIDLE ++ if (saddr->has_keep_alive_idle && saddr->keep_alive_idle) { ++ int keep_idle = saddr->keep_alive_idle; ++ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE, &keep_idle, ++ sizeof(keep_idle)); ++ if (ret < 0) { ++ error_setg_errno(errp, errno, ++ "Unable to set TCP keep-alive idle option on socket"); ++ return -1; ++ } ++ } ++#endif ++#ifdef HAVE_TCP_KEEPINTVL ++ if (saddr->has_keep_alive_interval && saddr->keep_alive_interval) { ++ int keep_interval = saddr->keep_alive_interval; ++ ret = setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL, &keep_interval, ++ sizeof(keep_interval)); ++ if (ret < 0) { ++ error_setg_errno(errp, errno, ++ "Unable to set TCP keep-alive interval option on socket"); ++ return -1; ++ } ++ } ++#endif + } + return 0; + } +@@ -630,6 +674,24 @@ static QemuOptsList inet_opts = { + .name = "keep-alive", + .type = QEMU_OPT_BOOL, + }, ++#ifdef HAVE_TCP_KEEPCNT ++ { ++ .name = "keep-alive-count", ++ .type = QEMU_OPT_NUMBER, ++ }, ++#endif ++#ifdef HAVE_TCP_KEEPIDLE ++ { ++ .name = "keep-alive-idle", ++ .type = QEMU_OPT_NUMBER, ++ }, ++#endif ++#ifdef HAVE_TCP_KEEPINTVL ++ { ++ .name = "keep-alive-interval", ++ .type = QEMU_OPT_NUMBER, ++ }, ++#endif + #ifdef HAVE_IPPROTO_MPTCP + { + .name = "mptcp", +@@ -695,6 +757,24 @@ int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) + addr->has_keep_alive = true; + addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false); + } ++#ifdef HAVE_TCP_KEEPCNT ++ if (qemu_opt_find(opts, "keep-alive-count")) { ++ addr->has_keep_alive_count = true; ++ addr->keep_alive_count = qemu_opt_get_number(opts, "keep-alive-count", 0); ++ } ++#endif ++#ifdef HAVE_TCP_KEEPIDLE ++ if (qemu_opt_find(opts, "keep-alive-idle")) { ++ addr->has_keep_alive_idle = true; ++ addr->keep_alive_idle = qemu_opt_get_number(opts, "keep-alive-idle", 0); ++ } ++#endif ++#ifdef HAVE_TCP_KEEPINTVL ++ if (qemu_opt_find(opts, "keep-alive-interval")) { ++ addr->has_keep_alive_interval = true; ++ addr->keep_alive_interval = qemu_opt_get_number(opts, "keep-alive-interval", 0); ++ } ++#endif + #ifdef HAVE_IPPROTO_MPTCP + if (qemu_opt_find(opts, "mptcp")) { + addr->has_mptcp = true; +-- +2.39.3 + diff --git a/kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch b/kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch new file mode 100644 index 0000000..036c406 --- /dev/null +++ b/kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch @@ -0,0 +1,460 @@ +From 56ed06502da893f9fd756cbe683917c64f4af0a6 Mon Sep 17 00:00:00 2001 +From: Juraj Marcin +Date: Wed, 21 May 2025 15:52:34 +0200 +Subject: [PATCH 7/9] util/qemu-sockets: Refactor inet_parse() to use QemuOpts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juraj Marcin +RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive +RH-Jira: RHEL-67706 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [5/7] 49ea5df8850a7518eb546c27878551cdb1aaa9ff (JurajMarcin/centos-src-qemu-kvm) + +Currently, the inet address parser cannot handle multiple options where +one is prefixed with the name of the other. For example, with the +'keep-alive-idle' option added, the current parser cannot parse +'127.0.0.1:5000,keep-alive-idle=60,keep-alive' correctly. Instead, it +fails with "error parsing 'keep-alive' flag '-idle=60,keep-alive'". + +To resolve these issues, this patch rewrites the inet address parsing +using the QemuOpts parser, which the inet_parse_flag() function tries to +mimic. This new parser supports all previously supported options and on +top of that the 'numeric' flag is now also supported. The only +difference is, the new parser produces an error if an unknown option is +passed, instead of silently ignoring it. + +Signed-off-by: Juraj Marcin +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Daniel P. Berrangé + +(cherry picked from commit 316e8ee8d614f049bfae697570a5e62af450491c) + +JIRA: https://issues.redhat.com/browse/RHEL-67706 + +Signed-off-by: Juraj Marcin +--- + tests/unit/test-util-sockets.c | 196 +++++++++++++++++++++++++++++++++ + util/qemu-sockets.c | 158 +++++++++++++------------- + 2 files changed, 270 insertions(+), 84 deletions(-) + +diff --git a/tests/unit/test-util-sockets.c b/tests/unit/test-util-sockets.c +index 4c9dd0b271..9e39b92e7c 100644 +--- a/tests/unit/test-util-sockets.c ++++ b/tests/unit/test-util-sockets.c +@@ -332,6 +332,177 @@ static void test_socket_unix_abstract(void) + + #endif /* CONFIG_LINUX */ + ++static void inet_parse_test_helper(const char *str, ++ InetSocketAddress *exp_addr, bool success) ++{ ++ InetSocketAddress addr; ++ Error *error = NULL; ++ ++ int rc = inet_parse(&addr, str, &error); ++ ++ if (success) { ++ g_assert_cmpint(rc, ==, 0); ++ } else { ++ g_assert_cmpint(rc, <, 0); ++ } ++ if (exp_addr != NULL) { ++ g_assert_cmpstr(addr.host, ==, exp_addr->host); ++ g_assert_cmpstr(addr.port, ==, exp_addr->port); ++ /* Own members: */ ++ g_assert_cmpint(addr.has_numeric, ==, exp_addr->has_numeric); ++ g_assert_cmpint(addr.numeric, ==, exp_addr->numeric); ++ g_assert_cmpint(addr.has_to, ==, exp_addr->has_to); ++ g_assert_cmpint(addr.to, ==, exp_addr->to); ++ g_assert_cmpint(addr.has_ipv4, ==, exp_addr->has_ipv4); ++ g_assert_cmpint(addr.ipv4, ==, exp_addr->ipv4); ++ g_assert_cmpint(addr.has_ipv6, ==, exp_addr->has_ipv6); ++ g_assert_cmpint(addr.ipv6, ==, exp_addr->ipv6); ++ g_assert_cmpint(addr.has_keep_alive, ==, exp_addr->has_keep_alive); ++ g_assert_cmpint(addr.keep_alive, ==, exp_addr->keep_alive); ++#ifdef HAVE_IPPROTO_MPTCP ++ g_assert_cmpint(addr.has_mptcp, ==, exp_addr->has_mptcp); ++ g_assert_cmpint(addr.mptcp, ==, exp_addr->mptcp); ++#endif ++ } ++ ++ g_free(addr.host); ++ g_free(addr.port); ++} ++ ++static void test_inet_parse_nohost_good(void) ++{ ++ char host[] = ""; ++ char port[] = "5000"; ++ InetSocketAddress exp_addr = { ++ .host = host, ++ .port = port, ++ }; ++ inet_parse_test_helper(":5000", &exp_addr, true); ++} ++ ++static void test_inet_parse_empty_bad(void) ++{ ++ inet_parse_test_helper("", NULL, false); ++} ++ ++static void test_inet_parse_only_colon_bad(void) ++{ ++ inet_parse_test_helper(":", NULL, false); ++} ++ ++static void test_inet_parse_ipv4_good(void) ++{ ++ char host[] = "127.0.0.1"; ++ char port[] = "5000"; ++ InetSocketAddress exp_addr = { ++ .host = host, ++ .port = port, ++ }; ++ inet_parse_test_helper("127.0.0.1:5000", &exp_addr, true); ++} ++ ++static void test_inet_parse_ipv4_noport_bad(void) ++{ ++ inet_parse_test_helper("127.0.0.1", NULL, false); ++} ++ ++static void test_inet_parse_ipv6_good(void) ++{ ++ char host[] = "::1"; ++ char port[] = "5000"; ++ InetSocketAddress exp_addr = { ++ .host = host, ++ .port = port, ++ }; ++ inet_parse_test_helper("[::1]:5000", &exp_addr, true); ++} ++ ++static void test_inet_parse_ipv6_noend_bad(void) ++{ ++ inet_parse_test_helper("[::1", NULL, false); ++} ++ ++static void test_inet_parse_ipv6_noport_bad(void) ++{ ++ inet_parse_test_helper("[::1]:", NULL, false); ++} ++ ++static void test_inet_parse_ipv6_empty_bad(void) ++{ ++ inet_parse_test_helper("[]:5000", NULL, false); ++} ++ ++static void test_inet_parse_hostname_good(void) ++{ ++ char host[] = "localhost"; ++ char port[] = "5000"; ++ InetSocketAddress exp_addr = { ++ .host = host, ++ .port = port, ++ }; ++ inet_parse_test_helper("localhost:5000", &exp_addr, true); ++} ++ ++static void test_inet_parse_all_options_good(void) ++{ ++ char host[] = "::1"; ++ char port[] = "5000"; ++ InetSocketAddress exp_addr = { ++ .host = host, ++ .port = port, ++ .has_numeric = true, ++ .numeric = true, ++ .has_to = true, ++ .to = 5006, ++ .has_ipv4 = true, ++ .ipv4 = false, ++ .has_ipv6 = true, ++ .ipv6 = true, ++ .has_keep_alive = true, ++ .keep_alive = true, ++#ifdef HAVE_IPPROTO_MPTCP ++ .has_mptcp = true, ++ .mptcp = false, ++#endif ++ }; ++ inet_parse_test_helper( ++ "[::1]:5000,numeric=on,to=5006,ipv4=off,ipv6=on,keep-alive=on" ++#ifdef HAVE_IPPROTO_MPTCP ++ ",mptcp=off" ++#endif ++ , &exp_addr, true); ++} ++ ++static void test_inet_parse_all_implicit_bool_good(void) ++{ ++ char host[] = "::1"; ++ char port[] = "5000"; ++ InetSocketAddress exp_addr = { ++ .host = host, ++ .port = port, ++ .has_numeric = true, ++ .numeric = true, ++ .has_to = true, ++ .to = 5006, ++ .has_ipv4 = true, ++ .ipv4 = true, ++ .has_ipv6 = true, ++ .ipv6 = true, ++ .has_keep_alive = true, ++ .keep_alive = true, ++#ifdef HAVE_IPPROTO_MPTCP ++ .has_mptcp = true, ++ .mptcp = true, ++#endif ++ }; ++ inet_parse_test_helper( ++ "[::1]:5000,numeric,to=5006,ipv4,ipv6,keep-alive" ++#ifdef HAVE_IPPROTO_MPTCP ++ ",mptcp" ++#endif ++ , &exp_addr, true); ++} ++ + int main(int argc, char **argv) + { + bool has_ipv4, has_ipv6; +@@ -377,6 +548,31 @@ int main(int argc, char **argv) + test_socket_unix_abstract); + #endif + ++ g_test_add_func("/util/socket/inet-parse/nohost-good", ++ test_inet_parse_nohost_good); ++ g_test_add_func("/util/socket/inet-parse/empty-bad", ++ test_inet_parse_empty_bad); ++ g_test_add_func("/util/socket/inet-parse/only-colon-bad", ++ test_inet_parse_only_colon_bad); ++ g_test_add_func("/util/socket/inet-parse/ipv4-good", ++ test_inet_parse_ipv4_good); ++ g_test_add_func("/util/socket/inet-parse/ipv4-noport-bad", ++ test_inet_parse_ipv4_noport_bad); ++ g_test_add_func("/util/socket/inet-parse/ipv6-good", ++ test_inet_parse_ipv6_good); ++ g_test_add_func("/util/socket/inet-parse/ipv6-noend-bad", ++ test_inet_parse_ipv6_noend_bad); ++ g_test_add_func("/util/socket/inet-parse/ipv6-noport-bad", ++ test_inet_parse_ipv6_noport_bad); ++ g_test_add_func("/util/socket/inet-parse/ipv6-empty-bad", ++ test_inet_parse_ipv6_empty_bad); ++ g_test_add_func("/util/socket/inet-parse/hostname-good", ++ test_inet_parse_hostname_good); ++ g_test_add_func("/util/socket/inet-parse/all-options-good", ++ test_inet_parse_all_options_good); ++ g_test_add_func("/util/socket/inet-parse/all-bare-bool-good", ++ test_inet_parse_all_implicit_bool_good); ++ + end: + return g_test_run(); + } +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 4fbf1ed5bf..403dc26b36 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -30,6 +30,7 @@ + #include "qapi/qobject-input-visitor.h" + #include "qapi/qobject-output-visitor.h" + #include "qemu/cutils.h" ++#include "qemu/option.h" + #include "trace.h" + + #ifndef AI_ADDRCONFIG +@@ -600,115 +601,104 @@ err: + return -1; + } + +-/* compatibility wrapper */ +-static int inet_parse_flag(const char *flagname, const char *optstr, bool *val, +- Error **errp) +-{ +- char *end; +- size_t len; +- +- end = strstr(optstr, ","); +- if (end) { +- if (end[1] == ',') { /* Reject 'ipv6=on,,foo' */ +- error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); +- return -1; +- } +- len = end - optstr; +- } else { +- len = strlen(optstr); +- } +- if (len == 0 || (len == 3 && strncmp(optstr, "=on", len) == 0)) { +- *val = true; +- } else if (len == 4 && strncmp(optstr, "=off", len) == 0) { +- *val = false; +- } else { +- error_setg(errp, "error parsing '%s' flag '%s'", flagname, optstr); +- return -1; +- } +- return 0; +-} ++static QemuOptsList inet_opts = { ++ .name = "InetSocketAddress", ++ .head = QTAILQ_HEAD_INITIALIZER(inet_opts.head), ++ .implied_opt_name = "addr", ++ .desc = { ++ { ++ .name = "addr", ++ .type = QEMU_OPT_STRING, ++ }, ++ { ++ .name = "numeric", ++ .type = QEMU_OPT_BOOL, ++ }, ++ { ++ .name = "to", ++ .type = QEMU_OPT_NUMBER, ++ }, ++ { ++ .name = "ipv4", ++ .type = QEMU_OPT_BOOL, ++ }, ++ { ++ .name = "ipv6", ++ .type = QEMU_OPT_BOOL, ++ }, ++ { ++ .name = "keep-alive", ++ .type = QEMU_OPT_BOOL, ++ }, ++#ifdef HAVE_IPPROTO_MPTCP ++ { ++ .name = "mptcp", ++ .type = QEMU_OPT_BOOL, ++ }, ++#endif ++ { /* end of list */ } ++ }, ++}; + + int inet_parse(InetSocketAddress *addr, const char *str, Error **errp) + { +- const char *optstr, *h; +- char host[65]; +- char port[33]; +- int to; +- int pos; +- char *begin; +- ++ QemuOpts *opts = qemu_opts_parse(&inet_opts, str, true, errp); ++ if (!opts) { ++ return -1; ++ } + memset(addr, 0, sizeof(*addr)); + + /* parse address */ +- if (str[0] == ':') { +- /* no host given */ +- host[0] = '\0'; +- if (sscanf(str, ":%32[^,]%n", port, &pos) != 1) { +- error_setg(errp, "error parsing port in address '%s'", str); +- return -1; +- } +- } else if (str[0] == '[') { ++ const char *addr_str = qemu_opt_get(opts, "addr"); ++ if (!addr_str) { ++ error_setg(errp, "error parsing address ''"); ++ return -1; ++ } ++ if (str[0] == '[') { + /* IPv6 addr */ +- if (sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos) != 2) { +- error_setg(errp, "error parsing IPv6 address '%s'", str); ++ const char *ip_end = strstr(addr_str, "]:"); ++ if (!ip_end || ip_end - addr_str < 2 || strlen(ip_end) < 3) { ++ error_setg(errp, "error parsing IPv6 address '%s'", addr_str); + return -1; + } ++ addr->host = g_strndup(addr_str + 1, ip_end - addr_str - 1); ++ addr->port = g_strdup(ip_end + 2); + } else { +- /* hostname or IPv4 addr */ +- if (sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos) != 2) { +- error_setg(errp, "error parsing address '%s'", str); ++ /* no host, hostname or IPv4 addr */ ++ const char *port = strchr(addr_str, ':'); ++ if (!port || strlen(port) < 2) { ++ error_setg(errp, "error parsing address '%s'", addr_str); + return -1; + } ++ addr->host = g_strndup(addr_str, port - addr_str); ++ addr->port = g_strdup(port + 1); + } + +- addr->host = g_strdup(host); +- addr->port = g_strdup(port); +- + /* parse options */ +- optstr = str + pos; +- h = strstr(optstr, ",to="); +- if (h) { +- h += 4; +- if (sscanf(h, "%d%n", &to, &pos) != 1 || +- (h[pos] != '\0' && h[pos] != ',')) { +- error_setg(errp, "error parsing to= argument"); +- return -1; +- } ++ if (qemu_opt_find(opts, "numeric")) { ++ addr->has_numeric = true, ++ addr->numeric = qemu_opt_get_bool(opts, "numeric", false); ++ } ++ if (qemu_opt_find(opts, "to")) { + addr->has_to = true; +- addr->to = to; ++ addr->to = qemu_opt_get_number(opts, "to", 0); + } +- begin = strstr(optstr, ",ipv4"); +- if (begin) { +- if (inet_parse_flag("ipv4", begin + 5, &addr->ipv4, errp) < 0) { +- return -1; +- } ++ if (qemu_opt_find(opts, "ipv4")) { + addr->has_ipv4 = true; ++ addr->ipv4 = qemu_opt_get_bool(opts, "ipv4", false); + } +- begin = strstr(optstr, ",ipv6"); +- if (begin) { +- if (inet_parse_flag("ipv6", begin + 5, &addr->ipv6, errp) < 0) { +- return -1; +- } ++ if (qemu_opt_find(opts, "ipv6")) { + addr->has_ipv6 = true; ++ addr->ipv6 = qemu_opt_get_bool(opts, "ipv6", false); + } +- begin = strstr(optstr, ",keep-alive"); +- if (begin) { +- if (inet_parse_flag("keep-alive", begin + strlen(",keep-alive"), +- &addr->keep_alive, errp) < 0) +- { +- return -1; +- } ++ if (qemu_opt_find(opts, "keep-alive")) { + addr->has_keep_alive = true; ++ addr->keep_alive = qemu_opt_get_bool(opts, "keep-alive", false); + } + #ifdef HAVE_IPPROTO_MPTCP +- begin = strstr(optstr, ",mptcp"); +- if (begin) { +- if (inet_parse_flag("mptcp", begin + strlen(",mptcp"), +- &addr->mptcp, errp) < 0) +- { +- return -1; +- } ++ if (qemu_opt_find(opts, "mptcp")) { + addr->has_mptcp = true; ++ addr->mptcp = qemu_opt_get_bool(opts, "mptcp", 0); + } + #endif + return 0; +-- +2.39.3 + diff --git a/kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch b/kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch new file mode 100644 index 0000000..b9329d6 --- /dev/null +++ b/kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch @@ -0,0 +1,83 @@ +From e6348c4dd343f1a367a30f08c01a0f25c764a93c Mon Sep 17 00:00:00 2001 +From: Juraj Marcin +Date: Wed, 21 May 2025 15:52:31 +0200 +Subject: [PATCH 4/9] util/qemu-sockets: Refactor setting client sockopts into + a separate function +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juraj Marcin +RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive +RH-Jira: RHEL-67706 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [2/7] c06976f96dbdf70b15079aa81dd1ac87abb0f1ab (JurajMarcin/centos-src-qemu-kvm) + +This is done in preparation for enabling the SO_KEEPALIVE support for +server sockets and adding settings for more TCP keep-alive socket +options. + +Signed-off-by: Juraj Marcin +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Daniel P. Berrangé + +(cherry picked from commit b8b5278aca78be4a1c2e7cbb11c6be176f63706d) + +JIRA: https://issues.redhat.com/browse/RHEL-67706 + +Signed-off-by: Juraj Marcin +--- + util/qemu-sockets.c | 29 +++++++++++++++++++---------- + 1 file changed, 19 insertions(+), 10 deletions(-) + +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 77477c1cd5..4a878e0527 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -205,6 +205,22 @@ static int try_bind(int socket, InetSocketAddress *saddr, struct addrinfo *e) + #endif + } + ++static int inet_set_sockopts(int sock, InetSocketAddress *saddr, Error **errp) ++{ ++ if (saddr->keep_alive) { ++ int keep_alive = 1; ++ int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, ++ &keep_alive, sizeof(keep_alive)); ++ ++ if (ret < 0) { ++ error_setg_errno(errp, errno, ++ "Unable to set keep-alive option on socket"); ++ return -1; ++ } ++ } ++ return 0; ++} ++ + static int inet_listen_saddr(InetSocketAddress *saddr, + int port_offset, + int num, +@@ -475,16 +491,9 @@ int inet_connect_saddr(InetSocketAddress *saddr, Error **errp) + return sock; + } + +- if (saddr->keep_alive) { +- int val = 1; +- int ret = setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, +- &val, sizeof(val)); +- +- if (ret < 0) { +- error_setg_errno(errp, errno, "Unable to set KEEPALIVE"); +- close(sock); +- return -1; +- } ++ if (inet_set_sockopts(sock, saddr, errp) < 0) { ++ close(sock); ++ return -1; + } + + return sock; +-- +2.39.3 + diff --git a/kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch b/kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch new file mode 100644 index 0000000..ff7ecb4 --- /dev/null +++ b/kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch @@ -0,0 +1,141 @@ +From 5001e77f9a8b91bee86e5c8ecbc417d0d2551855 Mon Sep 17 00:00:00 2001 +From: Juraj Marcin +Date: Wed, 21 May 2025 15:52:32 +0200 +Subject: [PATCH 5/9] util/qemu-sockets: Refactor success and failure paths in + inet_listen_saddr() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +RH-Author: Juraj Marcin +RH-MergeRequest: 368: util/qemu-sockets: Introduce inet socket options controlling TCP keep-alive +RH-Jira: RHEL-67706 +RH-Acked-by: Peter Xu +RH-Acked-by: Miroslav Rezanina +RH-Commit: [3/7] 2e18c6f6eeadf99d6a526a089009bb89cd2fd6a8 (JurajMarcin/centos-src-qemu-kvm) + +To get a listening socket, we need to first create a socket, try binding +it to a certain port, and lastly starting listening to it. Each of these +operations can fail due to various reasons, one of them being that the +requested address/port is already in use. In such case, the function +tries the same process with a new port number. + +This patch refactors the port number loop, so the success path is no +longer buried inside the 'if' statements in the middle of the loop. Now, +the success path is not nested and ends at the end of the iteration +after successful socket creation, binding, and listening. In case any of +the operations fails, it either continues to the next iteration (and the +next port) or jumps out of the loop to handle the error and exits the +function. + +Signed-off-by: Juraj Marcin +Reviewed-by: Daniel P. Berrangé +Signed-off-by: Daniel P. Berrangé + +(cherry picked from commit 911e0f2c6e2d00c985affa75ec188c8edcf480f2) + +JIRA: https://issues.redhat.com/browse/RHEL-67706 + +Signed-off-by: Juraj Marcin +--- + util/qemu-sockets.c | 51 ++++++++++++++++++++++++--------------------- + 1 file changed, 27 insertions(+), 24 deletions(-) + +diff --git a/util/qemu-sockets.c b/util/qemu-sockets.c +index 4a878e0527..329fdbfd97 100644 +--- a/util/qemu-sockets.c ++++ b/util/qemu-sockets.c +@@ -303,11 +303,20 @@ static int inet_listen_saddr(InetSocketAddress *saddr, + port_min = inet_getport(e); + port_max = saddr->has_to ? saddr->to + port_offset : port_min; + for (p = port_min; p <= port_max; p++) { ++ if (slisten >= 0) { ++ /* ++ * We have a socket we tried with the previous port. It cannot ++ * be rebound, we need to close it and create a new one. ++ */ ++ close(slisten); ++ slisten = -1; ++ } + inet_setport(e, p); + + slisten = create_fast_reuse_socket(e); + if (slisten < 0) { +- /* First time we expect we might fail to create the socket ++ /* ++ * First time we expect we might fail to create the socket + * eg if 'e' has AF_INET6 but ipv6 kmod is not loaded. + * Later iterations should always succeed if first iteration + * worked though, so treat that as fatal. +@@ -317,40 +326,38 @@ static int inet_listen_saddr(InetSocketAddress *saddr, + } else { + error_setg_errno(errp, errno, + "Failed to recreate failed listening socket"); +- goto listen_failed; ++ goto fail; + } + } + socket_created = true; + + rc = try_bind(slisten, saddr, e); + if (rc < 0) { +- if (errno != EADDRINUSE) { +- error_setg_errno(errp, errno, "Failed to bind socket"); +- goto listen_failed; +- } +- } else { +- if (!listen(slisten, num)) { +- goto listen_ok; ++ if (errno == EADDRINUSE) { ++ /* This port is already used, try the next one */ ++ continue; + } +- if (errno != EADDRINUSE) { +- error_setg_errno(errp, errno, "Failed to listen on socket"); +- goto listen_failed; ++ error_setg_errno(errp, errno, "Failed to bind socket"); ++ goto fail; ++ } ++ if (listen(slisten, num)) { ++ if (errno == EADDRINUSE) { ++ /* This port is already used, try the next one */ ++ continue; + } ++ error_setg_errno(errp, errno, "Failed to listen on socket"); ++ goto fail; + } +- /* Someone else managed to bind to the same port and beat us +- * to listen on it! Socket semantics does not allow us to +- * recover from this situation, so we need to recreate the +- * socket to allow bind attempts for subsequent ports: +- */ +- close(slisten); +- slisten = -1; ++ /* We have a listening socket */ ++ freeaddrinfo(res); ++ return slisten; + } + } + error_setg_errno(errp, errno, + socket_created ? + "Failed to find an available port" : + "Failed to create a socket"); +-listen_failed: ++fail: + saved_errno = errno; + if (slisten >= 0) { + close(slisten); +@@ -358,10 +365,6 @@ listen_failed: + freeaddrinfo(res); + errno = saved_errno; + return -1; +- +-listen_ok: +- freeaddrinfo(res); +- return slisten; + } + + #ifdef _WIN32 +-- +2.39.3 + diff --git a/qemu-kvm.spec b/qemu-kvm.spec index a29cba7..3a68772 100644 --- a/qemu-kvm.spec +++ b/qemu-kvm.spec @@ -147,7 +147,7 @@ Obsoletes: %{name}-block-ssh <= %{epoch}:%{version} \ Summary: QEMU is a machine emulator and virtualizer Name: qemu-kvm Version: 10.0.0 -Release: 4%{?rcrel}%{?dist}%{?cc_suffix} +Release: 5%{?rcrel}%{?dist}%{?cc_suffix} # Epoch because we pushed a qemu-1.0 package. AIUI this can't ever be dropped # Epoch 15 used for RHEL 8 # Epoch 17 used for RHEL 9 (due to release versioning offset in RHEL 8.5) @@ -244,6 +244,24 @@ Patch39: kvm-tests-Add-iotest-mirror-sparse-for-recent-patches.patch # For RHEL-88435 - --migrate-disks-detect-zeroes doesn't take effect for disk migration [rhel-10.1] # For RHEL-88437 - Disk size of target raw image is full allocated when doing mirror with default discard value [rhel-10.1] Patch40: kvm-mirror-Reduce-I-O-when-destination-is-detect-zeroes-.patch +# For RHEL-65852 - Support multipath failover with scsi-block +Patch41: kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch +# For RHEL-65852 - Support multipath failover with scsi-block +Patch42: kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch +# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' +Patch43: kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch +# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' +Patch44: kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch +# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' +Patch45: kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch +# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' +Patch46: kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch +# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' +Patch47: kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch +# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' +Patch48: kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch +# For RHEL-67706 - postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S' +Patch49: kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch %if %{have_clang} BuildRequires: clang @@ -1326,6 +1344,21 @@ useradd -r -u 107 -g qemu -G kvm -d / -s /sbin/nologin \ %endif %changelog +* Mon Jun 09 2025 Miroslav Rezanina - 10.0.0-5 +- kvm-file-posix-Define-DM_MPATH_PROBE_PATHS.patch [RHEL-65852] +- kvm-file-posix-Probe-paths-and-retry-SG_IO-on-potential-.patch [RHEL-65852] +- kvm-io-Fix-partial-struct-copy-in-qio_dns_resolver_looku.patch [RHEL-67706] +- kvm-util-qemu-sockets-Refactor-setting-client-sockopts-i.patch [RHEL-67706] +- kvm-util-qemu-sockets-Refactor-success-and-failure-paths.patch [RHEL-67706] +- kvm-util-qemu-sockets-Add-support-for-keep-alive-flag-to.patch [RHEL-67706] +- kvm-util-qemu-sockets-Refactor-inet_parse-to-use-QemuOpt.patch [RHEL-67706] +- kvm-util-qemu-sockets-Introduce-inet-socket-options-cont.patch [RHEL-67706] +- kvm-tests-unit-test-util-sockets-fix-mem-leak-on-error-o.patch [RHEL-67706] +- Resolves: RHEL-65852 + (Support multipath failover with scsi-block) +- Resolves: RHEL-67706 + (postcopy on the destination host can't switch into pause status under the network issue if boot VM with '-S') + * Mon May 26 2025 Miroslav Rezanina - 10.0.0-4 - kvm-block-Expand-block-status-mode-from-bool-to-flags.patch [RHEL-88435 RHEL-88437] - kvm-file-posix-gluster-Handle-zero-block-status-hint-bet.patch [RHEL-88435 RHEL-88437]