From 5e5694f6ad8ec8650de404e8e69c3d7ee32485bf Mon Sep 17 00:00:00 2001 From: Stefano Brivio Date: Tue, 29 Jul 2025 16:18:08 -0400 Subject: [PATCH] passt-0^20250512.g8ec1341-2.el9 Resolves: RHEL-106326 --- ...ult-don-t-quit-source-after-migratio.patch | 264 ++++++++++++++++++ passt.spec | 6 +- 2 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 0002-treewide-By-default-don-t-quit-source-after-migratio.patch diff --git a/0002-treewide-By-default-don-t-quit-source-after-migratio.patch b/0002-treewide-By-default-don-t-quit-source-after-migratio.patch new file mode 100644 index 0000000..95e79e7 --- /dev/null +++ b/0002-treewide-By-default-don-t-quit-source-after-migratio.patch @@ -0,0 +1,264 @@ +From b0b5ce0a76cf7fec0b00405732fd94e0b34e8d84 Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Thu, 17 Jul 2025 10:38:17 +0200 +Subject: [PATCH] treewide: By default, don't quit source after migration, keep + sockets open + +We are hitting an issue in the KubeVirt integration where some data is +still sent to the source instance even after migration is complete. As +we exit, the kernel closes our sockets and resets connections. The +resulting RST segments are sent to peers, effectively terminating +connections that were meanwhile migrated. + +At the moment, this is not done intentionally, but in the future +KubeVirt might enable OVN-Kubernetes features where source and +destination nodes are explicitly getting mirrored traffic for a while, +in order to decrease migration downtime. + +By default, don't quit after migration is completed on the source: the +previous behaviour can be enabled with the new, but deprecated, +--migrate-exit option. After migration (as source), the -1 / --one-off +option has no effect. + +Also, by default, keep migrated TCP sockets open (in repair mode) as +long as we're running, and ignore events on any epoll descriptor +representing data channels. The previous behaviour can be enabled with +the new, equally deprecated, --migrate-no-linger option. + +By keeping sockets open, and not exiting, we prevent the kernel +running on the source node to send out RST segments if further data +reaches us. + +Reported-by: Nir Dothan +Signed-off-by: Stefano Brivio +(cherry picked from commit a8782865c342eb2682cca292d5bf92b567344351) +--- + conf.c | 22 ++++++++++++++++++++++ + flow.c | 2 +- + passt.1 | 29 +++++++++++++++++++++++++++++ + passt.h | 4 ++++ + tcp.c | 9 +++++++-- + tcp_conn.h | 3 ++- + test/lib/setup | 4 ++-- + vhost_user.c | 9 +++++++-- + 8 files changed, 74 insertions(+), 8 deletions(-) + +diff --git a/conf.c b/conf.c +index a6d7e22..1295d89 100644 +--- a/conf.c ++++ b/conf.c +@@ -864,6 +864,14 @@ static void usage(const char *name, FILE *f, int status) + FPRINTF(f, + " --repair-path PATH path for passt-repair(1)\n" + " default: append '.repair' to UNIX domain path\n"); ++ FPRINTF(f, ++ " --migrate-exit DEPRECATED:\n" ++ " source quits after migration\n" ++ " default: source keeps running after migration\n"); ++ FPRINTF(f, ++ " --migrate-no-linger DEPRECATED:\n" ++ " close sockets on migration\n" ++ " default: keep sockets open, ignore events\n"); + } + + FPRINTF(f, +@@ -1468,6 +1476,8 @@ void conf(struct ctx *c, int argc, char **argv) + {"socket-path", required_argument, NULL, 's' }, + {"fqdn", required_argument, NULL, 27 }, + {"repair-path", required_argument, NULL, 28 }, ++ {"migrate-exit", no_argument, NULL, 29 }, ++ {"migrate-no-linger", no_argument, NULL, 30 }, + { 0 }, + }; + const char *optstring = "+dqfel:hs:F:I:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:T:U:"; +@@ -1683,6 +1693,18 @@ void conf(struct ctx *c, int argc, char **argv) + optarg)) + die("Invalid passt-repair path: %s", optarg); + ++ break; ++ case 29: ++ if (c->mode != MODE_VU) ++ die("--migrate-exit is for vhost-user mode only"); ++ c->migrate_exit = true; ++ ++ break; ++ case 30: ++ if (c->mode != MODE_VU) ++ die("--migrate-no-linger is for vhost-user mode only"); ++ c->migrate_no_linger = true; ++ + break; + case 'd': + c->debug = 1; +diff --git a/flow.c b/flow.c +index 6a5c8aa..a4b65ea 100644 +--- a/flow.c ++++ b/flow.c +@@ -1089,7 +1089,7 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage, + * as EIO). + */ + foreach_established_tcp_flow(flow) { +- rc = tcp_flow_migrate_source_ext(fd, &flow->tcp); ++ rc = tcp_flow_migrate_source_ext(c, fd, &flow->tcp); + if (rc) { + flow_err(flow, "Can't send extended data: %s", + strerror_(-rc)); +diff --git a/passt.1 b/passt.1 +index 60066c2..cef98b2 100644 +--- a/passt.1 ++++ b/passt.1 +@@ -439,6 +439,30 @@ Default, for \-\-vhost-user mode only, is to append \fI.repair\fR to the path + chosen for the hypervisor UNIX domain socket. No socket is created if not in + \-\-vhost-user mode. + ++.TP ++.BR \-\-migrate-exit (DEPRECATED) ++Exit after a completed migration as source. By default, \fBpasst\fR keeps ++running and the migrated guest can continue using its connection, or a new guest ++can connect. ++ ++Note that this configuration option is \fBdeprecated\fR and will be removed in a ++future version. It is not expected to be of any use, and it simply reflects a ++legacy behaviour. If you have any use for this, refer to \fBREPORTING BUGS\fR ++below. ++ ++.TP ++.BR \-\-migrate-no-linger (DEPRECATED) ++Close TCP sockets on the source instance once migration completes. ++ ++By default, sockets are kept open, and events on data sockets are ignored, so ++that any further message reaching sockets after the source migrated is silently ++ignored, to avoid connection resets in case data is received after migration. ++ ++Note that this configuration option is \fBdeprecated\fR and will be removed in a ++future version. It is not expected to be of any use, and it simply reflects a ++legacy behaviour. If you have any use for this, refer to \fBREPORTING BUGS\fR ++below. ++ + .TP + .BR \-F ", " \-\-fd " " \fIFD + Pass a pre-opened, connected socket to \fBpasst\fR. Usually the socket is opened +@@ -454,6 +478,11 @@ is closed. + Quit after handling a single client connection, that is, once the client closes + the socket, or once we get a socket error. + ++\fBNote\fR: this option has no effect after \fBpasst\fR completes a migration as ++source, because, in that case, exiting would close sockets for active ++connections, which would in turn cause connection resets if any further data is ++received. See also the description of \fI\-\-migrate-no-linger\fR. ++ + .TP + .BR \-t ", " \-\-tcp-ports " " \fIspec + Configure TCP port forwarding to guest. \fIspec\fR can be one of: +diff --git a/passt.h b/passt.h +index 8693794..4cfd6eb 100644 +--- a/passt.h ++++ b/passt.h +@@ -241,6 +241,8 @@ struct ip6_ctx { + * @device_state_fd: Device state migration channel + * @device_state_result: Device state migration result + * @migrate_target: Are we the target, on the next migration request? ++ * @migrate_no_linger: Close sockets as we migrate them ++ * @migrate_exit: Exit (on source) once migration is complete + */ + struct ctx { + enum passt_modes mode; +@@ -318,6 +320,8 @@ struct ctx { + int device_state_fd; + int device_state_result; + bool migrate_target; ++ bool migrate_no_linger; ++ bool migrate_exit; + }; + + void proto_update_l2_buf(const unsigned char *eth_d, +diff --git a/tcp.c b/tcp.c +index 0ac298a..1b22f70 100644 +--- a/tcp.c ++++ b/tcp.c +@@ -3284,12 +3284,14 @@ int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn) + + /** + * tcp_flow_migrate_source_ext() - Dump queues, close sockets, send final data ++ * @c: Execution context + * @fd: Descriptor for state migration + * @conn: Pointer to the TCP connection structure + * + * Return: 0 on success, negative (not -EIO) on failure, -EIO on sending failure + */ +-int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn) ++int tcp_flow_migrate_source_ext(const struct ctx *c, ++ int fd, const struct tcp_tap_conn *conn) + { + uint32_t peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap; + struct tcp_tap_transfer_ext *t = &migrate_ext[FLOW_IDX(conn)]; +@@ -3334,7 +3336,10 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn) + if ((rc = tcp_flow_dump_seq(conn, &t->seq_rcv))) + goto fail; + +- close(s); ++ if (c->migrate_no_linger) ++ close(s); ++ else ++ epoll_del(c, s); + + /* Adjustments unrelated to FIN segments: sequence numbers we dumped are + * based on the end of the queues. +diff --git a/tcp_conn.h b/tcp_conn.h +index 35d813d..38b5c54 100644 +--- a/tcp_conn.h ++++ b/tcp_conn.h +@@ -236,7 +236,8 @@ int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn); + int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn); + + int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn); +-int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn); ++int tcp_flow_migrate_source_ext(const struct ctx *c, int fd, ++ const struct tcp_tap_conn *conn); + + int tcp_flow_migrate_target(struct ctx *c, int fd); + int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd); +diff --git a/test/lib/setup b/test/lib/setup +index 575bc21..5994598 100755 +--- a/test/lib/setup ++++ b/test/lib/setup +@@ -350,7 +350,7 @@ setup_migrate() { + + sleep 1 + +- __opts="--vhost-user" ++ __opts="--vhost-user --migrate-exit --migrate-no-linger" + [ ${PCAP} -eq 1 ] && __opts="${__opts} -p ${LOGDIR}/passt_1.pcap" + [ ${DEBUG} -eq 1 ] && __opts="${__opts} -d" + [ ${TRACE} -eq 1 ] && __opts="${__opts} --trace" +@@ -360,7 +360,7 @@ setup_migrate() { + + context_run_bg passt_repair_1 "./passt-repair ${STATESETUP}/passt_1.socket.repair" + +- __opts="--vhost-user" ++ __opts="--vhost-user --migrate-exit --migrate-no-linger" + [ ${PCAP} -eq 1 ] && __opts="${__opts} -p ${LOGDIR}/passt_2.pcap" + [ ${DEBUG} -eq 1 ] && __opts="${__opts} -d" + [ ${TRACE} -eq 1 ] && __opts="${__opts} --trace" +diff --git a/vhost_user.c b/vhost_user.c +index 105f77a..c4d3a52 100644 +--- a/vhost_user.c ++++ b/vhost_user.c +@@ -1208,7 +1208,12 @@ void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events) + if (msg.hdr.request == VHOST_USER_CHECK_DEVICE_STATE && + vdev->context->device_state_result == 0 && + !vdev->context->migrate_target) { +- info("Migration complete, exiting"); +- _exit(EXIT_SUCCESS); ++ if (vdev->context->migrate_exit) { ++ info("Migration complete, exiting"); ++ _exit(EXIT_SUCCESS); ++ } ++ ++ info("Migration complete"); ++ vdev->context->one_off = false; + } + } +-- +2.47.1 + diff --git a/passt.spec b/passt.spec index f9b2a13..d827c71 100644 --- a/passt.spec +++ b/passt.spec @@ -12,7 +12,7 @@ Name: passt Version: 0^20250512.g8ec1341 -Release: 1%{?dist} +Release: 2%{?dist} Summary: User-mode networking daemons for virtual machines and namespaces License: GPL-2.0-or-later AND BSD-3-Clause Group: System Environment/Daemons @@ -20,6 +20,7 @@ URL: https://passt.top/ Source: https://passt.top/passt/snapshot/passt-%{git_hash}.tar.xz Patch1: 0001-selinux-Drop-user_namespace-create-allow-rules.patch +Patch2: 0002-treewide-By-default-don-t-quit-source-after-migratio.patch BuildRequires: gcc, make, git, checkpolicy, selinux-policy-devel Requires: (%{name}-selinux = %{version}-%{release} if selinux-policy-%{selinuxtype}) @@ -132,6 +133,9 @@ fi %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp %changelog +* Tue Jul 29 2025 Stefano Brivio - 0^20250512.g8ec1341-2 +- Resolves: RHEL-106326 + * Tue May 13 2025 Stefano Brivio - 0^20250512.g8ec1341-1 - Resolves: RHEL-84283