From 87e7d167fe9522217bd86bece801af8fed97e69a Mon Sep 17 00:00:00 2001
From: AlmaLinux RelEng Bot <eabdullin@almalinux.org>
Date: Tue, 19 May 2026 19:26:12 -0400
Subject: [PATCH] import UBI passt-0^20251210.gd04c480-4.el10_2

---
 .gitignore                                    |   2 +-
 ...ult-don-t-quit-source-after-migratio.patch | 264 ----------
 ...s-of-sequence-comparison-macros-to-u.patch |  48 --
 ...er-FIN-flags-with-mismatching-sequen.patch |  76 ---
 ...n-MSS-window-on-no-queued-data-or-no.patch | 110 ++++
 ...le-matching-IP-version-if-not-suppor.patch |  90 ++++
 ...ove-sockets-from-epoll-loop-when-con.patch |  49 --
 ...ead-and-watch-permissions-on-netns-d.patch |  58 +++
 ...pen-permissions-on-netns-directory-o.patch |  68 +++
 ...-issue-in-check-for-approximating-wi.patch |  74 +++
 ...emove-unneeded-epoll_ref-indirection.patch |  48 ++
 ...-socket-to-flow-inside-udp_flow_sock.patch |  47 ++
 ...tor-tcp_splice_conn_epoll_events-to-.patch |  94 ++++
 ...low_epoll_set-to-centralize-epoll-op.patch | 489 ++++++++++++++++++
 ...ropagate-tap-side-RST-to-socket-side.patch |  99 ++++
 ...plit-activity-timeouts-for-UDP-flows.patch | 239 +++++++++
 ...n-working-activity-timeout-mechanism.patch |  22 +-
 ...-inactivity-timeouts-based-on-a-cloc.patch |  88 ++--
 ...end_flag-to-send-TCP-keepalive-segme.patch |  22 +-
 ...palive-segments-after-a-period-of-ta.patch |  46 +-
 ...-buffer-boost-with-EPOLLOUT-monitori.patch | 132 +++++
 passt.spec                                    |  83 +--
 sources                                       |   2 +-
 23 files changed, 1697 insertions(+), 553 deletions(-)
 delete mode 100644 0001-treewide-By-default-don-t-quit-source-after-migratio.patch
 delete mode 100644 0002-tcp-Cast-operands-of-sequence-comparison-macros-to-u.patch
 delete mode 100644 0003-tcp-Don-t-consider-FIN-flags-with-mismatching-sequen.patch
 create mode 100644 0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch
 create mode 100644 0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch
 delete mode 100644 0004-tcp-Properly-remove-sockets-from-epoll-loop-when-con.patch
 create mode 100644 0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch
 create mode 100644 0006-selinux-Enable-open-permissions-on-netns-directory-o.patch
 create mode 100644 0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch
 create mode 100644 0008-udp_flow-remove-unneeded-epoll_ref-indirection.patch
 create mode 100644 0009-udp_flow-Assign-socket-to-flow-inside-udp_flow_sock.patch
 create mode 100644 0010-tcp_splice-Refactor-tcp_splice_conn_epoll_events-to-.patch
 create mode 100644 0011-flow-Introduce-flow_epoll_set-to-centralize-epoll-op.patch
 create mode 100644 0012-tcp-Properly-propagate-tap-side-RST-to-socket-side.patch
 create mode 100644 0013-udp-Split-activity-timeouts-for-UDP-flows.patch
 rename 0005-tcp-Remove-non-working-activity-timeout-mechanism.patch => 0014-tcp-Remove-non-working-activity-timeout-mechanism.patch (83%)
 rename 0006-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch => 0015-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch (66%)
 rename 0007-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch => 0016-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch (75%)
 rename 0008-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch => 0017-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch (82%)
 create mode 100644 0018-tcp-Replace-send-buffer-boost-with-EPOLLOUT-monitori.patch

diff --git a/.gitignore b/.gitignore
index 9706623..20ac24f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-passt-8ec134109eb136432a29bdf5a14f8b1fd4e46208.tar.xz
+passt-d04c48032bcf724550d0b8f652fd00efcd2dfad0.tar.xz
diff --git a/0001-treewide-By-default-don-t-quit-source-after-migratio.patch b/0001-treewide-By-default-don-t-quit-source-after-migratio.patch
deleted file mode 100644
index 95e79e7..0000000
--- a/0001-treewide-By-default-don-t-quit-source-after-migratio.patch
+++ /dev/null
@@ -1,264 +0,0 @@
-From b0b5ce0a76cf7fec0b00405732fd94e0b34e8d84 Mon Sep 17 00:00:00 2001
-From: Stefano Brivio <sbrivio@redhat.com>
-Date: Thu, 17 Jul 2025 10:38:17 +0200
-Subject: [PATCH] treewide: By default, don't quit source after migration, keep
- sockets open
-
-We are hitting an issue in the KubeVirt integration where some data is
-still sent to the source instance even after migration is complete. As
-we exit, the kernel closes our sockets and resets connections. The
-resulting RST segments are sent to peers, effectively terminating
-connections that were meanwhile migrated.
-
-At the moment, this is not done intentionally, but in the future
-KubeVirt might enable OVN-Kubernetes features where source and
-destination nodes are explicitly getting mirrored traffic for a while,
-in order to decrease migration downtime.
-
-By default, don't quit after migration is completed on the source: the
-previous behaviour can be enabled with the new, but deprecated,
---migrate-exit option. After migration (as source), the -1 / --one-off
-option has no effect.
-
-Also, by default, keep migrated TCP sockets open (in repair mode) as
-long as we're running, and ignore events on any epoll descriptor
-representing data channels. The previous behaviour can be enabled with
-the new, equally deprecated, --migrate-no-linger option.
-
-By keeping sockets open, and not exiting, we prevent the kernel
-running on the source node to send out RST segments if further data
-reaches us.
-
-Reported-by: Nir Dothan <ndothan@redhat.com>
-Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-(cherry picked from commit a8782865c342eb2682cca292d5bf92b567344351)
----
- conf.c         | 22 ++++++++++++++++++++++
- flow.c         |  2 +-
- passt.1        | 29 +++++++++++++++++++++++++++++
- passt.h        |  4 ++++
- tcp.c          |  9 +++++++--
- tcp_conn.h     |  3 ++-
- test/lib/setup |  4 ++--
- vhost_user.c   |  9 +++++++--
- 8 files changed, 74 insertions(+), 8 deletions(-)
-
-diff --git a/conf.c b/conf.c
-index a6d7e22..1295d89 100644
---- a/conf.c
-+++ b/conf.c
-@@ -864,6 +864,14 @@ static void usage(const char *name, FILE *f, int status)
- 		FPRINTF(f,
- 			"  --repair-path PATH	path for passt-repair(1)\n"
- 			"    default: append '.repair' to UNIX domain path\n");
-+		FPRINTF(f,
-+			"  --migrate-exit	DEPRECATED:\n"
-+			"			source quits after migration\n"
-+			"    default: source keeps running after migration\n");
-+		FPRINTF(f,
-+			"  --migrate-no-linger	DEPRECATED:\n"
-+			"			close sockets on migration\n"
-+			"    default: keep sockets open, ignore events\n");
- 	}
- 
- 	FPRINTF(f,
-@@ -1468,6 +1476,8 @@ void conf(struct ctx *c, int argc, char **argv)
- 		{"socket-path",	required_argument,	NULL,		's' },
- 		{"fqdn",	required_argument,	NULL,		27 },
- 		{"repair-path",	required_argument,	NULL,		28 },
-+		{"migrate-exit", no_argument,		NULL,		29 },
-+		{"migrate-no-linger", no_argument,	NULL,		30 },
- 		{ 0 },
- 	};
- 	const char *optstring = "+dqfel:hs:F:I:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:T:U:";
-@@ -1683,6 +1693,18 @@ void conf(struct ctx *c, int argc, char **argv)
- 					   optarg))
- 				die("Invalid passt-repair path: %s", optarg);
- 
-+			break;
-+		case 29:
-+			if (c->mode != MODE_VU)
-+				die("--migrate-exit is for vhost-user mode only");
-+			c->migrate_exit = true;
-+
-+			break;
-+		case 30:
-+			if (c->mode != MODE_VU)
-+				die("--migrate-no-linger is for vhost-user mode only");
-+			c->migrate_no_linger = true;
-+
- 			break;
- 		case 'd':
- 			c->debug = 1;
-diff --git a/flow.c b/flow.c
-index 6a5c8aa..a4b65ea 100644
---- a/flow.c
-+++ b/flow.c
-@@ -1089,7 +1089,7 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
- 	 * as EIO).
- 	 */
- 	foreach_established_tcp_flow(flow) {
--		rc = tcp_flow_migrate_source_ext(fd, &flow->tcp);
-+		rc = tcp_flow_migrate_source_ext(c, fd, &flow->tcp);
- 		if (rc) {
- 			flow_err(flow, "Can't send extended data: %s",
- 				 strerror_(-rc));
-diff --git a/passt.1 b/passt.1
-index 60066c2..cef98b2 100644
---- a/passt.1
-+++ b/passt.1
-@@ -439,6 +439,30 @@ Default, for \-\-vhost-user mode only, is to append \fI.repair\fR to the path
- chosen for the hypervisor UNIX domain socket. No socket is created if not in
- \-\-vhost-user mode.
- 
-+.TP
-+.BR \-\-migrate-exit (DEPRECATED)
-+Exit after a completed migration as source. By default, \fBpasst\fR keeps
-+running and the migrated guest can continue using its connection, or a new guest
-+can connect.
-+
-+Note that this configuration option is \fBdeprecated\fR and will be removed in a
-+future version. It is not expected to be of any use, and it simply reflects a
-+legacy behaviour. If you have any use for this, refer to \fBREPORTING BUGS\fR
-+below.
-+
-+.TP
-+.BR \-\-migrate-no-linger (DEPRECATED)
-+Close TCP sockets on the source instance once migration completes.
-+
-+By default, sockets are kept open, and events on data sockets are ignored, so
-+that any further message reaching sockets after the source migrated is silently
-+ignored, to avoid connection resets in case data is received after migration.
-+
-+Note that this configuration option is \fBdeprecated\fR and will be removed in a
-+future version. It is not expected to be of any use, and it simply reflects a
-+legacy behaviour. If you have any use for this, refer to \fBREPORTING BUGS\fR
-+below.
-+
- .TP
- .BR \-F ", " \-\-fd " " \fIFD
- Pass a pre-opened, connected socket to \fBpasst\fR. Usually the socket is opened
-@@ -454,6 +478,11 @@ is closed.
- Quit after handling a single client connection, that is, once the client closes
- the socket, or once we get a socket error.
- 
-+\fBNote\fR: this option has no effect after \fBpasst\fR completes a migration as
-+source, because, in that case, exiting would close sockets for active
-+connections, which would in turn cause connection resets if any further data is
-+received. See also the description of \fI\-\-migrate-no-linger\fR.
-+
- .TP
- .BR \-t ", " \-\-tcp-ports " " \fIspec
- Configure TCP port forwarding to guest. \fIspec\fR can be one of:
-diff --git a/passt.h b/passt.h
-index 8693794..4cfd6eb 100644
---- a/passt.h
-+++ b/passt.h
-@@ -241,6 +241,8 @@ struct ip6_ctx {
-  * @device_state_fd:	Device state migration channel
-  * @device_state_result: Device state migration result
-  * @migrate_target:	Are we the target, on the next migration request?
-+ * @migrate_no_linger:	Close sockets as we migrate them
-+ * @migrate_exit:	Exit (on source) once migration is complete
-  */
- struct ctx {
- 	enum passt_modes mode;
-@@ -318,6 +320,8 @@ struct ctx {
- 	int device_state_fd;
- 	int device_state_result;
- 	bool migrate_target;
-+	bool migrate_no_linger;
-+	bool migrate_exit;
- };
- 
- void proto_update_l2_buf(const unsigned char *eth_d,
-diff --git a/tcp.c b/tcp.c
-index 0ac298a..1b22f70 100644
---- a/tcp.c
-+++ b/tcp.c
-@@ -3284,12 +3284,14 @@ int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn)
- 
- /**
-  * tcp_flow_migrate_source_ext() - Dump queues, close sockets, send final data
-+ * @c:		Execution context
-  * @fd:		Descriptor for state migration
-  * @conn:	Pointer to the TCP connection structure
-  *
-  * Return: 0 on success, negative (not -EIO) on failure, -EIO on sending failure
-  */
--int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
-+int tcp_flow_migrate_source_ext(const struct ctx *c,
-+				int fd, const struct tcp_tap_conn *conn)
- {
- 	uint32_t peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap;
- 	struct tcp_tap_transfer_ext *t = &migrate_ext[FLOW_IDX(conn)];
-@@ -3334,7 +3336,10 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn)
- 	if ((rc = tcp_flow_dump_seq(conn, &t->seq_rcv)))
- 		goto fail;
- 
--	close(s);
-+	if (c->migrate_no_linger)
-+		close(s);
-+	else
-+		epoll_del(c, s);
- 
- 	/* Adjustments unrelated to FIN segments: sequence numbers we dumped are
- 	 * based on the end of the queues.
-diff --git a/tcp_conn.h b/tcp_conn.h
-index 35d813d..38b5c54 100644
---- a/tcp_conn.h
-+++ b/tcp_conn.h
-@@ -236,7 +236,8 @@ int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn);
- int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn);
- 
- int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn);
--int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn);
-+int tcp_flow_migrate_source_ext(const struct ctx *c, int fd,
-+				const struct tcp_tap_conn *conn);
- 
- int tcp_flow_migrate_target(struct ctx *c, int fd);
- int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd);
-diff --git a/test/lib/setup b/test/lib/setup
-index 575bc21..5994598 100755
---- a/test/lib/setup
-+++ b/test/lib/setup
-@@ -350,7 +350,7 @@ setup_migrate() {
- 
- 	sleep 1
- 
--	__opts="--vhost-user"
-+	__opts="--vhost-user --migrate-exit --migrate-no-linger"
- 	[ ${PCAP} -eq 1 ] && __opts="${__opts} -p ${LOGDIR}/passt_1.pcap"
- 	[ ${DEBUG} -eq 1 ] && __opts="${__opts} -d"
- 	[ ${TRACE} -eq 1 ] && __opts="${__opts} --trace"
-@@ -360,7 +360,7 @@ setup_migrate() {
- 
- 	context_run_bg passt_repair_1 "./passt-repair ${STATESETUP}/passt_1.socket.repair"
- 
--	__opts="--vhost-user"
-+	__opts="--vhost-user --migrate-exit --migrate-no-linger"
- 	[ ${PCAP} -eq 1 ] && __opts="${__opts} -p ${LOGDIR}/passt_2.pcap"
- 	[ ${DEBUG} -eq 1 ] && __opts="${__opts} -d"
- 	[ ${TRACE} -eq 1 ] && __opts="${__opts} --trace"
-diff --git a/vhost_user.c b/vhost_user.c
-index 105f77a..c4d3a52 100644
---- a/vhost_user.c
-+++ b/vhost_user.c
-@@ -1208,7 +1208,12 @@ void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events)
- 	if (msg.hdr.request == VHOST_USER_CHECK_DEVICE_STATE &&
- 	    vdev->context->device_state_result == 0 &&
- 	    !vdev->context->migrate_target) {
--		info("Migration complete, exiting");
--		_exit(EXIT_SUCCESS);
-+		if (vdev->context->migrate_exit) {
-+			info("Migration complete, exiting");
-+			_exit(EXIT_SUCCESS);
-+		}
-+
-+		info("Migration complete");
-+		vdev->context->one_off = false;
- 	}
- }
--- 
-2.47.1
-
diff --git a/0002-tcp-Cast-operands-of-sequence-comparison-macros-to-u.patch b/0002-tcp-Cast-operands-of-sequence-comparison-macros-to-u.patch
deleted file mode 100644
index 2f18cda..0000000
--- a/0002-tcp-Cast-operands-of-sequence-comparison-macros-to-u.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From bd90a820852ff8966aeb83231c29e48849db3493 Mon Sep 17 00:00:00 2001
-From: Stefano Brivio <sbrivio@redhat.com>
-Date: Fri, 29 Aug 2025 22:11:31 +0200
-Subject: [PATCH 2/3] tcp: Cast operands of sequence comparison macros to
- uint32_t before using them
-
-Otherwise, passing signed types causes automatic promotion of the
-result of the subtractions as well, which is not what we want, as
-these macros rely on unsigned 32-bit arithmetic.
-
-The next patch introduces a ssize_t operand for SEQ_LE, illustrating
-the issue.
-
-Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
-Tested-by: Paul Holzinger <pholzing@redhat.com>
-Reviewed-by: Jon Maloy <jmaloy@redhat.com>
-(cherry picked from commit 660cd6907e14a41ad9bc77d317140c70ab416fce)
----
- tcp_internal.h | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
-diff --git a/tcp_internal.h b/tcp_internal.h
-index 36c6533..c80ba40 100644
---- a/tcp_internal.h
-+++ b/tcp_internal.h
-@@ -18,10 +18,14 @@
- 						   sizeof(struct ipv6hdr), \
- 						   sizeof(uint32_t))
- 
--#define SEQ_LE(a, b)			((b) - (a) < MAX_WINDOW)
--#define SEQ_LT(a, b)			((b) - (a) - 1 < MAX_WINDOW)
--#define SEQ_GE(a, b)			((a) - (b) < MAX_WINDOW)
--#define SEQ_GT(a, b)			((a) - (b) - 1 < MAX_WINDOW)
-+#define SEQ_LE(a, b)			\
-+	((uint32_t)(b) - (uint32_t)(a) < MAX_WINDOW)
-+#define SEQ_LT(a, b)			\
-+	((uint32_t)(b) - (uint32_t)(a) - 1 < MAX_WINDOW)
-+#define SEQ_GE(a, b)			\
-+	((uint32_t)(a) - (uint32_t)(b) < MAX_WINDOW)
-+#define SEQ_GT(a, b)			\
-+	((uint32_t)(a) - (uint32_t)(b) - 1 < MAX_WINDOW)
- 
- #define FIN		(1 << 0)
- #define SYN		(1 << 1)
--- 
-2.47.1
-
diff --git a/0003-tcp-Don-t-consider-FIN-flags-with-mismatching-sequen.patch b/0003-tcp-Don-t-consider-FIN-flags-with-mismatching-sequen.patch
deleted file mode 100644
index 0940651..0000000
--- a/0003-tcp-Don-t-consider-FIN-flags-with-mismatching-sequen.patch
+++ /dev/null
@@ -1,76 +0,0 @@
-From f9278aab878ef58cf8502ea8f904dbb40fbbb16a Mon Sep 17 00:00:00 2001
-From: Stefano Brivio <sbrivio@redhat.com>
-Date: Thu, 2 Oct 2025 00:41:54 +0200
-Subject: [PATCH 3/3] tcp: Don't consider FIN flags with mismatching sequence
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-If a guest or container sends us a FIN segment but its sequence number
-doesn't match the highest sequence of data we *accepted* (not
-necessarily the highest sequence we received), that is,
-conn->seq_from_tap, plus any data we're accepting in the current
-batch, we should discard the flag (not necessarily the segment),
-because there's still data we need to receive (again) before the end
-of the stream.
-
-If we consider those FIN flags as such, we'll end up in the
-situation described below.
-
-Here, 192.168.10.102 is a HTTP server in a Podman container, and
-192.168.10.44 is a client fetching approximately 121 KB of data from
-it:
-
-   82   2.026811 192.168.10.102 → 192.168.10.44 54 TCP 55414 → 44992 [FIN, ACK] Seq=121441 Ack=143 Win=65536 Len=0
-
-the server is done sending
-
-   83   2.026898 192.168.10.44 → 192.168.10.102 54 TCP 44992 → 55414 [ACK] Seq=143 Ack=114394 Win=216192 Len=0
-
-pasta (client) acknowledges a previous sequence, because of
-a short sendmsg()
-
-   84   2.027324 192.168.10.44 → 192.168.10.102 54 TCP 44992 → 55414 [FIN, ACK] Seq=143 Ack=114394 Win=216192 Len=0
-
-pasta (client) sends FIN, ACK as the client has no more data to
-send (a single GET request), while still acknowledging a previous
-sequence, because the retransmission didn't happen yet
-
-   85   2.027349 192.168.10.102 → 192.168.10.44 54 TCP 55414 → 44992 [ACK] Seq=121442 Ack=144 Win=65536 Len=0
-
-the server acknowledges the FIN, ACK
-
-   86   2.224125 192.168.10.102 → 192.168.10.44 4150 TCP [TCP Retransmission] 55414 → 44992 [ACK] Seq=114394 Ack=144 Win=65536 Len=4096 [TCP segment of a reassembled PDU]
-
-and finally a retransmission comes, but as we wrongly switched to
-the CLOSE-WAIT state,
-
-   87   2.224202 192.168.10.44 → 192.168.10.102 54 TCP 44992 → 55414 [RST] Seq=144 Win=0 Len=0
-
-we consider frame #86 as an acknowledgement for the FIN segment we
-sent, and close the connection, while we still had to re-receive
-(and finally send) the missing data segment, instead.
-
-Link: https://github.com/containers/podman/issues/27179
-Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
-(cherry picked from commit b145441913eef6f8885b6b84531e944ff593790c)
----
- tcp.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tcp.c b/tcp.c
-index 0ac298a..4428305 100644
---- a/tcp.c
-+++ b/tcp.c
-@@ -1696,7 +1696,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
- 			}
- 		}
- 
--		if (th->fin)
-+		if (th->fin && seq == seq_from_tap)
- 			fin = 1;
- 
- 		if (!len)
--- 
-2.47.1
-
diff --git a/0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch b/0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch
new file mode 100644
index 0000000..50f80a3
--- /dev/null
+++ b/0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch
@@ -0,0 +1,110 @@
+From b40f5cd8c8e16c6eceb1f26eb895527fda84068b Mon Sep 17 00:00:00 2001
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Sat, 13 Dec 2025 14:19:13 +0100
+Subject: [PATCH] tcp: Use less-than-MSS window on no queued data, or no data
+ sent recently
+
+We limit the advertised window to guests and containers to the
+available length of the sending buffer, and if it's less than the MSS,
+since commit cf1925fb7b77 ("tcp: Don't limit window to less-than-MSS
+values, use zero instead"), we approximate that limit to zero.
+
+This way, we'll trigger a window update as soon as we realise that we
+can advertise a larger value, just like we do in all other cases where
+we advertise a zero-sized window.
+
+By doing that, we don't wait for the peer to send us data before we
+update the window. This matters because the guest or container might
+be trying to aggregate more data and won't send us anything at all if
+the advertised window is too small.
+
+However, this might be problematic in two situations:
+
+1. one, reported by Tyler, where the remote (receiving) peer
+   advertises a window that's smaller than what we usually get and
+   very close to the MSS, causing the kernel to give us a starting
+   size of the buffer that's less than the MSS we advertise to the
+   guest or container.
+
+   If this happens, we'll never advertise a non-zero window after
+   the handshake, and the container or guest will never send us any
+   data at all.
+
+   With a simple 'curl https://cloudflare.com/', we get, with default
+   TCP memory parameters, a 65535-byte window from the peer, and 46080
+   bytes of initial sending buffer from the kernel. But we advertised
+   a 65480-byte MSS, and we'll never actually receive the client
+   request.
+
+   This seems to be specific to Cloudflare for some reason, probably
+   deriving from a particular tuning of TCP parameters on their
+   servers.
+
+2. another one, hypothesised by David, where the peer might only be
+   willing to process (and acknowledge) data in batches.
+
+   We might have queued outbound data which is, at the same time, not
+   enough to fill one of these batches and be acknowledged and removed
+   from the sending queue, but enough to make our available buffer
+   smaller than the MSS, and the connection will hang.
+
+Take care of both cases by:
+
+a. not approximating the sending buffer to zero if we have no outboud
+   queued data at all, because in that case we don't expect the
+   available buffer to increase if we don't send any data, so there's
+   no point in waiting for it to grow larger than the MSS.
+
+   This fixes problem 1. above.
+
+b. also using the full sending buffer size if we haven't send data to
+   the socket for a while (reported by tcpi_last_data_sent). This part
+   was already suggested by David in:
+
+     https://archives.passt.top/passt-dev/aTZzgtcKWLb28zrf@zatzit/
+
+   and I'm now picking ten times the RTT as a somewhat arbitrary
+   threshold.
+
+   This is meant to take care of potential problem 2. above, but it
+   also happens to fix 1.
+
+Reported-by: Tyler Cloud <tcloud@redhat.com>
+Link: https://bugs.passt.top/show_bug.cgi?id=183
+Suggested-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+---
+ tcp.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/tcp.c b/tcp.c
+index 81bc114..b179e39 100644
+--- a/tcp.c
++++ b/tcp.c
+@@ -1211,8 +1211,21 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
+ 		 * the MSS to zero, as we already have mechanisms in place to
+ 		 * force updates after the window becomes zero. This matches the
+ 		 * suggestion from RFC 813, Section 4.
++		 *
++		 * But don't do this if, either:
++		 *
++		 * - there's nothing in the outbound queue: the size of the
++		 *   sending buffer is limiting us, and it won't increase if we
++		 *   don't send data, so there's no point in waiting, or
++		 *
++		 * - we haven't sent data in a while (somewhat arbitrarily, ten
++		 *   times the RTT), as that might indicate that the receiver
++		 *   will only process data in batches that are large enough,
++		 *   but we won't send enough to fill one because we're stuck
++		 *   with pending data in the outbound queue
+ 		 */
+-		if (limit < MSS_GET(conn))
++		if (limit < MSS_GET(conn) && sendq &&
++		    tinfo->tcpi_last_data_sent < tinfo->tcpi_rtt / 1000 * 10)
+ 			limit = 0;
+ 
+ 		new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd, limit);
+-- 
+2.47.1
+
diff --git a/0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch b/0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch
new file mode 100644
index 0000000..d85c03d
--- /dev/null
+++ b/0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch
@@ -0,0 +1,90 @@
+From 75dcbc300bf09c3649823b12d30c4f24de7271d4 Mon Sep 17 00:00:00 2001
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Tue, 23 Dec 2025 13:39:17 +0100
+Subject: [PATCH] pasta: Warn, disable matching IP version if not supported, in
+ local mode
+
+...instead of exiting, but only if local mode is enabled, that is, if
+we couldn't find a template interface or if the user didn't specify
+one.
+
+With IPv4, we always try to set or copy an address, so check if that
+fails.
+
+With IPv6, in local mode, we rely on the link-local address that's
+automatically generated inside the target namespace, and only fail
+later, as we try to set up routes. Check if that fails, instead.
+
+Otherwise, we'll fail to start if IPv6 support is not built in or
+disabled by the kernel ("ipv6.disable=1" on the command line),
+because, in that case, we'll try to enable local mode by default, and
+then fail to set any address or route.
+
+It would probably be more elegant to check for IP version support in
+conf_ip4_local() and conf_ip6_local(), and not even try to enable
+connectivity for unsupported versions, but it looks less robust than
+trying and failing, as there might be other ways to disable a given
+IP version.
+
+Note that there's currently no way to disable IPv4 support on the
+kernel command line, that is, there's no such thing as an
+ipv4.disable boot parameter. But I guess that's due to be eventually
+implemented, one day, so let's cover that case as well, also for
+consistency.
+
+Reported-by: Iyan <iyanmv@gmail.com>
+Link: https://bugzilla.redhat.com/show_bug.cgi?id=2424192
+Fixes: 4ddd59bc6085 ("conf: Separate local mode for each IP version, don't enable disabled IP version")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+---
+ pasta.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/pasta.c b/pasta.c
+index c307b8a..0ddd6b0 100644
+--- a/pasta.c
++++ b/pasta.c
+@@ -348,6 +348,12 @@ void pasta_ns_conf(struct ctx *c)
+ 						 AF_INET);
+ 			}
+ 
++			if (c->ifi4 == -1 && rc == -ENOTSUP) {
++				warn("IPv4 not supported, disabling");
++				c->ifi4 = 0;
++				goto ipv4_done;
++			}
++
+ 			if (rc < 0) {
+ 				die("Couldn't set IPv4 address(es) in namespace: %s",
+ 				    strerror_(-rc));
+@@ -367,6 +373,7 @@ void pasta_ns_conf(struct ctx *c)
+ 				    strerror_(-rc));
+ 			}
+ 		}
++ipv4_done:
+ 
+ 		if (c->ifi6) {
+ 			rc = nl_addr_get_ll(nl_sock_ns, c->pasta_ifi,
+@@ -413,12 +420,19 @@ void pasta_ns_conf(struct ctx *c)
+ 						  AF_INET6);
+ 			}
+ 
++			if (c->ifi6 == -1 && rc == -ENOTSUP) {
++				warn("IPv6 not supported, disabling");
++				c->ifi6 = 0;
++				goto ipv6_done;
++			}
++
+ 			if (rc < 0) {
+ 				die("Couldn't set IPv6 route(s) in guest: %s",
+ 				    strerror_(-rc));
+ 			}
+ 		}
+ 	}
++ipv6_done:
+ 
+ 	proto_update_l2_buf(c->guest_mac);
+ }
+-- 
+2.47.1
+
diff --git a/0004-tcp-Properly-remove-sockets-from-epoll-loop-when-con.patch b/0004-tcp-Properly-remove-sockets-from-epoll-loop-when-con.patch
deleted file mode 100644
index d8c48d9..0000000
--- a/0004-tcp-Properly-remove-sockets-from-epoll-loop-when-con.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From a7d9ce6cacf8d62ca78fa98d469902c900659cb9 Mon Sep 17 00:00:00 2001
-From: David Gibson <david@gibson.dropbear.id.au>
-Date: Tue, 4 Nov 2025 16:40:43 +1100
-Subject: [PATCH 4/8] tcp: Properly remove sockets from epoll loop when
- connection is closed
-
-Most of the handling for closing a TCP connectin is in conn_event_do() when
-it receives a 'CLOSED' event.  We specifically check for this case and,
-correctly, remove the connection from the flow hash table.  However, we
-also bypass the call tp tcp_epoll_ctl() which is not correct.  By skipping
-tcp_epoll_ctl() we skip it's specific handling of the CLOSED event, which
-includes removing the TCP socket from epoll.
-
-If we somehow get an event on such a stale socket, we'll get a stale flow
-reference.  That flow slot might have been re-used, leading to to a crash
-in conn_at_sidx().
-
-Fixes: b86afe3559c0 ("tcp: Don't defer hash table removal")
-Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
-(cherry picked from commit 60ade6cae70dc48f7f777c38e7c70fc3696784c4)
----
- tcp.c | 9 +++++----
- 1 file changed, 5 insertions(+), 4 deletions(-)
-
-diff --git a/tcp.c b/tcp.c
-index 0ac298a..17e99af 100644
---- a/tcp.c
-+++ b/tcp.c
-@@ -681,12 +681,13 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
- 		flow_dbg(conn, "%s",
- 			 num == -1 	       ? "CLOSED" : tcp_event_str[num]);
- 
--	if (event == CLOSED)
--		flow_hash_remove(c, TAP_SIDX(conn));
--	else if ((event == TAP_FIN_RCVD) && !(conn->events & SOCK_FIN_RCVD))
-+	if ((event == TAP_FIN_RCVD) && !(conn->events & SOCK_FIN_RCVD)) {
- 		conn_flag(c, conn, ACTIVE_CLOSE);
--	else
-+	} else {
-+		if (event == CLOSED)
-+			flow_hash_remove(c, TAP_SIDX(conn));
- 		tcp_epoll_ctl(c, conn);
-+	}
- 
- 	if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED))
- 		tcp_timer_ctl(c, conn);
--- 
-2.47.1
-
diff --git a/0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch b/0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch
new file mode 100644
index 0000000..f3d94ad
--- /dev/null
+++ b/0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch
@@ -0,0 +1,58 @@
+From d2c5133990a7758bfa567fc73216393498949e9b Mon Sep 17 00:00:00 2001
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Tue, 23 Dec 2025 01:59:34 +0100
+Subject: [PATCH] selinux: Enable read and watch permissions on netns directory
+ as well
+
+With commit 7aeda16a7818 ("selinux: Transition to pasta_t in
+containers"), we need to make sure that pasta can access the target
+namespace directory passed by Podman, and, in a general case, we have
+all the permissions we need.
+
+But if we now start a container without the Podman changes referenced
+by commit fd1bcc30af07 ("selinux: add container_var_run_t type
+transition"), or with them, but with the container being created
+before those and without a reboot in between, we'll additionally need
+'read' and 'watch' permissions on user_tmp_t directory as well, as
+user_tmp_t is still the (inconsistent) context of the namespace entry.
+
+Otherwise, on a container start/restart, we'll get SELinux denials:
+
+  type=AVC msg=audit(1766451401.296:184): avc:  denied  { read } for  pid=2159 comm="pasta.avx2" name="netns" dev="tmpfs" ino=60 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:obje
+ct_r:user_tmp_t:s0 tclass=dir permissive=1
+  type=AVC msg=audit(1766451401.298:185): avc:  denied  { watch } for  pid=2159 comm="pasta.avx2" path="/run/user/1001/netns" dev="tmpfs" ino=60 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:object_r:user_tmp_t:s0 tclass=dir permissive=1
+
+This can be reproduced quite simply:
+
+  $ podman create -q --name hello hello
+  6c4eaf15a03edf799673a97d84d0331f3a3f34a11015b58c69318101a3232770
+
+  [upgrade passt's SELinux policy to a version including 7aeda16a7818]
+
+  $ podman start hello
+  Error: unable to start container "6c4eaf15a03edf799673a97d84d0331f3a3f34a11015b58c69318101a3232770": pasta failed with exit code 1:
+  netns dir open: Permission denied, exiting
+
+Reported-by: Tuomo Soini <tis@foobar.fi>
+Fixes: 7aeda16a7818 ("selinux: Transition to pasta_t in containers")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+---
+ contrib/selinux/pasta.te | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/contrib/selinux/pasta.te b/contrib/selinux/pasta.te
+index 95fe42a..3eb58f6 100644
+--- a/contrib/selinux/pasta.te
++++ b/contrib/selinux/pasta.te
+@@ -149,7 +149,7 @@ allow pasta_t root_t:dir mounton;
+ manage_files_pattern(pasta_t, pasta_pid_t, pasta_pid_t)
+ files_pid_filetrans(pasta_t, pasta_pid_t, file)
+ 
+-allow pasta_t user_tmp_t:dir { add_name remove_name search write };
++allow pasta_t user_tmp_t:dir { add_name read remove_name search watch write };
+ allow pasta_t user_tmp_t:fifo_file append;
+ allow pasta_t user_tmp_t:file { create open write };
+ allow pasta_t user_tmp_t:sock_file { create unlink };
+-- 
+2.47.1
+
diff --git a/0006-selinux-Enable-open-permissions-on-netns-directory-o.patch b/0006-selinux-Enable-open-permissions-on-netns-directory-o.patch
new file mode 100644
index 0000000..fa997ea
--- /dev/null
+++ b/0006-selinux-Enable-open-permissions-on-netns-directory-o.patch
@@ -0,0 +1,68 @@
+From 5f9c51e34e5ff9c78f4b295666fa438402103e84 Mon Sep 17 00:00:00 2001
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Fri, 16 Jan 2026 16:48:46 +0100
+Subject: [PATCH 6/7] selinux: Enable open permissions on netns directory,
+ operations on container_var_run_t
+
+Tuomo reports two further SELinux denials after upgrading to a
+passt-selinux version that includes the transition to pasta_t for
+containers, one I could reproduce:
+
+  denied  { open } for  pid=3343050 comm="pasta.avx2" path="/run/user/1000/netns" dev="tmpfs" ino=51 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:object_r:user_tmp_t:s0 tclass=dir permissive=1
+
+which I didn't take care of in the previous commit, d2c5133990a7
+("selinux: Enable read and watch permissions on netns directory as
+well"), as it didn't appear in my quick test. But I can make pasta use
+"open" on the network namespace entry by simply using it to make
+connections.
+
+So, for that, add "open" to the existing rule for user_tmp_t:dir.
+
+Then, another one I couldn't reproduce instead:
+
+  denied  { write } for  pid=3589324 comm="pasta.avx2" name="rootless-netns" dev="tmpfs" ino=36 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:object_r:container_var_run_t:s0 tclass=dir permissive=0
+
+which, I think, comes from a specific combination of versions of
+container-selinux, Podman, and passt-selinux packages, which
+prevents the expected type transition on container_var_run_t unless
+restorecon is invoked manually, or until a reboot.
+
+Allowing the same permissions on container_var_run_t as we do on
+ifconfig_var_run_t is harmless, so do that to prevent this further
+denial.
+
+Reported-by: Tuomo Soini <tis@foobar.fi>
+Fixes: d2c5133990a7 ("selinux: Enable read and watch permissions on netns directory as well")
+Fixes: 7aeda16a7818 ("selinux: Transition to pasta_t in containers")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+(cherry picked from commit a6d92ca82c9ea0b395aa56c568ee6b6e6d4ac81e)
+---
+ contrib/selinux/pasta.te | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/contrib/selinux/pasta.te b/contrib/selinux/pasta.te
+index 3eb58f6..fb51416 100644
+--- a/contrib/selinux/pasta.te
++++ b/contrib/selinux/pasta.te
+@@ -149,7 +149,7 @@ allow pasta_t root_t:dir mounton;
+ manage_files_pattern(pasta_t, pasta_pid_t, pasta_pid_t)
+ files_pid_filetrans(pasta_t, pasta_pid_t, file)
+ 
+-allow pasta_t user_tmp_t:dir { add_name read remove_name search watch write };
++allow pasta_t user_tmp_t:dir { add_name open read remove_name search watch write };
+ allow pasta_t user_tmp_t:fifo_file append;
+ allow pasta_t user_tmp_t:file { create open write };
+ allow pasta_t user_tmp_t:sock_file { create unlink };
+@@ -249,7 +249,9 @@ type_transition container_runtime_t user_tmp_t : dir ifconfig_var_run_t "netns";
+ type_transition container_runtime_t container_var_run_t : dir ifconfig_var_run_t "netns";
+ type_transition container_runtime_t user_tmp_t : dir ifconfig_var_run_t "rootless-netns";
+ type_transition container_runtime_t container_var_run_t : dir ifconfig_var_run_t "rootless-netns";
++allow pasta_t container_var_run_t:dir { add_name open rmdir write };
+ allow pasta_t ifconfig_var_run_t:dir { add_name open rmdir write };
++allow pasta_t container_var_run_t:file { create open write };
+ allow pasta_t ifconfig_var_run_t:file { create open write };
+ allow systemd_user_runtimedir_t ifconfig_var_run_t:dir rmdir;
+ 
+-- 
+2.47.1
+
diff --git a/0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch b/0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch
new file mode 100644
index 0000000..6f9f1e7
--- /dev/null
+++ b/0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch
@@ -0,0 +1,74 @@
+From b9fbbca97e4ebf2358da3c0d971e1cc214cab632 Mon Sep 17 00:00:00 2001
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Fri, 9 Jan 2026 13:52:00 +0100
+Subject: [PATCH 7/7] tcp: Fix rounding issue in check for approximating window
+ to zero
+
+In general, we approximate the advertised window to zero if we would
+otherwise advertise less than a MSS worth, and the reasoning behind
+that is explained in cf1925fb7b77 ("tcp: Don't limit window to
+less-than-MSS values, use zero instead").
+
+Then, in commit b40f5cd8c8e1 ("tcp: Use less-than-MSS window on no
+queued data, or no data sent recently"), I introduced some conditions
+under which we won't do that, including a check on whether any data
+was sent recently.
+
+As an arbitrary but probably reasonable threshold, we consider data to
+have recently been sent if that occurred less than ten times the
+round-trip time (RTT) ago.
+
+The time elapsed since the last data transmission is reported by the
+kernel in milliseconds, in the tcpi_last_data_sent field of struct
+tcp_info, and the RTT is reported in microseconds instead, in
+tcpi_rtt.
+
+To avoid the risk of overflow in a simple way, for the purpose of this
+comparison, I converted tcpi_rtt to milliseconds first, but this means
+that the check will always be false (and we'll never approximate the
+window to zero) if the RTT is below one millisecond.
+
+This, in turn, reintroduces nasty delay issues in transfers in
+non-local connections which have however almost-local (low) latency.
+
+Given that we want to use ten times the RTT as an arbitrary "long
+enough" upper bound, round the RTT up while converting it to
+milliseconds.
+
+As an alternative, we could perform the comparison in microseconds,
+but we would need a slightly more complicated implementation to
+exclude overflows, and it's definitely not worth it given the nature
+of this threshold.
+
+Fixes: b40f5cd8c8e1 ("tcp: Use less-than-MSS window on no queued data, or no data sent recently")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+(cherry picked from commit 2be0e790804f99580b1c8a1781c49913440607f2)
+---
+ tcp.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/tcp.c b/tcp.c
+index 23fcbc3..8f4f087 100644
+--- a/tcp.c
++++ b/tcp.c
+@@ -1180,6 +1180,7 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
+ 	if ((conn->flags & LOCAL) || tcp_rtt_dst_low(conn)) {
+ 		new_wnd_to_tap = tinfo->tcpi_snd_wnd;
+ 	} else {
++		unsigned rtt_ms_ceiling = DIV_ROUND_UP(tinfo->tcpi_rtt, 1000);
+ 		uint32_t sendq;
+ 		int limit;
+ 
+@@ -1223,7 +1224,7 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
+ 		 *   with pending data in the outbound queue
+ 		 */
+ 		if (limit < MSS_GET(conn) && sendq &&
+-		    tinfo->tcpi_last_data_sent < tinfo->tcpi_rtt / 1000 * 10)
++		    tinfo->tcpi_last_data_sent < rtt_ms_ceiling * 10)
+ 			limit = 0;
+ 
+ 		new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd, limit);
+-- 
+2.47.1
+
diff --git a/0008-udp_flow-remove-unneeded-epoll_ref-indirection.patch b/0008-udp_flow-remove-unneeded-epoll_ref-indirection.patch
new file mode 100644
index 0000000..48e643a
--- /dev/null
+++ b/0008-udp_flow-remove-unneeded-epoll_ref-indirection.patch
@@ -0,0 +1,48 @@
+From cbc536354eb5f2d6f19e3842c2ff5a3c2725b8ed Mon Sep 17 00:00:00 2001
+From: Laurent Vivier <lvivier@redhat.com>
+Date: Fri, 9 Jan 2026 17:54:35 +0100
+Subject: [PATCH 08/18] udp_flow: remove unneeded epoll_ref indirection
+
+The fref union was used to convert flow_sidx_t to uint32_t for
+assignment to ref.data.  This is unnecessary since epoll_ref already
+contains a flowside member of type flow_sidx_t, so we can assign
+directly.
+
+This aligns with how icmp.c and other callers assign flow_sidx_t to
+epoll_ref.
+
+Signed-off-by: Laurent Vivier <lvivier@redhat.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+(cherry picked from commit ab27852d0eebcd96d33c3699b44596a827b83bc6)
+---
+ udp_flow.c | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/udp_flow.c b/udp_flow.c
+index 8907f2f..0ba7880 100644
+--- a/udp_flow.c
++++ b/udp_flow.c
+@@ -74,10 +74,6 @@ static int udp_flow_sock(const struct ctx *c,
+ {
+ 	const struct flowside *side = &uflow->f.side[sidei];
+ 	uint8_t pif = uflow->f.pif[sidei];
+-	union {
+-		flow_sidx_t sidx;
+-		uint32_t data;
+-	} fref = { .sidx = FLOW_SIDX(uflow, sidei) };
+ 	union epoll_ref ref;
+ 	int rc;
+ 	int s;
+@@ -89,7 +85,7 @@ static int udp_flow_sock(const struct ctx *c,
+ 	}
+ 
+ 	ref.type = EPOLL_TYPE_UDP;
+-	ref.data = fref.data;
++	ref.flowside = FLOW_SIDX(uflow, sidei);
+ 	ref.fd = s;
+ 
+ 	flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
+-- 
+2.47.1
+
diff --git a/0009-udp_flow-Assign-socket-to-flow-inside-udp_flow_sock.patch b/0009-udp_flow-Assign-socket-to-flow-inside-udp_flow_sock.patch
new file mode 100644
index 0000000..1d38206
--- /dev/null
+++ b/0009-udp_flow-Assign-socket-to-flow-inside-udp_flow_sock.patch
@@ -0,0 +1,47 @@
+From c552b6462b67dd45b8162fe8f4d177bdc724c703 Mon Sep 17 00:00:00 2001
+From: Laurent Vivier <lvivier@redhat.com>
+Date: Fri, 9 Jan 2026 17:54:36 +0100
+Subject: [PATCH 09/18] udp_flow: Assign socket to flow inside udp_flow_sock()
+
+Move the assignment of uflow->s[sidei] from the caller (udp_flow_new())
+into udp_flow_sock() itself, placing it after the successful connect().
+
+This is a pure refactoring with no functional change.  The socket fd is
+now assigned within udp_flow_sock() where the socket is created, rather
+than requiring the caller to capture the return value.  On error paths,
+uflow->s[sidei] remains at its initialized value of -1 rather than being
+set to the negative error code, which is semantically cleaner (though
+functionally equivalent given the >= 0 check in udp_flow_close()).
+
+Signed-off-by: Laurent Vivier <lvivier@redhat.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+(cherry picked from commit e0fdfccc1c1a56c58a96d7fd6cc5d532cd780b6f)
+---
+ udp_flow.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/udp_flow.c b/udp_flow.c
+index 0ba7880..c4cf35c 100644
+--- a/udp_flow.c
++++ b/udp_flow.c
+@@ -105,6 +105,7 @@ static int udp_flow_sock(const struct ctx *c,
+ 		flow_dbg_perror(uflow, "Couldn't connect flow socket");
+ 		return rc;
+ 	}
++	uflow->s[sidei] = s;
+ 
+ 	/* It's possible, if unlikely, that we could receive some packets in
+ 	 * between the bind() and connect() which may or may not be for this
+@@ -159,7 +160,7 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
+ 
+ 	flow_foreach_sidei(sidei) {
+ 		if (pif_is_socket(uflow->f.pif[sidei]))
+-			if ((uflow->s[sidei] = udp_flow_sock(c, uflow, sidei)) < 0)
++			if (udp_flow_sock(c, uflow, sidei) < 0)
+ 				goto cancel;
+ 	}
+ 
+-- 
+2.47.1
+
diff --git a/0010-tcp_splice-Refactor-tcp_splice_conn_epoll_events-to-.patch b/0010-tcp_splice-Refactor-tcp_splice_conn_epoll_events-to-.patch
new file mode 100644
index 0000000..afb0af4
--- /dev/null
+++ b/0010-tcp_splice-Refactor-tcp_splice_conn_epoll_events-to-.patch
@@ -0,0 +1,94 @@
+From 75b53a195da1fd7eb5a804df1b7b9217a92b1291 Mon Sep 17 00:00:00 2001
+From: Laurent Vivier <lvivier@redhat.com>
+Date: Fri, 9 Jan 2026 17:54:37 +0100
+Subject: [PATCH 10/18] tcp_splice: Refactor tcp_splice_conn_epoll_events() to
+ per-side computation
+
+The function tcp_splice_conn_epoll_events() currently takes an array of
+struct epoll_event and fills in the .events field for both sides using
+flow_foreach_sidei() loops.
+
+This works, but the function is doing two conceptually separate things
+at once: computing events for side 0 and computing events for side 1.
+The OUT_WAIT handling is particularly subtle, as it has cross-side
+effects: when OUT_WAIT(sidei) is set, we add EPOLLOUT to ev[sidei] but
+also remove EPOLLIN from ev[!sidei].
+
+Refactor to make the function compute events for a single side at a
+time, taking sidei as a parameter and returning uint32_t. This makes
+the logic more focused and easier to follow. The cross-side effects of
+OUT_WAIT are preserved by checking both OUT_WAIT(sidei) and
+OUT_WAIT(!sidei) within each call.
+
+The caller tcp_splice_epoll_ctl() now invokes the function twice, once
+for each side, making the two-sided nature of the operation explicit.
+
+No functional change.
+
+Signed-off-by: Laurent Vivier <lvivier@redhat.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+(cherry picked from commit 23da651ab08e564b84c532f6f93b0817d2ae850f)
+---
+ tcp_splice.c | 33 ++++++++++++++-------------------
+ 1 file changed, 14 insertions(+), 19 deletions(-)
+
+diff --git a/tcp_splice.c b/tcp_splice.c
+index 4405224..bf4ff46 100644
+--- a/tcp_splice.c
++++ b/tcp_splice.c
+@@ -114,29 +114,23 @@ static struct tcp_splice_conn *conn_at_sidx(flow_sidx_t sidx)
+  * @events:	Connection event flags
+  * @ev:		Events to fill in, 0 is accepted socket, 1 is connecting socket
+  */
+-static void tcp_splice_conn_epoll_events(uint16_t events,
+-					 struct epoll_event ev[])
++static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
+ {
+-	unsigned sidei;
+-
+-	flow_foreach_sidei(sidei)
+-		ev[sidei].events = 0;
++	uint32_t e = 0;
+ 
+ 	if (events & SPLICE_ESTABLISHED) {
+-		flow_foreach_sidei(sidei) {
+-			if (!(events & FIN_SENT(!sidei)))
+-				ev[sidei].events = EPOLLIN | EPOLLRDHUP;
+-		}
+-	} else if (events & SPLICE_CONNECT) {
+-		ev[1].events = EPOLLOUT;
++		if (!(events & FIN_SENT(!sidei)))
++			e = EPOLLIN | EPOLLRDHUP;
++	} else if (sidei == 1 && events & SPLICE_CONNECT) {
++		e = EPOLLOUT;
+ 	}
+ 
+-	flow_foreach_sidei(sidei) {
+-		if (events & OUT_WAIT(sidei)) {
+-			ev[sidei].events |= EPOLLOUT;
+-			ev[!sidei].events &= ~EPOLLIN;
+-		}
+-	}
++	if (events & OUT_WAIT(sidei))
++		e |= EPOLLOUT;
++	if (events & OUT_WAIT(!sidei))
++		e &= ~EPOLLIN;
++
++	return e;
+ }
+ 
+ /**
+@@ -161,7 +155,8 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
+ 	struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
+ 					 { .data.u64 = ref[1].u64 } };
+ 
+-	tcp_splice_conn_epoll_events(conn->events, ev);
++	ev[0].events = tcp_splice_conn_epoll_events(conn->events, 0);
++	ev[1].events = tcp_splice_conn_epoll_events(conn->events, 1);
+ 
+ 
+ 	if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) ||
+-- 
+2.47.1
+
diff --git a/0011-flow-Introduce-flow_epoll_set-to-centralize-epoll-op.patch b/0011-flow-Introduce-flow_epoll_set-to-centralize-epoll-op.patch
new file mode 100644
index 0000000..a61430d
--- /dev/null
+++ b/0011-flow-Introduce-flow_epoll_set-to-centralize-epoll-op.patch
@@ -0,0 +1,489 @@
+From dd444785b14fefb10b692e7293396aae99a7eb18 Mon Sep 17 00:00:00 2001
+From: Laurent Vivier <lvivier@redhat.com>
+Date: Fri, 9 Jan 2026 17:54:38 +0100
+Subject: [PATCH 11/18] flow: Introduce flow_epoll_set() to centralize epoll
+ operations
+
+Currently, each flow type (TCP, TCP_SPLICE, PING, UDP) has its own
+code to add or modify file descriptors in epoll. This leads to
+duplicated boilerplate code across icmp.c, tcp.c, tcp_splice.c, and
+udp_flow.c, each setting up epoll_ref unions and calling epoll_ctl()
+with flow-type-specific details.
+
+Introduce flow_epoll_set() in flow.c to handle epoll operations for
+all flow types in a unified way.
+
+This will be needed to migrate queue pair from an epollfd to another.
+
+Signed-off-by: Laurent Vivier <lvivier@redhat.com>
+Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+(cherry picked from commit c0be730f2aa2243a132b3ee40c2bf05ebc84fedf)
+---
+ flow.c       | 37 ++++++++++++++++++++++++
+ flow.h       |  2 ++
+ icmp.c       | 10 ++-----
+ tcp.c        | 48 ++++++++++++++++++------------
+ tcp_splice.c | 82 ++++++++++++++++++++++++----------------------------
+ udp_flow.c   | 11 ++-----
+ 6 files changed, 111 insertions(+), 79 deletions(-)
+
+diff --git a/flow.c b/flow.c
+index 4f53486..cefe6c8 100644
+--- a/flow.c
++++ b/flow.c
+@@ -20,6 +20,7 @@
+ #include "flow.h"
+ #include "flow_table.h"
+ #include "repair.h"
++#include "epoll_ctl.h"
+ 
+ const char *flow_state_str[] = {
+ 	[FLOW_STATE_FREE]	= "FREE",
+@@ -53,6 +54,16 @@ const uint8_t flow_proto[] = {
+ static_assert(ARRAY_SIZE(flow_proto) == FLOW_NUM_TYPES,
+ 	      "flow_proto[] doesn't match enum flow_type");
+ 
++static const enum epoll_type flow_epoll[] = {
++	[FLOW_TCP]		= EPOLL_TYPE_TCP,
++	[FLOW_TCP_SPLICE]	= EPOLL_TYPE_TCP_SPLICE,
++	[FLOW_PING4]		= EPOLL_TYPE_PING,
++	[FLOW_PING6]		= EPOLL_TYPE_PING,
++	[FLOW_UDP]		= EPOLL_TYPE_UDP,
++};
++static_assert(ARRAY_SIZE(flow_epoll) == FLOW_NUM_TYPES,
++	      "flow_epoll[] doesn't match enum flow_type");
++
+ #define foreach_established_tcp_flow(flow)				\
+ 	flow_foreach_of_type((flow), FLOW_TCP)				\
+ 		if (!tcp_flow_is_established(&(flow)->tcp))		\
+@@ -390,6 +401,32 @@ void flow_epollid_clear(struct flow_common *f)
+ 	f->epollid = EPOLLFD_ID_INVALID;
+ }
+ 
++/**
++ * flow_epoll_set() - Add or modify epoll registration for a flow socket
++ * @f:		Flow to register socket for
++ * @command:	epoll_ctl() command: EPOLL_CTL_ADD or EPOLL_CTL_MOD
++ * @events:	epoll events to watch for
++ * @fd:		File descriptor to register
++ * @sidei:	Side index of the flow
++ *
++ * Return: 0 on success, -1 on error (from epoll_ctl())
++ */
++int flow_epoll_set(const struct flow_common *f, int command, uint32_t events,
++		   int fd, unsigned int sidei)
++{
++	struct epoll_event ev;
++	union epoll_ref ref;
++
++	ref.fd = fd;
++	ref.type = flow_epoll[f->type];
++	ref.flowside = flow_sidx(f, sidei);
++
++	ev.events = events;
++	ev.data.u64 = ref.u64;
++
++	return epoll_ctl(flow_epollfd(f), command, fd, &ev);
++}
++
+ /**
+  * flow_epollid_register() - Initialize the epoll id -> fd mapping
+  * @epollid:	epoll id to associate to
+diff --git a/flow.h b/flow.h
+index b43b0b1..1b78d59 100644
+--- a/flow.h
++++ b/flow.h
+@@ -265,6 +265,8 @@ bool flow_in_epoll(const struct flow_common *f);
+ int flow_epollfd(const struct flow_common *f);
+ void flow_epollid_set(struct flow_common *f, int epollid);
+ void flow_epollid_clear(struct flow_common *f);
++int flow_epoll_set(const struct flow_common *f, int command, uint32_t events,
++		   int fd, unsigned int sidei);
+ void flow_epollid_register(int epollid, int epollfd);
+ void flow_defer_handler(const struct ctx *c, const struct timespec *now);
+ int flow_migrate_source_early(struct ctx *c, const struct migrate_stage *stage,
+diff --git a/icmp.c b/icmp.c
+index 9564c49..eb7f11b 100644
+--- a/icmp.c
++++ b/icmp.c
+@@ -177,7 +177,6 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
+ 	union flow *flow = flow_alloc();
+ 	struct icmp_ping_flow *pingf;
+ 	const struct flowside *tgt;
+-	union epoll_ref ref;
+ 
+ 	if (!flow)
+ 		return NULL;
+@@ -211,13 +210,10 @@ static struct icmp_ping_flow *icmp_ping_new(const struct ctx *c,
+ 		goto cancel;
+ 
+ 	flow_epollid_set(&pingf->f, EPOLLFD_ID_DEFAULT);
+-
+-	ref.type = EPOLL_TYPE_PING;
+-	ref.flowside = FLOW_SIDX(flow, TGTSIDE);
+-	ref.fd = pingf->sock;
+-
+-	if (epoll_add(flow_epollfd(&pingf->f), EPOLLIN, ref) < 0) {
++	if (flow_epoll_set(&pingf->f, EPOLL_CTL_ADD, EPOLLIN, pingf->sock,
++			   TGTSIDE) < 0) {
+ 		close(pingf->sock);
++		flow_epollid_clear(&pingf->f);
+ 		goto cancel;
+ 	}
+ 
+diff --git a/tcp.c b/tcp.c
+index 8f4f087..146d460 100644
+--- a/tcp.c
++++ b/tcp.c
+@@ -523,34 +523,44 @@ static uint32_t tcp_conn_epoll_events(uint8_t events, uint8_t conn_flags)
+ 
+ /**
+  * tcp_epoll_ctl() - Add/modify/delete epoll state from connection events
+- * @c:		Execution context
+  * @conn:	Connection pointer
+  *
+  * Return: 0 on success, negative error code on failure (not on deletion)
+  */
+-static int tcp_epoll_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
++static int tcp_epoll_ctl(struct tcp_tap_conn *conn)
+ {
+-	int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
+-	union epoll_ref ref = { .type = EPOLL_TYPE_TCP, .fd = conn->sock,
+-		                .flowside = FLOW_SIDX(conn, !TAPSIDE(conn)), };
+-	struct epoll_event ev = { .data.u64 = ref.u64 };
+-	int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
+-					      : c->epollfd;
++	uint32_t events;
++	int m;
+ 
+ 	if (conn->events == CLOSED) {
+-		if (flow_in_epoll(&conn->f))
++		if (flow_in_epoll(&conn->f)) {
++			int epollfd = flow_epollfd(&conn->f);
++
+ 			epoll_del(epollfd, conn->sock);
+-		if (conn->timer != -1)
+-			epoll_del(epollfd, conn->timer);
++			if (conn->timer != -1)
++				epoll_del(epollfd, conn->timer);
++		}
++
+ 		return 0;
+ 	}
+ 
+-	ev.events = tcp_conn_epoll_events(conn->events, conn->flags);
++	events = tcp_conn_epoll_events(conn->events, conn->flags);
+ 
+-	if (epoll_ctl(epollfd, m, conn->sock, &ev))
+-		return -errno;
++	if (flow_in_epoll(&conn->f)) {
++		m = EPOLL_CTL_MOD;
++	} else {
++		flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
++		m = EPOLL_CTL_ADD;
++	}
+ 
+-	flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
++	if (flow_epoll_set(&conn->f, m, events, conn->sock,
++			   !TAPSIDE(conn)) < 0) {
++		int ret = -errno;
++
++		if (m == EPOLL_CTL_ADD)
++			flow_epollid_clear(&conn->f);
++		return ret;
++	}
+ 
+ 	if (conn->timer != -1) {
+ 		union epoll_ref ref_t = { .type = EPOLL_TYPE_TCP_TIMER,
+@@ -681,7 +691,7 @@ void conn_flag_do(const struct ctx *c, struct tcp_tap_conn *conn,
+ 	}
+ 
+ 	if (flag == STALLED || flag == ~STALLED)
+-		tcp_epoll_ctl(c, conn);
++		tcp_epoll_ctl(conn);
+ 
+ 	if (flag == ACK_FROM_TAP_DUE || flag == ACK_TO_TAP_DUE		  ||
+ 	    (flag == ~ACK_FROM_TAP_DUE && (conn->flags & ACK_TO_TAP_DUE)) ||
+@@ -738,7 +748,7 @@ void conn_event_do(const struct ctx *c, struct tcp_tap_conn *conn,
+ 	} else {
+ 		if (event == CLOSED)
+ 			flow_hash_remove(c, TAP_SIDX(conn));
+-		tcp_epoll_ctl(c, conn);
++		tcp_epoll_ctl(conn);
+ 	}
+ 
+ 	if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED))
+@@ -1753,7 +1763,7 @@ static void tcp_conn_from_tap(const struct ctx *c, sa_family_t af,
+ 		conn_event(c, conn, TAP_SYN_ACK_SENT);
+ 	}
+ 
+-	tcp_epoll_ctl(c, conn);
++	tcp_epoll_ctl(conn);
+ 
+ 	if (c->mode == MODE_VU) { /* To rebind to same oport after migration */
+ 		socklen_t sl = sizeof(sa);
+@@ -4021,7 +4031,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd
+ 	tcp_send_flag(c, conn, ACK);
+ 	tcp_data_from_sock(c, conn);
+ 
+-	if ((rc = tcp_epoll_ctl(c, conn))) {
++	if ((rc = tcp_epoll_ctl(conn))) {
+ 		flow_dbg(conn,
+ 			 "Failed to subscribe to epoll for migrated socket: %s",
+ 			 strerror_(-rc));
+diff --git a/tcp_splice.c b/tcp_splice.c
+index bf4ff46..a7c04ca 100644
+--- a/tcp_splice.c
++++ b/tcp_splice.c
+@@ -135,37 +135,31 @@ static uint32_t tcp_splice_conn_epoll_events(uint16_t events, unsigned sidei)
+ 
+ /**
+  * tcp_splice_epoll_ctl() - Add/modify/delete epoll state from connection events
+- * @c:		Execution context
+  * @conn:	Connection pointer
+  *
+  * Return: 0 on success, negative error code on failure (not on deletion)
+  */
+-static int tcp_splice_epoll_ctl(const struct ctx *c,
+-				struct tcp_splice_conn *conn)
++static int tcp_splice_epoll_ctl(struct tcp_splice_conn *conn)
+ {
+-	int epollfd = flow_in_epoll(&conn->f) ? flow_epollfd(&conn->f)
+-					      : c->epollfd;
+-	int m = flow_in_epoll(&conn->f) ? EPOLL_CTL_MOD : EPOLL_CTL_ADD;
+-	const union epoll_ref ref[SIDES] = {
+-		{ .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[0],
+-		  .flowside = FLOW_SIDX(conn, 0) },
+-		{ .type = EPOLL_TYPE_TCP_SPLICE, .fd = conn->s[1],
+-		  .flowside = FLOW_SIDX(conn, 1) }
+-	};
+-	struct epoll_event ev[SIDES] = { { .data.u64 = ref[0].u64 },
+-					 { .data.u64 = ref[1].u64 } };
+-
+-	ev[0].events = tcp_splice_conn_epoll_events(conn->events, 0);
+-	ev[1].events = tcp_splice_conn_epoll_events(conn->events, 1);
+-
+-
+-	if (epoll_ctl(epollfd, m, conn->s[0], &ev[0]) ||
+-	    epoll_ctl(epollfd, m, conn->s[1], &ev[1])) {
++	uint32_t events[2];
++	int m;
++
++	if (flow_in_epoll(&conn->f)) {
++		m = EPOLL_CTL_MOD;
++	} else {
++		flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
++		m = EPOLL_CTL_ADD;
++	}
++
++	events[0] = tcp_splice_conn_epoll_events(conn->events, 0);
++	events[1] = tcp_splice_conn_epoll_events(conn->events, 1);
++
++	if (flow_epoll_set(&conn->f, m, events[0], conn->s[0], 0) ||
++	    flow_epoll_set(&conn->f, m, events[1], conn->s[1], 1)) {
+ 		int ret = -errno;
+ 		flow_perror(conn, "ERROR on epoll_ctl()");
+ 		return ret;
+ 	}
+-	flow_epollid_set(&conn->f, EPOLLFD_ID_DEFAULT);
+ 
+ 	return 0;
+ }
+@@ -205,7 +199,7 @@ static void conn_flag_do(struct tcp_splice_conn *conn,
+ 	}
+ }
+ 
+-#define conn_flag(c, conn, flag)					\
++#define conn_flag(conn, flag)					\
+ 	do {								\
+ 		flow_trace(conn, "flag at %s:%i", __func__, __LINE__);	\
+ 		conn_flag_do(conn, flag);				\
+@@ -213,12 +207,10 @@ static void conn_flag_do(struct tcp_splice_conn *conn,
+ 
+ /**
+  * conn_event_do() - Set and log connection events, update epoll state
+- * @c:		Execution context
+  * @conn:	Connection pointer
+  * @event:	Connection event
+  */
+-static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
+-			  unsigned long event)
++static void conn_event_do(struct tcp_splice_conn *conn, unsigned long event)
+ {
+ 	if (event & (event - 1)) {
+ 		int flag_index = fls(~event);
+@@ -240,14 +232,14 @@ static void conn_event_do(const struct ctx *c, struct tcp_splice_conn *conn,
+ 			flow_dbg(conn, "%s", tcp_splice_event_str[flag_index]);
+ 	}
+ 
+-	if (tcp_splice_epoll_ctl(c, conn))
+-		conn_flag(c, conn, CLOSING);
++	if (tcp_splice_epoll_ctl(conn))
++		conn_flag(conn, CLOSING);
+ }
+ 
+-#define conn_event(c, conn, event)					\
++#define conn_event(conn, event)					\
+ 	do {								\
+ 		flow_trace(conn, "event at %s:%i",__func__, __LINE__);	\
+-		conn_event_do(c, conn, event);				\
++		conn_event_do(conn, event);				\
+ 	} while (0)
+ 
+ 
+@@ -315,7 +307,7 @@ static int tcp_splice_connect_finish(const struct ctx *c,
+ 			if (pipe2(conn->pipe[sidei], O_NONBLOCK | O_CLOEXEC)) {
+ 				flow_perror(conn, "cannot create %d->%d pipe",
+ 					    sidei, !sidei);
+-				conn_flag(c, conn, CLOSING);
++				conn_flag(conn, CLOSING);
+ 				return -EIO;
+ 			}
+ 
+@@ -329,7 +321,7 @@ static int tcp_splice_connect_finish(const struct ctx *c,
+ 	}
+ 
+ 	if (!(conn->events & SPLICE_ESTABLISHED))
+-		conn_event(c, conn, SPLICE_ESTABLISHED);
++		conn_event(conn, SPLICE_ESTABLISHED);
+ 
+ 	return 0;
+ }
+@@ -376,7 +368,7 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
+ 
+ 	pif_sockaddr(c, &sa, tgtpif, &tgt->eaddr, tgt->eport);
+ 
+-	conn_event(c, conn, SPLICE_CONNECT);
++	conn_event(conn, SPLICE_CONNECT);
+ 
+ 	if (connect(conn->s[1], &sa.sa, socklen_inany(&sa))) {
+ 		if (errno != EINPROGRESS) {
+@@ -385,7 +377,7 @@ static int tcp_splice_connect(const struct ctx *c, struct tcp_splice_conn *conn)
+ 			return -errno;
+ 		}
+ 	} else {
+-		conn_event(c, conn, SPLICE_ESTABLISHED);
++		conn_event(conn, SPLICE_ESTABLISHED);
+ 		return tcp_splice_connect_finish(c, conn);
+ 	}
+ 
+@@ -445,7 +437,7 @@ void tcp_splice_conn_from_sock(const struct ctx *c, union flow *flow, int s0)
+ 		flow_trace(conn, "failed to set TCP_QUICKACK on %i", s0);
+ 
+ 	if (tcp_splice_connect(c, conn))
+-		conn_flag(c, conn, CLOSING);
++		conn_flag(conn, CLOSING);
+ 
+ 	FLOW_ACTIVATE(conn);
+ }
+@@ -494,14 +486,14 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
+ 
+ 	if (events & EPOLLOUT) {
+ 		fromsidei = !evsidei;
+-		conn_event(c, conn, ~OUT_WAIT(evsidei));
++		conn_event(conn, ~OUT_WAIT(evsidei));
+ 	} else {
+ 		fromsidei = evsidei;
+ 	}
+ 
+ 	if (events & EPOLLRDHUP)
+ 		/* For side 0 this is fake, but implied */
+-		conn_event(c, conn, FIN_RCVD(evsidei));
++		conn_event(conn, FIN_RCVD(evsidei));
+ 
+ swap:
+ 	eof = 0;
+@@ -536,7 +528,7 @@ retry:
+ 				more = SPLICE_F_MORE;
+ 
+ 			if (conn->flags & lowat_set_flag)
+-				conn_flag(c, conn, lowat_act_flag);
++				conn_flag(conn, lowat_act_flag);
+ 		}
+ 
+ 		do
+@@ -568,8 +560,8 @@ retry:
+ 						   "Setting SO_RCVLOWAT %i: %s",
+ 						   lowat, strerror_(errno));
+ 				} else {
+-					conn_flag(c, conn, lowat_set_flag);
+-					conn_flag(c, conn, lowat_act_flag);
++					conn_flag(conn, lowat_set_flag);
++					conn_flag(conn, lowat_act_flag);
+ 				}
+ 			}
+ 
+@@ -583,7 +575,7 @@ retry:
+ 			if (conn->read[fromsidei] == conn->written[fromsidei])
+ 				break;
+ 
+-			conn_event(c, conn, OUT_WAIT(!fromsidei));
++			conn_event(conn, OUT_WAIT(!fromsidei));
+ 			break;
+ 		}
+ 
+@@ -605,7 +597,7 @@ retry:
+ 			if ((conn->events & FIN_RCVD(sidei)) &&
+ 			    !(conn->events & FIN_SENT(!sidei))) {
+ 				shutdown(conn->s[!sidei], SHUT_WR);
+-				conn_event(c, conn, FIN_SENT(!sidei));
++				conn_event(conn, FIN_SENT(!sidei));
+ 			}
+ 		}
+ 	}
+@@ -626,7 +618,7 @@ retry:
+ 	return;
+ 
+ close:
+-	conn_flag(c, conn, CLOSING);
++	conn_flag(conn, CLOSING);
+ }
+ 
+ /**
+@@ -762,10 +754,10 @@ void tcp_splice_timer(struct tcp_splice_conn *conn)
+ 				flow_trace(conn, "can't set SO_RCVLOWAT on %d",
+ 					   conn->s[sidei]);
+ 			}
+-			conn_flag(c, conn, ~RCVLOWAT_SET(sidei));
++			conn_flag(conn, ~RCVLOWAT_SET(sidei));
+ 		}
+ 	}
+ 
+ 	flow_foreach_sidei(sidei)
+-		conn_flag(c, conn, ~RCVLOWAT_ACT(sidei));
++		conn_flag(conn, ~RCVLOWAT_ACT(sidei));
+ }
+diff --git a/udp_flow.c b/udp_flow.c
+index c4cf35c..80b1543 100644
+--- a/udp_flow.c
++++ b/udp_flow.c
+@@ -74,7 +74,6 @@ static int udp_flow_sock(const struct ctx *c,
+ {
+ 	const struct flowside *side = &uflow->f.side[sidei];
+ 	uint8_t pif = uflow->f.pif[sidei];
+-	union epoll_ref ref;
+ 	int rc;
+ 	int s;
+ 
+@@ -84,14 +83,10 @@ static int udp_flow_sock(const struct ctx *c,
+ 		return s;
+ 	}
+ 
+-	ref.type = EPOLL_TYPE_UDP;
+-	ref.flowside = FLOW_SIDX(uflow, sidei);
+-	ref.fd = s;
+-
+ 	flow_epollid_set(&uflow->f, EPOLLFD_ID_DEFAULT);
+-
+-	rc = epoll_add(flow_epollfd(&uflow->f), EPOLLIN, ref);
+-	if (rc < 0) {
++	if (flow_epoll_set(&uflow->f, EPOLL_CTL_ADD, EPOLLIN, s, sidei) < 0) {
++		rc = -errno;
++		flow_epollid_clear(&uflow->f);
+ 		close(s);
+ 		return rc;
+ 	}
+-- 
+2.47.1
+
diff --git a/0012-tcp-Properly-propagate-tap-side-RST-to-socket-side.patch b/0012-tcp-Properly-propagate-tap-side-RST-to-socket-side.patch
new file mode 100644
index 0000000..b6a8c84
--- /dev/null
+++ b/0012-tcp-Properly-propagate-tap-side-RST-to-socket-side.patch
@@ -0,0 +1,99 @@
+From a742a423b3c8fd345c4af50dd5f06b95af6c75bf Mon Sep 17 00:00:00 2001
+From: David Gibson <david@gibson.dropbear.id.au>
+Date: Tue, 27 Jan 2026 19:39:52 +1100
+Subject: [PATCH 12/18] tcp: Properly propagate tap-side RST to socket side
+
+When the guest sends a TCP RST, or on certain error conditions, we want to
+signal the abnormal termination of a TCP connection to the peer with an
+RST as well.  We attempt to do that by close()ing the socket.
+
+That doesn't work: a close() will usually send a FIN, rather than an RST.
+The standard method of forcing an RST on a socket is to set the SO_LINGER
+socket option with a 0 timeout, then close().
+
+Update the tcp_rst() path to do this, so it forces a socket side RST.
+Update the handling of a guest side RST to use the same path (minus
+sending a tap side RST) so that we properly propagate guest RSTs to the
+peer.
+
+Link: https://bugs.passt.top/show_bug.cgi?id=191
+Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+(cherry picked from commit cce94e92fb3d2a90730c125f2bad32c9ed51da3f)
+---
+ tcp.c | 37 +++++++++++++++++++++++++++++++++----
+ 1 file changed, 33 insertions(+), 4 deletions(-)
+
+diff --git a/tcp.c b/tcp.c
+index 146d460..602e810 100644
+--- a/tcp.c
++++ b/tcp.c
+@@ -1417,7 +1417,34 @@ static int tcp_send_flag(const struct ctx *c, struct tcp_tap_conn *conn,
+ }
+ 
+ /**
+- * tcp_rst_do() - Reset a tap connection: send RST segment to tap, close socket
++ * tcp_sock_rst() - Close TCP connection forcing RST on socket side
++ * @c:		Execution context
++ * @conn:	Connection pointer
++ */
++static void tcp_sock_rst(const struct ctx *c, struct tcp_tap_conn *conn)
++{
++	const struct linger linger0 = {
++		.l_onoff = 1,
++		.l_linger = 0,
++	};
++
++	/* Force RST on socket to inform the peer
++	 *
++	 * We do this by setting SO_LINGER with 0 timeout, which means that
++	 * close() will send an RST (unless the connection is already closed in
++	 * both directions).
++	 */
++	if (setsockopt(conn->sock, SOL_SOCKET,
++		       SO_LINGER, &linger0, sizeof(linger0)) < 0) {
++		flow_dbg_perror(conn,
++				"SO_LINGER failed, may not send RST to peer");
++	}
++
++	conn_event(c, conn, CLOSED);
++}
++
++/**
++ * tcp_rst_do() - Reset a tap connection: send RST segment on both sides, close
+  * @c:		Execution context
+  * @conn:	Connection pointer
+  */
+@@ -1426,8 +1453,10 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn)
+ 	if (conn->events == CLOSED)
+ 		return;
+ 
++	/* Send RST on tap */
+ 	tcp_send_flag(c, conn, RST);
+-	conn_event(c, conn, CLOSED);
++
++	tcp_sock_rst(c, conn);
+ }
+ 
+ /**
+@@ -1898,7 +1927,7 @@ static int tcp_data_from_tap(const struct ctx *c, struct tcp_tap_conn *conn,
+ 			return -1;
+ 
+ 		if (th->rst) {
+-			conn_event(c, conn, CLOSED);
++			tcp_sock_rst(c, conn);
+ 			return 1;
+ 		}
+ 
+@@ -2262,7 +2291,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
+ 	flow_trace(conn, "packet length %zu from tap", l4len);
+ 
+ 	if (th->rst) {
+-		conn_event(c, conn, CLOSED);
++		tcp_sock_rst(c, conn);
+ 		return 1;
+ 	}
+ 
+-- 
+2.47.1
+
diff --git a/0013-udp-Split-activity-timeouts-for-UDP-flows.patch b/0013-udp-Split-activity-timeouts-for-UDP-flows.patch
new file mode 100644
index 0000000..771c076
--- /dev/null
+++ b/0013-udp-Split-activity-timeouts-for-UDP-flows.patch
@@ -0,0 +1,239 @@
+From 59c4113359af6d610e3f23f030a09ffa9011c9f8 Mon Sep 17 00:00:00 2001
+From: Yumei Huang <yuhuang@redhat.com>
+Date: Sat, 14 Feb 2026 15:31:36 +0800
+Subject: [PATCH 13/18] udp: Split activity timeouts for UDP flows
+
+Frequent DNS queries over UDP from a container or guest can result
+in many sockets shown in ss(8), typically one per flow. This is
+expected and harmless, but it can make the output of ss(8) look
+noisy and potentially concern users.
+
+This patch splits UDP flow timeouts into two, mirroring the Linux
+kernel, and sources the values from kernel parameters. The shorter
+timeout is applied to unidirectional flows and minimal bidirectional
+exchanges (single datagram and reply), while the longer timeout is
+used for bidirectional flows with multiple datagrams on either side.
+
+Link: https://bugs.passt.top/show_bug.cgi?id=197
+Suggested-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Yumei Huang <yuhuang@redhat.com>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+(cherry picked from commit bebafa72a982784164a7d556bd860ec0ed1e02c7)
+---
+ contrib/apparmor/abstractions/passt |  4 ++++
+ udp.c                               | 34 +++++++++++++++++++++++++++--
+ udp.h                               |  4 ++++
+ udp_flow.c                          | 30 ++++++++++++++++++++-----
+ udp_flow.h                          |  4 ++++
+ 5 files changed, 69 insertions(+), 7 deletions(-)
+
+diff --git a/contrib/apparmor/abstractions/passt b/contrib/apparmor/abstractions/passt
+index 43fd63f..e8ed513 100644
+--- a/contrib/apparmor/abstractions/passt
++++ b/contrib/apparmor/abstractions/passt
+@@ -36,6 +36,10 @@
+ 
+   @{PROC}/sys/net/ipv4/ip_local_port_range r,	# fwd_probe_ephemeral()
+ 
++  # udp_get_timeout_params(), udp.c
++  @{PROC}/sys/net/netfilter/nf_conntrack_udp_timeout r,
++  @{PROC}/sys/net/netfilter/nf_conntrack_udp_timeout_stream r,
++
+   network netlink raw,				# nl_sock_init_do(), netlink.c
+ 
+   network inet stream,				# tcp.c
+diff --git a/udp.c b/udp.c
+index 08bec50..11c3ecc 100644
+--- a/udp.c
++++ b/udp.c
+@@ -26,7 +26,10 @@
+  *
+  * We track pseudo-connections of this type as flow table entries of type
+  * FLOW_UDP.  We store the time of the last traffic on the flow in uflow->ts,
+- * and let the flow expire if there is no traffic for UDP_CONN_TIMEOUT seconds.
++ * and let the flow expire if there is no traffic for UDP_TIMEOUT seconds for
++ * unidirectional flows and flows with only one datagram and one reply, or
++ * UDP_TIMEOUT_STREAM seconds for bidirectional flows with more than one
++ * datagram on either side.
+  *
+  * NOTE: This won't handle multicast protocols, or some protocols with different
+  * port usage.  We'll need specific logic if we want to handle those.
+@@ -118,6 +121,13 @@
+ 
+ #define UDP_MAX_FRAMES		32  /* max # of frames to receive at once */
+ 
++#define UDP_TIMEOUT	"/proc/sys/net/netfilter/nf_conntrack_udp_timeout"
++#define UDP_TIMEOUT_STREAM	\
++	"/proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream"
++
++#define UDP_TIMEOUT_DEFAULT		30	/* s */
++#define UDP_TIMEOUT_STREAM_DEFAULT	120	/* s */
++
+ /* Maximum UDP data to be returned in ICMP messages */
+ #define ICMP4_MAX_DLEN 8
+ #define ICMP6_MAX_DLEN (IPV6_MIN_MTU			\
+@@ -966,7 +976,7 @@ void udp_sock_handler(const struct ctx *c, union epoll_ref ref,
+ 		int s = ref.fd;
+ 
+ 		flow_trace(uflow, "Received data on reply socket");
+-		uflow->ts = now->tv_sec;
++		udp_flow_activity(uflow, !tosidx.sidei, now);
+ 
+ 		if (pif_is_socket(topif)) {
+ 			udp_sock_to_sock(c, ref.fd, n, tosidx);
+@@ -1301,6 +1311,24 @@ void udp_port_rebind_all(struct ctx *c)
+ 		udp_port_rebind(c, false);
+ }
+ 
++/*
++ * udp_get_timeout_params() - Get host kernel UDP timeout parameters
++ * @c:		Execution context
++ */
++static void udp_get_timeout_params(struct ctx *c)
++{
++	intmax_t v;
++
++	v = read_file_integer(UDP_TIMEOUT, UDP_TIMEOUT_DEFAULT);
++	c->udp.timeout = v;
++
++	v = read_file_integer(UDP_TIMEOUT_STREAM, UDP_TIMEOUT_STREAM_DEFAULT);
++	c->udp.stream_timeout = v;
++
++	debug("Using UDP timeout parameters, timeout: %d, stream_timeout: %d",
++	      c->udp.timeout, c->udp.stream_timeout);
++}
++
+ /**
+  * udp_init() - Initialise per-socket data, and sockets in namespace
+  * @c:		Execution context
+@@ -1311,6 +1339,8 @@ int udp_init(struct ctx *c)
+ {
+ 	ASSERT(!c->no_udp);
+ 
++	udp_get_timeout_params(c);
++
+ 	udp_iov_init(c);
+ 
+ 	if (c->mode == MODE_PASTA) {
+diff --git a/udp.h b/udp.h
+index 03e8dc5..618f258 100644
+--- a/udp.h
++++ b/udp.h
+@@ -42,11 +42,15 @@ union udp_listen_epoll_ref {
+  * @fwd_in:		Port forwarding configuration for inbound packets
+  * @fwd_out:		Port forwarding configuration for outbound packets
+  * @timer_run:		Timestamp of most recent timer run
++ * @timeout:		Timeout for unidirectional flows (in s)
++ * @stream_timeout:	Timeout for stream-like flows (in s)
+  */
+ struct udp_ctx {
+ 	struct fwd_ports fwd_in;
+ 	struct fwd_ports fwd_out;
+ 	struct timespec timer_run;
++	int timeout;
++	int stream_timeout;
+ };
+ 
+ #endif /* UDP_H */
+diff --git a/udp_flow.c b/udp_flow.c
+index 80b1543..4a8d4b6 100644
+--- a/udp_flow.c
++++ b/udp_flow.c
+@@ -17,8 +17,6 @@
+ #include "udp_internal.h"
+ #include "epoll_ctl.h"
+ 
+-#define UDP_CONN_TIMEOUT	180 /* s, timeout for ephemeral or local bind */
+-
+ /**
+  * udp_at_sidx() - Get UDP specific flow at given sidx
+  * @sidx:    Flow and side to retrieve
+@@ -152,6 +150,8 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
+ 	uflow->ts = now->tv_sec;
+ 	uflow->s[INISIDE] = uflow->s[TGTSIDE] = -1;
+ 	uflow->ttl[INISIDE] = uflow->ttl[TGTSIDE] = 0;
++	uflow->activity[INISIDE] = 1;
++	uflow->activity[TGTSIDE] = 0;
+ 
+ 	flow_foreach_sidei(sidei) {
+ 		if (pif_is_socket(uflow->f.pif[sidei]))
+@@ -227,7 +227,7 @@ flow_sidx_t udp_flow_from_sock(const struct ctx *c, uint8_t pif,
+ 
+ 	sidx = flow_lookup_sa(c, IPPROTO_UDP, pif, s_in, dst, port);
+ 	if ((uflow = udp_at_sidx(sidx))) {
+-		uflow->ts = now->tv_sec;
++		udp_flow_activity(uflow, sidx.sidei, now);
+ 		return flow_sidx_opposite(sidx);
+ 	}
+ 
+@@ -284,7 +284,7 @@ flow_sidx_t udp_flow_from_tap(const struct ctx *c,
+ 	sidx = flow_lookup_af(c, IPPROTO_UDP, pif, af, saddr, daddr,
+ 			      srcport, dstport);
+ 	if ((uflow = udp_at_sidx(sidx))) {
+-		uflow->ts = now->tv_sec;
++		udp_flow_activity(uflow, sidx.sidei, now);
+ 		return flow_sidx_opposite(sidx);
+ 	}
+ 
+@@ -361,9 +361,29 @@ bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow,
+ bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
+ 		    const struct timespec *now)
+ {
+-	if (now->tv_sec - uflow->ts <= UDP_CONN_TIMEOUT)
++	int timeout = c->udp.timeout;
++
++	if (uflow->activity[TGTSIDE] &&
++	    (uflow->activity[INISIDE] > 1 || uflow->activity[TGTSIDE] > 1))
++		timeout = c->udp.stream_timeout;
++
++	if (now->tv_sec - uflow->ts <= timeout)
+ 		return false;
+ 
+ 	udp_flow_close(c, uflow);
+ 	return true;
+ }
++
++/**
++ * udp_flow_activity() - Track activity of a UDP flow
++ * @uflow:	UDP flow
++ * @sidei:	Side index of the flow (INISIDE or TGTSIDE)
++ * @now:	Current timestamp
++ */
++void udp_flow_activity(struct udp_flow *uflow, unsigned int sidei,
++		       const struct timespec *now)
++{
++	uflow->ts = now->tv_sec;
++	if (uflow->activity[sidei] < UINT8_MAX)
++		uflow->activity[sidei]++;
++}
+diff --git a/udp_flow.h b/udp_flow.h
+index 4c528e9..183a429 100644
+--- a/udp_flow.h
++++ b/udp_flow.h
+@@ -16,6 +16,7 @@
+  * @flush1:	@s[1] may have datagrams queued for other flows
+  * @ts:		Activity timestamp
+  * @s:		Socket fd (or -1) for each side of the flow
++ * @activity:	Packets seen from each side of the flow, up to UINT8_MAX
+  */
+ struct udp_flow {
+ 	/* Must be first element */
+@@ -29,6 +30,7 @@ struct udp_flow {
+ 
+ 	time_t ts;
+ 	int s[SIDES];
++	uint8_t activity[SIDES];
+ };
+ 
+ struct udp_flow *udp_at_sidx(flow_sidx_t sidx);
+@@ -46,5 +48,7 @@ bool udp_flow_defer(const struct ctx *c, struct udp_flow *uflow,
+ 		    const struct timespec *now);
+ bool udp_flow_timer(const struct ctx *c, struct udp_flow *uflow,
+ 		    const struct timespec *now);
++void udp_flow_activity(struct udp_flow *uflow, unsigned int sidei,
++		       const struct timespec *now);
+ 
+ #endif /* UDP_FLOW_H */
+-- 
+2.47.1
+
diff --git a/0005-tcp-Remove-non-working-activity-timeout-mechanism.patch b/0014-tcp-Remove-non-working-activity-timeout-mechanism.patch
similarity index 83%
rename from 0005-tcp-Remove-non-working-activity-timeout-mechanism.patch
rename to 0014-tcp-Remove-non-working-activity-timeout-mechanism.patch
index 154136f..e3ad84c 100644
--- a/0005-tcp-Remove-non-working-activity-timeout-mechanism.patch
+++ b/0014-tcp-Remove-non-working-activity-timeout-mechanism.patch
@@ -1,7 +1,7 @@
-From 34a346e3eb83b33bd6d62a57e0e990c5c698fe85 Mon Sep 17 00:00:00 2001
+From 0e93de89b6723dbacc391f4c975a3bdbc3529ef4 Mon Sep 17 00:00:00 2001
 From: David Gibson <david@gibson.dropbear.id.au>
 Date: Wed, 4 Feb 2026 21:41:34 +1000
-Subject: [PATCH 5/8] tcp: Remove non-working activity timeout mechanism
+Subject: [PATCH 14/18] tcp: Remove non-working activity timeout mechanism
 
 This mechanism was intended to remove connections which have had no
 activity for two hours, even if they haven't closed or been reset
@@ -27,20 +27,20 @@ Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
  1 file changed, 3 insertions(+), 21 deletions(-)
 
 diff --git a/tcp.c b/tcp.c
-index 17e99af..7a7ca24 100644
+index 602e810..de2ad38 100644
 --- a/tcp.c
 +++ b/tcp.c
-@@ -197,9 +197,6 @@
+@@ -199,9 +199,6 @@
   *   TAP_FIN_ACKED), but no socket activity is detected from the socket within
   *   this time, reset the connection
   *
 - * - ACT_TIMEOUT, in the presence of any event: if no activity is detected on
 - *   either side, the connection is reset
 - *
-  * - ACK_INTERVAL elapsed after data segment received from tap without having
+  * - RTT / 2 elapsed after data segment received from tap without having
   *   sent an ACK segment, or zero-sized window advertised to tap/guest (flag
-  *   ACK_TO_TAP_DUE): forcibly check if an ACK segment can be sent
-@@ -578,7 +575,9 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
+  *   ACK_TO_TAP_DUE): forcibly check if an ACK segment can be sent.
+@@ -632,7 +629,9 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
  	} else if (CONN_HAS(conn, SOCK_FIN_SENT | TAP_FIN_ACKED)) {
  		it.it_value.tv_sec = FIN_TIMEOUT;
  	} else {
@@ -50,10 +50,10 @@ index 17e99af..7a7ca24 100644
 +		it.it_value.tv_nsec = 0;
  	}
  
- 	flow_dbg(conn, "timer expires in %llu.%03llus",
-@@ -2294,23 +2293,6 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
- 				tcp_timer_ctl(c, conn);
- 			}
+ 	if (conn->flags & ACK_TO_TAP_DUE) {
+@@ -2628,23 +2627,6 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
+ 			tcp_data_from_sock(c, conn);
+ 			tcp_timer_ctl(c, conn);
  		}
 -	} else {
 -		struct itimerspec new = { { 0 }, { ACT_TIMEOUT, 0 } };
diff --git a/0006-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch b/0015-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch
similarity index 66%
rename from 0006-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch
rename to 0015-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch
index 32d302b..4f3b9ee 100644
--- a/0006-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch
+++ b/0015-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch
@@ -1,7 +1,7 @@
-From 479c464c767e655fad65b9bedc496570bb40c997 Mon Sep 17 00:00:00 2001
+From 443c72321d624d331a5bf18777d2a7b0c58bec8f Mon Sep 17 00:00:00 2001
 From: David Gibson <david@gibson.dropbear.id.au>
 Date: Wed, 4 Feb 2026 21:41:35 +1000
-Subject: [PATCH 6/8] tcp: Re-introduce inactivity timeouts based on a clock
+Subject: [PATCH 15/18] tcp: Re-introduce inactivity timeouts based on a clock
  algorithm
 
 We previously had a mechanism to remove TCP connections which were
@@ -29,18 +29,18 @@ Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
 Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
 (cherry picked from commit 1820103fbbf13df98257a3f5c3ba625de624b0b3)
 ---
- tcp.c      | 49 ++++++++++++++++++++++++++++++++++++++++++++++++-
- tcp.h      |  2 ++
+ tcp.c      | 52 ++++++++++++++++++++++++++++++++++++++++++++++++----
+ tcp.h      |  4 +++-
  tcp_conn.h |  3 +++
- 3 files changed, 53 insertions(+), 1 deletion(-)
+ 3 files changed, 54 insertions(+), 5 deletions(-)
 
 diff --git a/tcp.c b/tcp.c
-index 7a7ca24..394fc35 100644
+index de2ad38..dd58550 100644
 --- a/tcp.c
 +++ b/tcp.c
-@@ -201,6 +201,13 @@
-  *   sent an ACK segment, or zero-sized window advertised to tap/guest (flag
-  *   ACK_TO_TAP_DUE): forcibly check if an ACK segment can be sent
+@@ -207,6 +207,13 @@
+  *   TCP_INFO, with a representable range from RTT_STORE_MIN (100 us) to
+  *   RTT_STORE_MAX (3276.8 ms). The timeout value is clamped accordingly.
   *
 + * We also use a global interval timer for an activity timeout which doesn't
 + * require precision:
@@ -52,9 +52,9 @@ index 7a7ca24..394fc35 100644
   *
   * Summary of data flows (with ESTABLISHED event)
   * ----------------------------------------------
-@@ -330,7 +337,8 @@ enum {
- #define SYN_TIMEOUT			10		/* s */
- #define ACK_TIMEOUT			2
+@@ -345,7 +352,8 @@ enum {
+ #define RTO_INIT			1		/* s, RFC 6298 */
+ #define RTO_INIT_AFTER_SYN_RETRIES	3		/* s, RFC 6298 */
  #define FIN_TIMEOUT			60
 -#define ACT_TIMEOUT			7200
 +
@@ -62,7 +62,7 @@ index 7a7ca24..394fc35 100644
  
  #define LOW_RTT_TABLE_SIZE		8
  #define LOW_RTT_THRESHOLD		10 /* us */
-@@ -2000,6 +2008,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
+@@ -2294,6 +2302,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
  		return 1;
  	}
  
@@ -71,27 +71,19 @@ index 7a7ca24..394fc35 100644
  	if (th->ack && !(conn->events & ESTABLISHED))
  		tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq));
  
-@@ -2318,6 +2328,8 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
+@@ -2652,6 +2662,8 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
  		return;
  	}
  
 +	conn->inactive = false;
 +
- 	if ((conn->events & TAP_FIN_SENT) && (events & EPOLLHUP)) {
+ 	if ((conn->events & TAP_FIN_ACKED) && (events & EPOLLHUP)) {
  		conn_event(c, conn, CLOSED);
  		return;
-@@ -2696,6 +2708,7 @@ static void tcp_port_rebind(struct ctx *c, bool outbound)
+@@ -3030,6 +3042,38 @@ static void tcp_port_rebind(struct ctx *c, bool outbound)
  	}
  }
  
-+
- /**
-  * tcp_port_rebind_outbound() - Rebind ports in namespace
-  * @arg:	Execution context
-@@ -2714,6 +2727,38 @@ static int tcp_port_rebind_outbound(void *arg)
- 	return 0;
- }
- 
 +/**
 + * tcp_inactivity() - Scan for and close long-inactive connections
 + * @:	Execution context
@@ -125,9 +117,17 @@ index 7a7ca24..394fc35 100644
 +}
 +
  /**
-  * tcp_timer() - Periodic tasks: port detection, closed connections, pool refill
+  * tcp_port_rebind_outbound() - Rebind ports in namespace
+  * @arg:	Execution context
+@@ -3068,13 +3112,13 @@ void tcp_port_rebind_all(struct ctx *c)
   * @c:		Execution context
-@@ -2738,6 +2783,8 @@ void tcp_timer(struct ctx *c, const struct timespec *now)
+  * @now:	Current timestamp
+  */
+-void tcp_timer(const struct ctx *c, const struct timespec *now)
++void tcp_timer(struct ctx *c, const struct timespec *now)
+ {
+-	(void)now;
+-
  	tcp_sock_refill_init(c);
  	if (c->mode == MODE_PASTA)
  		tcp_splice_refill(c);
@@ -137,29 +137,39 @@ index 7a7ca24..394fc35 100644
  
  /**
 diff --git a/tcp.h b/tcp.h
-index 234a803..b75e9a7 100644
+index 3f21e75..37cfc5b 100644
 --- a/tcp.h
 +++ b/tcp.h
-@@ -59,12 +59,14 @@ union tcp_listen_epoll_ref {
-  * @fwd_out:		Port forwarding configuration for outbound packets
-  * @timer_run:		Timestamp of most recent timer run
-  * @pipe_size:		Size of pipes for spliced connections
+@@ -23,7 +23,7 @@ int tcp_sock_init(const struct ctx *c, uint8_t pif,
+ 		  in_port_t port);
+ int tcp_init(struct ctx *c);
+ void tcp_port_rebind_all(struct ctx *c);
+-void tcp_timer(const struct ctx *c, const struct timespec *now);
++void tcp_timer(struct ctx *c, const struct timespec *now);
+ void tcp_defer_handler(struct ctx *c);
+ 
+ void tcp_update_l2_buf(const unsigned char *eth_d);
+@@ -64,6 +64,7 @@ union tcp_listen_epoll_ref {
+  * @rto_max:		Maximum retry timeout (in s)
+  * @syn_retries:	SYN retries using exponential backoff timeout
+  * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
 + * @inactivity_run:	Time we last scanned for inactive connections
   */
  struct tcp_ctx {
  	struct fwd_ports fwd_in;
- 	struct fwd_ports fwd_out;
- 	struct timespec timer_run;
- 	size_t pipe_size;
+@@ -73,6 +74,7 @@ struct tcp_ctx {
+ 	int rto_max;
+ 	uint8_t syn_retries;
+ 	uint8_t syn_linear_timeouts;
 +	time_t inactivity_run;
  };
  
  #endif /* TCP_H */
 diff --git a/tcp_conn.h b/tcp_conn.h
-index 35d813d..93f9440 100644
+index 9c6ff9e..2e70d39 100644
 --- a/tcp_conn.h
 +++ b/tcp_conn.h
-@@ -17,6 +17,7 @@
+@@ -16,6 +16,7 @@
   * @ws_from_tap:	Window scaling factor advertised from tap/guest
   * @ws_to_tap:		Window scaling factor advertised to tap/guest
   * @tap_mss:		MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
@@ -167,9 +177,9 @@ index 35d813d..93f9440 100644
   * @sock:		Socket descriptor number
   * @events:		Connection events, implying connection states
   * @listening_sock:	Listening socket this socket was accept()ed from, or -1
-@@ -52,6 +53,8 @@ struct tcp_tap_conn {
- #define MSS_SET(conn, mss)	(conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
- #define MSS_GET(conn)		(conn->tap_mss << (16 - TCP_MSS_BITS))
+@@ -58,6 +59,8 @@ struct tcp_tap_conn {
+ 	(conn->rtt_exp = MIN(RTT_EXP_MAX, ilog2(MAX(1, rtt / RTT_STORE_MIN))))
+ #define RTT_GET(conn)			(RTT_STORE_MIN << conn->rtt_exp)
  
 +	bool		inactive	:1;
 +
diff --git a/0007-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch b/0016-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch
similarity index 75%
rename from 0007-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch
rename to 0016-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch
index 526e283..a91d604 100644
--- a/0007-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch
+++ b/0016-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch
@@ -1,7 +1,7 @@
-From d2f7c36df97e287e2c665d4caedf1137755bfd54 Mon Sep 17 00:00:00 2001
+From 4c1c322bd204309ee61b9f53ce6e179d52e34bdc Mon Sep 17 00:00:00 2001
 From: David Gibson <david@gibson.dropbear.id.au>
 Date: Wed, 4 Feb 2026 21:41:36 +1000
-Subject: [PATCH 7/8] tcp: Extend tcp_send_flag() to send TCP keepalive
+Subject: [PATCH 16/18] tcp: Extend tcp_send_flag() to send TCP keepalive
  segments
 
 TCP keepalives aren't technically a flag, but they are a zero-data segment
@@ -20,25 +20,25 @@ Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
  3 files changed, 9 insertions(+)
 
 diff --git a/tcp_buf.c b/tcp_buf.c
-index 0530563..a324eee 100644
+index 5d419d3..75a020f 100644
 --- a/tcp_buf.c
 +++ b/tcp_buf.c
-@@ -212,6 +212,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
- 	tcp_payload_used++;
+@@ -227,6 +227,10 @@ int tcp_buf_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
+ 	tcp_frame_conns[tcp_payload_used++] = conn;
  	l4len = optlen + sizeof(struct tcphdr);
  	iov[TCP_IOV_PAYLOAD].iov_len = l4len;
 +
 +	if (flags & KEEPALIVE)
 +		seq--;
 +
- 	tcp_l2_buf_fill_headers(conn, iov, NULL, seq, false);
+ 	tcp_l2_buf_fill_headers(c, conn, iov, NULL, seq, false);
  
- 	if (flags & DUP_ACK) {
+ 	tcp_l2_buf_pad(iov);
 diff --git a/tcp_internal.h b/tcp_internal.h
-index 36c6533..371a5f5 100644
+index 5f8fb35..36f443b 100644
 --- a/tcp_internal.h
 +++ b/tcp_internal.h
-@@ -30,6 +30,8 @@
+@@ -38,6 +38,8 @@
  
  /* Flags for internal usage */
  #define DUP_ACK		(1 << 5)
@@ -48,7 +48,7 @@ index 36c6533..371a5f5 100644
  #define OPT_NOP		1
  #define OPT_MSS		2
 diff --git a/tcp_vu.c b/tcp_vu.c
-index 57587cc..cb3f80f 100644
+index db9db78..dd50241 100644
 --- a/tcp_vu.c
 +++ b/tcp_vu.c
 @@ -135,6 +135,9 @@ int tcp_vu_send_flag(const struct ctx *c, struct tcp_tap_conn *conn, int flags)
@@ -58,7 +58,7 @@ index 57587cc..cb3f80f 100644
 +	if (flags & KEEPALIVE)
 +		seq--;
 +
- 	tcp_fill_headers(conn, NULL, ip4h, ip6h, th, &payload,
+ 	tcp_fill_headers(c, conn, NULL, eh, ip4h, ip6h, th, &payload,
  			 NULL, seq, !*c->pcap);
  
 -- 
diff --git a/0008-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch b/0017-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch
similarity index 82%
rename from 0008-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch
rename to 0017-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch
index 6575585..4cc103e 100644
--- a/0008-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch
+++ b/0017-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch
@@ -1,7 +1,7 @@
-From e349a1b2416f220a8cf518c88bec8b6a7dea201d Mon Sep 17 00:00:00 2001
+From 066999b7cce460950ac01db4583b82d0ceb0dc8a Mon Sep 17 00:00:00 2001
 From: David Gibson <david@gibson.dropbear.id.au>
 Date: Wed, 4 Feb 2026 21:41:37 +1000
-Subject: [PATCH 8/8] tcp: Send TCP keepalive segments after a period of
+Subject: [PATCH 17/18] tcp: Send TCP keepalive segments after a period of
  tap-side inactivity
 
 There are several circumstances in which a live, but idle TCP connection
@@ -39,10 +39,10 @@ Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
  3 files changed, 43 insertions(+)
 
 diff --git a/tcp.c b/tcp.c
-index 394fc35..8ea7794 100644
+index dd58550..1691987 100644
 --- a/tcp.c
 +++ b/tcp.c
-@@ -209,6 +209,12 @@
+@@ -215,6 +215,12 @@
   *   keepalives) will be removed between INACTIVITY_INTERVAL s and
   *   2*INACTIVITY_INTERVAL s after the last activity.
   *
@@ -55,7 +55,7 @@ index 394fc35..8ea7794 100644
   * Summary of data flows (with ESTABLISHED event)
   * ----------------------------------------------
   *
-@@ -339,6 +345,7 @@ enum {
+@@ -354,6 +360,7 @@ enum {
  #define FIN_TIMEOUT			60
  
  #define INACTIVITY_INTERVAL		7200		/* s */
@@ -63,7 +63,7 @@ index 394fc35..8ea7794 100644
  
  #define LOW_RTT_TABLE_SIZE		8
  #define LOW_RTT_THRESHOLD		10 /* us */
-@@ -2009,6 +2016,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
+@@ -2303,6 +2310,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
  	}
  
  	conn->inactive = false;
@@ -71,8 +71,8 @@ index 394fc35..8ea7794 100644
  
  	if (th->ack && !(conn->events & ESTABLISHED))
  		tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq));
-@@ -2727,6 +2735,36 @@ static int tcp_port_rebind_outbound(void *arg)
- 	return 0;
+@@ -3042,6 +3050,36 @@ static void tcp_port_rebind(struct ctx *c, bool outbound)
+ 	}
  }
  
 +/**
@@ -108,7 +108,7 @@ index 394fc35..8ea7794 100644
  /**
   * tcp_inactivity() - Scan for and close long-inactive connections
   * @:	Execution context
-@@ -2784,6 +2822,7 @@ void tcp_timer(struct ctx *c, const struct timespec *now)
+@@ -3118,6 +3156,7 @@ void tcp_timer(struct ctx *c, const struct timespec *now)
  	if (c->mode == MODE_PASTA)
  		tcp_splice_refill(c);
  
@@ -117,30 +117,30 @@ index 394fc35..8ea7794 100644
  }
  
 diff --git a/tcp.h b/tcp.h
-index b75e9a7..7433f15 100644
+index 37cfc5b..505f21a 100644
 --- a/tcp.h
 +++ b/tcp.h
-@@ -59,6 +59,7 @@ union tcp_listen_epoll_ref {
-  * @fwd_out:		Port forwarding configuration for outbound packets
-  * @timer_run:		Timestamp of most recent timer run
-  * @pipe_size:		Size of pipes for spliced connections
+@@ -64,6 +64,7 @@ union tcp_listen_epoll_ref {
+  * @rto_max:		Maximum retry timeout (in s)
+  * @syn_retries:	SYN retries using exponential backoff timeout
+  * @syn_linear_timeouts: SYN retries before using exponential backoff timeout
 + * @keepalive_run:	Time we last issued tap-side keepalives
   * @inactivity_run:	Time we last scanned for inactive connections
   */
  struct tcp_ctx {
-@@ -66,6 +67,7 @@ struct tcp_ctx {
- 	struct fwd_ports fwd_out;
- 	struct timespec timer_run;
- 	size_t pipe_size;
+@@ -74,6 +75,7 @@ struct tcp_ctx {
+ 	int rto_max;
+ 	uint8_t syn_retries;
+ 	uint8_t syn_linear_timeouts;
 +	time_t keepalive_run;
  	time_t inactivity_run;
  };
  
 diff --git a/tcp_conn.h b/tcp_conn.h
-index 93f9440..23ef5bd 100644
+index 2e70d39..2ff76ed 100644
 --- a/tcp_conn.h
 +++ b/tcp_conn.h
-@@ -17,6 +17,7 @@
+@@ -16,6 +16,7 @@
   * @ws_from_tap:	Window scaling factor advertised from tap/guest
   * @ws_to_tap:		Window scaling factor advertised to tap/guest
   * @tap_mss:		MSS advertised by tap/guest, rounded to 2 ^ TCP_MSS_BITS
@@ -148,9 +148,9 @@ index 93f9440..23ef5bd 100644
   * @inactive:		No activity within the current INACTIVITY_INTERVAL
   * @sock:		Socket descriptor number
   * @events:		Connection events, implying connection states
-@@ -53,6 +54,7 @@ struct tcp_tap_conn {
- #define MSS_SET(conn, mss)	(conn->tap_mss = (mss >> (16 - TCP_MSS_BITS)))
- #define MSS_GET(conn)		(conn->tap_mss << (16 - TCP_MSS_BITS))
+@@ -59,6 +60,7 @@ struct tcp_tap_conn {
+ 	(conn->rtt_exp = MIN(RTT_EXP_MAX, ilog2(MAX(1, rtt / RTT_STORE_MIN))))
+ #define RTT_GET(conn)			(RTT_STORE_MIN << conn->rtt_exp)
  
 +	bool		tap_inactive	:1;
  	bool		inactive	:1;
diff --git a/0018-tcp-Replace-send-buffer-boost-with-EPOLLOUT-monitori.patch b/0018-tcp-Replace-send-buffer-boost-with-EPOLLOUT-monitori.patch
new file mode 100644
index 0000000..21da2a5
--- /dev/null
+++ b/0018-tcp-Replace-send-buffer-boost-with-EPOLLOUT-monitori.patch
@@ -0,0 +1,132 @@
+From 8fc9d5982f6e160fd7d1748d5b285219b2b8ea56 Mon Sep 17 00:00:00 2001
+From: Yumei Huang <yuhuang@redhat.com>
+Date: Fri, 20 Mar 2026 18:32:14 +0800
+Subject: [PATCH 18/18] tcp: Replace send buffer boost with EPOLLOUT monitoring
+
+Currently we use the SNDBUF boost mechanism to force TCP auto-tuning.
+However, it doesn't always work, and sometimes causes a lot of
+retransmissions. As a result, the throughput suffers.
+
+This patch replaces it with monitoring EPOLLOUT when sendmsg() failure
+(with EAGAIN and EWOULDBLOCK) and partial sends occur.
+
+Tested with iperf3 inside pasta: throughput is now comparable to running
+iperf3 directly on the host without pasta. However, retransmissions can
+still be elevated when RTT >= 50ms. For example, when RTT is between
+200ms and 500ms, retransmission count varies from 30 to 120 in roughly
+80% of test runs.
+
+Link: https://bugs.passt.top/show_bug.cgi?id=138
+Link: https://github.com/containers/podman/issues/28219
+Suggested-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Yumei Huang <yuhuang@redhat.com>
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+---
+ tcp.c | 57 +++++++++++++++++----------------------------------------
+ 1 file changed, 17 insertions(+), 40 deletions(-)
+
+diff --git a/tcp.c b/tcp.c
+index 1691987..920af70 100644
+--- a/tcp.c
++++ b/tcp.c
+@@ -365,13 +365,6 @@ enum {
+ #define LOW_RTT_TABLE_SIZE		8
+ #define LOW_RTT_THRESHOLD		10 /* us */
+ 
+-/* Parameters to temporarily exceed sending buffer to force TCP auto-tuning */
+-#define SNDBUF_BOOST_BYTES_RTT_LO	2500 /* B * s: no boost until here */
+-/* ...examples:  5 MB sent * 500 ns RTT, 250 kB * 10 ms,  8 kB * 300 ms */
+-#define SNDBUF_BOOST_FACTOR		150 /* % */
+-#define SNDBUF_BOOST_BYTES_RTT_HI	6000 /* apply full boost factor */
+-/*		12 MB sent * 500 ns RTT, 600 kB * 10 ms, 20 kB * 300 ms */
+-
+ /* Ratio of buffer to bandwidth * delay product implying interactive traffic */
+ #define SNDBUF_TO_BW_DELAY_INTERACTIVE	/* > */ 20 /* (i.e. < 5% of buffer) */
+ 
+@@ -1067,35 +1060,6 @@ void tcp_fill_headers(const struct ctx *c, struct tcp_tap_conn *conn,
+ 	tap_hdr_update(taph, MAX(l3len + sizeof(struct ethhdr), ETH_ZLEN));
+ }
+ 
+-/**
+- * tcp_sndbuf_boost() - Calculate limit of sending buffer to force auto-tuning
+- * @conn:	Connection pointer
+- * @tinfo:	tcp_info from kernel, must be pre-fetched
+- *
+- * Return: increased sending buffer to use as a limit for advertised window
+- */
+-static unsigned long tcp_sndbuf_boost(const struct tcp_tap_conn *conn,
+-				      const struct tcp_info_linux *tinfo)
+-{
+-	unsigned long bytes_rtt_product;
+-
+-	if (!bytes_acked_cap)
+-		return SNDBUF_GET(conn);
+-
+-	/* This is *not* a bandwidth-delay product, but it's somewhat related:
+-	 * as we send more data (usually at the beginning of a connection), we
+-	 * try to make the sending buffer progressively grow, with the RTT as a
+-	 * factor (longer delay, bigger buffer needed).
+-	 */
+-	bytes_rtt_product = (long long)tinfo->tcpi_bytes_acked *
+-			    tinfo->tcpi_rtt / 1000 / 1000;
+-
+-	return clamped_scale(SNDBUF_GET(conn), bytes_rtt_product,
+-			     SNDBUF_BOOST_BYTES_RTT_LO,
+-			     SNDBUF_BOOST_BYTES_RTT_HI,
+-			     SNDBUF_BOOST_FACTOR);
+-}
+-
+ /**
+  * tcp_update_seqack_wnd() - Update ACK sequence and window to guest/tap
+  * @c:		Execution context
+@@ -1216,8 +1180,6 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn,
+ 
+ 		if ((int)sendq > SNDBUF_GET(conn)) /* Due to memory pressure? */
+ 			limit = 0;
+-		else if ((int)tinfo->tcpi_snd_wnd > SNDBUF_GET(conn))
+-			limit = tcp_sndbuf_boost(conn, tinfo) - (int)sendq;
+ 		else
+ 			limit = SNDBUF_GET(conn) - (int)sendq;
+ 
+@@ -2088,14 +2050,28 @@ eintr:
+ 
+ 		if (errno == EAGAIN || errno == EWOULDBLOCK) {
+ 			tcp_send_flag(c, conn, ACK | DUP_ACK);
++			uint32_t events = tcp_conn_epoll_events(conn->events,
++								conn->flags);
++			events |= EPOLLOUT;
++			if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events,
++			    conn->sock, !TAPSIDE(conn)) < 0)
++				debug("Failed to add EPOLLOUT");
+ 			return p->count - idx;
+-
+ 		}
+ 		return -1;
+ 	}
+ 
+-	if (n < (int)(seq_from_tap - conn->seq_from_tap))
++	if (n < (int)(seq_from_tap - conn->seq_from_tap)) {
+ 		partial_send = 1;
++		uint32_t events = tcp_conn_epoll_events(conn->events,
++							conn->flags);
++		events |= EPOLLOUT;
++		if (flow_epoll_set(&conn->f, EPOLL_CTL_MOD, events, conn->sock,
++		    !TAPSIDE(conn)) < 0)
++			debug("Failed to add EPOLLOUT");
++	 } else {
++		tcp_epoll_ctl(conn);
++	 }
+ 
+ 	conn->seq_from_tap += n;
+ 
+@@ -2688,6 +2664,7 @@ void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
+ 			tcp_data_from_sock(c, conn);
+ 
+ 		if (events & EPOLLOUT) {
++			tcp_epoll_ctl(conn);
+ 			if (tcp_update_seqack_wnd(c, conn, false, NULL))
+ 				tcp_send_flag(c, conn, ACK);
+ 		}
+-- 
+2.47.1
+
diff --git a/passt.spec b/passt.spec
index 30a2518..79a25f0 100644
--- a/passt.spec
+++ b/passt.spec
@@ -7,26 +7,35 @@
 # Copyright (c) 2022 Red Hat GmbH
 # Author: Stefano Brivio <sbrivio@redhat.com>
 
-%global git_hash 8ec134109eb136432a29bdf5a14f8b1fd4e46208
+%global git_hash d04c48032bcf724550d0b8f652fd00efcd2dfad0
 %global selinuxtype targeted
+%global selinux_policy_version 41.41
 
 Name:		passt
-Version:	0^20250512.g8ec1341
-Release:	5%{?dist}
+Version:	0^20251210.gd04c480
+Release:	4%{?dist}
 Summary:	User-mode networking daemons for virtual machines and namespaces
 License:	GPL-2.0-or-later AND BSD-3-Clause
 Group:		System Environment/Daemons
 URL:		https://passt.top/
 Source:		https://passt.top/passt/snapshot/passt-%{git_hash}.tar.xz
 
-Patch1:		0001-treewide-By-default-don-t-quit-source-after-migratio.patch
-Patch2:		0002-tcp-Cast-operands-of-sequence-comparison-macros-to-u.patch
-Patch3:		0003-tcp-Don-t-consider-FIN-flags-with-mismatching-sequen.patch
-Patch4:		0004-tcp-Properly-remove-sockets-from-epoll-loop-when-con.patch
-Patch5:		0005-tcp-Remove-non-working-activity-timeout-mechanism.patch
-Patch6:		0006-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch
-Patch7:		0007-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch
-Patch8:		0008-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch
+Patch3:		0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch
+Patch4:		0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch
+Patch5:		0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch
+Patch6:		0006-selinux-Enable-open-permissions-on-netns-directory-o.patch
+Patch7:		0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch
+Patch8:		0008-udp_flow-remove-unneeded-epoll_ref-indirection.patch
+Patch9:		0009-udp_flow-Assign-socket-to-flow-inside-udp_flow_sock.patch
+Patch10:	0010-tcp_splice-Refactor-tcp_splice_conn_epoll_events-to-.patch
+Patch11:	0011-flow-Introduce-flow_epoll_set-to-centralize-epoll-op.patch
+Patch12:	0012-tcp-Properly-propagate-tap-side-RST-to-socket-side.patch
+Patch13:	0013-udp-Split-activity-timeouts-for-UDP-flows.patch
+Patch14:	0014-tcp-Remove-non-working-activity-timeout-mechanism.patch
+Patch15:	0015-tcp-Re-introduce-inactivity-timeouts-based-on-a-cloc.patch
+Patch16:	0016-tcp-Extend-tcp_send_flag-to-send-TCP-keepalive-segme.patch
+Patch17:	0017-tcp-Send-TCP-keepalive-segments-after-a-period-of-ta.patch
+Patch18:	0018-tcp-Replace-send-buffer-boost-with-EPOLLOUT-monitori.patch
 
 BuildRequires:	gcc, make, git, checkpolicy, selinux-policy-devel
 Requires:	(%{name}-selinux = %{version}-%{release} if selinux-policy-%{selinuxtype})
@@ -42,15 +51,21 @@ for network namespaces: traffic is forwarded using a tap interface inside the
 namespace, without the need to create further interfaces on the host, hence not
 requiring any capabilities or privileges.
 
-%package    selinux
-BuildArch:  noarch
-Summary:    SELinux support for passt and pasta
-Requires:   %{name} = %{version}-%{release}
-Requires:   selinux-policy
-Requires(post): %{name}
-Requires(post): policycoreutils
-Requires(preun): %{name}
-Requires(preun): policycoreutils
+%package		    selinux
+BuildArch:		    noarch
+Summary:		    SELinux support for passt and pasta
+%if 0%{?fedora} > 43
+BuildRequires:      selinux-policy-devel
+%selinux_requires_min
+%else
+BuildRequires:      pkgconfig(systemd)
+Requires(post):     libselinux-utils
+Requires(post):     policycoreutils
+%endif
+Requires:		    container-selinux
+Requires:		    selinux-policy-%{selinuxtype}
+Requires(post):		container-selinux
+Requires(post):		selinux-policy-%{selinuxtype}
 
 %description selinux
 This package adds SELinux enforcement to passt(1), pasta(1), passt-repair(1).
@@ -98,15 +113,11 @@ popd
 %selinux_relabel_pre -s %{selinuxtype}
 
 %post selinux
-%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt.pp
-%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/pasta.pp
-%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp
+%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt.pp %{_datadir}/selinux/packages/%{selinuxtype}/pasta.pp %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp
 
 %postun selinux
 if [ $1 -eq 0 ]; then
-	%selinux_modules_uninstall -s %{selinuxtype} passt
-	%selinux_modules_uninstall -s %{selinuxtype} pasta
-	%selinux_modules_uninstall -s %{selinuxtype} passt-repair
+	%selinux_modules_uninstall -s %{selinuxtype} passt pasta passt-repair
 fi
 
 %posttrans selinux
@@ -139,11 +150,23 @@ fi
 %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp
 
 %changelog
-* Wed Apr 22 2026 Stefano Brivio <sbrivio@redhat.com> - 0^20250512.g8ec1341-5
-- Resolves: RHEL-169974 RHEL-169634
+* Tue Apr 21 2026 Stefano Brivio <sbrivio@redhat.com> - 0^20251210.gd04c480-4
+- Resolves: RHEL-169635 RHEL-169642 RHEL-169646
 
-* Thu Oct 23 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20250512.g8ec1341-4
-- Resolves: RHEL-123415 RHEL-123424
+* Wed Feb 11 2026 Stefano Brivio <sbrivio@redhat.com> - 0^20251210.gd04c480-3
+- Resolves: RHEL-136495 RHEL-136314
+
+* Wed Dec 24 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20251210.gd04c480-2
+- Resolves: RHEL-136314 RHEL-137440 RHEL-136495
+
+* Wed Dec 10 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20251210.gd04c480-1
+- Resolves: RHEL-134949 RHEL-134953
+
+* Tue Dec  9 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20251209.gc3f1ba7-1
+- Resolves: RHEL-134120
+
+* Thu Oct 23 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20250512.g8ec1341-3
+- Resolves: RHEL-123425 RHEL-123683
 
 * Tue Jul 29 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20250512.g8ec1341-2
 - Resolves: RHEL-106425
diff --git a/sources b/sources
index 8dba02a..ba91c70 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (passt-8ec134109eb136432a29bdf5a14f8b1fd4e46208.tar.xz) = 3a63f3f62aae06ae0da2293808223f539bca1a030131c50499d5de2daa96faf887fd486b6aa71d627b5ede6de6f5310876150983a3e77fbaf9926e69af56bdab
+SHA512 (passt-d04c48032bcf724550d0b8f652fd00efcd2dfad0.tar.xz) = ad9606133292dcc5955aa58d6481aaf98f327ded5c70dcefb35158ef8fd1a35ea0c6bfb5ef9e3ba39f9a161366dc3733aca475d34249aff8977174fdae7d39cc