From 1259b2e54bfae1ad39783182f7e39613ab386331 Mon Sep 17 00:00:00 2001 From: AlmaLinux RelEng Bot Date: Wed, 1 Apr 2026 06:16:17 -0400 Subject: [PATCH] import CS passt-0^20251210.gd04c480-3.el9 --- .gitignore | 2 +- .passt.metadata | 2 +- ...op-user_namespace-create-allow-rules.patch | 21 +- ...emd_logind_exec_t-instead-of-systemd.patch | 41 +++ ...ult-don-t-quit-source-after-migratio.patch | 264 ------------------ ...n-MSS-window-on-no-queued-data-or-no.patch | 110 ++++++++ ...le-matching-IP-version-if-not-suppor.patch | 90 ++++++ ...ead-and-watch-permissions-on-netns-d.patch | 58 ++++ ...pen-permissions-on-netns-directory-o.patch | 68 +++++ ...-issue-in-check-for-approximating-wi.patch | 74 +++++ SPECS/passt.spec | 61 ++-- 11 files changed, 496 insertions(+), 295 deletions(-) create mode 100644 SOURCES/0002-selinux-Use-systemd_logind_exec_t-instead-of-systemd.patch delete mode 100644 SOURCES/0002-treewide-By-default-don-t-quit-source-after-migratio.patch create mode 100644 SOURCES/0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch create mode 100644 SOURCES/0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch create mode 100644 SOURCES/0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch create mode 100644 SOURCES/0006-selinux-Enable-open-permissions-on-netns-directory-o.patch create mode 100644 SOURCES/0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch diff --git a/.gitignore b/.gitignore index 7d7ac68..0a79d09 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/passt-8ec134109eb136432a29bdf5a14f8b1fd4e46208.tar.xz +SOURCES/passt-d04c48032bcf724550d0b8f652fd00efcd2dfad0.tar.xz diff --git a/.passt.metadata b/.passt.metadata index 674e16c..c00c0d5 100644 --- a/.passt.metadata +++ b/.passt.metadata @@ -1 +1 @@ -7b91876dcd65569ddf775b2da567345500ec8862 SOURCES/passt-8ec134109eb136432a29bdf5a14f8b1fd4e46208.tar.xz +ec2fcde158b88b1ed9786565025380d03aa32d56 SOURCES/passt-d04c48032bcf724550d0b8f652fd00efcd2dfad0.tar.xz diff --git a/SOURCES/0001-selinux-Drop-user_namespace-create-allow-rules.patch b/SOURCES/0001-selinux-Drop-user_namespace-create-allow-rules.patch index 4149192..5b5fea6 100644 --- a/SOURCES/0001-selinux-Drop-user_namespace-create-allow-rules.patch +++ b/SOURCES/0001-selinux-Drop-user_namespace-create-allow-rules.patch @@ -1,6 +1,6 @@ -From 6977619743bbc602a865f79562b59a80921d6063 Mon Sep 17 00:00:00 2001 +From 7087adfbab35354f9def7edee87385b82416c722 Mon Sep 17 00:00:00 2001 From: Stefano Brivio -Date: Mon, 21 Aug 2023 17:52:28 +0200 +Date: Mon, 8 Dec 2025 22:32:50 -0500 Subject: [PATCH] selinux: Drop user_namespace create allow rules Those are incompatible with current el9 kernels. I introduced them @@ -24,10 +24,10 @@ Signed-off-by: Stefano Brivio 2 files changed, 2 deletions(-) diff --git a/contrib/selinux/passt.te b/contrib/selinux/passt.te -index c6cea34..131fadc 100644 +index 6995df8..76d23e8 100644 --- a/contrib/selinux/passt.te +++ b/contrib/selinux/passt.te -@@ -92,7 +92,6 @@ allow syslogd_t self:cap_userns sys_ptrace; +@@ -105,7 +105,6 @@ allow syslogd_t self:cap_userns sys_ptrace; allow passt_t self:process setcap; allow passt_t self:capability { sys_tty_config setpcap net_bind_service setuid setgid}; allow passt_t self:cap_userns { setpcap sys_admin sys_ptrace }; @@ -36,16 +36,17 @@ index c6cea34..131fadc 100644 auth_read_passwd(passt_t) diff --git a/contrib/selinux/pasta.te b/contrib/selinux/pasta.te -index 69be081..892edae 100644 +index 95fe42a..7e1e821 100644 --- a/contrib/selinux/pasta.te +++ b/contrib/selinux/pasta.te -@@ -110,7 +110,6 @@ init_daemon_domain(pasta_t, pasta_exec_t) - - allow pasta_t self:capability { setpcap net_bind_service sys_tty_config dac_read_search net_admin sys_resource setuid setgid }; - allow pasta_t self:cap_userns { setpcap sys_admin sys_ptrace net_admin net_bind_service }; +@@ -126,7 +126,6 @@ allow pasta_t self:cap_userns { setpcap sys_admin sys_ptrace net_admin net_bind_ + # pasta only calls setuid and setgid with the current UID and GID, so this + # denial is harmless. See https://bugzilla.redhat.com/show_bug.cgi?id=2330512#c10 + dontaudit pasta_t self:cap_userns { setgid setuid }; -allow pasta_t self:user_namespace create; auth_read_passwd(pasta_t) -- -2.39.2 +2.47.1 + diff --git a/SOURCES/0002-selinux-Use-systemd_logind_exec_t-instead-of-systemd.patch b/SOURCES/0002-selinux-Use-systemd_logind_exec_t-instead-of-systemd.patch new file mode 100644 index 0000000..7f3736a --- /dev/null +++ b/SOURCES/0002-selinux-Use-systemd_logind_exec_t-instead-of-systemd.patch @@ -0,0 +1,41 @@ +From 2244df26b2cb63acb51a20485e1ca7ad0649b152 Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Mon, 22 Dec 2025 21:48:32 -0500 +Subject: [PATCH] selinux: Use systemd_logind_exec_t instead of + systemd_user_runtimedir_exec_t + +On CentOS Stream 9, selinux-policy doesn't contain commit +700b3622d575 ("Confine /usr/lib/systemd/systemd-user-runtime-dir"), +so the file context of /usr/lib/systemd/systemd-user-runtime-dir is +still systemd_logind_exec_t there. + +Signed-off-by: Stefano Brivio +--- + contrib/selinux/pasta.te | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/contrib/selinux/pasta.te b/contrib/selinux/pasta.te +index 7e1e821..d29d6c4 100644 +--- a/contrib/selinux/pasta.te ++++ b/contrib/selinux/pasta.te +@@ -98,7 +98,7 @@ require { + type container_runtime_t; + type container_var_run_t; + type container_t; +- type systemd_user_runtimedir_t; ++ type systemd_logind_exec_t; + } + + type pasta_t; +@@ -250,7 +250,7 @@ type_transition container_runtime_t user_tmp_t : dir ifconfig_var_run_t "rootles + type_transition container_runtime_t container_var_run_t : dir ifconfig_var_run_t "rootless-netns"; + allow pasta_t ifconfig_var_run_t:dir { add_name open rmdir write }; + allow pasta_t ifconfig_var_run_t:file { create open write }; +-allow systemd_user_runtimedir_t ifconfig_var_run_t:dir rmdir; ++allow systemd_logind_exec_t ifconfig_var_run_t:dir rmdir; + + # Allow pasta to bind to any port + bool pasta_bind_all_ports true; +-- +2.47.1 + diff --git a/SOURCES/0002-treewide-By-default-don-t-quit-source-after-migratio.patch b/SOURCES/0002-treewide-By-default-don-t-quit-source-after-migratio.patch deleted file mode 100644 index 95e79e7..0000000 --- a/SOURCES/0002-treewide-By-default-don-t-quit-source-after-migratio.patch +++ /dev/null @@ -1,264 +0,0 @@ -From b0b5ce0a76cf7fec0b00405732fd94e0b34e8d84 Mon Sep 17 00:00:00 2001 -From: Stefano Brivio -Date: Thu, 17 Jul 2025 10:38:17 +0200 -Subject: [PATCH] treewide: By default, don't quit source after migration, keep - sockets open - -We are hitting an issue in the KubeVirt integration where some data is -still sent to the source instance even after migration is complete. As -we exit, the kernel closes our sockets and resets connections. The -resulting RST segments are sent to peers, effectively terminating -connections that were meanwhile migrated. - -At the moment, this is not done intentionally, but in the future -KubeVirt might enable OVN-Kubernetes features where source and -destination nodes are explicitly getting mirrored traffic for a while, -in order to decrease migration downtime. - -By default, don't quit after migration is completed on the source: the -previous behaviour can be enabled with the new, but deprecated, ---migrate-exit option. After migration (as source), the -1 / --one-off -option has no effect. - -Also, by default, keep migrated TCP sockets open (in repair mode) as -long as we're running, and ignore events on any epoll descriptor -representing data channels. The previous behaviour can be enabled with -the new, equally deprecated, --migrate-no-linger option. - -By keeping sockets open, and not exiting, we prevent the kernel -running on the source node to send out RST segments if further data -reaches us. - -Reported-by: Nir Dothan -Signed-off-by: Stefano Brivio -(cherry picked from commit a8782865c342eb2682cca292d5bf92b567344351) ---- - conf.c | 22 ++++++++++++++++++++++ - flow.c | 2 +- - passt.1 | 29 +++++++++++++++++++++++++++++ - passt.h | 4 ++++ - tcp.c | 9 +++++++-- - tcp_conn.h | 3 ++- - test/lib/setup | 4 ++-- - vhost_user.c | 9 +++++++-- - 8 files changed, 74 insertions(+), 8 deletions(-) - -diff --git a/conf.c b/conf.c -index a6d7e22..1295d89 100644 ---- a/conf.c -+++ b/conf.c -@@ -864,6 +864,14 @@ static void usage(const char *name, FILE *f, int status) - FPRINTF(f, - " --repair-path PATH path for passt-repair(1)\n" - " default: append '.repair' to UNIX domain path\n"); -+ FPRINTF(f, -+ " --migrate-exit DEPRECATED:\n" -+ " source quits after migration\n" -+ " default: source keeps running after migration\n"); -+ FPRINTF(f, -+ " --migrate-no-linger DEPRECATED:\n" -+ " close sockets on migration\n" -+ " default: keep sockets open, ignore events\n"); - } - - FPRINTF(f, -@@ -1468,6 +1476,8 @@ void conf(struct ctx *c, int argc, char **argv) - {"socket-path", required_argument, NULL, 's' }, - {"fqdn", required_argument, NULL, 27 }, - {"repair-path", required_argument, NULL, 28 }, -+ {"migrate-exit", no_argument, NULL, 29 }, -+ {"migrate-no-linger", no_argument, NULL, 30 }, - { 0 }, - }; - const char *optstring = "+dqfel:hs:F:I:p:P:m:a:n:M:g:i:o:D:S:H:461t:u:T:U:"; -@@ -1683,6 +1693,18 @@ void conf(struct ctx *c, int argc, char **argv) - optarg)) - die("Invalid passt-repair path: %s", optarg); - -+ break; -+ case 29: -+ if (c->mode != MODE_VU) -+ die("--migrate-exit is for vhost-user mode only"); -+ c->migrate_exit = true; -+ -+ break; -+ case 30: -+ if (c->mode != MODE_VU) -+ die("--migrate-no-linger is for vhost-user mode only"); -+ c->migrate_no_linger = true; -+ - break; - case 'd': - c->debug = 1; -diff --git a/flow.c b/flow.c -index 6a5c8aa..a4b65ea 100644 ---- a/flow.c -+++ b/flow.c -@@ -1089,7 +1089,7 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage, - * as EIO). - */ - foreach_established_tcp_flow(flow) { -- rc = tcp_flow_migrate_source_ext(fd, &flow->tcp); -+ rc = tcp_flow_migrate_source_ext(c, fd, &flow->tcp); - if (rc) { - flow_err(flow, "Can't send extended data: %s", - strerror_(-rc)); -diff --git a/passt.1 b/passt.1 -index 60066c2..cef98b2 100644 ---- a/passt.1 -+++ b/passt.1 -@@ -439,6 +439,30 @@ Default, for \-\-vhost-user mode only, is to append \fI.repair\fR to the path - chosen for the hypervisor UNIX domain socket. No socket is created if not in - \-\-vhost-user mode. - -+.TP -+.BR \-\-migrate-exit (DEPRECATED) -+Exit after a completed migration as source. By default, \fBpasst\fR keeps -+running and the migrated guest can continue using its connection, or a new guest -+can connect. -+ -+Note that this configuration option is \fBdeprecated\fR and will be removed in a -+future version. It is not expected to be of any use, and it simply reflects a -+legacy behaviour. If you have any use for this, refer to \fBREPORTING BUGS\fR -+below. -+ -+.TP -+.BR \-\-migrate-no-linger (DEPRECATED) -+Close TCP sockets on the source instance once migration completes. -+ -+By default, sockets are kept open, and events on data sockets are ignored, so -+that any further message reaching sockets after the source migrated is silently -+ignored, to avoid connection resets in case data is received after migration. -+ -+Note that this configuration option is \fBdeprecated\fR and will be removed in a -+future version. It is not expected to be of any use, and it simply reflects a -+legacy behaviour. If you have any use for this, refer to \fBREPORTING BUGS\fR -+below. -+ - .TP - .BR \-F ", " \-\-fd " " \fIFD - Pass a pre-opened, connected socket to \fBpasst\fR. Usually the socket is opened -@@ -454,6 +478,11 @@ is closed. - Quit after handling a single client connection, that is, once the client closes - the socket, or once we get a socket error. - -+\fBNote\fR: this option has no effect after \fBpasst\fR completes a migration as -+source, because, in that case, exiting would close sockets for active -+connections, which would in turn cause connection resets if any further data is -+received. See also the description of \fI\-\-migrate-no-linger\fR. -+ - .TP - .BR \-t ", " \-\-tcp-ports " " \fIspec - Configure TCP port forwarding to guest. \fIspec\fR can be one of: -diff --git a/passt.h b/passt.h -index 8693794..4cfd6eb 100644 ---- a/passt.h -+++ b/passt.h -@@ -241,6 +241,8 @@ struct ip6_ctx { - * @device_state_fd: Device state migration channel - * @device_state_result: Device state migration result - * @migrate_target: Are we the target, on the next migration request? -+ * @migrate_no_linger: Close sockets as we migrate them -+ * @migrate_exit: Exit (on source) once migration is complete - */ - struct ctx { - enum passt_modes mode; -@@ -318,6 +320,8 @@ struct ctx { - int device_state_fd; - int device_state_result; - bool migrate_target; -+ bool migrate_no_linger; -+ bool migrate_exit; - }; - - void proto_update_l2_buf(const unsigned char *eth_d, -diff --git a/tcp.c b/tcp.c -index 0ac298a..1b22f70 100644 ---- a/tcp.c -+++ b/tcp.c -@@ -3284,12 +3284,14 @@ int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn) - - /** - * tcp_flow_migrate_source_ext() - Dump queues, close sockets, send final data -+ * @c: Execution context - * @fd: Descriptor for state migration - * @conn: Pointer to the TCP connection structure - * - * Return: 0 on success, negative (not -EIO) on failure, -EIO on sending failure - */ --int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn) -+int tcp_flow_migrate_source_ext(const struct ctx *c, -+ int fd, const struct tcp_tap_conn *conn) - { - uint32_t peek_offset = conn->seq_to_tap - conn->seq_ack_from_tap; - struct tcp_tap_transfer_ext *t = &migrate_ext[FLOW_IDX(conn)]; -@@ -3334,7 +3336,10 @@ int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn) - if ((rc = tcp_flow_dump_seq(conn, &t->seq_rcv))) - goto fail; - -- close(s); -+ if (c->migrate_no_linger) -+ close(s); -+ else -+ epoll_del(c, s); - - /* Adjustments unrelated to FIN segments: sequence numbers we dumped are - * based on the end of the queues. -diff --git a/tcp_conn.h b/tcp_conn.h -index 35d813d..38b5c54 100644 ---- a/tcp_conn.h -+++ b/tcp_conn.h -@@ -236,7 +236,8 @@ int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn); - int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn); - - int tcp_flow_migrate_source(int fd, struct tcp_tap_conn *conn); --int tcp_flow_migrate_source_ext(int fd, const struct tcp_tap_conn *conn); -+int tcp_flow_migrate_source_ext(const struct ctx *c, int fd, -+ const struct tcp_tap_conn *conn); - - int tcp_flow_migrate_target(struct ctx *c, int fd); - int tcp_flow_migrate_target_ext(struct ctx *c, struct tcp_tap_conn *conn, int fd); -diff --git a/test/lib/setup b/test/lib/setup -index 575bc21..5994598 100755 ---- a/test/lib/setup -+++ b/test/lib/setup -@@ -350,7 +350,7 @@ setup_migrate() { - - sleep 1 - -- __opts="--vhost-user" -+ __opts="--vhost-user --migrate-exit --migrate-no-linger" - [ ${PCAP} -eq 1 ] && __opts="${__opts} -p ${LOGDIR}/passt_1.pcap" - [ ${DEBUG} -eq 1 ] && __opts="${__opts} -d" - [ ${TRACE} -eq 1 ] && __opts="${__opts} --trace" -@@ -360,7 +360,7 @@ setup_migrate() { - - context_run_bg passt_repair_1 "./passt-repair ${STATESETUP}/passt_1.socket.repair" - -- __opts="--vhost-user" -+ __opts="--vhost-user --migrate-exit --migrate-no-linger" - [ ${PCAP} -eq 1 ] && __opts="${__opts} -p ${LOGDIR}/passt_2.pcap" - [ ${DEBUG} -eq 1 ] && __opts="${__opts} -d" - [ ${TRACE} -eq 1 ] && __opts="${__opts} --trace" -diff --git a/vhost_user.c b/vhost_user.c -index 105f77a..c4d3a52 100644 ---- a/vhost_user.c -+++ b/vhost_user.c -@@ -1208,7 +1208,12 @@ void vu_control_handler(struct vu_dev *vdev, int fd, uint32_t events) - if (msg.hdr.request == VHOST_USER_CHECK_DEVICE_STATE && - vdev->context->device_state_result == 0 && - !vdev->context->migrate_target) { -- info("Migration complete, exiting"); -- _exit(EXIT_SUCCESS); -+ if (vdev->context->migrate_exit) { -+ info("Migration complete, exiting"); -+ _exit(EXIT_SUCCESS); -+ } -+ -+ info("Migration complete"); -+ vdev->context->one_off = false; - } - } --- -2.47.1 - diff --git a/SOURCES/0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch b/SOURCES/0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch new file mode 100644 index 0000000..50f80a3 --- /dev/null +++ b/SOURCES/0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch @@ -0,0 +1,110 @@ +From b40f5cd8c8e16c6eceb1f26eb895527fda84068b Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Sat, 13 Dec 2025 14:19:13 +0100 +Subject: [PATCH] tcp: Use less-than-MSS window on no queued data, or no data + sent recently + +We limit the advertised window to guests and containers to the +available length of the sending buffer, and if it's less than the MSS, +since commit cf1925fb7b77 ("tcp: Don't limit window to less-than-MSS +values, use zero instead"), we approximate that limit to zero. + +This way, we'll trigger a window update as soon as we realise that we +can advertise a larger value, just like we do in all other cases where +we advertise a zero-sized window. + +By doing that, we don't wait for the peer to send us data before we +update the window. This matters because the guest or container might +be trying to aggregate more data and won't send us anything at all if +the advertised window is too small. + +However, this might be problematic in two situations: + +1. one, reported by Tyler, where the remote (receiving) peer + advertises a window that's smaller than what we usually get and + very close to the MSS, causing the kernel to give us a starting + size of the buffer that's less than the MSS we advertise to the + guest or container. + + If this happens, we'll never advertise a non-zero window after + the handshake, and the container or guest will never send us any + data at all. + + With a simple 'curl https://cloudflare.com/', we get, with default + TCP memory parameters, a 65535-byte window from the peer, and 46080 + bytes of initial sending buffer from the kernel. But we advertised + a 65480-byte MSS, and we'll never actually receive the client + request. + + This seems to be specific to Cloudflare for some reason, probably + deriving from a particular tuning of TCP parameters on their + servers. + +2. another one, hypothesised by David, where the peer might only be + willing to process (and acknowledge) data in batches. + + We might have queued outbound data which is, at the same time, not + enough to fill one of these batches and be acknowledged and removed + from the sending queue, but enough to make our available buffer + smaller than the MSS, and the connection will hang. + +Take care of both cases by: + +a. not approximating the sending buffer to zero if we have no outboud + queued data at all, because in that case we don't expect the + available buffer to increase if we don't send any data, so there's + no point in waiting for it to grow larger than the MSS. + + This fixes problem 1. above. + +b. also using the full sending buffer size if we haven't send data to + the socket for a while (reported by tcpi_last_data_sent). This part + was already suggested by David in: + + https://archives.passt.top/passt-dev/aTZzgtcKWLb28zrf@zatzit/ + + and I'm now picking ten times the RTT as a somewhat arbitrary + threshold. + + This is meant to take care of potential problem 2. above, but it + also happens to fix 1. + +Reported-by: Tyler Cloud +Link: https://bugs.passt.top/show_bug.cgi?id=183 +Suggested-by: David Gibson +Signed-off-by: Stefano Brivio +Reviewed-by: David Gibson +--- + tcp.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/tcp.c b/tcp.c +index 81bc114..b179e39 100644 +--- a/tcp.c ++++ b/tcp.c +@@ -1211,8 +1211,21 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, + * the MSS to zero, as we already have mechanisms in place to + * force updates after the window becomes zero. This matches the + * suggestion from RFC 813, Section 4. ++ * ++ * But don't do this if, either: ++ * ++ * - there's nothing in the outbound queue: the size of the ++ * sending buffer is limiting us, and it won't increase if we ++ * don't send data, so there's no point in waiting, or ++ * ++ * - we haven't sent data in a while (somewhat arbitrarily, ten ++ * times the RTT), as that might indicate that the receiver ++ * will only process data in batches that are large enough, ++ * but we won't send enough to fill one because we're stuck ++ * with pending data in the outbound queue + */ +- if (limit < MSS_GET(conn)) ++ if (limit < MSS_GET(conn) && sendq && ++ tinfo->tcpi_last_data_sent < tinfo->tcpi_rtt / 1000 * 10) + limit = 0; + + new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd, limit); +-- +2.47.1 + diff --git a/SOURCES/0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch b/SOURCES/0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch new file mode 100644 index 0000000..d85c03d --- /dev/null +++ b/SOURCES/0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch @@ -0,0 +1,90 @@ +From 75dcbc300bf09c3649823b12d30c4f24de7271d4 Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Tue, 23 Dec 2025 13:39:17 +0100 +Subject: [PATCH] pasta: Warn, disable matching IP version if not supported, in + local mode + +...instead of exiting, but only if local mode is enabled, that is, if +we couldn't find a template interface or if the user didn't specify +one. + +With IPv4, we always try to set or copy an address, so check if that +fails. + +With IPv6, in local mode, we rely on the link-local address that's +automatically generated inside the target namespace, and only fail +later, as we try to set up routes. Check if that fails, instead. + +Otherwise, we'll fail to start if IPv6 support is not built in or +disabled by the kernel ("ipv6.disable=1" on the command line), +because, in that case, we'll try to enable local mode by default, and +then fail to set any address or route. + +It would probably be more elegant to check for IP version support in +conf_ip4_local() and conf_ip6_local(), and not even try to enable +connectivity for unsupported versions, but it looks less robust than +trying and failing, as there might be other ways to disable a given +IP version. + +Note that there's currently no way to disable IPv4 support on the +kernel command line, that is, there's no such thing as an +ipv4.disable boot parameter. But I guess that's due to be eventually +implemented, one day, so let's cover that case as well, also for +consistency. + +Reported-by: Iyan +Link: https://bugzilla.redhat.com/show_bug.cgi?id=2424192 +Fixes: 4ddd59bc6085 ("conf: Separate local mode for each IP version, don't enable disabled IP version") +Signed-off-by: Stefano Brivio +--- + pasta.c | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/pasta.c b/pasta.c +index c307b8a..0ddd6b0 100644 +--- a/pasta.c ++++ b/pasta.c +@@ -348,6 +348,12 @@ void pasta_ns_conf(struct ctx *c) + AF_INET); + } + ++ if (c->ifi4 == -1 && rc == -ENOTSUP) { ++ warn("IPv4 not supported, disabling"); ++ c->ifi4 = 0; ++ goto ipv4_done; ++ } ++ + if (rc < 0) { + die("Couldn't set IPv4 address(es) in namespace: %s", + strerror_(-rc)); +@@ -367,6 +373,7 @@ void pasta_ns_conf(struct ctx *c) + strerror_(-rc)); + } + } ++ipv4_done: + + if (c->ifi6) { + rc = nl_addr_get_ll(nl_sock_ns, c->pasta_ifi, +@@ -413,12 +420,19 @@ void pasta_ns_conf(struct ctx *c) + AF_INET6); + } + ++ if (c->ifi6 == -1 && rc == -ENOTSUP) { ++ warn("IPv6 not supported, disabling"); ++ c->ifi6 = 0; ++ goto ipv6_done; ++ } ++ + if (rc < 0) { + die("Couldn't set IPv6 route(s) in guest: %s", + strerror_(-rc)); + } + } + } ++ipv6_done: + + proto_update_l2_buf(c->guest_mac); + } +-- +2.47.1 + diff --git a/SOURCES/0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch b/SOURCES/0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch new file mode 100644 index 0000000..f3d94ad --- /dev/null +++ b/SOURCES/0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch @@ -0,0 +1,58 @@ +From d2c5133990a7758bfa567fc73216393498949e9b Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Tue, 23 Dec 2025 01:59:34 +0100 +Subject: [PATCH] selinux: Enable read and watch permissions on netns directory + as well + +With commit 7aeda16a7818 ("selinux: Transition to pasta_t in +containers"), we need to make sure that pasta can access the target +namespace directory passed by Podman, and, in a general case, we have +all the permissions we need. + +But if we now start a container without the Podman changes referenced +by commit fd1bcc30af07 ("selinux: add container_var_run_t type +transition"), or with them, but with the container being created +before those and without a reboot in between, we'll additionally need +'read' and 'watch' permissions on user_tmp_t directory as well, as +user_tmp_t is still the (inconsistent) context of the namespace entry. + +Otherwise, on a container start/restart, we'll get SELinux denials: + + type=AVC msg=audit(1766451401.296:184): avc: denied { read } for pid=2159 comm="pasta.avx2" name="netns" dev="tmpfs" ino=60 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:obje +ct_r:user_tmp_t:s0 tclass=dir permissive=1 + type=AVC msg=audit(1766451401.298:185): avc: denied { watch } for pid=2159 comm="pasta.avx2" path="/run/user/1001/netns" dev="tmpfs" ino=60 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:object_r:user_tmp_t:s0 tclass=dir permissive=1 + +This can be reproduced quite simply: + + $ podman create -q --name hello hello + 6c4eaf15a03edf799673a97d84d0331f3a3f34a11015b58c69318101a3232770 + + [upgrade passt's SELinux policy to a version including 7aeda16a7818] + + $ podman start hello + Error: unable to start container "6c4eaf15a03edf799673a97d84d0331f3a3f34a11015b58c69318101a3232770": pasta failed with exit code 1: + netns dir open: Permission denied, exiting + +Reported-by: Tuomo Soini +Fixes: 7aeda16a7818 ("selinux: Transition to pasta_t in containers") +Signed-off-by: Stefano Brivio +--- + contrib/selinux/pasta.te | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/contrib/selinux/pasta.te b/contrib/selinux/pasta.te +index 95fe42a..3eb58f6 100644 +--- a/contrib/selinux/pasta.te ++++ b/contrib/selinux/pasta.te +@@ -149,7 +149,7 @@ allow pasta_t root_t:dir mounton; + manage_files_pattern(pasta_t, pasta_pid_t, pasta_pid_t) + files_pid_filetrans(pasta_t, pasta_pid_t, file) + +-allow pasta_t user_tmp_t:dir { add_name remove_name search write }; ++allow pasta_t user_tmp_t:dir { add_name read remove_name search watch write }; + allow pasta_t user_tmp_t:fifo_file append; + allow pasta_t user_tmp_t:file { create open write }; + allow pasta_t user_tmp_t:sock_file { create unlink }; +-- +2.47.1 + diff --git a/SOURCES/0006-selinux-Enable-open-permissions-on-netns-directory-o.patch b/SOURCES/0006-selinux-Enable-open-permissions-on-netns-directory-o.patch new file mode 100644 index 0000000..3072efd --- /dev/null +++ b/SOURCES/0006-selinux-Enable-open-permissions-on-netns-directory-o.patch @@ -0,0 +1,68 @@ +From 6babaa8a88eb337e4b81aeff673fcebb28015f36 Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Fri, 16 Jan 2026 16:48:46 +0100 +Subject: [PATCH 6/7] selinux: Enable open permissions on netns directory, + operations on container_var_run_t + +Tuomo reports two further SELinux denials after upgrading to a +passt-selinux version that includes the transition to pasta_t for +containers, one I could reproduce: + + denied { open } for pid=3343050 comm="pasta.avx2" path="/run/user/1000/netns" dev="tmpfs" ino=51 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:object_r:user_tmp_t:s0 tclass=dir permissive=1 + +which I didn't take care of in the previous commit, d2c5133990a7 +("selinux: Enable read and watch permissions on netns directory as +well"), as it didn't appear in my quick test. But I can make pasta use +"open" on the network namespace entry by simply using it to make +connections. + +So, for that, add "open" to the existing rule for user_tmp_t:dir. + +Then, another one I couldn't reproduce instead: + + denied { write } for pid=3589324 comm="pasta.avx2" name="rootless-netns" dev="tmpfs" ino=36 scontext=unconfined_u:unconfined_r:pasta_t:s0-s0:c0.c1023 tcontext=unconfined_u:object_r:container_var_run_t:s0 tclass=dir permissive=0 + +which, I think, comes from a specific combination of versions of +container-selinux, Podman, and passt-selinux packages, which +prevents the expected type transition on container_var_run_t unless +restorecon is invoked manually, or until a reboot. + +Allowing the same permissions on container_var_run_t as we do on +ifconfig_var_run_t is harmless, so do that to prevent this further +denial. + +Reported-by: Tuomo Soini +Fixes: d2c5133990a7 ("selinux: Enable read and watch permissions on netns directory as well") +Fixes: 7aeda16a7818 ("selinux: Transition to pasta_t in containers") +Signed-off-by: Stefano Brivio +(cherry picked from commit a6d92ca82c9ea0b395aa56c568ee6b6e6d4ac81e) +--- + contrib/selinux/pasta.te | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/contrib/selinux/pasta.te b/contrib/selinux/pasta.te +index 22daa77..abeafa4 100644 +--- a/contrib/selinux/pasta.te ++++ b/contrib/selinux/pasta.te +@@ -148,7 +148,7 @@ allow pasta_t root_t:dir mounton; + manage_files_pattern(pasta_t, pasta_pid_t, pasta_pid_t) + files_pid_filetrans(pasta_t, pasta_pid_t, file) + +-allow pasta_t user_tmp_t:dir { add_name read remove_name search watch write }; ++allow pasta_t user_tmp_t:dir { add_name open read remove_name search watch write }; + allow pasta_t user_tmp_t:fifo_file append; + allow pasta_t user_tmp_t:file { create open write }; + allow pasta_t user_tmp_t:sock_file { create unlink }; +@@ -248,7 +248,9 @@ type_transition container_runtime_t user_tmp_t : dir ifconfig_var_run_t "netns"; + type_transition container_runtime_t container_var_run_t : dir ifconfig_var_run_t "netns"; + type_transition container_runtime_t user_tmp_t : dir ifconfig_var_run_t "rootless-netns"; + type_transition container_runtime_t container_var_run_t : dir ifconfig_var_run_t "rootless-netns"; ++allow pasta_t container_var_run_t:dir { add_name open rmdir write }; + allow pasta_t ifconfig_var_run_t:dir { add_name open rmdir write }; ++allow pasta_t container_var_run_t:file { create open write }; + allow pasta_t ifconfig_var_run_t:file { create open write }; + allow systemd_logind_exec_t ifconfig_var_run_t:dir rmdir; + +-- +2.47.1 + diff --git a/SOURCES/0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch b/SOURCES/0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch new file mode 100644 index 0000000..b7cb942 --- /dev/null +++ b/SOURCES/0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch @@ -0,0 +1,74 @@ +From dbfbc33776290260b87bb29bb5572750f9709b35 Mon Sep 17 00:00:00 2001 +From: Stefano Brivio +Date: Fri, 9 Jan 2026 13:52:00 +0100 +Subject: [PATCH 7/7] tcp: Fix rounding issue in check for approximating window + to zero + +In general, we approximate the advertised window to zero if we would +otherwise advertise less than a MSS worth, and the reasoning behind +that is explained in cf1925fb7b77 ("tcp: Don't limit window to +less-than-MSS values, use zero instead"). + +Then, in commit b40f5cd8c8e1 ("tcp: Use less-than-MSS window on no +queued data, or no data sent recently"), I introduced some conditions +under which we won't do that, including a check on whether any data +was sent recently. + +As an arbitrary but probably reasonable threshold, we consider data to +have recently been sent if that occurred less than ten times the +round-trip time (RTT) ago. + +The time elapsed since the last data transmission is reported by the +kernel in milliseconds, in the tcpi_last_data_sent field of struct +tcp_info, and the RTT is reported in microseconds instead, in +tcpi_rtt. + +To avoid the risk of overflow in a simple way, for the purpose of this +comparison, I converted tcpi_rtt to milliseconds first, but this means +that the check will always be false (and we'll never approximate the +window to zero) if the RTT is below one millisecond. + +This, in turn, reintroduces nasty delay issues in transfers in +non-local connections which have however almost-local (low) latency. + +Given that we want to use ten times the RTT as an arbitrary "long +enough" upper bound, round the RTT up while converting it to +milliseconds. + +As an alternative, we could perform the comparison in microseconds, +but we would need a slightly more complicated implementation to +exclude overflows, and it's definitely not worth it given the nature +of this threshold. + +Fixes: b40f5cd8c8e1 ("tcp: Use less-than-MSS window on no queued data, or no data sent recently") +Signed-off-by: Stefano Brivio +Reviewed-by: David Gibson +(cherry picked from commit 2be0e790804f99580b1c8a1781c49913440607f2) +--- + tcp.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/tcp.c b/tcp.c +index 23fcbc3..8f4f087 100644 +--- a/tcp.c ++++ b/tcp.c +@@ -1180,6 +1180,7 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, + if ((conn->flags & LOCAL) || tcp_rtt_dst_low(conn)) { + new_wnd_to_tap = tinfo->tcpi_snd_wnd; + } else { ++ unsigned rtt_ms_ceiling = DIV_ROUND_UP(tinfo->tcpi_rtt, 1000); + uint32_t sendq; + int limit; + +@@ -1223,7 +1224,7 @@ int tcp_update_seqack_wnd(const struct ctx *c, struct tcp_tap_conn *conn, + * with pending data in the outbound queue + */ + if (limit < MSS_GET(conn) && sendq && +- tinfo->tcpi_last_data_sent < tinfo->tcpi_rtt / 1000 * 10) ++ tinfo->tcpi_last_data_sent < rtt_ms_ceiling * 10) + limit = 0; + + new_wnd_to_tap = MIN((int)tinfo->tcpi_snd_wnd, limit); +-- +2.47.1 + diff --git a/SPECS/passt.spec b/SPECS/passt.spec index d827c71..4c5fc5e 100644 --- a/SPECS/passt.spec +++ b/SPECS/passt.spec @@ -7,12 +7,13 @@ # Copyright (c) 2022 Red Hat GmbH # Author: Stefano Brivio -%global git_hash 8ec134109eb136432a29bdf5a14f8b1fd4e46208 +%global git_hash d04c48032bcf724550d0b8f652fd00efcd2dfad0 %global selinuxtype targeted +%global selinux_policy_version 41.41 Name: passt -Version: 0^20250512.g8ec1341 -Release: 2%{?dist} +Version: 0^20251210.gd04c480 +Release: 3%{?dist} Summary: User-mode networking daemons for virtual machines and namespaces License: GPL-2.0-or-later AND BSD-3-Clause Group: System Environment/Daemons @@ -20,7 +21,12 @@ URL: https://passt.top/ Source: https://passt.top/passt/snapshot/passt-%{git_hash}.tar.xz Patch1: 0001-selinux-Drop-user_namespace-create-allow-rules.patch -Patch2: 0002-treewide-By-default-don-t-quit-source-after-migratio.patch +Patch2: 0002-selinux-Use-systemd_logind_exec_t-instead-of-systemd.patch +Patch3: 0003-tcp-Use-less-than-MSS-window-on-no-queued-data-or-no.patch +Patch4: 0004-pasta-Warn-disable-matching-IP-version-if-not-suppor.patch +Patch5: 0005-selinux-Enable-read-and-watch-permissions-on-netns-d.patch +Patch6: 0006-selinux-Enable-open-permissions-on-netns-directory-o.patch +Patch7: 0007-tcp-Fix-rounding-issue-in-check-for-approximating-wi.patch BuildRequires: gcc, make, git, checkpolicy, selinux-policy-devel Requires: (%{name}-selinux = %{version}-%{release} if selinux-policy-%{selinuxtype}) @@ -36,15 +42,21 @@ for network namespaces: traffic is forwarded using a tap interface inside the namespace, without the need to create further interfaces on the host, hence not requiring any capabilities or privileges. -%package selinux -BuildArch: noarch -Summary: SELinux support for passt and pasta -Requires: %{name} = %{version}-%{release} -Requires: selinux-policy -Requires(post): %{name} -Requires(post): policycoreutils -Requires(preun): %{name} -Requires(preun): policycoreutils +%package selinux +BuildArch: noarch +Summary: SELinux support for passt and pasta +%if 0%{?fedora} > 43 +BuildRequires: selinux-policy-devel +%selinux_requires_min +%else +BuildRequires: pkgconfig(systemd) +Requires(post): libselinux-utils +Requires(post): policycoreutils +%endif +Requires: container-selinux +Requires: selinux-policy-%{selinuxtype} +Requires(post): container-selinux +Requires(post): selinux-policy-%{selinuxtype} %description selinux This package adds SELinux enforcement to passt(1), pasta(1), passt-repair(1). @@ -92,15 +104,11 @@ popd %selinux_relabel_pre -s %{selinuxtype} %post selinux -%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt.pp -%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/pasta.pp -%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp +%selinux_modules_install -s %{selinuxtype} %{_datadir}/selinux/packages/%{selinuxtype}/passt.pp %{_datadir}/selinux/packages/%{selinuxtype}/pasta.pp %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp %postun selinux if [ $1 -eq 0 ]; then - %selinux_modules_uninstall -s %{selinuxtype} passt - %selinux_modules_uninstall -s %{selinuxtype} pasta - %selinux_modules_uninstall -s %{selinuxtype} passt-repair + %selinux_modules_uninstall -s %{selinuxtype} passt pasta passt-repair fi %posttrans selinux @@ -133,6 +141,21 @@ fi %{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp %changelog +* Wed Feb 11 2026 Stefano Brivio - 0^20251210.gd04c480-3 +- Resolves: RHEL-137588 RHEL-136313 + +* Wed Dec 24 2025 Stefano Brivio - 0^20251210.gd04c480-2 +- Resolves: RHEL-136313 RHEL-136461 RHEL-137439 RHEL-137588 + +* Wed Dec 10 2025 Stefano Brivio - 0^20251210.gd04c480-1 +- Resolves: RHEL-134942 RHEL-134943 + +* Tue Dec 9 2025 Stefano Brivio - 0^20251209.gc3f1ba7-1 +- Resolves: RHEL-134119 + +* Thu Oct 23 2025 Stefano Brivio - 0^20250512.g8ec1341-3 +- Resolves: RHEL-123376 RHEL-123438 + * Tue Jul 29 2025 Stefano Brivio - 0^20250512.g8ec1341-2 - Resolves: RHEL-106326