import UBI passt-0^20250217.ga1e48a0-5.el10_0
This commit is contained in:
parent
24db4ff194
commit
60b91b0d06
@ -1,26 +0,0 @@
|
||||
From 0968be4a8529adb0282be86236ba9d949b30d5b0 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Wed, 26 Feb 2025 10:37:37 -0500
|
||||
Subject: [PATCH] selinux: passt_repair_exec_t needs to be a exec_type
|
||||
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
contrib/selinux/passt-repair.te | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/contrib/selinux/passt-repair.te b/contrib/selinux/passt-repair.te
|
||||
index e3ffbcd..79f8acf 100644
|
||||
--- a/contrib/selinux/passt-repair.te
|
||||
+++ b/contrib/selinux/passt-repair.te
|
||||
@@ -33,7 +33,7 @@ require {
|
||||
type passt_repair_t;
|
||||
domain_type(passt_repair_t);
|
||||
type passt_repair_exec_t;
|
||||
-files_type(passt_repair_exec_t);
|
||||
+corecmd_executable_file(passt_repair_exec_t);
|
||||
|
||||
role unconfined_r types passt_repair_t;
|
||||
|
||||
--
|
||||
2.47.1
|
||||
|
||||
@ -0,0 +1,51 @@
|
||||
From 6c2c2f64f838097e1c4412fe3b4c65aec7d51345 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Thu, 27 Feb 2025 16:55:13 +1100
|
||||
Subject: [PATCH 2/9] migrate, flow: Trivially succeed if migrating with no
|
||||
flows
|
||||
|
||||
We could get a migration request when we have no active flows; or at least
|
||||
none that we need or are able to migrate. In this case after sending or
|
||||
receiving the number of flows we continue to step through various lists.
|
||||
|
||||
In the target case, this could include communication with passt-repair. If
|
||||
passt-repair wasn't started that could cause further errors, but of course
|
||||
they shouldn't matter if we have nothing to repair.
|
||||
|
||||
Make it more obvious that there's nothing to do and avoid such errors by
|
||||
short-circuiting flow_migrate_{source,target}() if there are no migratable
|
||||
flows.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
flow.c | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/flow.c b/flow.c
|
||||
index cc881e86..b187dff8 100644
|
||||
--- a/flow.c
|
||||
+++ b/flow.c
|
||||
@@ -1019,6 +1019,9 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
|
||||
|
||||
debug("Sending %u flows", ntohl(count));
|
||||
|
||||
+ if (!count)
|
||||
+ return 0;
|
||||
+
|
||||
/* Dump and send information that can be stored in the flow table.
|
||||
*
|
||||
* Limited rollback options here: if we fail to transfer any data (that
|
||||
@@ -1088,6 +1091,9 @@ int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
|
||||
count = ntohl(count);
|
||||
debug("Receiving %u flows", count);
|
||||
|
||||
+ if (!count)
|
||||
+ return 0;
|
||||
+
|
||||
if ((rc = flow_migrate_repair_all(c, true)))
|
||||
return -rc;
|
||||
|
||||
--
|
||||
2.48.1
|
||||
|
||||
@ -0,0 +1,55 @@
|
||||
From 64f1ff1f1e8f9fe765f191139e34a654ab074fb8 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Thu, 27 Feb 2025 16:55:14 +1100
|
||||
Subject: [PATCH 3/9] migrate, flow: Don't attempt to migrate TCP flows without
|
||||
passt-repair
|
||||
|
||||
Migrating TCP flows requires passt-repair in order to use TCP_REPAIR. If
|
||||
passt-repair is not started, our failure mode is pretty ugly though: we'll
|
||||
attempt the migration, hitting various problems when we can't enter repair
|
||||
mode. In some cases we may not roll back these changes properly, meaning
|
||||
we break network connections on the source.
|
||||
|
||||
Our general approach is not to completely block migration if there are
|
||||
problems, but simply to break any flows we can't migrate. So, if we have
|
||||
no connection from passt-repair carry on with the migration, but don't
|
||||
attempt to migrate any TCP connections.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
flow.c | 11 +++++++++--
|
||||
1 file changed, 9 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/flow.c b/flow.c
|
||||
index b187dff8..4d611f56 100644
|
||||
--- a/flow.c
|
||||
+++ b/flow.c
|
||||
@@ -943,6 +943,10 @@ static int flow_migrate_repair_all(struct ctx *c, bool enable)
|
||||
unsigned i;
|
||||
int rc;
|
||||
|
||||
+ /* If we don't have a repair helper, there's nothing we can do */
|
||||
+ if (c->fd_repair < 0)
|
||||
+ return 0;
|
||||
+
|
||||
foreach_established_tcp_flow(i, flow, FLOW_MAX) {
|
||||
if (enable)
|
||||
rc = tcp_flow_repair_on(c, &flow->tcp);
|
||||
@@ -1007,8 +1011,11 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
|
||||
(void)c;
|
||||
(void)stage;
|
||||
|
||||
- foreach_established_tcp_flow(i, flow, FLOW_MAX)
|
||||
- count++;
|
||||
+ /* If we don't have a repair helper, we can't migrate TCP flows */
|
||||
+ if (c->fd_repair >= 0) {
|
||||
+ foreach_established_tcp_flow(i, flow, FLOW_MAX)
|
||||
+ count++;
|
||||
+ }
|
||||
|
||||
count = htonl(count);
|
||||
if (write_all_buf(fd, &count, sizeof(count))) {
|
||||
--
|
||||
2.48.1
|
||||
|
||||
@ -0,0 +1,50 @@
|
||||
From 79c3a0a8c49c2358abb8d2cd9e5e88b884ae63d6 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Thu, 27 Feb 2025 16:55:15 +1100
|
||||
Subject: [PATCH 4/9] tcp: Correct error code handling from
|
||||
tcp_flow_repair_socket()
|
||||
|
||||
There are two small bugs in error returns from tcp_low_repair_socket(),
|
||||
which is supposed to return a negative errno code:
|
||||
|
||||
1) On bind() failures, wedirectly pass on the return code from bind(),
|
||||
which is just 0 or -1, instead of an error code.
|
||||
|
||||
2) In the caller, tcp_flow_migrate_target() we call strerror_() directly
|
||||
on the negative error code, but strerror() requires a positive error
|
||||
code.
|
||||
|
||||
Correct both of these.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
tcp.c | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index 98e1c6a1..9622ad98 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -3285,7 +3285,8 @@ int tcp_flow_repair_socket(struct ctx *c, struct tcp_tap_conn *conn)
|
||||
|
||||
tcp_sock_set_nodelay(s);
|
||||
|
||||
- if ((rc = bind(s, &a.sa, sizeof(a)))) {
|
||||
+ if (bind(s, &a.sa, sizeof(a))) {
|
||||
+ rc = -errno;
|
||||
err_perror("Failed to bind socket for migrated flow");
|
||||
goto err;
|
||||
}
|
||||
@@ -3380,7 +3381,7 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
|
||||
conn->seq_init_from_tap = ntohl(t.seq_init_from_tap);
|
||||
|
||||
if ((rc = tcp_flow_repair_socket(c, conn))) {
|
||||
- flow_err(flow, "Can't set up socket: %s, drop", strerror_(rc));
|
||||
+ flow_err(flow, "Can't set up socket: %s, drop", strerror_(-rc));
|
||||
flow_alloc_cancel(flow);
|
||||
return 0;
|
||||
}
|
||||
--
|
||||
2.48.1
|
||||
|
||||
@ -0,0 +1,37 @@
|
||||
From 661e2065ca065babf56070b77c9fcafd8271b853 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Thu, 27 Feb 2025 16:55:16 +1100
|
||||
Subject: [PATCH 5/9] tcp: Unconditionally move to CLOSED state on tcp_rst()
|
||||
|
||||
tcp_rst() attempts to send an RST packet to the guest, and if that succeeds
|
||||
moves the flow to CLOSED state. However, even if the tcp_send_flag() fails
|
||||
the flow is still dead: we've usually closed the socket already, and
|
||||
something has already gone irretrievably wrong. So we should still mark
|
||||
the flow as CLOSED. That will cause it to be cleaned up, meaning any
|
||||
future packets from the guest for it won't match a flow, so should generate
|
||||
new RSTs (they don't at the moment, but that's a separate bug).
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
tcp.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index 9622ad98..716079f8 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -1216,8 +1216,8 @@ void tcp_rst_do(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
if (conn->events == CLOSED)
|
||||
return;
|
||||
|
||||
- if (!tcp_send_flag(c, conn, RST))
|
||||
- conn_event(c, conn, CLOSED);
|
||||
+ tcp_send_flag(c, conn, RST);
|
||||
+ conn_event(c, conn, CLOSED);
|
||||
}
|
||||
|
||||
/**
|
||||
--
|
||||
2.48.1
|
||||
|
||||
@ -0,0 +1,93 @@
|
||||
From 5f7c930333e44ee61a51df4660c4a6610f45e98f Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Thu, 27 Feb 2025 16:55:17 +1100
|
||||
Subject: [PATCH 6/9] migrate, tcp: Don't flow_alloc_cancel() during incoming
|
||||
migration
|
||||
|
||||
In tcp_flow_migrate_target(), if we're unable to create and bind the new
|
||||
socket, we print an error, cancel the flow and carry on. This seems to
|
||||
make sense based on our policy of generally letting the migration complete
|
||||
even if some or all flows are lost in the process. But it doesn't quite
|
||||
work: the flow_alloc_cancel() means that the flows in the target's flow
|
||||
table are no longer one to one match to the flows which the source is
|
||||
sending data for. This means that data for later flows will be mismatched
|
||||
to a different flow. Most likely that will cause some nasty error later,
|
||||
but even worse it might appear to succeed but lead to data corruption due
|
||||
to incorrectly restoring one of the flows.
|
||||
|
||||
Instead, we should leave the flow in the table until we've read all the
|
||||
data for it, *then* discard it. Technically removing the
|
||||
flow_alloc_cancel() would be enough for this: if tcp_flow_repair_socket()
|
||||
fails it leaves conn->sock == -1, which will cause the restore functions
|
||||
in tcp_flow_migrate_target_ext() to fail, discarding the flow. To make
|
||||
what's going on clearer (and with less extraneous error messages), put
|
||||
several explicit tests for a missing socket later in the migration path to
|
||||
read the data associated with the flow but explicitly discard it.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
tcp.c | 19 ++++++++++++++++---
|
||||
1 file changed, 16 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index 716079f8..4bd26aed 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -2711,6 +2711,9 @@ int tcp_flow_repair_on(struct ctx *c, const struct tcp_tap_conn *conn)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
+ if (conn->sock < 0)
|
||||
+ return 0;
|
||||
+
|
||||
if ((rc = repair_set(c, conn->sock, TCP_REPAIR_ON)))
|
||||
err("Failed to set TCP_REPAIR");
|
||||
|
||||
@@ -2728,6 +2731,9 @@ int tcp_flow_repair_off(struct ctx *c, const struct tcp_tap_conn *conn)
|
||||
{
|
||||
int rc = 0;
|
||||
|
||||
+ if (conn->sock < 0)
|
||||
+ return 0;
|
||||
+
|
||||
if ((rc = repair_set(c, conn->sock, TCP_REPAIR_OFF)))
|
||||
err("Failed to clear TCP_REPAIR");
|
||||
|
||||
@@ -3382,7 +3388,8 @@ int tcp_flow_migrate_target(struct ctx *c, int fd)
|
||||
|
||||
if ((rc = tcp_flow_repair_socket(c, conn))) {
|
||||
flow_err(flow, "Can't set up socket: %s, drop", strerror_(-rc));
|
||||
- flow_alloc_cancel(flow);
|
||||
+ /* Can't leave the flow in an incomplete state */
|
||||
+ FLOW_ACTIVATE(conn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3459,6 +3466,10 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
|
||||
return rc;
|
||||
}
|
||||
|
||||
+ if (conn->sock < 0)
|
||||
+ /* We weren't able to create the socket, discard flow */
|
||||
+ goto fail;
|
||||
+
|
||||
if (tcp_flow_select_queue(s, TCP_SEND_QUEUE))
|
||||
goto fail;
|
||||
|
||||
@@ -3546,8 +3557,10 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
- tcp_flow_repair_off(c, conn);
|
||||
- repair_flush(c);
|
||||
+ if (conn->sock >= 0) {
|
||||
+ tcp_flow_repair_off(c, conn);
|
||||
+ repair_flush(c);
|
||||
+ }
|
||||
|
||||
conn->flags = 0; /* Not waiting for ACK, don't schedule timer */
|
||||
tcp_rst(c, conn);
|
||||
--
|
||||
2.48.1
|
||||
|
||||
89
0007-ip-Helpers-to-access-IPv6-flow-label.patch
Normal file
89
0007-ip-Helpers-to-access-IPv6-flow-label.patch
Normal file
@ -0,0 +1,89 @@
|
||||
From 86465f8455faa0568f5c886c6cff529940f80cb2 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 5 Mar 2025 15:32:28 +1100
|
||||
Subject: [PATCH 7/9] ip: Helpers to access IPv6 flow label
|
||||
|
||||
The flow label is a 20-bit field in the IPv6 header. The length and
|
||||
alignment make it awkward to pass around as is. Obviously, it can be
|
||||
packed into a 32-bit integer though, and we do this in two places. We
|
||||
have some further upcoming places where we want to manipulate the flow
|
||||
label, so make some helpers for marshalling and unmarshalling it to an
|
||||
integer.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
ip.h | 25 +++++++++++++++++++++++++
|
||||
tap.c | 4 +---
|
||||
tcp.c | 4 +---
|
||||
3 files changed, 27 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/ip.h b/ip.h
|
||||
index 1544dbf4..55c0c793 100644
|
||||
--- a/ip.h
|
||||
+++ b/ip.h
|
||||
@@ -90,6 +90,31 @@ struct ipv6_opt_hdr {
|
||||
*/
|
||||
} __attribute__((packed)); /* required for some archs */
|
||||
|
||||
+/**
|
||||
+ * ip6_set_flow_lbl() - Set flow label in an IPv6 header
|
||||
+ * @ip6h: Pointer to IPv6 header, updated
|
||||
+ * @flow: Set @ip6h flow label to the low 20 bits of this integer
|
||||
+ */
|
||||
+static inline void ip6_set_flow_lbl(struct ipv6hdr *ip6h, uint32_t flow)
|
||||
+{
|
||||
+ ip6h->flow_lbl[0] = (flow >> 16) & 0xf;
|
||||
+ ip6h->flow_lbl[1] = (flow >> 8) & 0xff;
|
||||
+ ip6h->flow_lbl[2] = (flow >> 0) & 0xff;
|
||||
+}
|
||||
+
|
||||
+/** ip6_get_flow_lbl() - Get flow label from an IPv6 header
|
||||
+ * @ip6h: Pointer to IPv6 header
|
||||
+ *
|
||||
+ * Return: flow label from @ip6h as an integer (<= 20 bits)
|
||||
+ */
|
||||
+/* cppcheck-suppress unusedFunction */
|
||||
+static inline uint32_t ip6_get_flow_lbl(const struct ipv6hdr *ip6h)
|
||||
+{
|
||||
+ return (ip6h->flow_lbl[0] & 0xf) << 16 |
|
||||
+ ip6h->flow_lbl[1] << 8 |
|
||||
+ ip6h->flow_lbl[2];
|
||||
+}
|
||||
+
|
||||
char *ipv6_l4hdr(const struct pool *p, int idx, size_t offset, uint8_t *proto,
|
||||
size_t *dlen);
|
||||
|
||||
diff --git a/tap.c b/tap.c
|
||||
index d0673e58..d5219935 100644
|
||||
--- a/tap.c
|
||||
+++ b/tap.c
|
||||
@@ -241,9 +241,7 @@ static void *tap_push_ip6h(struct ipv6hdr *ip6h,
|
||||
ip6h->hop_limit = 255;
|
||||
ip6h->saddr = *src;
|
||||
ip6h->daddr = *dst;
|
||||
- ip6h->flow_lbl[0] = (flow >> 16) & 0xf;
|
||||
- ip6h->flow_lbl[1] = (flow >> 8) & 0xff;
|
||||
- ip6h->flow_lbl[2] = (flow >> 0) & 0xff;
|
||||
+ ip6_set_flow_lbl(ip6h, flow);
|
||||
return ip6h + 1;
|
||||
}
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index 4bd26aed..aa0eaf8e 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -965,9 +965,7 @@ void tcp_fill_headers(const struct tcp_tap_conn *conn,
|
||||
ip6h->version = 6;
|
||||
ip6h->nexthdr = IPPROTO_TCP;
|
||||
|
||||
- ip6h->flow_lbl[0] = (conn->sock >> 16) & 0xf;
|
||||
- ip6h->flow_lbl[1] = (conn->sock >> 8) & 0xff;
|
||||
- ip6h->flow_lbl[2] = (conn->sock >> 0) & 0xff;
|
||||
+ ip6_set_flow_lbl(ip6h, conn->sock);
|
||||
|
||||
if (!no_tcp_csum) {
|
||||
psum = proto_ipv6_header_psum(l4len, IPPROTO_TCP,
|
||||
--
|
||||
2.48.1
|
||||
|
||||
@ -0,0 +1,80 @@
|
||||
From 3479b0cf35f35c1b9b7202a3ffea4a4e7d3c4702 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 5 Mar 2025 15:32:29 +1100
|
||||
Subject: [PATCH 8/9] tap: Consider IPv6 flow label when building packet
|
||||
sequences
|
||||
|
||||
To allow more batching, we group together related packets into "seqs" in
|
||||
the tap layer, before passing them to the L4 protocol layers. Currently
|
||||
we consider the IP protocol, both IP addresses and also the L4 ports when
|
||||
grouping things into seqs. We ignore the IPv6 flow label.
|
||||
|
||||
We have some future cases where we want to consider the the flow label in
|
||||
the L4 code, which is awkward if we could be given a single batch with
|
||||
multiple labels. Add the flow label to tap6_l4_t and group by it as well
|
||||
as the other criteria. In future we could possibly use the flow label
|
||||
_instead_ of peeking into the L4 header for the ports, but we don't do so
|
||||
for now.
|
||||
|
||||
The guest should use the same flow label for all packets in a low, but if
|
||||
it doesn't this change won't break anything, it just means we'll batch
|
||||
things a bit sub-optimally.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
ip.h | 1 -
|
||||
tap.c | 4 ++++
|
||||
2 files changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/ip.h b/ip.h
|
||||
index 55c0c793..e124b596 100644
|
||||
--- a/ip.h
|
||||
+++ b/ip.h
|
||||
@@ -107,7 +107,6 @@ static inline void ip6_set_flow_lbl(struct ipv6hdr *ip6h, uint32_t flow)
|
||||
*
|
||||
* Return: flow label from @ip6h as an integer (<= 20 bits)
|
||||
*/
|
||||
-/* cppcheck-suppress unusedFunction */
|
||||
static inline uint32_t ip6_get_flow_lbl(const struct ipv6hdr *ip6h)
|
||||
{
|
||||
return (ip6h->flow_lbl[0] & 0xf) << 16 |
|
||||
diff --git a/tap.c b/tap.c
|
||||
index d5219935..c5b36f1c 100644
|
||||
--- a/tap.c
|
||||
+++ b/tap.c
|
||||
@@ -489,6 +489,7 @@ static struct tap4_l4_t {
|
||||
* struct l4_seq6_t - Message sequence for one protocol handler call, IPv6
|
||||
* @msgs: Count of messages in sequence
|
||||
* @protocol: Protocol number
|
||||
+ * @flow_lbl: IPv6 flow label
|
||||
* @source: Source port
|
||||
* @dest: Destination port
|
||||
* @saddr: Source address
|
||||
@@ -497,6 +498,7 @@ static struct tap4_l4_t {
|
||||
*/
|
||||
static struct tap6_l4_t {
|
||||
uint8_t protocol;
|
||||
+ uint32_t flow_lbl :20;
|
||||
|
||||
uint16_t source;
|
||||
uint16_t dest;
|
||||
@@ -870,6 +872,7 @@ resume:
|
||||
((seq)->protocol == (proto) && \
|
||||
(seq)->source == (uh)->source && \
|
||||
(seq)->dest == (uh)->dest && \
|
||||
+ (seq)->flow_lbl == ip6_get_flow_lbl(ip6h) && \
|
||||
IN6_ARE_ADDR_EQUAL(&(seq)->saddr, saddr) && \
|
||||
IN6_ARE_ADDR_EQUAL(&(seq)->daddr, daddr))
|
||||
|
||||
@@ -878,6 +881,7 @@ resume:
|
||||
(seq)->protocol = (proto); \
|
||||
(seq)->source = (uh)->source; \
|
||||
(seq)->dest = (uh)->dest; \
|
||||
+ (seq)->flow_lbl = ip6_get_flow_lbl(ip6h); \
|
||||
(seq)->saddr = *saddr; \
|
||||
(seq)->daddr = *daddr; \
|
||||
} while (0)
|
||||
--
|
||||
2.48.1
|
||||
|
||||
234
0009-tcp-Send-RST-in-response-to-guest-packets-that-match.patch
Normal file
234
0009-tcp-Send-RST-in-response-to-guest-packets-that-match.patch
Normal file
@ -0,0 +1,234 @@
|
||||
From 4725179c803b267a5c1905d503c62b9a0f061b7d Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 5 Mar 2025 15:32:30 +1100
|
||||
Subject: [PATCH 9/9] tcp: Send RST in response to guest packets that match no
|
||||
connection
|
||||
|
||||
Currently, if a non-SYN TCP packet arrives which doesn't match any existing
|
||||
connection, we simply ignore it. However RFC 9293, section 3.10.7.1 says
|
||||
we should respond with an RST to a non-SYN, non-RST packet that's for a
|
||||
CLOSED (i.e. non-existent) connection.
|
||||
|
||||
This can arise in practice with migration, in cases where some error means
|
||||
we have to discard a connection. We destroy the connection with tcp_rst()
|
||||
in that case, but because the guest is stopped, we may not be able to
|
||||
deliver the RST packet on the tap interface immediately. This change
|
||||
ensures an RST will be sent if the guest tries to use the connection again.
|
||||
|
||||
A similar situation can arise if a passt/pasta instance is killed or
|
||||
crashes, but is then replaced with another attached to the same guest.
|
||||
This can leave the guest with stale connections that the new passt instance
|
||||
isn't aware of. It's better to send an RST so the guest knows quickly
|
||||
these are broken, rather than letting them linger until they time out.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
tap.c | 17 +++++++-------
|
||||
tap.h | 6 +++++
|
||||
tcp.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
|
||||
tcp.h | 2 +-
|
||||
4 files changed, 88 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/tap.c b/tap.c
|
||||
index c5b36f1c..eb2aff8d 100644
|
||||
--- a/tap.c
|
||||
+++ b/tap.c
|
||||
@@ -122,7 +122,7 @@ const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
|
||||
*
|
||||
* Return: pointer at which to write the packet's payload
|
||||
*/
|
||||
-static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
|
||||
+void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
|
||||
{
|
||||
struct ethhdr *eh = (struct ethhdr *)buf;
|
||||
|
||||
@@ -143,8 +143,8 @@ static void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto)
|
||||
*
|
||||
* Return: pointer at which to write the packet's payload
|
||||
*/
|
||||
-static void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
|
||||
- struct in_addr dst, size_t l4len, uint8_t proto)
|
||||
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
|
||||
+ struct in_addr dst, size_t l4len, uint8_t proto)
|
||||
{
|
||||
uint16_t l3len = l4len + sizeof(*ip4h);
|
||||
|
||||
@@ -229,10 +229,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
|
||||
*
|
||||
* Return: pointer at which to write the packet's payload
|
||||
*/
|
||||
-static void *tap_push_ip6h(struct ipv6hdr *ip6h,
|
||||
- const struct in6_addr *src,
|
||||
- const struct in6_addr *dst,
|
||||
- size_t l4len, uint8_t proto, uint32_t flow)
|
||||
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
|
||||
+ const struct in6_addr *src, const struct in6_addr *dst,
|
||||
+ size_t l4len, uint8_t proto, uint32_t flow)
|
||||
{
|
||||
ip6h->payload_len = htons(l4len);
|
||||
ip6h->priority = 0;
|
||||
@@ -744,7 +743,7 @@ append:
|
||||
for (k = 0; k < p->count; )
|
||||
k += tcp_tap_handler(c, PIF_TAP, AF_INET,
|
||||
&seq->saddr, &seq->daddr,
|
||||
- p, k, now);
|
||||
+ 0, p, k, now);
|
||||
} else if (seq->protocol == IPPROTO_UDP) {
|
||||
if (c->no_udp)
|
||||
continue;
|
||||
@@ -927,7 +926,7 @@ append:
|
||||
for (k = 0; k < p->count; )
|
||||
k += tcp_tap_handler(c, PIF_TAP, AF_INET6,
|
||||
&seq->saddr, &seq->daddr,
|
||||
- p, k, now);
|
||||
+ seq->flow_lbl, p, k, now);
|
||||
} else if (seq->protocol == IPPROTO_UDP) {
|
||||
if (c->no_udp)
|
||||
continue;
|
||||
diff --git a/tap.h b/tap.h
|
||||
index dfbd8b9e..6f48657b 100644
|
||||
--- a/tap.h
|
||||
+++ b/tap.h
|
||||
@@ -44,6 +44,9 @@ static inline void tap_hdr_update(struct tap_hdr *thdr, size_t l2len)
|
||||
thdr->vnet_len = htonl(l2len);
|
||||
}
|
||||
|
||||
+void *tap_push_l2h(const struct ctx *c, void *buf, uint16_t proto);
|
||||
+void *tap_push_ip4h(struct iphdr *ip4h, struct in_addr src,
|
||||
+ struct in_addr dst, size_t l4len, uint8_t proto);
|
||||
void tap_udp4_send(const struct ctx *c, struct in_addr src, in_port_t sport,
|
||||
struct in_addr dst, in_port_t dport,
|
||||
const void *in, size_t dlen);
|
||||
@@ -51,6 +54,9 @@ void tap_icmp4_send(const struct ctx *c, struct in_addr src, struct in_addr dst,
|
||||
const void *in, size_t l4len);
|
||||
const struct in6_addr *tap_ip6_daddr(const struct ctx *c,
|
||||
const struct in6_addr *src);
|
||||
+void *tap_push_ip6h(struct ipv6hdr *ip6h,
|
||||
+ const struct in6_addr *src, const struct in6_addr *dst,
|
||||
+ size_t l4len, uint8_t proto, uint32_t flow);
|
||||
void tap_udp6_send(const struct ctx *c,
|
||||
const struct in6_addr *src, in_port_t sport,
|
||||
const struct in6_addr *dst, in_port_t dport,
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index aa0eaf8e..d00e8fcb 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -1868,6 +1868,75 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
|
||||
tcp_data_from_sock(c, conn);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * tcp_rst_no_conn() - Send RST in response to a packet with no connection
|
||||
+ * @c: Execution context
|
||||
+ * @af: Address family, AF_INET or AF_INET6
|
||||
+ * @saddr: Source address of the packet we're responding to
|
||||
+ * @daddr: Destination address of the packet we're responding to
|
||||
+ * @flow_lbl: IPv6 flow label (ignored for IPv4)
|
||||
+ * @th: TCP header of the packet we're responding to
|
||||
+ * @l4len: Packet length, including TCP header
|
||||
+ */
|
||||
+static void tcp_rst_no_conn(const struct ctx *c, int af,
|
||||
+ const void *saddr, const void *daddr,
|
||||
+ uint32_t flow_lbl,
|
||||
+ const struct tcphdr *th, size_t l4len)
|
||||
+{
|
||||
+ struct iov_tail payload = IOV_TAIL(NULL, 0, 0);
|
||||
+ struct tcphdr *rsth;
|
||||
+ char buf[USHRT_MAX];
|
||||
+ uint32_t psum = 0;
|
||||
+ size_t rst_l2len;
|
||||
+
|
||||
+ /* Don't respond to RSTs without a connection */
|
||||
+ if (th->rst)
|
||||
+ return;
|
||||
+
|
||||
+ if (af == AF_INET) {
|
||||
+ struct iphdr *ip4h = tap_push_l2h(c, buf, ETH_P_IP);
|
||||
+ const struct in_addr *rst_src = daddr;
|
||||
+ const struct in_addr *rst_dst = saddr;
|
||||
+
|
||||
+ rsth = tap_push_ip4h(ip4h, *rst_src, *rst_dst,
|
||||
+ sizeof(*rsth), IPPROTO_TCP);
|
||||
+ psum = proto_ipv4_header_psum(sizeof(*rsth), IPPROTO_TCP,
|
||||
+ *rst_src, *rst_dst);
|
||||
+
|
||||
+ } else {
|
||||
+ struct ipv6hdr *ip6h = tap_push_l2h(c, buf, ETH_P_IPV6);
|
||||
+ const struct in6_addr *rst_src = daddr;
|
||||
+ const struct in6_addr *rst_dst = saddr;
|
||||
+
|
||||
+ rsth = tap_push_ip6h(ip6h, rst_src, rst_dst,
|
||||
+ sizeof(*rsth), IPPROTO_TCP, flow_lbl);
|
||||
+ psum = proto_ipv6_header_psum(sizeof(*rsth), IPPROTO_TCP,
|
||||
+ rst_src, rst_dst);
|
||||
+ }
|
||||
+
|
||||
+ memset(rsth, 0, sizeof(*rsth));
|
||||
+
|
||||
+ rsth->source = th->dest;
|
||||
+ rsth->dest = th->source;
|
||||
+ rsth->rst = 1;
|
||||
+ rsth->doff = sizeof(*rsth) / 4UL;
|
||||
+
|
||||
+ /* Sequence matching logic from RFC 9293 section 3.10.7.1 */
|
||||
+ if (th->ack) {
|
||||
+ rsth->seq = th->ack_seq;
|
||||
+ } else {
|
||||
+ size_t dlen = l4len - th->doff * 4UL;
|
||||
+ uint32_t ack = ntohl(th->seq) + dlen;
|
||||
+
|
||||
+ rsth->ack_seq = htonl(ack);
|
||||
+ rsth->ack = 1;
|
||||
+ }
|
||||
+
|
||||
+ tcp_update_csum(psum, rsth, &payload);
|
||||
+ rst_l2len = ((char *)rsth - buf) + sizeof(*rsth);
|
||||
+ tap_send_single(c, buf, rst_l2len);
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* tcp_tap_handler() - Handle packets from tap and state transitions
|
||||
* @c: Execution context
|
||||
@@ -1875,6 +1944,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
|
||||
* @af: Address family, AF_INET or AF_INET6
|
||||
* @saddr: Source address
|
||||
* @daddr: Destination address
|
||||
+ * @flow_lbl: IPv6 flow label (ignored for IPv4)
|
||||
* @p: Pool of TCP packets, with TCP headers
|
||||
* @idx: Index of first packet in pool to process
|
||||
* @now: Current timestamp
|
||||
@@ -1882,7 +1952,7 @@ static void tcp_conn_from_sock_finish(const struct ctx *c,
|
||||
* Return: count of consumed packets
|
||||
*/
|
||||
int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
- const void *saddr, const void *daddr,
|
||||
+ const void *saddr, const void *daddr, uint32_t flow_lbl,
|
||||
const struct pool *p, int idx, const struct timespec *now)
|
||||
{
|
||||
struct tcp_tap_conn *conn;
|
||||
@@ -1915,6 +1985,8 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
if (opts && th->syn && !th->ack)
|
||||
tcp_conn_from_tap(c, af, saddr, daddr, th,
|
||||
opts, optlen, now);
|
||||
+ else
|
||||
+ tcp_rst_no_conn(c, af, saddr, daddr, flow_lbl, th, len);
|
||||
return 1;
|
||||
}
|
||||
|
||||
diff --git a/tcp.h b/tcp.h
|
||||
index cf30744d..9142ecac 100644
|
||||
--- a/tcp.h
|
||||
+++ b/tcp.h
|
||||
@@ -16,7 +16,7 @@ void tcp_listen_handler(const struct ctx *c, union epoll_ref ref,
|
||||
void tcp_sock_handler(const struct ctx *c, union epoll_ref ref,
|
||||
uint32_t events);
|
||||
int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
- const void *saddr, const void *daddr,
|
||||
+ const void *saddr, const void *daddr, uint32_t flow_lbl,
|
||||
const struct pool *p, int idx, const struct timespec *now);
|
||||
int tcp_sock_init(const struct ctx *c, const union inany_addr *addr,
|
||||
const char *ifname, in_port_t port);
|
||||
--
|
||||
2.48.1
|
||||
|
||||
151
0010-selinux-Fixes-workarounds-for-passt-and-passt-repair.patch
Normal file
151
0010-selinux-Fixes-workarounds-for-passt-and-passt-repair.patch
Normal file
@ -0,0 +1,151 @@
|
||||
From 631cfe67c02df0eb097f2b1ad6f23397e3c2246c Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Fri, 28 Feb 2025 01:14:01 +0100
|
||||
Subject: [PATCH 1/4] selinux: Fixes/workarounds for passt and passt-repair,
|
||||
mostly for libvirt usage
|
||||
|
||||
Here are a bunch of workarounds and a couple of fixes for libvirt
|
||||
usage which are rather hard to split into single logical patches
|
||||
as there appear to be some obscure dependencies between some of them:
|
||||
|
||||
- passt-repair needs to have an exec_type typeattribute (otherwise
|
||||
the policy for lsmd(1) causes a violation on getattr on its
|
||||
executable) file, and that typeattribute just happened to be there
|
||||
for passt as a result of init_daemon_domain(), but passt-repair
|
||||
isn't a daemon, so we need an explicit corecmd_executable_file()
|
||||
|
||||
- passt-repair needs a workaround, which I'll revisit once
|
||||
https://github.com/fedora-selinux/selinux-policy/issues/2579 is
|
||||
solved, for usage with libvirt: allow it to use qemu_var_run_t
|
||||
and virt_var_run_t sockets
|
||||
|
||||
- add 'bpf' and 'dac_read_search' capabilities for passt-repair:
|
||||
they are needed (for whatever reason I didn't investigate) to
|
||||
actually receive socket files via SCM_RIGHTS
|
||||
|
||||
- passt needs further workarounds in the sense of
|
||||
https://github.com/fedora-selinux/selinux-policy/issues/2579:
|
||||
allow it to use map and use svirt_tmpfs_t (not just svirt_image_t):
|
||||
it depends on where the libvirt guest image is
|
||||
|
||||
- ...it also needs to map /dev/null if <access mode='shared'/> is
|
||||
enabled in libvirt's XML for the memoryBacking object, for
|
||||
vhost-user operation
|
||||
|
||||
- and 'ioctl' on the TCP socket appears to be actually needed, on top
|
||||
of 'getattr', to dump some socket parameters
|
||||
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit 87471731e6bb0b5df3a50277527caf3381b45ee4)
|
||||
---
|
||||
contrib/selinux/passt-repair.te | 33 +++++++++++++++++++++++++++++++--
|
||||
contrib/selinux/passt.te | 9 +++++++--
|
||||
2 files changed, 38 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/contrib/selinux/passt-repair.te b/contrib/selinux/passt-repair.te
|
||||
index e3ffbcd..f171be6 100644
|
||||
--- a/contrib/selinux/passt-repair.te
|
||||
+++ b/contrib/selinux/passt-repair.te
|
||||
@@ -28,12 +28,22 @@ require {
|
||||
type console_device_t;
|
||||
type user_devpts_t;
|
||||
type user_tmp_t;
|
||||
+
|
||||
+ # Workaround: passt-repair needs to needs to access socket files
|
||||
+ # that passt, started by libvirt, might create under different
|
||||
+ # labels, depending on whether passt is started as root or not.
|
||||
+ #
|
||||
+ # However, libvirt doesn't maintain its own policy, which makes
|
||||
+ # updates particularly complicated. To avoid breakage in the short
|
||||
+ # term, deal with that in passt's own policy.
|
||||
+ type qemu_var_run_t;
|
||||
+ type virt_var_run_t;
|
||||
}
|
||||
|
||||
type passt_repair_t;
|
||||
domain_type(passt_repair_t);
|
||||
type passt_repair_exec_t;
|
||||
-files_type(passt_repair_exec_t);
|
||||
+corecmd_executable_file(passt_repair_exec_t);
|
||||
|
||||
role unconfined_r types passt_repair_t;
|
||||
|
||||
@@ -41,7 +51,8 @@ allow passt_repair_t passt_repair_exec_t:file { read execute execute_no_trans en
|
||||
type_transition unconfined_t passt_repair_exec_t:process passt_repair_t;
|
||||
allow unconfined_t passt_repair_t:process transition;
|
||||
|
||||
-allow passt_repair_t self:capability { dac_override net_admin net_raw };
|
||||
+allow passt_repair_t self:capability { dac_override dac_read_search net_admin net_raw };
|
||||
+allow passt_repair_t self:capability2 bpf;
|
||||
|
||||
allow passt_repair_t console_device_t:chr_file { append open getattr read write ioctl };
|
||||
allow passt_repair_t user_devpts_t:chr_file { append open getattr read write ioctl };
|
||||
@@ -50,9 +61,27 @@ allow passt_repair_t unconfined_t:unix_stream_socket { connectto read write };
|
||||
allow passt_repair_t passt_t:unix_stream_socket { connectto read write };
|
||||
allow passt_repair_t user_tmp_t:unix_stream_socket { connectto read write };
|
||||
|
||||
+allow passt_repair_t user_tmp_t:dir search;
|
||||
+
|
||||
allow passt_repair_t unconfined_t:sock_file { read write };
|
||||
allow passt_repair_t passt_t:sock_file { read write };
|
||||
allow passt_repair_t user_tmp_t:sock_file { read write };
|
||||
|
||||
allow passt_repair_t unconfined_t:tcp_socket { read setopt write };
|
||||
allow passt_repair_t passt_t:tcp_socket { read setopt write };
|
||||
+
|
||||
+# Workaround: passt-repair needs to needs to access socket files
|
||||
+# that passt, started by libvirt, might create under different
|
||||
+# labels, depending on whether passt is started as root or not.
|
||||
+#
|
||||
+# However, libvirt doesn't maintain its own policy, which makes
|
||||
+# updates particularly complicated. To avoid breakage in the short
|
||||
+# term, deal with that in passt's own policy.
|
||||
+allow passt_repair_t qemu_var_run_t:unix_stream_socket { connectto read write };
|
||||
+allow passt_repair_t virt_var_run_t:unix_stream_socket { connectto read write };
|
||||
+
|
||||
+allow passt_repair_t qemu_var_run_t:dir search;
|
||||
+allow passt_repair_t virt_var_run_t:dir search;
|
||||
+
|
||||
+allow passt_repair_t qemu_var_run_t:sock_file { read write };
|
||||
+allow passt_repair_t virt_var_run_t:sock_file { read write };
|
||||
diff --git a/contrib/selinux/passt.te b/contrib/selinux/passt.te
|
||||
index cd1e5ee..a7e7727 100644
|
||||
--- a/contrib/selinux/passt.te
|
||||
+++ b/contrib/selinux/passt.te
|
||||
@@ -29,6 +29,9 @@ require {
|
||||
# particularly complicated. To avoid breakage in the short term,
|
||||
# deal with it in passt's own policy.
|
||||
type svirt_image_t;
|
||||
+ type svirt_tmpfs_t;
|
||||
+ type svirt_t;
|
||||
+ type null_device_t;
|
||||
|
||||
class file { ioctl getattr setattr create read write unlink open relabelto execute execute_no_trans map };
|
||||
class dir { search write add_name remove_name mounton };
|
||||
@@ -45,7 +48,7 @@ require {
|
||||
type net_conf_t;
|
||||
type proc_net_t;
|
||||
type node_t;
|
||||
- class tcp_socket { create accept listen name_bind name_connect getattr };
|
||||
+ class tcp_socket { create accept listen name_bind name_connect getattr ioctl };
|
||||
class udp_socket { create accept listen };
|
||||
class icmp_socket { bind create name_bind node_bind setopt read write };
|
||||
class sock_file { create unlink write };
|
||||
@@ -128,7 +131,7 @@ corenet_udp_sendrecv_all_ports(passt_t)
|
||||
allow passt_t node_t:icmp_socket { name_bind node_bind };
|
||||
allow passt_t port_t:icmp_socket name_bind;
|
||||
|
||||
-allow passt_t self:tcp_socket { create getopt setopt connect bind listen accept shutdown read write getattr };
|
||||
+allow passt_t self:tcp_socket { create getopt setopt connect bind listen accept shutdown read write getattr ioctl };
|
||||
allow passt_t self:udp_socket { create getopt setopt connect bind read write };
|
||||
allow passt_t self:icmp_socket { bind create setopt read write };
|
||||
|
||||
@@ -142,3 +145,5 @@ allow passt_t unconfined_t:unix_stream_socket { read write };
|
||||
# particularly complicated. To avoid breakage in the short term,
|
||||
# deal with it in passt's own policy.
|
||||
allow passt_t svirt_image_t:file { read write map };
|
||||
+allow passt_t svirt_tmpfs_t:file { read write map };
|
||||
+allow passt_t null_device_t:chr_file map;
|
||||
--
|
||||
2.47.1
|
||||
|
||||
224
0011-passt-repair-Add-directory-watch.patch
Normal file
224
0011-passt-repair-Add-directory-watch.patch
Normal file
@ -0,0 +1,224 @@
|
||||
From dd1123d9405db0160d2f2024f5c6a2eec8c0d6b3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Fri, 7 Mar 2025 23:27:03 +0100
|
||||
Subject: [PATCH 2/4] passt-repair: Add directory watch
|
||||
|
||||
It might not be feasible for users to start passt-repair after passt
|
||||
is started, on a migration target, but before the migration process
|
||||
starts.
|
||||
|
||||
For instance, with libvirt, the guest domain (and, hence, passt) is
|
||||
started on the target as part of the migration process. At least for
|
||||
the moment being, there's no hook a libvirt user (including KubeVirt)
|
||||
can use to start passt-repair before the migration starts.
|
||||
|
||||
Add a directory watch using inotify: if PATH is a directory, instead
|
||||
of connecting to it, we'll watch for a .repair socket file to appear
|
||||
in it, and then attempt to connect to that socket.
|
||||
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
(cherry picked from commit 04701702471ececee362669cc6b49ed9e20a1b6d)
|
||||
---
|
||||
contrib/selinux/passt-repair.te | 16 +++----
|
||||
passt-repair.1 | 6 ++-
|
||||
passt-repair.c | 82 ++++++++++++++++++++++++++++++---
|
||||
3 files changed, 88 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/contrib/selinux/passt-repair.te b/contrib/selinux/passt-repair.te
|
||||
index f171be6..7157dfb 100644
|
||||
--- a/contrib/selinux/passt-repair.te
|
||||
+++ b/contrib/selinux/passt-repair.te
|
||||
@@ -61,11 +61,11 @@ allow passt_repair_t unconfined_t:unix_stream_socket { connectto read write };
|
||||
allow passt_repair_t passt_t:unix_stream_socket { connectto read write };
|
||||
allow passt_repair_t user_tmp_t:unix_stream_socket { connectto read write };
|
||||
|
||||
-allow passt_repair_t user_tmp_t:dir search;
|
||||
+allow passt_repair_t user_tmp_t:dir { getattr read search watch };
|
||||
|
||||
-allow passt_repair_t unconfined_t:sock_file { read write };
|
||||
-allow passt_repair_t passt_t:sock_file { read write };
|
||||
-allow passt_repair_t user_tmp_t:sock_file { read write };
|
||||
+allow passt_repair_t unconfined_t:sock_file { getattr read write };
|
||||
+allow passt_repair_t passt_t:sock_file { getattr read write };
|
||||
+allow passt_repair_t user_tmp_t:sock_file { getattr read write };
|
||||
|
||||
allow passt_repair_t unconfined_t:tcp_socket { read setopt write };
|
||||
allow passt_repair_t passt_t:tcp_socket { read setopt write };
|
||||
@@ -80,8 +80,8 @@ allow passt_repair_t passt_t:tcp_socket { read setopt write };
|
||||
allow passt_repair_t qemu_var_run_t:unix_stream_socket { connectto read write };
|
||||
allow passt_repair_t virt_var_run_t:unix_stream_socket { connectto read write };
|
||||
|
||||
-allow passt_repair_t qemu_var_run_t:dir search;
|
||||
-allow passt_repair_t virt_var_run_t:dir search;
|
||||
+allow passt_repair_t qemu_var_run_t:dir { getattr read search watch };
|
||||
+allow passt_repair_t virt_var_run_t:dir { getattr read search watch };
|
||||
|
||||
-allow passt_repair_t qemu_var_run_t:sock_file { read write };
|
||||
-allow passt_repair_t virt_var_run_t:sock_file { read write };
|
||||
+allow passt_repair_t qemu_var_run_t:sock_file { getattr read write };
|
||||
+allow passt_repair_t virt_var_run_t:sock_file { getattr read write };
|
||||
diff --git a/passt-repair.1 b/passt-repair.1
|
||||
index 7c1b140..e65aadd 100644
|
||||
--- a/passt-repair.1
|
||||
+++ b/passt-repair.1
|
||||
@@ -16,13 +16,17 @@
|
||||
.B passt-repair
|
||||
is a privileged helper setting and clearing repair mode on TCP sockets on behalf
|
||||
of \fBpasst\fR(1), as instructed via single-byte commands over a UNIX domain
|
||||
-socket, specified by \fIPATH\fR.
|
||||
+socket.
|
||||
|
||||
It can be used to migrate TCP connections between guests without granting
|
||||
additional capabilities to \fBpasst\fR(1) itself: to migrate TCP connections,
|
||||
\fBpasst\fR(1) leverages repair mode, which needs the \fBCAP_NET_ADMIN\fR
|
||||
capability (see \fBcapabilities\fR(7)) to be set or cleared.
|
||||
|
||||
+If \fIPATH\fR represents a UNIX domain socket, \fBpasst-repair\fR(1) attempts to
|
||||
+connect to it. If it is a directory, \fBpasst-repair\fR(1) waits until a file
|
||||
+ending with \fI.repair\fR appears in it, and then attempts to connect to it.
|
||||
+
|
||||
.SH PROTOCOL
|
||||
|
||||
\fBpasst-repair\fR(1) connects to \fBpasst\fR(1) using the socket specified via
|
||||
diff --git a/passt-repair.c b/passt-repair.c
|
||||
index e0c366e..8bb3f00 100644
|
||||
--- a/passt-repair.c
|
||||
+++ b/passt-repair.c
|
||||
@@ -16,11 +16,14 @@
|
||||
* off. Reply by echoing the command. Exit on EOF.
|
||||
*/
|
||||
|
||||
+#include <sys/inotify.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
+#include <sys/stat.h>
|
||||
#include <sys/un.h>
|
||||
#include <errno.h>
|
||||
+#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
@@ -39,6 +42,8 @@
|
||||
#include "seccomp_repair.h"
|
||||
|
||||
#define SCM_MAX_FD 253 /* From Linux kernel (include/net/scm.h), not in UAPI */
|
||||
+#define REPAIR_EXT ".repair"
|
||||
+#define REPAIR_EXT_LEN strlen(REPAIR_EXT)
|
||||
|
||||
/**
|
||||
* main() - Entry point and whole program with loop
|
||||
@@ -51,6 +56,9 @@
|
||||
* #syscalls:repair socket s390x:socketcall i686:socketcall
|
||||
* #syscalls:repair recvfrom recvmsg arm:recv ppc64le:recv
|
||||
* #syscalls:repair sendto sendmsg arm:send ppc64le:send
|
||||
+ * #syscalls:repair stat|statx stat64|statx statx
|
||||
+ * #syscalls:repair fstat|fstat64 newfstatat|fstatat64
|
||||
+ * #syscalls:repair inotify_init1 inotify_add_watch
|
||||
*/
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
@@ -58,12 +66,14 @@ int main(int argc, char **argv)
|
||||
__attribute__ ((aligned(__alignof__(struct cmsghdr))));
|
||||
struct sockaddr_un a = { AF_UNIX, "" };
|
||||
int fds[SCM_MAX_FD], s, ret, i, n = 0;
|
||||
+ bool inotify_dir = false;
|
||||
struct sock_fprog prog;
|
||||
int8_t cmd = INT8_MAX;
|
||||
struct cmsghdr *cmsg;
|
||||
struct msghdr msg;
|
||||
struct iovec iov;
|
||||
size_t cmsg_len;
|
||||
+ struct stat sb;
|
||||
int op;
|
||||
|
||||
prctl(PR_SET_DUMPABLE, 0);
|
||||
@@ -90,19 +100,77 @@ int main(int argc, char **argv)
|
||||
_exit(2);
|
||||
}
|
||||
|
||||
- ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", argv[1]);
|
||||
+ if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
|
||||
+ fprintf(stderr, "Failed to create AF_UNIX socket: %i\n", errno);
|
||||
+ _exit(1);
|
||||
+ }
|
||||
+
|
||||
+ if ((stat(argv[1], &sb))) {
|
||||
+ fprintf(stderr, "Can't stat() %s: %i\n", argv[1], errno);
|
||||
+ _exit(1);
|
||||
+ }
|
||||
+
|
||||
+ if ((sb.st_mode & S_IFMT) == S_IFDIR) {
|
||||
+ char buf[sizeof(struct inotify_event) + NAME_MAX + 1];
|
||||
+ const struct inotify_event *ev;
|
||||
+ char path[PATH_MAX + 1];
|
||||
+ ssize_t n;
|
||||
+ int fd;
|
||||
+
|
||||
+ ev = (struct inotify_event *)buf;
|
||||
+
|
||||
+ if ((fd = inotify_init1(IN_CLOEXEC)) < 0) {
|
||||
+ fprintf(stderr, "inotify_init1: %i\n", errno);
|
||||
+ _exit(1);
|
||||
+ }
|
||||
+
|
||||
+ if (inotify_add_watch(fd, argv[1], IN_CREATE) < 0) {
|
||||
+ fprintf(stderr, "inotify_add_watch: %i\n", errno);
|
||||
+ _exit(1);
|
||||
+ }
|
||||
+
|
||||
+ do {
|
||||
+ n = read(fd, buf, sizeof(buf));
|
||||
+ if (n < 0) {
|
||||
+ fprintf(stderr, "inotify read: %i", errno);
|
||||
+ _exit(1);
|
||||
+ }
|
||||
+
|
||||
+ if (n < (ssize_t)sizeof(*ev)) {
|
||||
+ fprintf(stderr, "Short inotify read: %zi", n);
|
||||
+ _exit(1);
|
||||
+ }
|
||||
+ } while (ev->len < REPAIR_EXT_LEN ||
|
||||
+ memcmp(ev->name + strlen(ev->name) - REPAIR_EXT_LEN,
|
||||
+ REPAIR_EXT, REPAIR_EXT_LEN));
|
||||
+
|
||||
+ snprintf(path, sizeof(path), "%s/%s", argv[1], ev->name);
|
||||
+ if ((stat(path, &sb))) {
|
||||
+ fprintf(stderr, "Can't stat() %s: %i\n", path, errno);
|
||||
+ _exit(1);
|
||||
+ }
|
||||
+
|
||||
+ ret = snprintf(a.sun_path, sizeof(a.sun_path), path);
|
||||
+ inotify_dir = true;
|
||||
+ } else {
|
||||
+ ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", argv[1]);
|
||||
+ }
|
||||
+
|
||||
if (ret <= 0 || ret >= (int)sizeof(a.sun_path)) {
|
||||
- fprintf(stderr, "Invalid socket path: %s\n", argv[1]);
|
||||
+ fprintf(stderr, "Invalid socket path");
|
||||
_exit(2);
|
||||
}
|
||||
|
||||
- if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
|
||||
- fprintf(stderr, "Failed to create AF_UNIX socket: %i\n", errno);
|
||||
- _exit(1);
|
||||
+ if ((sb.st_mode & S_IFMT) != S_IFSOCK) {
|
||||
+ fprintf(stderr, "%s is not a socket\n", a.sun_path);
|
||||
+ _exit(2);
|
||||
}
|
||||
|
||||
- if (connect(s, (struct sockaddr *)&a, sizeof(a))) {
|
||||
- fprintf(stderr, "Failed to connect to %s: %s\n", argv[1],
|
||||
+ while (connect(s, (struct sockaddr *)&a, sizeof(a))) {
|
||||
+ if (inotify_dir && errno == ECONNREFUSED)
|
||||
+ continue;
|
||||
+
|
||||
+ fprintf(stderr, "Failed to connect to %s: %s\n", a.sun_path,
|
||||
strerror(errno));
|
||||
_exit(1);
|
||||
}
|
||||
--
|
||||
2.47.1
|
||||
|
||||
135
0012-flow-repair-Wait-for-a-short-while-for-passt-repair-.patch
Normal file
135
0012-flow-repair-Wait-for-a-short-while-for-passt-repair-.patch
Normal file
@ -0,0 +1,135 @@
|
||||
From 918c8f4ea5fac44e47f175b306dc803ec93efbf9 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Thu, 6 Mar 2025 20:00:51 +0100
|
||||
Subject: [PATCH 3/4] flow, repair: Wait for a short while for passt-repair to
|
||||
connect
|
||||
|
||||
...and time out after that. This will be needed because of an upcoming
|
||||
change to passt-repair enabling it to start before passt is started,
|
||||
on both source and target, by means of an inotify watch.
|
||||
|
||||
Once the inotify watch triggers, passt-repair will connect right away,
|
||||
but we have no guarantees that the connection completes before we
|
||||
start the migration process, so wait for it (for a reasonable amount
|
||||
of time).
|
||||
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
(cherry picked from commit c8b520c0625b440d0dcd588af085d35cf46aae2c)
|
||||
---
|
||||
flow.c | 21 +++++++++++++++++++++
|
||||
repair.c | 32 ++++++++++++++++++++++++++++++++
|
||||
repair.h | 1 +
|
||||
3 files changed, 54 insertions(+)
|
||||
|
||||
diff --git a/flow.c b/flow.c
|
||||
index 4d611f5..029aff2 100644
|
||||
--- a/flow.c
|
||||
+++ b/flow.c
|
||||
@@ -930,6 +930,22 @@ static int flow_migrate_source_rollback(struct ctx *c, unsigned max_flow,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * flow_migrate_need_repair() - Do we need to set repair mode for any flow?
|
||||
+ *
|
||||
+ * Return: true if repair mode is needed, false otherwise
|
||||
+ */
|
||||
+static bool flow_migrate_need_repair(void)
|
||||
+{
|
||||
+ union flow *flow;
|
||||
+ unsigned i;
|
||||
+
|
||||
+ foreach_established_tcp_flow(i, flow, FLOW_MAX)
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* flow_migrate_repair_all() - Turn repair mode on or off for all flows
|
||||
* @c: Execution context
|
||||
@@ -985,6 +1001,9 @@ int flow_migrate_source_pre(struct ctx *c, const struct migrate_stage *stage,
|
||||
(void)stage;
|
||||
(void)fd;
|
||||
|
||||
+ if (flow_migrate_need_repair())
|
||||
+ repair_wait(c);
|
||||
+
|
||||
if ((rc = flow_migrate_repair_all(c, true)))
|
||||
return -rc;
|
||||
|
||||
@@ -1101,6 +1120,8 @@ int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
|
||||
if (!count)
|
||||
return 0;
|
||||
|
||||
+ repair_wait(c);
|
||||
+
|
||||
if ((rc = flow_migrate_repair_all(c, true)))
|
||||
return -rc;
|
||||
|
||||
diff --git a/repair.c b/repair.c
|
||||
index 3ee089f..149fe51 100644
|
||||
--- a/repair.c
|
||||
+++ b/repair.c
|
||||
@@ -27,6 +27,10 @@
|
||||
|
||||
#define SCM_MAX_FD 253 /* From Linux kernel (include/net/scm.h), not in UAPI */
|
||||
|
||||
+/* Wait for a while for TCP_REPAIR helper to connect if it's not there yet */
|
||||
+#define REPAIR_ACCEPT_TIMEOUT_MS 10
|
||||
+#define REPAIR_ACCEPT_TIMEOUT_US (REPAIR_ACCEPT_TIMEOUT_MS * 1000)
|
||||
+
|
||||
/* Pending file descriptors for next repair_flush() call, or command change */
|
||||
static int repair_fds[SCM_MAX_FD];
|
||||
|
||||
@@ -138,6 +142,34 @@ void repair_handler(struct ctx *c, uint32_t events)
|
||||
repair_close(c);
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * repair_wait() - Wait (with timeout) for TCP_REPAIR helper to connect
|
||||
+ * @c: Execution context
|
||||
+ */
|
||||
+void repair_wait(struct ctx *c)
|
||||
+{
|
||||
+ struct timeval tv = { .tv_sec = 0,
|
||||
+ .tv_usec = (long)(REPAIR_ACCEPT_TIMEOUT_US) };
|
||||
+ static_assert(REPAIR_ACCEPT_TIMEOUT_US < 1000 * 1000,
|
||||
+ ".tv_usec is greater than 1000 * 1000");
|
||||
+
|
||||
+ if (c->fd_repair >= 0 || c->fd_repair_listen == -1)
|
||||
+ return;
|
||||
+
|
||||
+ if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
|
||||
+ &tv, sizeof(tv))) {
|
||||
+ err_perror("Set timeout on TCP_REPAIR listening socket");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ repair_listen_handler(c, EPOLLIN);
|
||||
+
|
||||
+ tv.tv_usec = 0;
|
||||
+ if (setsockopt(c->fd_repair_listen, SOL_SOCKET, SO_RCVTIMEO,
|
||||
+ &tv, sizeof(tv)))
|
||||
+ err_perror("Clear timeout on TCP_REPAIR listening socket");
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* repair_flush() - Flush current set of sockets to helper, with current command
|
||||
* @c: Execution context
|
||||
diff --git a/repair.h b/repair.h
|
||||
index de279d6..1d37922 100644
|
||||
--- a/repair.h
|
||||
+++ b/repair.h
|
||||
@@ -10,6 +10,7 @@ void repair_sock_init(const struct ctx *c);
|
||||
void repair_listen_handler(struct ctx *c, uint32_t events);
|
||||
void repair_handler(struct ctx *c, uint32_t events);
|
||||
void repair_close(struct ctx *c);
|
||||
+void repair_wait(struct ctx *c);
|
||||
int repair_flush(struct ctx *c);
|
||||
int repair_set(struct ctx *c, int s, int cmd);
|
||||
|
||||
--
|
||||
2.47.1
|
||||
|
||||
@ -0,0 +1,28 @@
|
||||
From 8f6ae98e30f555d28768653a5e0fc208a1748d50 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Tue, 18 Mar 2025 17:18:47 +0100
|
||||
Subject: [PATCH 4/4] passt-repair: Fix build with -Werror=format-security
|
||||
|
||||
Fixes: 04701702471e ("passt-repair: Add directory watch")
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit 51f3c071a76bd20677e72b49007b822dca71e755)
|
||||
---
|
||||
passt-repair.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/passt-repair.c b/passt-repair.c
|
||||
index 8bb3f00..120f7aa 100644
|
||||
--- a/passt-repair.c
|
||||
+++ b/passt-repair.c
|
||||
@@ -150,7 +150,7 @@ int main(int argc, char **argv)
|
||||
_exit(1);
|
||||
}
|
||||
|
||||
- ret = snprintf(a.sun_path, sizeof(a.sun_path), path);
|
||||
+ ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", path);
|
||||
inotify_dir = true;
|
||||
} else {
|
||||
ret = snprintf(a.sun_path, sizeof(a.sun_path), "%s", argv[1]);
|
||||
--
|
||||
2.47.1
|
||||
|
||||
@ -0,0 +1,66 @@
|
||||
From 7e98ded849216778118cbcc862f8b97f920e2c79 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 19 Mar 2025 16:14:21 +1100
|
||||
Subject: [PATCH 14/17] migrate, tcp: More careful marshalling of mss parameter
|
||||
during migration
|
||||
|
||||
During migration we extract the limit on segment size using TCP_MAXSEG,
|
||||
and set it on the other side with TCP_REPAIR_OPTIONS. However, unlike most
|
||||
32-bit values we transfer we transfer it in native endian, not network
|
||||
endian. This is not correct; add it to the list of endian fixups we make.
|
||||
|
||||
In addition, while MAXSEG will be 32-bits in practice, and is given as such
|
||||
to TCP_REPAIR_OPTIONS, the TCP_MAXSEG sockopt treats it as an 'int'. It's
|
||||
not strictly safe to pass a uint32_t to a getsockopt() expecting an int,
|
||||
although we'll get away with it on most (maybe all) platforms. Correct
|
||||
this as well.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
[sbrivio: Minor coding style fix]
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit 28772ee91a60b34786023496ea17c2c2f4e5f7f5)
|
||||
---
|
||||
tcp.c | 7 ++++++-
|
||||
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index d00e8fc..65747c7 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -2847,13 +2847,16 @@ static int tcp_flow_dump_tinfo(int s, struct tcp_tap_transfer_ext *t)
|
||||
static int tcp_flow_dump_mss(int s, struct tcp_tap_transfer_ext *t)
|
||||
{
|
||||
socklen_t sl = sizeof(t->mss);
|
||||
+ int val;
|
||||
|
||||
- if (getsockopt(s, SOL_TCP, TCP_MAXSEG, &t->mss, &sl)) {
|
||||
+ if (getsockopt(s, SOL_TCP, TCP_MAXSEG, &val, &sl)) {
|
||||
int rc = -errno;
|
||||
err_perror("Getting MSS, socket %i", s);
|
||||
return rc;
|
||||
}
|
||||
|
||||
+ t->mss = (uint32_t)val;
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3288,6 +3291,7 @@ int tcp_flow_migrate_source_ext(int fd, int fidx,
|
||||
t->sndq = htonl(t->sndq);
|
||||
t->notsent = htonl(t->notsent);
|
||||
t->rcvq = htonl(t->rcvq);
|
||||
+ t->mss = htonl(t->mss);
|
||||
|
||||
t->snd_wl1 = htonl(t->snd_wl1);
|
||||
t->snd_wnd = htonl(t->snd_wnd);
|
||||
@@ -3501,6 +3505,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
|
||||
t.sndq = ntohl(t.sndq);
|
||||
t.notsent = ntohl(t.notsent);
|
||||
t.rcvq = ntohl(t.rcvq);
|
||||
+ t.mss = ntohl(t.mss);
|
||||
|
||||
t.snd_wl1 = ntohl(t.snd_wl1);
|
||||
t.snd_wnd = ntohl(t.snd_wnd);
|
||||
--
|
||||
2.47.1
|
||||
|
||||
315
0015-flow-Add-flow_perror-helper.patch
Normal file
315
0015-flow-Add-flow_perror-helper.patch
Normal file
@ -0,0 +1,315 @@
|
||||
From d91ae25d5c4bb5dd28d229537d9e360694c6a009 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Tue, 18 Feb 2025 19:59:24 +1100
|
||||
Subject: [PATCH 15/17] flow: Add flow_perror() helper
|
||||
|
||||
Our general logging helpers include a number of _perror() variants which,
|
||||
like perror(3) include the description of the current errno. We didn't
|
||||
have those for our flow specific logging helpers, though. Fill this gap
|
||||
with flow_perror() and flow_dbg_perror(), and use them where it's useful.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit adb46c11d0ea67824cf8c4ef2113ec0b2c563c0e)
|
||||
---
|
||||
flow.c | 12 +++++++-----
|
||||
flow.h | 18 ++++++++++++++----
|
||||
icmp.c | 5 ++---
|
||||
tcp.c | 33 +++++++++++++++------------------
|
||||
tcp_splice.c | 9 ++++-----
|
||||
udp_flow.c | 19 +++++++------------
|
||||
6 files changed, 49 insertions(+), 47 deletions(-)
|
||||
|
||||
diff --git a/flow.c b/flow.c
|
||||
index 029aff2..3455945 100644
|
||||
--- a/flow.c
|
||||
+++ b/flow.c
|
||||
@@ -289,11 +289,13 @@ int flowside_connect(const struct ctx *c, int s,
|
||||
|
||||
/** flow_log_ - Log flow-related message
|
||||
* @f: flow the message is related to
|
||||
+ * @newline: Append newline at the end of the message, if missing
|
||||
* @pri: Log priority
|
||||
* @fmt: Format string
|
||||
* @...: printf-arguments
|
||||
*/
|
||||
-void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
|
||||
+void flow_log_(const struct flow_common *f, bool newline, int pri,
|
||||
+ const char *fmt, ...)
|
||||
{
|
||||
const char *type_or_state;
|
||||
char msg[BUFSIZ];
|
||||
@@ -309,7 +311,7 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
|
||||
else
|
||||
type_or_state = FLOW_TYPE(f);
|
||||
|
||||
- logmsg(true, false, pri,
|
||||
+ logmsg(newline, false, pri,
|
||||
"Flow %u (%s): %s", flow_idx(f), type_or_state, msg);
|
||||
}
|
||||
|
||||
@@ -329,7 +331,7 @@ void flow_log_details_(const struct flow_common *f, int pri,
|
||||
const struct flowside *tgt = &f->side[TGTSIDE];
|
||||
|
||||
if (state >= FLOW_STATE_TGT)
|
||||
- flow_log_(f, pri,
|
||||
+ flow_log_(f, true, pri,
|
||||
"%s [%s]:%hu -> [%s]:%hu => %s [%s]:%hu -> [%s]:%hu",
|
||||
pif_name(f->pif[INISIDE]),
|
||||
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
|
||||
@@ -342,7 +344,7 @@ void flow_log_details_(const struct flow_common *f, int pri,
|
||||
inany_ntop(&tgt->eaddr, estr1, sizeof(estr1)),
|
||||
tgt->eport);
|
||||
else if (state >= FLOW_STATE_INI)
|
||||
- flow_log_(f, pri, "%s [%s]:%hu -> [%s]:%hu => ?",
|
||||
+ flow_log_(f, true, pri, "%s [%s]:%hu -> [%s]:%hu => ?",
|
||||
pif_name(f->pif[INISIDE]),
|
||||
inany_ntop(&ini->eaddr, estr0, sizeof(estr0)),
|
||||
ini->eport,
|
||||
@@ -363,7 +365,7 @@ static void flow_set_state(struct flow_common *f, enum flow_state state)
|
||||
ASSERT(oldstate < FLOW_NUM_STATES);
|
||||
|
||||
f->state = state;
|
||||
- flow_log_(f, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
|
||||
+ flow_log_(f, true, LOG_DEBUG, "%s -> %s", flow_state_str[oldstate],
|
||||
FLOW_STATE(f));
|
||||
|
||||
flow_log_details_(f, LOG_DEBUG, MAX(state, oldstate));
|
||||
diff --git a/flow.h b/flow.h
|
||||
index 675726e..dcf7645 100644
|
||||
--- a/flow.h
|
||||
+++ b/flow.h
|
||||
@@ -258,11 +258,11 @@ int flow_migrate_source(struct ctx *c, const struct migrate_stage *stage,
|
||||
int flow_migrate_target(struct ctx *c, const struct migrate_stage *stage,
|
||||
int fd);
|
||||
|
||||
-void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
|
||||
- __attribute__((format(printf, 3, 4)));
|
||||
-
|
||||
-#define flow_log(f_, pri, ...) flow_log_(&(f_)->f, (pri), __VA_ARGS__)
|
||||
+void flow_log_(const struct flow_common *f, bool newline, int pri,
|
||||
+ const char *fmt, ...)
|
||||
+ __attribute__((format(printf, 4, 5)));
|
||||
|
||||
+#define flow_log(f_, pri, ...) flow_log_(&(f_)->f, true, (pri), __VA_ARGS__)
|
||||
#define flow_dbg(f, ...) flow_log((f), LOG_DEBUG, __VA_ARGS__)
|
||||
#define flow_err(f, ...) flow_log((f), LOG_ERR, __VA_ARGS__)
|
||||
|
||||
@@ -272,6 +272,16 @@ void flow_log_(const struct flow_common *f, int pri, const char *fmt, ...)
|
||||
flow_dbg((f), __VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
+#define flow_log_perror_(f, pri, ...) \
|
||||
+ do { \
|
||||
+ int errno_ = errno; \
|
||||
+ flow_log_((f), false, (pri), __VA_ARGS__); \
|
||||
+ logmsg(true, true, (pri), ": %s", strerror_(errno_)); \
|
||||
+ } while (0)
|
||||
+
|
||||
+#define flow_dbg_perror(f_, ...) flow_log_perror_(&(f_)->f, LOG_DEBUG, __VA_ARGS__)
|
||||
+#define flow_perror(f_, ...) flow_log_perror_(&(f_)->f, LOG_ERR, __VA_ARGS__)
|
||||
+
|
||||
void flow_log_details_(const struct flow_common *f, int pri,
|
||||
enum flow_state state);
|
||||
#define flow_log_details(f_, pri) \
|
||||
diff --git a/icmp.c b/icmp.c
|
||||
index bcf498d..7e2b342 100644
|
||||
--- a/icmp.c
|
||||
+++ b/icmp.c
|
||||
@@ -85,7 +85,7 @@ void icmp_sock_handler(const struct ctx *c, union epoll_ref ref)
|
||||
|
||||
n = recvfrom(ref.fd, buf, sizeof(buf), 0, &sr.sa, &sl);
|
||||
if (n < 0) {
|
||||
- flow_err(pingf, "recvfrom() error: %s", strerror_(errno));
|
||||
+ flow_perror(pingf, "recvfrom() error");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -300,8 +300,7 @@ int icmp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
|
||||
pif_sockaddr(c, &sa, &sl, PIF_HOST, &tgt->eaddr, 0);
|
||||
if (sendto(pingf->sock, pkt, l4len, MSG_NOSIGNAL, &sa.sa, sl) < 0) {
|
||||
- flow_dbg(pingf, "failed to relay request to socket: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_dbg_perror(pingf, "failed to relay request to socket");
|
||||
} else {
|
||||
flow_dbg(pingf,
|
||||
"echo request to socket, ID: %"PRIu16", seq: %"PRIu16,
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index 65747c7..2c226ae 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -551,8 +551,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
|
||||
fd = timerfd_create(CLOCK_MONOTONIC, 0);
|
||||
if (fd == -1 || fd > FD_REF_MAX) {
|
||||
- flow_dbg(conn, "failed to get timer: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_dbg_perror(conn, "failed to get timer");
|
||||
if (fd > -1)
|
||||
close(fd);
|
||||
conn->timer = -1;
|
||||
@@ -561,8 +560,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
conn->timer = fd;
|
||||
|
||||
if (epoll_ctl(c->epollfd, EPOLL_CTL_ADD, conn->timer, &ev)) {
|
||||
- flow_dbg(conn, "failed to add timer: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_dbg_perror(conn, "failed to add timer");
|
||||
close(conn->timer);
|
||||
conn->timer = -1;
|
||||
return;
|
||||
@@ -587,7 +585,7 @@ static void tcp_timer_ctl(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
(unsigned long long)it.it_value.tv_nsec / 1000 / 1000);
|
||||
|
||||
if (timerfd_settime(conn->timer, 0, &it, NULL))
|
||||
- flow_err(conn, "failed to set timer: %s", strerror_(errno));
|
||||
+ flow_perror(conn, "failed to set timer");
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1384,10 +1382,10 @@ static void tcp_bind_outbound(const struct ctx *c,
|
||||
if (bind(s, &bind_sa.sa, sl)) {
|
||||
char sstr[INANY_ADDRSTRLEN];
|
||||
|
||||
- flow_dbg(conn,
|
||||
- "Can't bind TCP outbound socket to %s:%hu: %s",
|
||||
- inany_ntop(&tgt->oaddr, sstr, sizeof(sstr)),
|
||||
- tgt->oport, strerror_(errno));
|
||||
+ flow_dbg_perror(conn,
|
||||
+ "Can't bind TCP outbound socket to %s:%hu",
|
||||
+ inany_ntop(&tgt->oaddr, sstr, sizeof(sstr)),
|
||||
+ tgt->oport);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1396,9 +1394,9 @@ static void tcp_bind_outbound(const struct ctx *c,
|
||||
if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
|
||||
c->ip4.ifname_out,
|
||||
strlen(c->ip4.ifname_out))) {
|
||||
- flow_dbg(conn, "Can't bind IPv4 TCP socket to"
|
||||
- " interface %s: %s", c->ip4.ifname_out,
|
||||
- strerror_(errno));
|
||||
+ flow_dbg_perror(conn,
|
||||
+ "Can't bind IPv4 TCP socket to interface %s",
|
||||
+ c->ip4.ifname_out);
|
||||
}
|
||||
}
|
||||
} else if (bind_sa.sa_family == AF_INET6) {
|
||||
@@ -1406,9 +1404,9 @@ static void tcp_bind_outbound(const struct ctx *c,
|
||||
if (setsockopt(s, SOL_SOCKET, SO_BINDTODEVICE,
|
||||
c->ip6.ifname_out,
|
||||
strlen(c->ip6.ifname_out))) {
|
||||
- flow_dbg(conn, "Can't bind IPv6 TCP socket to"
|
||||
- " interface %s: %s", c->ip6.ifname_out,
|
||||
- strerror_(errno));
|
||||
+ flow_dbg_perror(conn,
|
||||
+ "Can't bind IPv6 TCP socket to interface %s",
|
||||
+ c->ip6.ifname_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2263,7 +2261,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
|
||||
* and we just set the timer to a new point in the future: discard it.
|
||||
*/
|
||||
if (timerfd_gettime(conn->timer, &check_armed))
|
||||
- flow_err(conn, "failed to read timer: %s", strerror_(errno));
|
||||
+ flow_perror(conn, "failed to read timer");
|
||||
|
||||
if (check_armed.it_value.tv_sec || check_armed.it_value.tv_nsec)
|
||||
return;
|
||||
@@ -2305,8 +2303,7 @@ void tcp_timer_handler(const struct ctx *c, union epoll_ref ref)
|
||||
* ~ACK_TO_TAP_DUE or ~ACK_FROM_TAP_DUE.
|
||||
*/
|
||||
if (timerfd_settime(conn->timer, 0, &new, &old))
|
||||
- flow_err(conn, "failed to set timer: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_perror(conn, "failed to set timer");
|
||||
|
||||
if (old.it_value.tv_sec == ACT_TIMEOUT) {
|
||||
flow_dbg(conn, "activity timeout");
|
||||
diff --git a/tcp_splice.c b/tcp_splice.c
|
||||
index 5d845c9..0d10e3d 100644
|
||||
--- a/tcp_splice.c
|
||||
+++ b/tcp_splice.c
|
||||
@@ -164,7 +164,7 @@ static int tcp_splice_epoll_ctl(const struct ctx *c,
|
||||
if (epoll_ctl(c->epollfd, m, conn->s[0], &ev[0]) ||
|
||||
epoll_ctl(c->epollfd, m, conn->s[1], &ev[1])) {
|
||||
int ret = -errno;
|
||||
- flow_err(conn, "ERROR on epoll_ctl(): %s", strerror_(errno));
|
||||
+ flow_perror(conn, "ERROR on epoll_ctl()");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -317,8 +317,8 @@ static int tcp_splice_connect_finish(const struct ctx *c,
|
||||
|
||||
if (conn->pipe[sidei][0] < 0) {
|
||||
if (pipe2(conn->pipe[sidei], O_NONBLOCK | O_CLOEXEC)) {
|
||||
- flow_err(conn, "cannot create %d->%d pipe: %s",
|
||||
- sidei, !sidei, strerror_(errno));
|
||||
+ flow_perror(conn, "cannot create %d->%d pipe",
|
||||
+ sidei, !sidei);
|
||||
conn_flag(c, conn, CLOSING);
|
||||
return -EIO;
|
||||
}
|
||||
@@ -482,8 +482,7 @@ void tcp_splice_sock_handler(struct ctx *c, union epoll_ref ref,
|
||||
|
||||
rc = getsockopt(ref.fd, SOL_SOCKET, SO_ERROR, &err, &sl);
|
||||
if (rc)
|
||||
- flow_err(conn, "Error retrieving SO_ERROR: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_perror(conn, "Error retrieving SO_ERROR");
|
||||
else
|
||||
flow_trace(conn, "Error event on socket: %s",
|
||||
strerror_(err));
|
||||
diff --git a/udp_flow.c b/udp_flow.c
|
||||
index 83c2568..c6b8630 100644
|
||||
--- a/udp_flow.c
|
||||
+++ b/udp_flow.c
|
||||
@@ -93,9 +93,8 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
|
||||
*/
|
||||
uflow->s[INISIDE] = fcntl(s_ini, F_DUPFD_CLOEXEC, 0);
|
||||
if (uflow->s[INISIDE] < 0) {
|
||||
- flow_err(uflow,
|
||||
- "Couldn't duplicate listening socket: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_perror(uflow,
|
||||
+ "Couldn't duplicate listening socket");
|
||||
goto cancel;
|
||||
}
|
||||
}
|
||||
@@ -113,16 +112,13 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
|
||||
uflow->s[TGTSIDE] = flowside_sock_l4(c, EPOLL_TYPE_UDP_REPLY,
|
||||
tgtpif, tgt, fref.data);
|
||||
if (uflow->s[TGTSIDE] < 0) {
|
||||
- flow_dbg(uflow,
|
||||
- "Couldn't open socket for spliced flow: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_dbg_perror(uflow,
|
||||
+ "Couldn't open socket for spliced flow");
|
||||
goto cancel;
|
||||
}
|
||||
|
||||
if (flowside_connect(c, uflow->s[TGTSIDE], tgtpif, tgt) < 0) {
|
||||
- flow_dbg(uflow,
|
||||
- "Couldn't connect flow socket: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_dbg_perror(uflow, "Couldn't connect flow socket");
|
||||
goto cancel;
|
||||
}
|
||||
|
||||
@@ -142,9 +138,8 @@ static flow_sidx_t udp_flow_new(const struct ctx *c, union flow *flow,
|
||||
flow_trace(uflow,
|
||||
"Discarded %d spurious reply datagrams", rc);
|
||||
} else if (errno != EAGAIN) {
|
||||
- flow_err(uflow,
|
||||
- "Unexpected error discarding datagrams: %s",
|
||||
- strerror_(errno));
|
||||
+ flow_perror(uflow,
|
||||
+ "Unexpected error discarding datagrams");
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.47.1
|
||||
|
||||
143
0016-migrate-tcp-Migrate-RFC-7323-timestamp.patch
Normal file
143
0016-migrate-tcp-Migrate-RFC-7323-timestamp.patch
Normal file
@ -0,0 +1,143 @@
|
||||
From ca35bbc0095e3583e253521c51a8f201afd0f7f2 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 19 Mar 2025 16:14:22 +1100
|
||||
Subject: [PATCH 16/17] migrate, tcp: Migrate RFC 7323 timestamp
|
||||
|
||||
Currently our migration of the state of TCP sockets omits the RFC 7323
|
||||
timestamp. In some circumstances that can result in data sent from the
|
||||
target machine not being received, because it is discarded on the peer due
|
||||
to PAWS checking.
|
||||
|
||||
Add code to dump and restore the timestamp across migration.
|
||||
|
||||
Link: https://bugs.passt.top/show_bug.cgi?id=115
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
[sbrivio: Minor style fixes]
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit cfb3740568ab291d7be00e457658c45ce9367ed5)
|
||||
---
|
||||
tcp.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
tcp_conn.h | 2 ++
|
||||
2 files changed, 61 insertions(+)
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index 2c226ae..ac2b7d3 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -2857,6 +2857,57 @@ static int tcp_flow_dump_mss(int s, struct tcp_tap_transfer_ext *t)
|
||||
return 0;
|
||||
}
|
||||
|
||||
+
|
||||
+/**
|
||||
+ * tcp_flow_dump_timestamp() - Dump RFC 7323 timestamp via TCP_TIMESTAMP
|
||||
+ * @conn: Pointer to the TCP connection structure
|
||||
+ * @t: Extended migration data (tcpi_options must be populated)
|
||||
+ *
|
||||
+ * Return: 0 on success, negative error code on failure
|
||||
+ */
|
||||
+static int tcp_flow_dump_timestamp(const struct tcp_tap_conn *conn,
|
||||
+ struct tcp_tap_transfer_ext *t)
|
||||
+{
|
||||
+ int val = 0;
|
||||
+
|
||||
+ if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
|
||||
+ socklen_t sl = sizeof(val);
|
||||
+
|
||||
+ if (getsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP, &val, &sl)) {
|
||||
+ int rc = -errno;
|
||||
+ flow_perror(conn, "Getting RFC 7323 timestamp");
|
||||
+ return rc;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ t->timestamp = (uint32_t)val;
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/**
|
||||
+ * tcp_flow_repair_timestamp() - Restore RFC 7323 timestamp via TCP_TIMESTAMP
|
||||
+ * @conn: Pointer to the TCP connection structure
|
||||
+ * @t: Extended migration data
|
||||
+ *
|
||||
+ * Return: 0 on success, negative error code on failure
|
||||
+ */
|
||||
+static int tcp_flow_repair_timestamp(const struct tcp_tap_conn *conn,
|
||||
+ const struct tcp_tap_transfer_ext *t)
|
||||
+{
|
||||
+ int val = (int)t->timestamp;
|
||||
+
|
||||
+ if (t->tcpi_options & TCPI_OPT_TIMESTAMPS) {
|
||||
+ if (setsockopt(conn->sock, SOL_TCP, TCP_TIMESTAMP,
|
||||
+ &val, sizeof(val))) {
|
||||
+ int rc = -errno;
|
||||
+ flow_perror(conn, "Setting RFC 7323 timestamp");
|
||||
+ return rc;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* tcp_flow_dump_wnd() - Dump current tcp_repair_window parameters
|
||||
* @c: Execution context
|
||||
@@ -3244,6 +3295,9 @@ int tcp_flow_migrate_source_ext(int fd, int fidx,
|
||||
if ((rc = tcp_flow_dump_mss(s, t)))
|
||||
goto fail;
|
||||
|
||||
+ if ((rc = tcp_flow_dump_timestamp(conn, t)))
|
||||
+ goto fail;
|
||||
+
|
||||
if ((rc = tcp_flow_dump_wnd(s, t)))
|
||||
goto fail;
|
||||
|
||||
@@ -3289,6 +3343,7 @@ int tcp_flow_migrate_source_ext(int fd, int fidx,
|
||||
t->notsent = htonl(t->notsent);
|
||||
t->rcvq = htonl(t->rcvq);
|
||||
t->mss = htonl(t->mss);
|
||||
+ t->timestamp = htonl(t->timestamp);
|
||||
|
||||
t->snd_wl1 = htonl(t->snd_wl1);
|
||||
t->snd_wnd = htonl(t->snd_wnd);
|
||||
@@ -3503,6 +3558,7 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
|
||||
t.notsent = ntohl(t.notsent);
|
||||
t.rcvq = ntohl(t.rcvq);
|
||||
t.mss = ntohl(t.mss);
|
||||
+ t.timestamp = ntohl(t.timestamp);
|
||||
|
||||
t.snd_wl1 = ntohl(t.snd_wl1);
|
||||
t.snd_wnd = ntohl(t.snd_wnd);
|
||||
@@ -3542,6 +3598,9 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
|
||||
/* We weren't able to create the socket, discard flow */
|
||||
goto fail;
|
||||
|
||||
+ if (tcp_flow_repair_timestamp(conn, &t))
|
||||
+ goto fail;
|
||||
+
|
||||
if (tcp_flow_select_queue(s, TCP_SEND_QUEUE))
|
||||
goto fail;
|
||||
|
||||
diff --git a/tcp_conn.h b/tcp_conn.h
|
||||
index 42dff48..c6501f8 100644
|
||||
--- a/tcp_conn.h
|
||||
+++ b/tcp_conn.h
|
||||
@@ -152,6 +152,7 @@ struct tcp_tap_transfer {
|
||||
* @notsent: Part of pending send queue that wasn't sent out yet
|
||||
* @rcvq: Length of pending receive queue
|
||||
* @mss: Socket-side MSS clamp
|
||||
+ * @timestamp: RFC 7323 timestamp
|
||||
* @snd_wl1: Next sequence used in window probe (next sequence - 1)
|
||||
* @snd_wnd: Socket-side sending window
|
||||
* @max_window: Window clamp
|
||||
@@ -171,6 +172,7 @@ struct tcp_tap_transfer_ext {
|
||||
uint32_t rcvq;
|
||||
|
||||
uint32_t mss;
|
||||
+ uint32_t timestamp;
|
||||
|
||||
/* We can't just use struct tcp_repair_window: we need network order */
|
||||
uint32_t snd_wl1;
|
||||
--
|
||||
2.47.1
|
||||
|
||||
59
0017-migrate-Bump-migration-version-number.patch
Normal file
59
0017-migrate-Bump-migration-version-number.patch
Normal file
@ -0,0 +1,59 @@
|
||||
From 2aa0c8a3a230350714d6d217c802db99b9b25a8e Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 19 Mar 2025 16:14:23 +1100
|
||||
Subject: [PATCH 17/17] migrate: Bump migration version number
|
||||
|
||||
v1 of the migration stream format, had some flaws: it didn't properly
|
||||
handle endianness of the MSS field, and it didn't transfer the RFC7323
|
||||
timestamp. We've now fixed those bugs, but it requires incompatible
|
||||
changes to the stream format.
|
||||
|
||||
Because of the timestamps in particular, v1 is not really usable, so there
|
||||
is little point maintaining compatible support for it. However, v1 is in
|
||||
released packages, both upstream and downstream (RHEL at least). Just
|
||||
updating the stream format without bumping the version would lead to very
|
||||
cryptic errors if anyone did attempt to migrate between an old and new
|
||||
passt.
|
||||
|
||||
So, bump the migration version to v2, so we'll get a clear error message if
|
||||
anyone attempts this. We don't attempt to maintain backwards compatibility
|
||||
with v1, however: we'll simply fail if given a v1 stream.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit c250ffc5c11385d9618b3a8165e676d68d5cbfa2)
|
||||
---
|
||||
migrate.c | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/migrate.c b/migrate.c
|
||||
index 0fca77b..48d63a0 100644
|
||||
--- a/migrate.c
|
||||
+++ b/migrate.c
|
||||
@@ -96,8 +96,8 @@ static int seen_addrs_target_v1(struct ctx *c,
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* Stages for version 1 */
|
||||
-static const struct migrate_stage stages_v1[] = {
|
||||
+/* Stages for version 2 */
|
||||
+static const struct migrate_stage stages_v2[] = {
|
||||
{
|
||||
.name = "observed addresses",
|
||||
.source = seen_addrs_source_v1,
|
||||
@@ -118,7 +118,11 @@ static const struct migrate_stage stages_v1[] = {
|
||||
|
||||
/* Supported encoding versions, from latest (most preferred) to oldest */
|
||||
static const struct migrate_version versions[] = {
|
||||
- { 1, stages_v1, },
|
||||
+ { 2, stages_v2, },
|
||||
+ /* v1 was released, but not widely used. It had bad endianness for the
|
||||
+ * MSS and omitted timestamps, which meant it usually wouldn't work.
|
||||
+ * Therefore we don't attempt to support compatibility with it.
|
||||
+ */
|
||||
{ 0 },
|
||||
};
|
||||
|
||||
--
|
||||
2.47.1
|
||||
|
||||
@ -0,0 +1,51 @@
|
||||
From 06eb28985636886bdf9759a26e733981ecb1fce3 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Wed, 19 Mar 2025 17:57:45 +0100
|
||||
Subject: [PATCH] tcp: Flush socket before checking for more data in active
|
||||
close state
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Otherwise, if all the pending data is acknowledged:
|
||||
|
||||
- tcp_update_seqack_from_tap() updates the current tap-side ACK
|
||||
sequence (conn->seq_ack_from_tap)
|
||||
|
||||
- next, we compare the sequence we sent (conn->seq_to_tap) to the
|
||||
ACK sequence (conn->seq_ack_from_tap) in tcp_data_from_sock() to
|
||||
understand if there's more data we can send.
|
||||
|
||||
If they match, we conclude that we haven't sent any of that data,
|
||||
and keep re-sending it.
|
||||
|
||||
We need, instead, to flush the socket (drop acknowledged data) before
|
||||
calling tcp_update_seqack_from_tap(), so that once we update
|
||||
conn->seq_ack_from_tap, we can be sure that all data until there is
|
||||
gone from the socket.
|
||||
|
||||
Link: https://bugs.passt.top/show_bug.cgi?id=114
|
||||
Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
|
||||
Fixes: 30f1e082c3c0 ("tcp: Keep updating window and checking for socket data after FIN from guest")
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
(cherry picked from commit ebdd46367ce1acba235013d97e362b8677b538d5)
|
||||
---
|
||||
tcp.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index ac2b7d3..74141ce 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -2047,6 +2047,7 @@ int tcp_tap_handler(const struct ctx *c, uint8_t pif, sa_family_t af,
|
||||
|
||||
/* Established connections not accepting data from tap */
|
||||
if (conn->events & TAP_FIN_RCVD) {
|
||||
+ tcp_sock_consume(conn, ntohl(th->ack_seq));
|
||||
tcp_update_seqack_from_tap(c, conn, ntohl(th->ack_seq));
|
||||
tcp_tap_window_update(conn, ntohs(th->window));
|
||||
tcp_data_from_sock(c, conn);
|
||||
--
|
||||
2.47.1
|
||||
|
||||
125
0019-pasta-passt-repair-Support-multiple-events-per-read-.patch
Normal file
125
0019-pasta-passt-repair-Support-multiple-events-per-read-.patch
Normal file
@ -0,0 +1,125 @@
|
||||
From 42a854a52b6fa2bbd70cbc0c7657c8a49a9c3d2d Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Fri, 28 Mar 2025 11:39:58 +1100
|
||||
Subject: [PATCH] pasta, passt-repair: Support multiple events per read() in
|
||||
inotify handlers
|
||||
|
||||
The current code assumes that we'll get one event per read() on
|
||||
inotify descriptors, but that's not the case, not from documentation,
|
||||
and not from reports.
|
||||
|
||||
Add loops in the two inotify handlers we have, in pasta-specific code
|
||||
and passt-repair, to go through all the events we receive.
|
||||
|
||||
Link: https://bugs.passt.top/show_bug.cgi?id=119
|
||||
[dwg: Remove unnecessary buffer expansion, use strnlen instead of strlen
|
||||
to make Coverity happier]
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
[sbrivio: Add additional check on ev->name and ev->len in passt-repair]
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
---
|
||||
passt-repair.c | 32 +++++++++++++++++++++++++-------
|
||||
pasta.c | 20 +++++++++++++-------
|
||||
2 files changed, 38 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/passt-repair.c b/passt-repair.c
|
||||
index 120f7aa7..86f02934 100644
|
||||
--- a/passt-repair.c
|
||||
+++ b/passt-repair.c
|
||||
@@ -111,14 +111,14 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
if ((sb.st_mode & S_IFMT) == S_IFDIR) {
|
||||
- char buf[sizeof(struct inotify_event) + NAME_MAX + 1];
|
||||
+ char buf[sizeof(struct inotify_event) + NAME_MAX + 1]
|
||||
+ __attribute__ ((aligned(__alignof__(struct inotify_event))));
|
||||
const struct inotify_event *ev;
|
||||
char path[PATH_MAX + 1];
|
||||
+ bool found = false;
|
||||
ssize_t n;
|
||||
int fd;
|
||||
|
||||
- ev = (struct inotify_event *)buf;
|
||||
-
|
||||
if ((fd = inotify_init1(IN_CLOEXEC)) < 0) {
|
||||
fprintf(stderr, "inotify_init1: %i\n", errno);
|
||||
_exit(1);
|
||||
@@ -130,6 +130,8 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
do {
|
||||
+ char *p;
|
||||
+
|
||||
n = read(fd, buf, sizeof(buf));
|
||||
if (n < 0) {
|
||||
fprintf(stderr, "inotify read: %i", errno);
|
||||
@@ -138,11 +140,27 @@ int main(int argc, char **argv)
|
||||
|
||||
if (n < (ssize_t)sizeof(*ev)) {
|
||||
fprintf(stderr, "Short inotify read: %zi", n);
|
||||
- _exit(1);
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ for (p = buf; p < buf + n; p += sizeof(*ev) + ev->len) {
|
||||
+ ev = (const struct inotify_event *)p;
|
||||
+
|
||||
+ if (ev->len >= REPAIR_EXT_LEN &&
|
||||
+ !memcmp(ev->name +
|
||||
+ strnlen(ev->name, ev->len) -
|
||||
+ REPAIR_EXT_LEN,
|
||||
+ REPAIR_EXT, REPAIR_EXT_LEN)) {
|
||||
+ found = true;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
- } while (ev->len < REPAIR_EXT_LEN ||
|
||||
- memcmp(ev->name + strlen(ev->name) - REPAIR_EXT_LEN,
|
||||
- REPAIR_EXT, REPAIR_EXT_LEN));
|
||||
+ } while (!found);
|
||||
+
|
||||
+ if (ev->len > NAME_MAX + 1 || ev->name[ev->len] != '\0') {
|
||||
+ fprintf(stderr, "Invalid filename from inotify\n");
|
||||
+ _exit(1);
|
||||
+ }
|
||||
|
||||
snprintf(path, sizeof(path), "%s/%s", argv[1], ev->name);
|
||||
if ((stat(path, &sb))) {
|
||||
diff --git a/pasta.c b/pasta.c
|
||||
index fa3e7de5..017fa322 100644
|
||||
--- a/pasta.c
|
||||
+++ b/pasta.c
|
||||
@@ -498,17 +498,23 @@ void pasta_netns_quit_init(const struct ctx *c)
|
||||
*/
|
||||
void pasta_netns_quit_inotify_handler(struct ctx *c, int inotify_fd)
|
||||
{
|
||||
- char buf[sizeof(struct inotify_event) + NAME_MAX + 1];
|
||||
- const struct inotify_event *in_ev = (struct inotify_event *)buf;
|
||||
+ char buf[sizeof(struct inotify_event) + NAME_MAX + 1]
|
||||
+ __attribute__ ((aligned(__alignof__(struct inotify_event))));
|
||||
+ const struct inotify_event *ev;
|
||||
+ ssize_t n;
|
||||
+ char *p;
|
||||
|
||||
- if (read(inotify_fd, buf, sizeof(buf)) < (ssize_t)sizeof(*in_ev))
|
||||
+ if ((n = read(inotify_fd, buf, sizeof(buf))) < (ssize_t)sizeof(*ev))
|
||||
return;
|
||||
|
||||
- if (strncmp(in_ev->name, c->netns_base, sizeof(c->netns_base)))
|
||||
- return;
|
||||
+ for (p = buf; p < buf + n; p += sizeof(*ev) + ev->len) {
|
||||
+ ev = (const struct inotify_event *)p;
|
||||
|
||||
- info("Namespace %s is gone, exiting", c->netns_base);
|
||||
- _exit(EXIT_SUCCESS);
|
||||
+ if (!strncmp(ev->name, c->netns_base, sizeof(c->netns_base))) {
|
||||
+ info("Namespace %s is gone, exiting", c->netns_base);
|
||||
+ _exit(EXIT_SUCCESS);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
/**
|
||||
--
|
||||
2.49.0
|
||||
|
||||
106
0020-migrate-tcp-bind-migrated-sockets-in-repair-mode.patch
Normal file
106
0020-migrate-tcp-bind-migrated-sockets-in-repair-mode.patch
Normal file
@ -0,0 +1,106 @@
|
||||
From dadf2aa2691976219b8272276860926427b97c04 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 2 Apr 2025 13:51:22 +1100
|
||||
Subject: [PATCH] migrate, tcp: bind() migrated sockets in repair mode
|
||||
|
||||
Currently on a migration target, we create then immediately bind() new
|
||||
sockets for the TCP connections we're reconstructing. Mostly, this works,
|
||||
since a socket() that is bound but hasn't had listen() or connect() called
|
||||
is essentially passive. However, this bind() is subject to the usual
|
||||
address conflict checking. In particular that means if we already have
|
||||
a listening socket on that port, we'll get an EADDRINUSE. This will happen
|
||||
for every connection we try to migrate that was initiated from outside to
|
||||
the guest, since we necessarily created a listening socket for that case.
|
||||
|
||||
We set SO_REUSEADDR on the socket in an attempt to avoid this, but that's
|
||||
not sufficient; even with SO_REUSEADDR address conflicts are still
|
||||
prohibited for listening sockets. Of course once these incoming sockets
|
||||
are fully repaired and connect()ed they'll no longer conflict, but that
|
||||
doesn't help us if we fail at the bind().
|
||||
|
||||
We can avoid this by not calling bind() until we're already in repair mode
|
||||
which suppresses this transient conflict. Because of the batching of
|
||||
setting repair mode, to do that we need to move the bind to a step in
|
||||
tcp_flow_migrate_target_ext().
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
[dwg: Fix trivial conflict for RHEL backport]
|
||||
---
|
||||
tcp.c | 38 +++++++++++++++++++++++++++-----------
|
||||
1 file changed, 27 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/tcp.c b/tcp.c
|
||||
index 74141cee..8f5ebc14 100644
|
||||
--- a/tcp.c
|
||||
+++ b/tcp.c
|
||||
@@ -3398,13 +3398,8 @@ fail:
|
||||
int tcp_flow_repair_socket(struct ctx *c, struct tcp_tap_conn *conn)
|
||||
{
|
||||
sa_family_t af = CONN_V4(conn) ? AF_INET : AF_INET6;
|
||||
- const struct flowside *sockside = HOSTFLOW(conn);
|
||||
- union sockaddr_inany a;
|
||||
- socklen_t sl;
|
||||
int s, rc;
|
||||
|
||||
- pif_sockaddr(c, &a, &sl, PIF_HOST, &sockside->oaddr, sockside->oport);
|
||||
-
|
||||
if ((conn->sock = socket(af, SOCK_STREAM | SOCK_NONBLOCK | SOCK_CLOEXEC,
|
||||
IPPROTO_TCP)) < 0) {
|
||||
rc = -errno;
|
||||
@@ -3418,12 +3413,6 @@ int tcp_flow_repair_socket(struct ctx *c, struct tcp_tap_conn *conn)
|
||||
|
||||
tcp_sock_set_nodelay(s);
|
||||
|
||||
- if (bind(s, &a.sa, sizeof(a))) {
|
||||
- rc = -errno;
|
||||
- err_perror("Failed to bind socket for migrated flow");
|
||||
- goto err;
|
||||
- }
|
||||
-
|
||||
if ((rc = tcp_flow_repair_on(c, conn)))
|
||||
goto err;
|
||||
|
||||
@@ -3435,6 +3424,30 @@ err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
+/**
|
||||
+ * tcp_flow_repair_bind() - Bind socket in repair mode
|
||||
+ * @c: Execution context
|
||||
+ * @conn: Pointer to the TCP connection structure
|
||||
+ *
|
||||
+ * Return: 0 on success, negative error code on failure
|
||||
+ */
|
||||
+static int tcp_flow_repair_bind(const struct ctx *c, struct tcp_tap_conn *conn)
|
||||
+{
|
||||
+ const struct flowside *sockside = HOSTFLOW(conn);
|
||||
+ union sockaddr_inany a;
|
||||
+ socklen_t sl;
|
||||
+
|
||||
+ pif_sockaddr(c, &a, &sl, PIF_HOST, &sockside->oaddr, sockside->oport);
|
||||
+
|
||||
+ if (bind(conn->sock, &a.sa, sizeof(a))) {
|
||||
+ int rc = -errno;
|
||||
+ flow_perror(conn, "Failed to bind socket for migrated flow");
|
||||
+ return rc;
|
||||
+ }
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
/**
|
||||
* tcp_flow_repair_connect() - Connect socket in repair mode, then turn it off
|
||||
* @c: Execution context
|
||||
@@ -3599,6 +3612,9 @@ int tcp_flow_migrate_target_ext(struct ctx *c, union flow *flow, int fd)
|
||||
/* We weren't able to create the socket, discard flow */
|
||||
goto fail;
|
||||
|
||||
+ if (tcp_flow_repair_bind(c, conn))
|
||||
+ goto fail;
|
||||
+
|
||||
if (tcp_flow_repair_timestamp(conn, &t))
|
||||
goto fail;
|
||||
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@ -0,0 +1,37 @@
|
||||
From 8fed562e63f3bf7ab2ddabaf1b31de49e03c4083 Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 2 Apr 2025 15:28:04 +1100
|
||||
Subject: [PATCH] passt-repair: Correct off-by-one error verifying name
|
||||
|
||||
passt-repair will generate an error if the name it gets from the kernel is
|
||||
too long or not NUL terminated. Downstream testing has reported
|
||||
occasionally seeing this error in practice.
|
||||
|
||||
In turns out there is a trivial off-by-one error in the check: ev->len is
|
||||
the length of the name, including terminating \0 characters, so to check
|
||||
for a \0 at the end of the buffer we need to check ev->name[len - 1] not
|
||||
ev->name[len].
|
||||
|
||||
Fixes: 42a854a52 ("pasta, passt-repair: Support multiple events per...")
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
---
|
||||
passt-repair.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/passt-repair.c b/passt-repair.c
|
||||
index 86f02934..440c77ae 100644
|
||||
--- a/passt-repair.c
|
||||
+++ b/passt-repair.c
|
||||
@@ -157,7 +157,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
} while (!found);
|
||||
|
||||
- if (ev->len > NAME_MAX + 1 || ev->name[ev->len] != '\0') {
|
||||
+ if (ev->len > NAME_MAX + 1 || ev->name[ev->len - 1] != '\0') {
|
||||
fprintf(stderr, "Invalid filename from inotify\n");
|
||||
_exit(1);
|
||||
}
|
||||
--
|
||||
2.49.0
|
||||
|
||||
@ -0,0 +1,40 @@
|
||||
From 0d6aec2bdd7b14e4b8420c1bf3c393e5666a9125 Mon Sep 17 00:00:00 2001
|
||||
From: Stefano Brivio <sbrivio@redhat.com>
|
||||
Date: Thu, 3 Apr 2025 19:01:02 +0200
|
||||
Subject: [PATCH] passt-repair: Ensure that read buffer is NULL-terminated
|
||||
|
||||
After 3d41e4d83895 ("passt-repair: Correct off-by-one error verifying
|
||||
name"), Coverity Scan isn't convinced anymore about the fact that the
|
||||
ev->name used in the snprintf() is NULL-terminated.
|
||||
|
||||
It comes from a read() call, and read() of course doesn't terminate
|
||||
it, but we already check that the byte at ev->len - 1 is a NULL
|
||||
terminator, so this is actually a false positive.
|
||||
|
||||
In any case, the logic ensuring that ev->name is NULL-terminated isn't
|
||||
necessarily obvious, and additionally checking that the last byte in
|
||||
the buffer we read is a NULL terminator is harmless, so do that
|
||||
explicitly, even if it's redundant.
|
||||
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
(cherry picked from commit 06784d7fc6761528d587837b241d27c6d17c0842)
|
||||
---
|
||||
passt-repair.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/passt-repair.c b/passt-repair.c
|
||||
index 440c77a..256a8c9 100644
|
||||
--- a/passt-repair.c
|
||||
+++ b/passt-repair.c
|
||||
@@ -137,6 +137,7 @@ int main(int argc, char **argv)
|
||||
fprintf(stderr, "inotify read: %i", errno);
|
||||
_exit(1);
|
||||
}
|
||||
+ buf[n - 1] = '\0';
|
||||
|
||||
if (n < (ssize_t)sizeof(*ev)) {
|
||||
fprintf(stderr, "Short inotify read: %zi", n);
|
||||
--
|
||||
2.47.1
|
||||
|
||||
87
0023-tcp_splice-Don-t-double-count-bytes-read-on-EINTR.patch
Normal file
87
0023-tcp_splice-Don-t-double-count-bytes-read-on-EINTR.patch
Normal file
@ -0,0 +1,87 @@
|
||||
From d4192fd37453a706eda7212cb842f8815d88628e Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 9 Apr 2025 16:35:40 +1000
|
||||
Subject: [PATCH 23/24] tcp_splice: Don't double count bytes read on EINTR
|
||||
|
||||
In tcp_splice_sock_handler(), if we get an EINTR on our second splice()
|
||||
(pipe to output socket) we - as we should - go back and retry it. However,
|
||||
we do so *after* we've already updated our byte counters. That does no
|
||||
harm for the conn->written[] counter - since the second splice() returned
|
||||
an error it will be advanced by 0. However we also advance the
|
||||
conn->read[] counter, and then do so again when the splice() succeeds.
|
||||
This results in the counters being out of sync, and us thinking we have
|
||||
remaining data in the pipe when we don't, which can leave us in an
|
||||
infinite loop once the stream finishes.
|
||||
|
||||
Fix this by moving the EINTR handling to directly next to the splice()
|
||||
call (which is what we usually do for EINTR). As a bonus this removes one
|
||||
mildly confusing goto.
|
||||
|
||||
For symmetry, also rework the EINTR handling on the first splice() the same
|
||||
way, although that doesn't (as far as I can tell) have buggy side effects.
|
||||
|
||||
Link: https://github.com/containers/podman/issues/23686#issuecomment-2779347687
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit d3f33f3b8ec4646dae3584b648cba142a73d3208)
|
||||
---
|
||||
tcp_splice.c | 27 +++++++++++++--------------
|
||||
1 file changed, 13 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/tcp_splice.c b/tcp_splice.c
|
||||
index 0d10e3d..7c3b56f 100644
|
||||
--- a/tcp_splice.c
|
||||
+++ b/tcp_splice.c
|
||||
@@ -520,15 +520,14 @@ swap:
|
||||
int more = 0;
|
||||
|
||||
retry:
|
||||
- readlen = splice(conn->s[fromsidei], NULL,
|
||||
- conn->pipe[fromsidei][1], NULL,
|
||||
- c->tcp.pipe_size,
|
||||
- SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
|
||||
+ do
|
||||
+ readlen = splice(conn->s[fromsidei], NULL,
|
||||
+ conn->pipe[fromsidei][1], NULL,
|
||||
+ c->tcp.pipe_size,
|
||||
+ SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
|
||||
+ while (readlen < 0 && errno == EINTR);
|
||||
flow_trace(conn, "%zi from read-side call", readlen);
|
||||
if (readlen < 0) {
|
||||
- if (errno == EINTR)
|
||||
- goto retry;
|
||||
-
|
||||
if (errno != EAGAIN)
|
||||
goto close;
|
||||
} else if (!readlen) {
|
||||
@@ -543,10 +542,13 @@ retry:
|
||||
conn_flag(c, conn, lowat_act_flag);
|
||||
}
|
||||
|
||||
-eintr:
|
||||
- written = splice(conn->pipe[fromsidei][0], NULL,
|
||||
- conn->s[!fromsidei], NULL, c->tcp.pipe_size,
|
||||
- SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
|
||||
+ do
|
||||
+ written = splice(conn->pipe[fromsidei][0], NULL,
|
||||
+ conn->s[!fromsidei], NULL,
|
||||
+ c->tcp.pipe_size,
|
||||
+ SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
|
||||
+ while (written < 0 && errno == EINTR);
|
||||
+
|
||||
flow_trace(conn, "%zi from write-side call (passed %zi)",
|
||||
written, c->tcp.pipe_size);
|
||||
|
||||
@@ -578,9 +580,6 @@ eintr:
|
||||
conn->written[fromsidei] += written > 0 ? written : 0;
|
||||
|
||||
if (written < 0) {
|
||||
- if (errno == EINTR)
|
||||
- goto eintr;
|
||||
-
|
||||
if (errno != EAGAIN)
|
||||
goto close;
|
||||
|
||||
--
|
||||
2.47.1
|
||||
|
||||
@ -0,0 +1,67 @@
|
||||
From 2887bf9c9a2be564478b259cd84d721e85a5129e Mon Sep 17 00:00:00 2001
|
||||
From: David Gibson <david@gibson.dropbear.id.au>
|
||||
Date: Wed, 9 Apr 2025 16:35:41 +1000
|
||||
Subject: [PATCH 24/24] tcp_splice: Don't clobber errno before checking for
|
||||
EAGAIN
|
||||
|
||||
Like many places, tcp_splice_sock_handler() needs to handle EAGAIN
|
||||
specially, in this case for both of its splice() calls. Unfortunately it
|
||||
tests for EAGAIN some time after those calls. In between there has been
|
||||
at least a flow_trace() which could have clobbered errno. Move the test on
|
||||
errno closer to the relevant system calls to avoid this problem.
|
||||
|
||||
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
|
||||
Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
|
||||
(cherry picked from commit 6693fa115824d198b7cde46c272514be194500a9)
|
||||
---
|
||||
tcp_splice.c | 18 ++++++++++--------
|
||||
1 file changed, 10 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/tcp_splice.c b/tcp_splice.c
|
||||
index 7c3b56f..60455d6 100644
|
||||
--- a/tcp_splice.c
|
||||
+++ b/tcp_splice.c
|
||||
@@ -526,13 +526,15 @@ retry:
|
||||
c->tcp.pipe_size,
|
||||
SPLICE_F_MOVE | SPLICE_F_NONBLOCK);
|
||||
while (readlen < 0 && errno == EINTR);
|
||||
+
|
||||
+ if (readlen < 0 && errno != EAGAIN)
|
||||
+ goto close;
|
||||
+
|
||||
flow_trace(conn, "%zi from read-side call", readlen);
|
||||
- if (readlen < 0) {
|
||||
- if (errno != EAGAIN)
|
||||
- goto close;
|
||||
- } else if (!readlen) {
|
||||
+
|
||||
+ if (!readlen) {
|
||||
eof = 1;
|
||||
- } else {
|
||||
+ } else if (readlen > 0) {
|
||||
never_read = 0;
|
||||
|
||||
if (readlen >= (long)c->tcp.pipe_size * 90 / 100)
|
||||
@@ -549,6 +551,9 @@ retry:
|
||||
SPLICE_F_MOVE | more | SPLICE_F_NONBLOCK);
|
||||
while (written < 0 && errno == EINTR);
|
||||
|
||||
+ if (written < 0 && errno != EAGAIN)
|
||||
+ goto close;
|
||||
+
|
||||
flow_trace(conn, "%zi from write-side call (passed %zi)",
|
||||
written, c->tcp.pipe_size);
|
||||
|
||||
@@ -580,9 +585,6 @@ retry:
|
||||
conn->written[fromsidei] += written > 0 ? written : 0;
|
||||
|
||||
if (written < 0) {
|
||||
- if (errno != EAGAIN)
|
||||
- goto close;
|
||||
-
|
||||
if (conn->read[fromsidei] == conn->written[fromsidei])
|
||||
break;
|
||||
|
||||
--
|
||||
2.47.1
|
||||
|
||||
32
passt.spec
32
passt.spec
@ -12,14 +12,36 @@
|
||||
|
||||
Name: passt
|
||||
Version: 0^20250217.ga1e48a0
|
||||
Release: 3%{?dist}
|
||||
Release: 5%{?dist}
|
||||
Summary: User-mode networking daemons for virtual machines and namespaces
|
||||
License: GPL-2.0-or-later AND BSD-3-Clause
|
||||
Group: System Environment/Daemons
|
||||
URL: https://passt.top/
|
||||
Source: https://passt.top/passt/snapshot/passt-%{git_hash}.tar.xz
|
||||
|
||||
Patch1: 0001-selinux-passt_repair_exec_t-needs-to-be-a-exec_type.patch
|
||||
Patch2: 0002-migrate-flow-Trivially-succeed-if-migrating-with-no-.patch
|
||||
Patch3: 0003-migrate-flow-Don-t-attempt-to-migrate-TCP-flows-with.patch
|
||||
Patch4: 0004-tcp-Correct-error-code-handling-from-tcp_flow_repair.patch
|
||||
Patch5: 0005-tcp-Unconditionally-move-to-CLOSED-state-on-tcp_rst.patch
|
||||
Patch6: 0006-migrate-tcp-Don-t-flow_alloc_cancel-during-incoming-.patch
|
||||
Patch7: 0007-ip-Helpers-to-access-IPv6-flow-label.patch
|
||||
Patch8: 0008-tap-Consider-IPv6-flow-label-when-building-packet-se.patch
|
||||
Patch9: 0009-tcp-Send-RST-in-response-to-guest-packets-that-match.patch
|
||||
Patch10: 0010-selinux-Fixes-workarounds-for-passt-and-passt-repair.patch
|
||||
Patch11: 0011-passt-repair-Add-directory-watch.patch
|
||||
Patch12: 0012-flow-repair-Wait-for-a-short-while-for-passt-repair-.patch
|
||||
Patch13: 0013-passt-repair-Fix-build-with-Werror-format-security.patch
|
||||
Patch14: 0014-migrate-tcp-More-careful-marshalling-of-mss-paramete.patch
|
||||
Patch15: 0015-flow-Add-flow_perror-helper.patch
|
||||
Patch16: 0016-migrate-tcp-Migrate-RFC-7323-timestamp.patch
|
||||
Patch17: 0017-migrate-Bump-migration-version-number.patch
|
||||
Patch18: 0018-tcp-Flush-socket-before-checking-for-more-data-in-ac.patch
|
||||
Patch19: 0019-pasta-passt-repair-Support-multiple-events-per-read-.patch
|
||||
Patch20: 0020-migrate-tcp-bind-migrated-sockets-in-repair-mode.patch
|
||||
Patch21: 0021-passt-repair-Correct-off-by-one-error-verifying-name.patch
|
||||
Patch22: 0022-passt-repair-Ensure-that-read-buffer-is-NULL-termina.patch
|
||||
Patch23: 0023-tcp_splice-Don-t-double-count-bytes-read-on-EINTR.patch
|
||||
Patch24: 0024-tcp_splice-Don-t-clobber-errno-before-checking-for-E.patch
|
||||
|
||||
BuildRequires: gcc, make, git, checkpolicy, selinux-policy-devel
|
||||
Requires: (%{name}-selinux = %{version}-%{release} if selinux-policy-%{selinuxtype})
|
||||
@ -132,6 +154,12 @@ fi
|
||||
%{_datadir}/selinux/packages/%{selinuxtype}/passt-repair.pp
|
||||
|
||||
%changelog
|
||||
* Thu Apr 10 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20250217.ga1e48a0-5
|
||||
- Resolves: RHEL-83979 RHEL-84157 RHEL-86761
|
||||
|
||||
* Thu Mar 20 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20250217.ga1e48a0-4
|
||||
- Resolves: RHEL-84249 RHEL-83979 RHEL-84157 RHEL-84248
|
||||
|
||||
* Fri Feb 28 2025 Stefano Brivio <sbrivio@redhat.com> - 0^20250217.ga1e48a0-3
|
||||
- Resolves: RHEL-80297
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user