kronosnet/SOURCES/bz1761711-fix-data-deliver-corruption-from-fragmented-packets.patch
2021-09-09 19:46:18 +00:00

416 lines
15 KiB
Diff

commit db21da87bba6017c8343f9c6f255b21813ffd5d0
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Tue Oct 15 06:46:36 2019 +0200
[host] rename variables to make it easier to read the code
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/host.c b/libknet/host.c
index abb1f89..ac26b89 100644
--- a/libknet/host.c
+++ b/libknet/host.c
@@ -569,7 +569,7 @@ static void _clear_cbuffers(struct knet_host *host, seq_num_t rx_seq_num)
int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, int clear_buf)
{
- size_t i, j; /* circular buffer indexes */
+ size_t head, tail; /* circular buffer indexes */
seq_num_t seq_dist;
char *dst_cbuf = host->circular_buffer;
char *dst_cbuf_defrag = host->circular_buffer_defrag;
@@ -585,13 +585,13 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i
seq_dist = *dst_seq_num - seq_num;
}
- j = seq_num % KNET_CBUFFER_SIZE;
+ head = seq_num % KNET_CBUFFER_SIZE;
if (seq_dist < KNET_CBUFFER_SIZE) { /* seq num is in ring buffer */
if (!defrag_buf) {
- return (dst_cbuf[j] == 0) ? 1 : 0;
+ return (dst_cbuf[head] == 0) ? 1 : 0;
} else {
- return (dst_cbuf_defrag[j] == 0) ? 1 : 0;
+ return (dst_cbuf_defrag[head] == 0) ? 1 : 0;
}
} else if (seq_dist <= SEQ_MAX - KNET_CBUFFER_SIZE) {
memset(dst_cbuf, 0, KNET_CBUFFER_SIZE);
@@ -600,16 +600,16 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i
}
/* cleaning up circular buffer */
- i = (*dst_seq_num + 1) % KNET_CBUFFER_SIZE;
+ tail = (*dst_seq_num + 1) % KNET_CBUFFER_SIZE;
- if (i > j) {
- memset(dst_cbuf + i, 0, KNET_CBUFFER_SIZE - i);
- memset(dst_cbuf, 0, j + 1);
- memset(dst_cbuf_defrag + i, 0, KNET_CBUFFER_SIZE - i);
- memset(dst_cbuf_defrag, 0, j + 1);
+ if (tail > head) {
+ memset(dst_cbuf + tail, 0, KNET_CBUFFER_SIZE - tail);
+ memset(dst_cbuf, 0, head + 1);
+ memset(dst_cbuf_defrag + tail, 0, KNET_CBUFFER_SIZE - tail);
+ memset(dst_cbuf_defrag, 0, head + 1);
} else {
- memset(dst_cbuf + i, 0, j - i + 1);
- memset(dst_cbuf_defrag + i, 0, j - i + 1);
+ memset(dst_cbuf + tail, 0, head - tail + 1);
+ memset(dst_cbuf_defrag + tail, 0, head - tail + 1);
}
*dst_seq_num = seq_num;
commit 1e473cf26d55c2b6ff8d5bfaa5aa689554de803c
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Tue Oct 15 06:53:24 2019 +0200
[host] fix defrag buffers reclaim logic
The problem:
- let's assume a 2 nodes (A and B) cluster setup
- node A sends fragmented packets to node B and there is
packet loss on the network.
- node B receives all those fragments and attempts to
reassemble them.
- node A sends packet seq_num X in Y fragments.
- node B receives only part of the fragments and stores
them in a defrag buf.
- packet loss stops.
- node A continues to send packets and a seq_num
roll-over takes place.
- node A sends a new packet seq_num X in Y fragments.
- node B gets confused here because the parts of the old
packet seq_num X are still stored and the buffer
has not been reclaimed.
- node B continues to rebuild packet seq_num X with
old stale data and new data from after the roll-over.
- node B completes reassembling the packet and delivers
junk to the application.
The solution:
Add a much stronger buffer reclaim logic that will apply
on each received packet and not only when defrag buffers
are needed, as there might be a mix of fragmented and not
fragmented packets in-flight.
The new logic creates a window of N packets that can be
handled at the same time (based on the number of buffers)
and clear everything else.
Fixes https://github.com/kronosnet/kronosnet/issues/261
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/host.c b/libknet/host.c
index ac26b89..85d4626 100644
--- a/libknet/host.c
+++ b/libknet/host.c
@@ -562,6 +562,35 @@ static void _clear_cbuffers(struct knet_host *host, seq_num_t rx_seq_num)
}
}
+static void _reclaim_old_defrag_bufs(struct knet_host *host, seq_num_t seq_num)
+{
+ seq_num_t head, tail; /* seq_num boundaries */
+ int i;
+
+ head = seq_num + 1;
+ tail = seq_num - (KNET_MAX_LINK + 1);
+
+ /*
+ * expire old defrag buffers
+ */
+ for (i = 0; i < KNET_MAX_LINK; i++) {
+ if (host->defrag_buf[i].in_use) {
+ /*
+ * head has done a rollover to 0+
+ */
+ if (tail > head) {
+ if ((host->defrag_buf[i].pckt_seq >= head) && (host->defrag_buf[i].pckt_seq <= tail)) {
+ host->defrag_buf[i].in_use = 0;
+ }
+ } else {
+ if ((host->defrag_buf[i].pckt_seq >= head) || (host->defrag_buf[i].pckt_seq <= tail)){
+ host->defrag_buf[i].in_use = 0;
+ }
+ }
+ }
+ }
+}
+
/*
* check if a given packet seq num is in the circular buffers
* defrag_buf = 0 -> use normal cbuf 1 -> use the defrag buffer lookup
@@ -579,6 +608,8 @@ int _seq_num_lookup(struct knet_host *host, seq_num_t seq_num, int defrag_buf, i
_clear_cbuffers(host, seq_num);
}
+ _reclaim_old_defrag_bufs(host, seq_num);
+
if (seq_num < *dst_seq_num) {
seq_dist = (SEQ_MAX - seq_num) + *dst_seq_num;
} else {
commit 5bd88ebd63af20577095c2c98975f0f1781ba46a
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Tue Oct 15 07:02:05 2019 +0200
[rx] copy data into the defrag buffer only if we know the size of the frame
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/threads_rx.c b/libknet/threads_rx.c
index b2a5dad..6c26cdc 100644
--- a/libknet/threads_rx.c
+++ b/libknet/threads_rx.c
@@ -186,8 +186,10 @@ static int pckt_defrag(knet_handle_t knet_h, struct knet_header *inbuf, ssize_t
defrag_buf->frag_size = *len;
}
- memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size),
- inbuf->khp_data_userdata, *len);
+ if (defrag_buf->frag_size) {
+ memmove(defrag_buf->buf + ((inbuf->khp_data_frag_seq - 1) * defrag_buf->frag_size),
+ inbuf->khp_data_userdata, *len);
+ }
defrag_buf->frag_recv++;
defrag_buf->frag_map[inbuf->khp_data_frag_seq] = 1;
commit cd59986900510119d8e7b63d33ad35466d480858
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Tue Oct 15 07:16:22 2019 +0200
[test] add ability to knet_bench to specify a fixed packet size for perf test
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c
index dc04239..54b5303 100644
--- a/libknet/tests/knet_bench.c
+++ b/libknet/tests/knet_bench.c
@@ -67,6 +67,8 @@ static int test_type = TEST_PING;
static uint64_t perf_by_size_size = 1 * ONE_GIGABYTE;
static uint64_t perf_by_time_secs = 10;
+static uint32_t force_packet_size = 0;
+
struct node {
int nodeid;
int links;
@@ -109,6 +111,7 @@ static void print_help(void)
printf(" -s nodeid that will generate traffic for benchmarks\n");
printf(" -S [size|seconds] when used in combination with -T perf-by-size it indicates how many GB of traffic to generate for the test. (default: 1GB)\n");
printf(" when used in combination with -T perf-by-time it indicates how many Seconds of traffic to generate for the test. (default: 10 seconds)\n");
+ printf(" -x force packet size for perf-by-time or perf-by-size\n");
printf(" -C repeat the test continously (default: off)\n");
printf(" -X[XX] show stats at the end of the run (default: 1)\n");
printf(" 1: show handle stats, 2: show summary link stats\n");
@@ -250,7 +253,7 @@ static void setup_knet(int argc, char *argv[])
memset(nodes, 0, sizeof(nodes));
- while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:X::P:z:h")) != EOF) {
+ while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) {
switch(rv) {
case 'h':
print_help();
@@ -406,6 +409,13 @@ static void setup_knet(int argc, char *argv[])
perf_by_size_size = (uint64_t)atoi(optarg) * ONE_GIGABYTE;
perf_by_time_secs = (uint64_t)atoi(optarg);
break;
+ case 'x':
+ force_packet_size = (uint32_t)atoi(optarg);
+ if ((force_packet_size < 1) || (force_packet_size > 65536)) {
+ printf("Unsupported packet size %u (accepted 1 - 65536)\n", force_packet_size);
+ exit(FAIL);
+ }
+ break;
case 'C':
continous = 1;
break;
@@ -874,7 +884,7 @@ static int setup_send_buffers_common(struct knet_mmsghdr *msg, struct iovec *iov
printf("TXT: Unable to malloc!\n");
return -1;
}
- memset(tx_buf[i], 0, KNET_MAX_PACKET_SIZE);
+ memset(tx_buf[i], i, KNET_MAX_PACKET_SIZE);
iov_out[i].iov_base = (void *)tx_buf[i];
memset(&msg[i].msg_hdr, 0, sizeof(struct msghdr));
msg[i].msg_hdr.msg_iov = &iov_out[i];
@@ -898,6 +908,9 @@ static void send_perf_data_by_size(void)
setup_send_buffers_common(msg, iov_out, tx_buf);
while (packetsize <= KNET_MAX_PACKET_SIZE) {
+ if (force_packet_size) {
+ packetsize = force_packet_size;
+ }
for (i = 0; i < PCKT_FRAG_MAX; i++) {
iov_out[i].iov_len = packetsize;
}
@@ -926,7 +939,7 @@ static void send_perf_data_by_size(void)
knet_send(knet_h, ctrl_message, TEST_STOP, channel);
- if (packetsize == KNET_MAX_PACKET_SIZE) {
+ if ((packetsize == KNET_MAX_PACKET_SIZE) || (force_packet_size)) {
break;
}
@@ -1175,6 +1188,9 @@ static void send_perf_data_by_time(void)
memset(&clock_end, 0, sizeof(clock_start));
while (packetsize <= KNET_MAX_PACKET_SIZE) {
+ if (force_packet_size) {
+ packetsize = force_packet_size;
+ }
for (i = 0; i < PCKT_FRAG_MAX; i++) {
iov_out[i].iov_len = packetsize;
}
@@ -1205,7 +1221,7 @@ static void send_perf_data_by_time(void)
knet_send(knet_h, ctrl_message, TEST_STOP, channel);
- if (packetsize == KNET_MAX_PACKET_SIZE) {
+ if ((packetsize == KNET_MAX_PACKET_SIZE) || (force_packet_size)) {
break;
}
commit e28e2ea7c7e8139a6792ec1508215d4560b53e65
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Wed Oct 16 08:10:23 2019 +0200
[test] add packet verification option to knet_bench
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c
index 54b5303..c9e1c06 100644
--- a/libknet/tests/knet_bench.c
+++ b/libknet/tests/knet_bench.c
@@ -47,6 +47,7 @@ static char *compresscfg = NULL;
static char *cryptocfg = NULL;
static int machine_output = 0;
static int use_access_lists = 0;
+static int use_pckt_verification = 0;
static int bench_shutdown_in_progress = 0;
static pthread_mutex_t shutdown_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -76,6 +77,11 @@ struct node {
struct sockaddr_storage address[KNET_MAX_LINK];
};
+struct pckt_ver {
+ uint32_t len;
+ uint32_t chksum;
+};
+
static void print_help(void)
{
printf("knet_bench usage:\n");
@@ -117,6 +123,7 @@ static void print_help(void)
printf(" 1: show handle stats, 2: show summary link stats\n");
printf(" 3: show detailed link stats\n");
printf(" -a enable machine parsable output (default: off).\n");
+ printf(" -v enable packet verification for performance tests (default: off).\n");
}
static void parse_nodes(char *nodesinfo[MAX_NODES], int onidx, int port, struct node nodes[MAX_NODES], int *thisidx)
@@ -253,7 +260,7 @@ static void setup_knet(int argc, char *argv[])
memset(nodes, 0, sizeof(nodes));
- while ((rv = getopt(argc, argv, "aCT:S:s:ldfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) {
+ while ((rv = getopt(argc, argv, "aCT:S:s:lvdfom:wb:t:n:c:p:x:X::P:z:h")) != EOF) {
switch(rv) {
case 'h':
print_help();
@@ -411,11 +418,14 @@ static void setup_knet(int argc, char *argv[])
break;
case 'x':
force_packet_size = (uint32_t)atoi(optarg);
- if ((force_packet_size < 1) || (force_packet_size > 65536)) {
- printf("Unsupported packet size %u (accepted 1 - 65536)\n", force_packet_size);
+ if ((force_packet_size < 64) || (force_packet_size > 65536)) {
+ printf("Unsupported packet size %u (accepted 64 - 65536)\n", force_packet_size);
exit(FAIL);
}
break;
+ case 'v':
+ use_pckt_verification = 1;
+ break;
case 'C':
continous = 1;
break;
@@ -654,6 +664,24 @@ static void setup_knet(int argc, char *argv[])
}
}
+/*
+ * calculate weak chksum (stole from corosync for debugging purposes)
+ */
+static uint32_t compute_chsum(const unsigned char *data, uint32_t data_len)
+{
+ unsigned int i;
+ unsigned int checksum = 0;
+
+ for (i = 0; i < data_len; i++) {
+ if (checksum & 1) {
+ checksum |= 0x10000;
+ }
+
+ checksum = ((checksum >> 1) + (unsigned char)data[i]) & 0xffff;
+ }
+ return (checksum);
+}
+
static void *_rx_thread(void *args)
{
int rx_epoll;
@@ -766,6 +794,20 @@ static void *_rx_thread(void *args)
}
continue;
}
+ if (use_pckt_verification) {
+ struct pckt_ver *recv_pckt = (struct pckt_ver *)msg[i].msg_hdr.msg_iov->iov_base;
+ uint32_t chksum;
+
+ if (msg[i].msg_len != recv_pckt->len) {
+ printf("Wrong packet len received: %u expected: %u!\n", msg[i].msg_len, recv_pckt->len);
+ exit(FAIL);
+ }
+ chksum = compute_chsum((const unsigned char *)msg[i].msg_hdr.msg_iov->iov_base + sizeof(struct pckt_ver), msg[i].msg_len - sizeof(struct pckt_ver));
+ if (recv_pckt->chksum != chksum){
+ printf("Wrong packet checksum received: %u expected: %u!\n", recv_pckt->chksum, chksum);
+ exit(FAIL);
+ }
+ }
rx_pkts++;
rx_bytes = rx_bytes + msg[i].msg_len;
current_pckt_size = msg[i].msg_len;
@@ -913,6 +955,11 @@ static void send_perf_data_by_size(void)
}
for (i = 0; i < PCKT_FRAG_MAX; i++) {
iov_out[i].iov_len = packetsize;
+ if (use_pckt_verification) {
+ struct pckt_ver *tx_pckt = (struct pckt_ver *)&iov_out[i].iov_base;
+ tx_pckt->len = iov_out[i].iov_len;
+ tx_pckt->chksum = compute_chsum((const unsigned char *)iov_out[i].iov_base + sizeof(struct pckt_ver), iov_out[i].iov_len - sizeof(struct pckt_ver));
+ }
}
total_pkts_to_tx = perf_by_size_size / packetsize;
@@ -1193,6 +1240,11 @@ static void send_perf_data_by_time(void)
}
for (i = 0; i < PCKT_FRAG_MAX; i++) {
iov_out[i].iov_len = packetsize;
+ if (use_pckt_verification) {
+ struct pckt_ver *tx_pckt = (struct pckt_ver *)iov_out[i].iov_base;
+ tx_pckt->len = iov_out[i].iov_len;
+ tx_pckt->chksum = compute_chsum((const unsigned char *)iov_out[i].iov_base + sizeof(struct pckt_ver), iov_out[i].iov_len - sizeof(struct pckt_ver));
+ }
}
printf("[info]: testing with %u bytes packet size for %" PRIu64 " seconds.\n", packetsize, perf_by_time_secs);