kronosnet/SOURCES/bz1736872-fix-mtu-calculati...

1752 lines
59 KiB
Diff

commit b67c63101246b400c7512cb1adbc590ac06cb6ee
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Tue Jul 30 11:18:33 2019 +0200
[crypto] fix log information
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/crypto.c b/libknet/crypto.c
index 9f05fba..9d6757b 100644
--- a/libknet/crypto.c
+++ b/libknet/crypto.c
@@ -151,8 +151,6 @@ int crypto_init(
goto out;
}
- log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
-
out:
if (!err) {
knet_h->crypto_instance = new;
@@ -161,6 +159,8 @@ out:
knet_h->sec_hash_size = new->sec_hash_size;
knet_h->sec_salt_size = new->sec_salt_size;
+ log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
+
if (current) {
if (crypto_modules_cmds[current->model].ops->fini != NULL) {
crypto_modules_cmds[current->model].ops->fini(knet_h, current);
commit a89c2cd6d3863abe0f3ae0165239177a7461ee5e
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Wed Jul 31 14:15:07 2019 +0200
[udp] log information about detected kernel MTU
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c
index 53d2ba0..be990bb 100644
--- a/libknet/transport_udp.c
+++ b/libknet/transport_udp.c
@@ -337,6 +337,7 @@ static int read_errs_from_sock(knet_handle_t knet_h, int sockfd)
break;
} else {
knet_h->kernel_mtu = sock_err->ee_info;
+ log_debug(knet_h, KNET_SUB_TRANSP_UDP, "detected kernel MTU: %u", knet_h->kernel_mtu);
pthread_mutex_unlock(&knet_h->kmtu_mutex);
}
commit 650ef6d26e83dd7827b2e913c52a1fac67ea60d4
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Fri Aug 2 10:43:09 2019 +0200
[docs] add knet packet layout
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
index 603f595..2cd48f9 100644
--- a/libknet/threads_pmtud.c
+++ b/libknet/threads_pmtud.c
@@ -91,6 +91,28 @@ restart:
failsafe++;
}
+ /*
+ * unencrypted packet looks like:
+ *
+ * | ip | protocol | knet_header | unencrypted data |
+ * | onwire_len |
+ * | overhead_len |
+ * | data_len |
+ * | app MTU |
+ *
+ * encrypted packet looks like (not to scale):
+ *
+ * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash |
+ * | onwire_len |
+ * | overhead_len |
+ * | data_len |
+ * | app MTU |
+ *
+ * knet_h->sec_block_size is >= 0 if encryption will pad the data
+ * knet_h->sec_salt_size is >= 0 if encryption is enabled
+ * knet_h->sec_hash_size is >= 0 if signing is enabled
+ */
+
data_len = onwire_len - overhead_len;
if (knet_h->crypto_instance) {
commit dbed772f0cb9070826eac6524646bd2ea7cce8c0
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Fri Aug 2 10:44:23 2019 +0200
[PMTUd] fix MTU calculation when using crypto and add docs
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
index 2cd48f9..1a19806 100644
--- a/libknet/threads_pmtud.c
+++ b/libknet/threads_pmtud.c
@@ -113,29 +113,68 @@ restart:
* knet_h->sec_hash_size is >= 0 if signing is enabled
*/
+ /*
+ * common to all packets
+ */
data_len = onwire_len - overhead_len;
if (knet_h->crypto_instance) {
+realign:
if (knet_h->sec_block_size) {
+
+ /*
+ * drop both salt and hash, that leaves only the crypto data and padding
+ * we need to calculate the padding based on the real encrypted data.
+ */
+ data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size);
+
+ /*
+ * if the crypto mechanism requires padding, calculate the padding
+ * and add it back to data_len because that's what the crypto layer
+ * would do.
+ */
pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size);
+
+ /*
+ * if are at the boundary, reset padding
+ */
if (pad_len == knet_h->sec_block_size) {
pad_len = 0;
}
data_len = data_len + pad_len;
- }
- data_len = data_len + (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size);
-
- if (knet_h->sec_block_size) {
+ /*
+ * if our current data_len is higher than max_mtu_len
+ * then we need to reduce by padding size (that is our
+ * increment / decrement value)
+ *
+ * this generally happens only on the first PMTUd run
+ */
while (data_len + overhead_len >= max_mtu_len) {
data_len = data_len - knet_h->sec_block_size;
}
+
+ /*
+ * add both hash and salt size back, similar to padding above,
+ * the crypto layer will add them to the data_len
+ */
+ data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size);
}
if (dst_link->last_bad_mtu) {
- while (data_len + overhead_len >= dst_link->last_bad_mtu) {
- data_len = data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size);
+ if (data_len + overhead_len >= dst_link->last_bad_mtu) {
+ /*
+ * reduce data_len to something lower than last_bad_mtu, overhead_len
+ * and sec_block_size (decrementing step) - 1 (granularity)
+ */
+ data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1;
+ if (knet_h->sec_block_size) {
+ /*
+ * make sure that data_len is aligned to the sec_block_size boundary
+ */
+ goto realign;
+ }
}
}
@@ -144,6 +183,10 @@ restart:
return -1;
}
+ /*
+ * recalculate onwire_len based on crypto information
+ * and place it in the PMTUd packet info
+ */
onwire_len = data_len + overhead_len;
knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
commit a9460c72fafe452b7cb584598aa43a87b44428f0
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Mon Aug 12 16:52:59 2019 +0200
[PMTUd] rework the whole math to calculate MTU
internal changes:
- drop the concept of sec_header_size that was completely wrong
and unnecessary
- bump crypto API to version 3 due to the above change
- clarify the difference between link->proto_overhead and
link->status->proto_overhead. We cannot rename the status
one as it would also change ABI.
- add onwire.c with documentation on the packet format
and what various len(s) mean in context.
- add 3 new functions to calculate MTUs back and forth
and use them around, hopefully with enough clarification
on why things are done in a given way.
- heavily change thread_pmtud.c to use those new facilities.
- fix major calculation issues when using crypto (non-crypto
was not affected by the problem).
- fix checks around to make sure they match the new math.
- fix padding calculation.
- add functional PMTUd crypto test
this test can take several hours (12+) and should be executed
on a controlled environment since it automatically changes
loopback MTU to run tests.
- fix way the lowest MTU is calculated during a PMTUd run
to avoid spurious double notifications.
- drop redundant checks.
user visible changes:
- Global MTU is now calculated properly when using crypto
and values will be in general bigger than before due
to incorrect padding calculation in the previous implementation.
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/Makefile.am b/libknet/Makefile.am
index d080732..2fa2416 100644
--- a/libknet/Makefile.am
+++ b/libknet/Makefile.am
@@ -36,6 +36,7 @@ sources = \
links_acl_loopback.c \
logging.c \
netutils.c \
+ onwire.c \
threads_common.c \
threads_dsthandler.c \
threads_heartbeat.c \
diff --git a/libknet/crypto.c b/libknet/crypto.c
index 9d6757b..afa4f88 100644
--- a/libknet/crypto.c
+++ b/libknet/crypto.c
@@ -154,12 +154,14 @@ int crypto_init(
out:
if (!err) {
knet_h->crypto_instance = new;
- knet_h->sec_header_size = new->sec_header_size;
knet_h->sec_block_size = new->sec_block_size;
knet_h->sec_hash_size = new->sec_hash_size;
knet_h->sec_salt_size = new->sec_salt_size;
- log_debug(knet_h, KNET_SUB_CRYPTO, "security network overhead: %zu", knet_h->sec_header_size);
+ log_debug(knet_h, KNET_SUB_CRYPTO, "Hash size: %zu salt size: %zu block size: %zu",
+ knet_h->sec_hash_size,
+ knet_h->sec_salt_size,
+ knet_h->sec_block_size);
if (current) {
if (crypto_modules_cmds[current->model].ops->fini != NULL) {
@@ -195,7 +197,6 @@ void crypto_fini(
crypto_modules_cmds[knet_h->crypto_instance->model].ops->fini(knet_h, knet_h->crypto_instance);
}
free(knet_h->crypto_instance);
- knet_h->sec_header_size = 0;
knet_h->sec_block_size = 0;
knet_h->sec_hash_size = 0;
knet_h->sec_salt_size = 0;
diff --git a/libknet/crypto_model.h b/libknet/crypto_model.h
index 70f6238..b05e49c 100644
--- a/libknet/crypto_model.h
+++ b/libknet/crypto_model.h
@@ -14,13 +14,12 @@
struct crypto_instance {
int model;
void *model_instance;
- size_t sec_header_size;
size_t sec_block_size;
size_t sec_hash_size;
size_t sec_salt_size;
};
-#define KNET_CRYPTO_MODEL_ABI 2
+#define KNET_CRYPTO_MODEL_ABI 3
/*
* see compress_model.h for explanation of the various lib related functions
diff --git a/libknet/crypto_nss.c b/libknet/crypto_nss.c
index 330b40c..c624a47 100644
--- a/libknet/crypto_nss.c
+++ b/libknet/crypto_nss.c
@@ -801,10 +801,7 @@ static int nsscrypto_init(
goto out_err;
}
- crypto_instance->sec_header_size = 0;
-
if (nsscrypto_instance->crypto_hash_type > 0) {
- crypto_instance->sec_header_size += nsshash_len[nsscrypto_instance->crypto_hash_type];
crypto_instance->sec_hash_size = nsshash_len[nsscrypto_instance->crypto_hash_type];
}
@@ -821,8 +818,6 @@ static int nsscrypto_init(
}
}
- crypto_instance->sec_header_size += (block_size * 2);
- crypto_instance->sec_header_size += SALT_SIZE;
crypto_instance->sec_salt_size = SALT_SIZE;
crypto_instance->sec_block_size = block_size;
}
diff --git a/libknet/crypto_openssl.c b/libknet/crypto_openssl.c
index 0cbc6f5..6571498 100644
--- a/libknet/crypto_openssl.c
+++ b/libknet/crypto_openssl.c
@@ -566,11 +566,8 @@ static int opensslcrypto_init(
memmove(opensslcrypto_instance->private_key, knet_handle_crypto_cfg->private_key, knet_handle_crypto_cfg->private_key_len);
opensslcrypto_instance->private_key_len = knet_handle_crypto_cfg->private_key_len;
- crypto_instance->sec_header_size = 0;
-
if (opensslcrypto_instance->crypto_hash_type) {
crypto_instance->sec_hash_size = EVP_MD_size(opensslcrypto_instance->crypto_hash_type);
- crypto_instance->sec_header_size += crypto_instance->sec_hash_size;
}
if (opensslcrypto_instance->crypto_cipher_type) {
@@ -578,8 +575,6 @@ static int opensslcrypto_init(
block_size = EVP_CIPHER_block_size(opensslcrypto_instance->crypto_cipher_type);
- crypto_instance->sec_header_size += (block_size * 2);
- crypto_instance->sec_header_size += SALT_SIZE;
crypto_instance->sec_salt_size = SALT_SIZE;
crypto_instance->sec_block_size = block_size;
}
diff --git a/libknet/internals.h b/libknet/internals.h
index 3f105a1..31840e4 100644
--- a/libknet/internals.h
+++ b/libknet/internals.h
@@ -71,7 +71,9 @@ struct knet_link {
uint8_t received_pong;
struct timespec ping_last;
/* used by PMTUD thread as temp per-link variables and should always contain the onwire_len value! */
- uint32_t proto_overhead;
+ uint32_t proto_overhead; /* IP + UDP/SCTP overhead. NOT to be confused
+ with stats.proto_overhead that includes also knet headers
+ and crypto headers */
struct timespec pmtud_last;
uint32_t last_ping_size;
uint32_t last_good_mtu;
@@ -197,7 +199,6 @@ struct knet_handle {
int pmtud_forcerun;
int pmtud_abort;
struct crypto_instance *crypto_instance;
- size_t sec_header_size;
size_t sec_block_size;
size_t sec_hash_size;
size_t sec_salt_size;
diff --git a/libknet/links.c b/libknet/links.c
index 51ead5a..03e0af9 100644
--- a/libknet/links.c
+++ b/libknet/links.c
@@ -265,7 +265,32 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
host->status.reachable = 1;
link->status.mtu = KNET_PMTUD_SIZE_V6;
} else {
- link->status.mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
+ /*
+ * calculate the minimum MTU that is safe to use,
+ * based on RFCs and that each network device should
+ * be able to support without any troubles
+ */
+ if (link->dynamic == KNET_LINK_STATIC) {
+ /*
+ * with static link we can be more precise than using
+ * the generic calc_min_mtu()
+ */
+ switch (link->dst_addr.ss_family) {
+ case AF_INET6:
+ link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V6 - (KNET_PMTUD_OVERHEAD_V6 + link->proto_overhead));
+ break;
+ case AF_INET:
+ link->status.mtu = calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V4 + link->proto_overhead));
+ break;
+ }
+ } else {
+ /*
+ * for dynamic links we start with the minimum MTU
+ * possible and PMTUd will kick in immediately
+ * after connection status is 1
+ */
+ link->status.mtu = calc_min_mtu(knet_h);
+ }
link->has_valid_mtu = 1;
}
diff --git a/libknet/onwire.c b/libknet/onwire.c
new file mode 100644
index 0000000..143ac4b
--- /dev/null
+++ b/libknet/onwire.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2019 Red Hat, Inc. All rights reserved.
+ *
+ * Author: Fabio M. Di Nitto <fabbione@kronosnet.org>
+ *
+ * This software licensed under LGPL-2.0+
+ */
+
+#include "config.h"
+
+#include <sys/errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "crypto.h"
+#include "internals.h"
+#include "logging.h"
+#include "common.h"
+#include "transport_udp.h"
+#include "transport_sctp.h"
+
+/*
+ * unencrypted packet looks like:
+ *
+ * | ip | protocol | knet_header | unencrypted data |
+ * | onwire_len |
+ * | proto_overhead |
+ * | data_len |
+ * | app MTU |
+ *
+ * encrypted packet looks like (not to scale):
+ *
+ * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash |
+ * | onwire_len |
+ * | proto_overhead |
+ * | data_len |
+ * | app MTU |
+ *
+ * knet_h->sec_block_size is >= 0 if encryption will pad the data
+ * knet_h->sec_salt_size is >= 0 if encryption is enabled
+ * knet_h->sec_hash_size is >= 0 if signing is enabled
+ */
+
+/*
+ * this function takes in the data that we would like to send
+ * and tells us the outgoing onwire data size with crypto and
+ * all the headers adjustment.
+ * calling thread needs to account for protocol overhead.
+ */
+
+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen)
+{
+ size_t outlen = inlen, pad_len = 0;
+
+ if (knet_h->sec_block_size) {
+ /*
+ * if the crypto mechanism requires padding, calculate the padding
+ * and add it back to outlen because that's what the crypto layer
+ * would do.
+ */
+ pad_len = knet_h->sec_block_size - (outlen % knet_h->sec_block_size);
+
+ outlen = outlen + pad_len;
+ }
+
+ return outlen + knet_h->sec_salt_size + knet_h->sec_hash_size;
+}
+
+/*
+ * this function takes in the data that we would like to send
+ * and tells us what is the real maximum data we can send
+ * accounting for headers and crypto
+ * calling thread needs to account for protocol overhead.
+ */
+
+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen)
+{
+ size_t outlen = inlen, pad_len = 0;
+
+ if (knet_h->sec_block_size) {
+ /*
+ * drop both salt and hash, that leaves only the crypto data and padding
+ * we need to calculate the padding based on the real encrypted data
+ * that includes the knet_header.
+ */
+ outlen = outlen - (knet_h->sec_salt_size + knet_h->sec_hash_size);
+
+ /*
+ * if the crypto mechanism requires padding, calculate the padding
+ * and remove it, to align the data.
+ * NOTE: we need to remove pad_len + 1 because, based on testing,
+ * if we send data that are already aligned to block_size, the
+ * crypto implementations will add another block_size!
+ * so we want to make sure that our data won't add an unnecessary
+ * block_size that we need to remove later.
+ */
+ pad_len = outlen % knet_h->sec_block_size;
+
+ outlen = outlen - (pad_len + 1);
+
+ /*
+ * add both hash and salt size back, similar to padding above,
+ * the crypto layer will add them to the outlen
+ */
+ outlen = outlen + (knet_h->sec_salt_size + knet_h->sec_hash_size);
+ }
+
+ /*
+ * drop KNET_HEADER_ALL_SIZE to provide a clean application MTU
+ * and various crypto headers
+ */
+ outlen = outlen - (KNET_HEADER_ALL_SIZE + knet_h->sec_salt_size + knet_h->sec_hash_size);
+
+ return outlen;
+}
+
+/*
+ * set the lowest possible value as failsafe for all links.
+ * KNET_PMTUD_MIN_MTU_V4 < KNET_PMTUD_MIN_MTU_V6
+ * KNET_PMTUD_OVERHEAD_V6 > KNET_PMTUD_OVERHEAD_V4
+ * KNET_PMTUD_SCTP_OVERHEAD > KNET_PMTUD_UDP_OVERHEAD
+ */
+
+size_t calc_min_mtu(knet_handle_t knet_h)
+{
+ return calc_max_data_outlen(knet_h, KNET_PMTUD_MIN_MTU_V4 - (KNET_PMTUD_OVERHEAD_V6 + KNET_PMTUD_SCTP_OVERHEAD));
+}
diff --git a/libknet/onwire.h b/libknet/onwire.h
index 9815bc3..74d4d09 100644
--- a/libknet/onwire.h
+++ b/libknet/onwire.h
@@ -120,7 +120,9 @@ struct knet_header_payload_ping {
#define KNET_PMTUD_SIZE_V4 65535
#define KNET_PMTUD_SIZE_V6 KNET_PMTUD_SIZE_V4
-/* These two get the protocol-specific overheads added to them */
+/*
+ * IPv4/IPv6 header size
+ */
#define KNET_PMTUD_OVERHEAD_V4 20
#define KNET_PMTUD_OVERHEAD_V6 40
@@ -199,4 +201,8 @@ struct knet_header {
#define KNET_HEADER_PMTUD_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_pmtud))
#define KNET_HEADER_DATA_SIZE (KNET_HEADER_SIZE + sizeof(struct knet_header_payload_data))
+size_t calc_data_outlen(knet_handle_t knet_h, size_t inlen);
+size_t calc_max_data_outlen(knet_handle_t knet_h, size_t inlen);
+size_t calc_min_mtu(knet_handle_t knet_h);
+
#endif
diff --git a/libknet/tests/Makefile.am b/libknet/tests/Makefile.am
index 3346596..9160780 100644
--- a/libknet/tests/Makefile.am
+++ b/libknet/tests/Makefile.am
@@ -38,6 +38,12 @@ int_checks = \
fun_checks =
+# checks below need to be executed manually
+# or with a specifi environment
+
+long_run_checks = \
+ fun_pmtud_crypto_test
+
benchmarks = \
knet_bench_test
@@ -45,6 +51,7 @@ noinst_PROGRAMS = \
api_knet_handle_new_limit_test \
pckt_test \
$(benchmarks) \
+ $(long_run_checks) \
$(check_PROGRAMS)
noinst_SCRIPTS = \
@@ -71,6 +78,7 @@ int_links_acl_ip_test_SOURCES = int_links_acl_ip.c \
../logging.c \
../netutils.c \
../threads_common.c \
+ ../onwire.c \
../transports.c \
../transport_common.c \
../transport_loopback.c \
@@ -88,4 +96,9 @@ knet_bench_test_SOURCES = knet_bench.c \
../logging.c \
../compat.c \
../transport_common.c \
- ../threads_common.c
+ ../threads_common.c \
+ ../onwire.c
+
+fun_pmtud_crypto_test_SOURCES = fun_pmtud_crypto.c \
+ test-common.c \
+ ../onwire.c
diff --git a/libknet/tests/api_knet_send_crypto.c b/libknet/tests/api_knet_send_crypto.c
index 11de857..5fc5463 100644
--- a/libknet/tests/api_knet_send_crypto.c
+++ b/libknet/tests/api_knet_send_crypto.c
@@ -67,7 +67,7 @@ static void test(const char *model)
memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg));
strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1);
strncpy(knet_handle_crypto_cfg.crypto_cipher_type, "aes128", sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1);
- strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha1", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
+ strncpy(knet_handle_crypto_cfg.crypto_hash_type, "sha256", sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
knet_handle_crypto_cfg.private_key_len = 2000;
if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) {
diff --git a/libknet/tests/fun_pmtud_crypto.c b/libknet/tests/fun_pmtud_crypto.c
new file mode 100644
index 0000000..91c062c
--- /dev/null
+++ b/libknet/tests/fun_pmtud_crypto.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright (C) 2019 Red Hat, Inc. All rights reserved.
+ *
+ * Authors: Fabio M. Di Nitto <fabbione@kronosnet.org>
+ *
+ * This software licensed under GPL-2.0+
+ */
+
+#include "config.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <net/ethernet.h>
+#include <ifaddrs.h>
+#include <net/if.h>
+
+#include "libknet.h"
+
+#include "compress.h"
+#include "internals.h"
+#include "netutils.h"
+#include "onwire.h"
+#include "test-common.h"
+
+static int private_data;
+
+static void sock_notify(void *pvt_data,
+ int datafd,
+ int8_t channel,
+ uint8_t tx_rx,
+ int error,
+ int errorno)
+{
+ return;
+}
+
+static int iface_fd = 0;
+static int default_mtu = 0;
+
+#ifdef KNET_LINUX
+const char *loopback = "lo";
+#endif
+#ifdef KNET_BSD
+const char *loopback = "lo0";
+#endif
+
+static int fd_init(void)
+{
+#ifdef KNET_LINUX
+ return socket(AF_INET, SOCK_STREAM, 0);
+#endif
+#ifdef KNET_BSD
+ return socket(AF_LOCAL, SOCK_DGRAM, 0);
+#endif
+ return -1;
+}
+
+static int set_iface_mtu(uint32_t mtu)
+{
+ int err = 0;
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(struct ifreq));
+ strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1);
+ ifr.ifr_mtu = mtu;
+
+ err = ioctl(iface_fd, SIOCSIFMTU, &ifr);
+
+ return err;
+}
+
+static int get_iface_mtu(void)
+{
+ int err = 0, savederrno = 0;
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(struct ifreq));
+ strncpy(ifr.ifr_name, loopback, IFNAMSIZ - 1);
+
+ err = ioctl(iface_fd, SIOCGIFMTU, &ifr);
+ if (err) {
+ savederrno = errno;
+ goto out_clean;
+ }
+
+ err = ifr.ifr_mtu;
+
+out_clean:
+ errno = savederrno;
+ return err;
+}
+
+static int exit_local(int code)
+{
+ set_iface_mtu(default_mtu);
+ close(iface_fd);
+ iface_fd = 0;
+ exit(code);
+}
+
+static void test_mtu(const char *model, const char *crypto, const char *hash)
+{
+ knet_handle_t knet_h;
+ int logfds[2];
+ int datafd = 0;
+ int8_t channel = 0;
+ struct sockaddr_storage lo;
+ struct knet_handle_crypto_cfg knet_handle_crypto_cfg;
+ unsigned int data_mtu, expected_mtu;
+ size_t calculated_iface_mtu = 0, detected_iface_mtu = 0;
+
+ if (make_local_sockaddr(&lo, 0) < 0) {
+ printf("Unable to convert loopback to sockaddr: %s\n", strerror(errno));
+ exit_local(FAIL);
+ }
+
+ setup_logpipes(logfds);
+
+ knet_h = knet_handle_start(logfds, KNET_LOG_DEBUG);
+
+ flush_logs(logfds[0], stdout);
+
+ printf("Test knet_send with %s and valid data\n", model);
+
+ memset(&knet_handle_crypto_cfg, 0, sizeof(struct knet_handle_crypto_cfg));
+ strncpy(knet_handle_crypto_cfg.crypto_model, model, sizeof(knet_handle_crypto_cfg.crypto_model) - 1);
+ strncpy(knet_handle_crypto_cfg.crypto_cipher_type, crypto, sizeof(knet_handle_crypto_cfg.crypto_cipher_type) - 1);
+ strncpy(knet_handle_crypto_cfg.crypto_hash_type, hash, sizeof(knet_handle_crypto_cfg.crypto_hash_type) - 1);
+ knet_handle_crypto_cfg.private_key_len = 2000;
+
+ if (knet_handle_crypto(knet_h, &knet_handle_crypto_cfg)) {
+ printf("knet_handle_crypto failed with correct config: %s\n", strerror(errno));
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ if (knet_handle_enable_sock_notify(knet_h, &private_data, sock_notify) < 0) {
+ printf("knet_handle_enable_sock_notify failed: %s\n", strerror(errno));
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ datafd = 0;
+ channel = -1;
+
+ if (knet_handle_add_datafd(knet_h, &datafd, &channel) < 0) {
+ printf("knet_handle_add_datafd failed: %s\n", strerror(errno));
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ if (knet_host_add(knet_h, 1) < 0) {
+ printf("knet_host_add failed: %s\n", strerror(errno));
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ if (knet_link_set_config(knet_h, 1, 0, KNET_TRANSPORT_UDP, &lo, &lo, 0) < 0) {
+ printf("Unable to configure link: %s\n", strerror(errno));
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ if (knet_link_set_pong_count(knet_h, 1, 0, 1) < 0) {
+ printf("knet_link_set_pong_count failed: %s\n", strerror(errno));
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ if (knet_link_set_enable(knet_h, 1, 0, 1) < 0) {
+ printf("knet_link_set_enable failed: %s\n", strerror(errno));
+ knet_link_clear_config(knet_h, 1, 0);
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ if (wait_for_host(knet_h, 1, 4, logfds[0], stdout) < 0) {
+ printf("timeout waiting for host to be reachable");
+ knet_link_set_enable(knet_h, 1, 0, 0);
+ knet_link_clear_config(knet_h, 1, 0);
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ flush_logs(logfds[0], stdout);
+
+ if (knet_handle_pmtud_get(knet_h, &data_mtu) < 0) {
+ printf("knet_handle_pmtud_get failed error: %s\n", strerror(errno));
+ knet_link_set_enable(knet_h, 1, 0, 0);
+ knet_link_clear_config(knet_h, 1, 0);
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ calculated_iface_mtu = calc_data_outlen(knet_h, data_mtu + KNET_HEADER_ALL_SIZE) + 28;
+ detected_iface_mtu = get_iface_mtu();
+ /*
+ * 28 = 20 IP header + 8 UDP header
+ */
+ expected_mtu = calc_max_data_outlen(knet_h, detected_iface_mtu - 28);
+
+ if (expected_mtu != data_mtu) {
+ printf("Wrong MTU detected! interface mtu: %zu knet mtu: %u expected mtu: %u\n", detected_iface_mtu, data_mtu, expected_mtu);
+ knet_link_set_enable(knet_h, 1, 0, 0);
+ knet_link_clear_config(knet_h, 1, 0);
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ if ((detected_iface_mtu - calculated_iface_mtu) >= knet_h->sec_block_size) {
+ printf("Wrong MTU detected! real iface mtu: %zu calculated: %zu\n", detected_iface_mtu, calculated_iface_mtu);
+ knet_link_set_enable(knet_h, 1, 0, 0);
+ knet_link_clear_config(knet_h, 1, 0);
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+ exit_local(FAIL);
+ }
+
+ knet_link_set_enable(knet_h, 1, 0, 0);
+ knet_link_clear_config(knet_h, 1, 0);
+ knet_host_remove(knet_h, 1);
+ knet_handle_free(knet_h);
+ flush_logs(logfds[0], stdout);
+ close_logpipes(logfds);
+}
+
+static void test(const char *model, const char *crypto, const char *hash)
+{
+ int i = 576;
+ int max = 65535;
+
+ while (i <= max) {
+ printf("Setting interface MTU to: %i\n", i);
+ set_iface_mtu(i);
+ test_mtu(model, crypto, hash);
+ if (i == max) {
+ break;
+ }
+ i = i + 15;
+ if (i > max) {
+ i = max;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct knet_crypto_info crypto_list[16];
+ size_t crypto_list_entries;
+
+#ifdef KNET_BSD
+ if (is_memcheck() || is_helgrind()) {
+ printf("valgrind-freebsd cannot run this test properly. Skipping\n");
+ return SKIP;
+ }
+#endif
+
+ if (geteuid() != 0) {
+ printf("This test requires root privileges\n");
+ return SKIP;
+ }
+
+ iface_fd = fd_init();
+ if (iface_fd < 0) {
+ printf("fd_init failed: %s\n", strerror(errno));
+ return FAIL;
+ }
+
+ default_mtu = get_iface_mtu();
+ if (default_mtu < 0) {
+ printf("get_iface_mtu failed: %s\n", strerror(errno));
+ return FAIL;
+ }
+
+ memset(crypto_list, 0, sizeof(crypto_list));
+
+ if (knet_get_crypto_list(crypto_list, &crypto_list_entries) < 0) {
+ printf("knet_get_crypto_list failed: %s\n", strerror(errno));
+ return FAIL;
+ }
+
+ if (crypto_list_entries == 0) {
+ printf("no crypto modules detected. Skipping\n");
+ return SKIP;
+ }
+
+ test(crypto_list[0].name, "aes128", "sha1");
+ test(crypto_list[0].name, "aes128", "sha256");
+ test(crypto_list[0].name, "aes256", "sha1");
+ test(crypto_list[0].name, "aes256", "sha256");
+
+ exit_local(PASS);
+}
diff --git a/libknet/threads_common.c b/libknet/threads_common.c
index 1f3e1e3..03edfc4 100644
--- a/libknet/threads_common.c
+++ b/libknet/threads_common.c
@@ -161,7 +161,7 @@ void force_pmtud_run(knet_handle_t knet_h, uint8_t subsystem, uint8_t reset_mtu)
{
if (reset_mtu) {
log_debug(knet_h, subsystem, "PMTUd has been reset to default");
- knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
+ knet_h->data_mtu = calc_min_mtu(knet_h);
if (knet_h->pmtud_notify_fn) {
knet_h->pmtud_notify_fn(knet_h->pmtud_notify_fn_private_data,
knet_h->data_mtu);
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
index 1a19806..1dd1788 100644
--- a/libknet/threads_pmtud.c
+++ b/libknet/threads_pmtud.c
@@ -25,16 +25,16 @@
static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link)
{
int err, ret, savederrno, mutex_retry_limit, failsafe, use_kernel_mtu, warn_once;
- uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */
- size_t onwire_len; /* current packet onwire size */
- size_t overhead_len; /* onwire packet overhead (protocol based) */
- size_t max_mtu_len; /* max mtu for protocol */
- size_t data_len; /* how much data we can send in the packet
- * generally would be onwire_len - overhead_len
- * needs to be adjusted for crypto
- */
- size_t pad_len; /* crypto packet pad size, needs to move into crypto.c callbacks */
- ssize_t len; /* len of what we were able to sendto onwire */
+ uint32_t kernel_mtu; /* record kernel_mtu from EMSGSIZE */
+ size_t onwire_len; /* current packet onwire size */
+ size_t ipproto_overhead_len; /* onwire packet overhead (protocol based) */
+ size_t max_mtu_len; /* max mtu for protocol */
+ size_t data_len; /* how much data we can send in the packet
+ * generally would be onwire_len - ipproto_overhead_len
+ * needs to be adjusted for crypto
+ */
+ size_t app_mtu_len; /* real data that we can send onwire */
+ ssize_t len; /* len of what we were able to sendto onwire */
struct timespec ts;
unsigned long long pong_timeout_adj_tmp;
@@ -45,20 +45,16 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
mutex_retry_limit = 0;
failsafe = 0;
- dst_link->last_bad_mtu = 0;
-
knet_h->pmtudbuf->khp_pmtud_link = dst_link->link_id;
switch (dst_link->dst_addr.ss_family) {
case AF_INET6:
max_mtu_len = KNET_PMTUD_SIZE_V6;
- overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead;
- dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len;
+ ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead;
break;
case AF_INET:
max_mtu_len = KNET_PMTUD_SIZE_V4;
- overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead;
- dst_link->last_good_mtu = dst_link->last_ping_size + overhead_len;
+ ipproto_overhead_len = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead;
break;
default:
log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted, unknown protocol");
@@ -66,6 +62,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
break;
}
+ dst_link->last_bad_mtu = 0;
+ dst_link->last_good_mtu = dst_link->last_ping_size + ipproto_overhead_len;
+
/*
* discovery starts from the top because kernel will
* refuse to send packets > current iface mtu.
@@ -92,107 +91,39 @@ restart:
}
/*
- * unencrypted packet looks like:
- *
- * | ip | protocol | knet_header | unencrypted data |
- * | onwire_len |
- * | overhead_len |
- * | data_len |
- * | app MTU |
- *
- * encrypted packet looks like (not to scale):
- *
- * | ip | protocol | salt | crypto(knet_header | data) | crypto_data_pad | hash |
- * | onwire_len |
- * | overhead_len |
- * | data_len |
- * | app MTU |
- *
- * knet_h->sec_block_size is >= 0 if encryption will pad the data
- * knet_h->sec_salt_size is >= 0 if encryption is enabled
- * knet_h->sec_hash_size is >= 0 if signing is enabled
+ * common to all packets
*/
/*
- * common to all packets
+ * calculate the application MTU based on current onwire_len minus ipproto_overhead_len
*/
- data_len = onwire_len - overhead_len;
-
- if (knet_h->crypto_instance) {
-realign:
- if (knet_h->sec_block_size) {
+ app_mtu_len = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len);
- /*
- * drop both salt and hash, that leaves only the crypto data and padding
- * we need to calculate the padding based on the real encrypted data.
- */
- data_len = data_len - (knet_h->sec_salt_size + knet_h->sec_hash_size);
-
- /*
- * if the crypto mechanism requires padding, calculate the padding
- * and add it back to data_len because that's what the crypto layer
- * would do.
- */
- pad_len = knet_h->sec_block_size - (data_len % knet_h->sec_block_size);
-
- /*
- * if are at the boundary, reset padding
- */
- if (pad_len == knet_h->sec_block_size) {
- pad_len = 0;
- }
- data_len = data_len + pad_len;
-
- /*
- * if our current data_len is higher than max_mtu_len
- * then we need to reduce by padding size (that is our
- * increment / decrement value)
- *
- * this generally happens only on the first PMTUd run
- */
- while (data_len + overhead_len >= max_mtu_len) {
- data_len = data_len - knet_h->sec_block_size;
- }
+ /*
+ * recalculate onwire len back that might be different based
+ * on data padding from crypto layer.
+ */
- /*
- * add both hash and salt size back, similar to padding above,
- * the crypto layer will add them to the data_len
- */
- data_len = data_len + (knet_h->sec_salt_size + knet_h->sec_hash_size);
- }
+ onwire_len = calc_data_outlen(knet_h, app_mtu_len + KNET_HEADER_ALL_SIZE) + ipproto_overhead_len;
- if (dst_link->last_bad_mtu) {
- if (data_len + overhead_len >= dst_link->last_bad_mtu) {
- /*
- * reduce data_len to something lower than last_bad_mtu, overhead_len
- * and sec_block_size (decrementing step) - 1 (granularity)
- */
- data_len = dst_link->last_bad_mtu - overhead_len - knet_h->sec_block_size - 1;
- if (knet_h->sec_block_size) {
- /*
- * make sure that data_len is aligned to the sec_block_size boundary
- */
- goto realign;
- }
- }
- }
+ /*
+ * calculate the size of what we need to send to sendto(2).
+ * see also onwire.c for packet format explanation.
+ */
+ data_len = app_mtu_len + knet_h->sec_hash_size + knet_h->sec_salt_size + KNET_HEADER_ALL_SIZE;
- if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size) + 1) {
+ if (knet_h->crypto_instance) {
+ if (data_len < (knet_h->sec_hash_size + knet_h->sec_salt_size) + 1) {
log_debug(knet_h, KNET_SUB_PMTUD, "Aborting PMTUD process: link mtu smaller than crypto header detected (link might have been disconnected)");
return -1;
}
- /*
- * recalculate onwire_len based on crypto information
- * and place it in the PMTUd packet info
- */
- onwire_len = data_len + overhead_len;
knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
if (crypto_encrypt_and_sign(knet_h,
(const unsigned char *)knet_h->pmtudbuf,
- data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size + knet_h->sec_block_size),
+ data_len - (knet_h->sec_hash_size + knet_h->sec_salt_size),
knet_h->pmtudbuf_crypt,
(ssize_t *)&data_len) < 0) {
log_debug(knet_h, KNET_SUB_PMTUD, "Unable to crypto pmtud packet");
@@ -201,11 +132,8 @@ realign:
outbuf = knet_h->pmtudbuf_crypt;
knet_h->stats_extra.tx_crypt_pmtu_packets++;
-
} else {
-
knet_h->pmtudbuf->khp_pmtud_size = onwire_len;
-
}
/* link has gone down, aborting pmtud */
@@ -417,7 +345,7 @@ retry:
/*
* account for IP overhead, knet headers and crypto in PMTU calculation
*/
- dst_link->status.mtu = onwire_len - dst_link->status.proto_overhead;
+ dst_link->status.mtu = calc_max_data_outlen(knet_h, onwire_len - ipproto_overhead_len);
pthread_mutex_unlock(&knet_h->pmtud_mutex);
return 0;
}
@@ -437,7 +365,7 @@ retry:
goto restart;
}
-static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, unsigned int *min_mtu, int force_run)
+static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host, struct knet_link *dst_link, int force_run)
{
uint8_t saved_valid_pmtud;
unsigned int saved_pmtud;
@@ -455,17 +383,22 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
timespec_diff(dst_link->pmtud_last, clock_now, &diff_pmtud);
if (diff_pmtud < interval) {
- *min_mtu = dst_link->status.mtu;
return dst_link->has_valid_mtu;
}
}
+ /*
+ * status.proto_overhead should include all IP/(UDP|SCTP)/knet headers
+ *
+ * please note that it is not the same as link->proto_overhead that
+ * includes only either UDP or SCTP (at the moment) overhead.
+ */
switch (dst_link->dst_addr.ss_family) {
case AF_INET6:
- dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size;
+ dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V6 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size;
break;
case AF_INET:
- dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_header_size;
+ dst_link->status.proto_overhead = KNET_PMTUD_OVERHEAD_V4 + dst_link->proto_overhead + KNET_HEADER_ALL_SIZE + knet_h->sec_hash_size + knet_h->sec_salt_size;
break;
}
@@ -486,26 +419,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
dst_link->has_valid_mtu = 0;
} else {
dst_link->has_valid_mtu = 1;
- switch (dst_link->dst_addr.ss_family) {
- case AF_INET6:
- if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V6) ||
- ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V6)) {
- log_debug(knet_h, KNET_SUB_PMTUD,
- "PMTUD detected an IPv6 MTU out of bound value (%u) for host: %u link: %u.",
- dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id);
- dst_link->has_valid_mtu = 0;
- }
- break;
- case AF_INET:
- if (((dst_link->status.mtu + dst_link->status.proto_overhead) < KNET_PMTUD_MIN_MTU_V4) ||
- ((dst_link->status.mtu + dst_link->status.proto_overhead) > KNET_PMTUD_SIZE_V4)) {
- log_debug(knet_h, KNET_SUB_PMTUD,
- "PMTUD detected an IPv4 MTU out of bound value (%u) for host: %u link: %u.",
- dst_link->status.mtu + dst_link->status.proto_overhead, dst_host->host_id, dst_link->link_id);
- dst_link->has_valid_mtu = 0;
- }
- break;
- }
if (dst_link->has_valid_mtu) {
if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) {
log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u",
@@ -513,9 +426,6 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
}
log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD completed for host: %u link: %u current link mtu: %u",
dst_host->host_id, dst_link->link_id, dst_link->status.mtu);
- if (dst_link->status.mtu < *min_mtu) {
- *min_mtu = dst_link->status.mtu;
- }
/*
* set pmtud_last, if we can, after we are done with the PMTUd process
@@ -541,14 +451,14 @@ void *_handle_pmtud_link_thread(void *data)
struct knet_host *dst_host;
struct knet_link *dst_link;
int link_idx;
- unsigned int min_mtu, have_mtu;
+ unsigned int have_mtu;
unsigned int lower_mtu;
int link_has_mtu;
int force_run = 0;
set_thread_status(knet_h, KNET_THREAD_PMTUD, KNET_THREAD_STARTED);
- knet_h->data_mtu = KNET_PMTUD_MIN_MTU_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
+ knet_h->data_mtu = calc_min_mtu(knet_h);
/* preparing pmtu buffer */
knet_h->pmtudbuf->kh_version = KNET_HEADER_VERSION;
@@ -578,7 +488,6 @@ void *_handle_pmtud_link_thread(void *data)
}
lower_mtu = KNET_PMTUD_SIZE_V4;
- min_mtu = KNET_PMTUD_SIZE_V4 - KNET_HEADER_ALL_SIZE - knet_h->sec_header_size;
have_mtu = 0;
for (dst_host = knet_h->host_head; dst_host != NULL; dst_host = dst_host->next) {
@@ -593,14 +502,14 @@ void *_handle_pmtud_link_thread(void *data)
(dst_link->status.dynconnected != 1)))
continue;
- link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, &min_mtu, force_run);
+ link_has_mtu = _handle_check_pmtud(knet_h, dst_host, dst_link, force_run);
if (errno == EDEADLK) {
goto out_unlock;
}
if (link_has_mtu) {
have_mtu = 1;
- if (min_mtu < lower_mtu) {
- lower_mtu = min_mtu;
+ if (dst_link->status.mtu < lower_mtu) {
+ lower_mtu = dst_link->status.mtu;
}
}
}
commit 499f589404db791d8e68c84c8ba3a857aeea5083
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Tue Aug 13 06:41:32 2019 +0200
[PMTUd] add dynamic pong timeout when using crypto
problem originally reported by proxmox community, users
observed that under pressure the MTU would flap back and forth
between 2 values due to other node response timeout.
implement a dynamic timeout multiplier when using crypto that
should solve the problem in a more flexible fashion.
When a timeout hits, those new logs will show:
[knet]: [info] host: host: 1 (passive) best link: 0 (pri: 0)
[knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
[knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (4) for host 1 link: 0
[knet]: [info] pmtud: PMTUD link change for host: 1 link: 0 from 469 to 65429
[knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
[knet]: [info] pmtud: Global data MTU changed to: 65429
[knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
[knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (8) for host 1 link: 0
[knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (16) for host 1 link: 0
[knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (32) for host 1 link: 0
[knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (64) for host 1 link: 0
[knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
[knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
[knet]: [debug] pmtud: Increasing PMTUd response timeout multiplier to (128) for host 1 link: 0
[knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
and when the latency reduces and it is safe to be more responsive again:
[knet]: [debug] pmtud: Starting PMTUD for host: 1 link: 0
[knet]: [debug] pmtud: Decreasing PMTUd response timeout multiplier to (64) for host 1 link: 0
[knet]: [debug] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 65429
....
testing this patch on normal hosts is a bit challenging tho.
Patch was tested by hardcoding a super low timeout.
and using a long running version of api_knet_send_crypto_test with a short PMTUd setfreq (10 sec).
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/internals.h b/libknet/internals.h
index 31840e4..d1a4757 100644
--- a/libknet/internals.h
+++ b/libknet/internals.h
@@ -80,6 +80,7 @@ struct knet_link {
uint32_t last_bad_mtu;
uint32_t last_sent_mtu;
uint32_t last_recv_mtu;
+ uint32_t pmtud_crypto_timeout_multiplier;/* used by PMTUd to adjust timeouts on high loads */
uint8_t has_valid_mtu;
};
diff --git a/libknet/links.c b/libknet/links.c
index 03e0af9..f7eccc3 100644
--- a/libknet/links.c
+++ b/libknet/links.c
@@ -219,6 +219,7 @@ int knet_link_set_config(knet_handle_t knet_h, knet_node_id_t host_id, uint8_t l
}
}
+ link->pmtud_crypto_timeout_multiplier = KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN;
link->pong_count = KNET_LINK_DEFAULT_PONG_COUNT;
link->has_valid_mtu = 0;
link->ping_interval = KNET_LINK_DEFAULT_PING_INTERVAL * 1000; /* microseconds */
diff --git a/libknet/links.h b/libknet/links.h
index e14958d..c8ca610 100644
--- a/libknet/links.h
+++ b/libknet/links.h
@@ -30,6 +30,16 @@
*/
#define KNET_LINK_PONG_TIMEOUT_LAT_MUL 2
+/*
+ * under heavy load with crypto enabled, it takes much
+ * longer time to receive a response from the other node.
+ *
+ * 128 is somewhat arbitrary number but we want to set a limit
+ * and report failures after that.
+ */
+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN 2
+#define KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX 128
+
int _link_updown(knet_handle_t knet_h, knet_node_id_t node_id, uint8_t link_id,
unsigned int enabled, unsigned int connected);
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
index 1dd1788..d342697 100644
--- a/libknet/threads_pmtud.c
+++ b/libknet/threads_pmtud.c
@@ -36,8 +36,9 @@ static int _handle_check_link_pmtud(knet_handle_t knet_h, struct knet_host *dst_
size_t app_mtu_len; /* real data that we can send onwire */
ssize_t len; /* len of what we were able to sendto onwire */
- struct timespec ts;
- unsigned long long pong_timeout_adj_tmp;
+ struct timespec ts, pmtud_crypto_start_ts, pmtud_crypto_stop_ts;
+ unsigned long long pong_timeout_adj_tmp, timediff;
+ int pmtud_crypto_reduce = 1;
unsigned char *outbuf = (unsigned char *)knet_h->pmtudbuf;
warn_once = 0;
@@ -242,6 +243,15 @@ retry:
return -1;
}
+ /*
+ * non fatal, we can wait the next round to reduce the
+ * multiplier
+ */
+ if (clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_start_ts) < 0) {
+ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno));
+ pmtud_crypto_reduce = 0;
+ }
+
/*
* set PMTUd reply timeout to match pong_timeout on a given link
*
@@ -261,7 +271,7 @@ retry:
/*
* crypto, under pressure, is a royal PITA
*/
- pong_timeout_adj_tmp = dst_link->pong_timeout_adj * 2;
+ pong_timeout_adj_tmp = dst_link->pong_timeout_adj * dst_link->pmtud_crypto_timeout_multiplier;
} else {
pong_timeout_adj_tmp = dst_link->pong_timeout_adj;
}
@@ -295,6 +305,17 @@ retry:
if (ret) {
if (ret == ETIMEDOUT) {
+ if ((knet_h->crypto_instance) && (dst_link->pmtud_crypto_timeout_multiplier < KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MAX)) {
+ dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier * 2;
+ pmtud_crypto_reduce = 0;
+ log_debug(knet_h, KNET_SUB_PMTUD,
+ "Increasing PMTUd response timeout multiplier to (%u) for host %u link: %u",
+ dst_link->pmtud_crypto_timeout_multiplier,
+ dst_host->host_id,
+ dst_link->link_id);
+ pthread_mutex_unlock(&knet_h->pmtud_mutex);
+ goto restart;
+ }
if (!warn_once) {
log_warn(knet_h, KNET_SUB_PMTUD,
"possible MTU misconfiguration detected. "
@@ -323,6 +344,23 @@ retry:
}
}
+ if ((knet_h->crypto_instance) && (pmtud_crypto_reduce == 1) &&
+ (dst_link->pmtud_crypto_timeout_multiplier > KNET_LINK_PMTUD_CRYPTO_TIMEOUT_MULTIPLIER_MIN)) {
+ if (!clock_gettime(CLOCK_MONOTONIC, &pmtud_crypto_stop_ts)) {
+ timespec_diff(pmtud_crypto_start_ts, pmtud_crypto_stop_ts, &timediff);
+ if (((pong_timeout_adj_tmp * 1000) / 2) > timediff) {
+ dst_link->pmtud_crypto_timeout_multiplier = dst_link->pmtud_crypto_timeout_multiplier / 2;
+ log_debug(knet_h, KNET_SUB_PMTUD,
+ "Decreasing PMTUd response timeout multiplier to (%u) for host %u link: %u",
+ dst_link->pmtud_crypto_timeout_multiplier,
+ dst_host->host_id,
+ dst_link->link_id);
+ }
+ } else {
+ log_debug(knet_h, KNET_SUB_PMTUD, "Unable to get current time: %s", strerror(errno));
+ }
+ }
+
if ((dst_link->last_recv_mtu != onwire_len) || (ret)) {
dst_link->last_bad_mtu = onwire_len;
} else {
commit 5f3476849523e9ee486481b429b471a1ab3cac20
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Thu Jul 18 07:50:37 2019 +0200
[handle] make sure that the pmtud buf contains at least knet header size
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/handle.c b/libknet/handle.c
index 4835e99..1fb9c9b 100644
--- a/libknet/handle.c
+++ b/libknet/handle.c
@@ -234,14 +234,14 @@ static int _init_buffers(knet_handle_t knet_h)
}
memset(knet_h->pingbuf, 0, KNET_HEADER_PING_SIZE);
- knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6);
+ knet_h->pmtudbuf = malloc(KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE);
if (!knet_h->pmtudbuf) {
savederrno = errno;
log_err(knet_h, KNET_SUB_HANDLE, "Unable to allocate memory for pmtud buffer: %s",
strerror(savederrno));
goto exit_fail;
}
- memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6);
+ memset(knet_h->pmtudbuf, 0, KNET_PMTUD_SIZE_V6 + KNET_HEADER_ALL_SIZE);
for (i = 0; i < PCKT_FRAG_MAX; i++) {
bufsize = ceil((float)KNET_MAX_PACKET_SIZE / (i + 1)) + KNET_HEADER_ALL_SIZE + KNET_DATABUFSIZE_CRYPT_PAD;
commit 3b3b6d2a7e1fee7eb41c6bacc1005ff90f7dd5cb
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Thu Jul 18 10:23:14 2019 +0200
[tests] fix knet_bench coverity errors
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/tests/knet_bench.c b/libknet/tests/knet_bench.c
index dfe5238..dc04239 100644
--- a/libknet/tests/knet_bench.c
+++ b/libknet/tests/knet_bench.c
@@ -277,22 +277,24 @@ static void setup_knet(int argc, char *argv[])
printf("Error: -p can only be specified once\n");
exit(FAIL);
}
- policystr = optarg;
- if (!strcmp(policystr, "active")) {
- policy = KNET_LINK_POLICY_ACTIVE;
- policyfound = 1;
- }
- /*
- * we can't use rr because clangs can't compile
- * an array of 3 strings, one of which is 2 bytes long
- */
- if (!strcmp(policystr, "round-robin")) {
- policy = KNET_LINK_POLICY_RR;
- policyfound = 1;
- }
- if (!strcmp(policystr, "passive")) {
- policy = KNET_LINK_POLICY_PASSIVE;
- policyfound = 1;
+ if (optarg) {
+ policystr = optarg;
+ if (!strcmp(policystr, "active")) {
+ policy = KNET_LINK_POLICY_ACTIVE;
+ policyfound = 1;
+ }
+ /*
+ * we can't use rr because clangs can't compile
+ * an array of 3 strings, one of which is 2 bytes long
+ */
+ if (!strcmp(policystr, "round-robin")) {
+ policy = KNET_LINK_POLICY_RR;
+ policyfound = 1;
+ }
+ if (!strcmp(policystr, "passive")) {
+ policy = KNET_LINK_POLICY_PASSIVE;
+ policyfound = 1;
+ }
}
if (!policyfound) {
printf("Error: invalid policy %s specified. -p accepts active|passive|rr\n", policystr);
@@ -304,14 +306,16 @@ static void setup_knet(int argc, char *argv[])
printf("Error: -P can only be specified once\n");
exit(FAIL);
}
- protostr = optarg;
- if (!strcmp(protostr, "UDP")) {
- protocol = KNET_TRANSPORT_UDP;
- protofound = 1;
- }
- if (!strcmp(protostr, "SCTP")) {
- protocol = KNET_TRANSPORT_SCTP;
- protofound = 1;
+ if (optarg) {
+ protostr = optarg;
+ if (!strcmp(protostr, "UDP")) {
+ protocol = KNET_TRANSPORT_UDP;
+ protofound = 1;
+ }
+ if (!strcmp(protostr, "SCTP")) {
+ protocol = KNET_TRANSPORT_SCTP;
+ protofound = 1;
+ }
}
if (!protofound) {
printf("Error: invalid protocol %s specified. -P accepts udp|sctp\n", policystr);
@@ -380,17 +384,22 @@ static void setup_knet(int argc, char *argv[])
}
break;
case 'T':
- if (!strcmp("ping", optarg)) {
- test_type = TEST_PING;
- }
- if (!strcmp("ping_data", optarg)) {
- test_type = TEST_PING_AND_DATA;
- }
- if (!strcmp("perf-by-size", optarg)) {
- test_type = TEST_PERF_BY_SIZE;
- }
- if (!strcmp("perf-by-time", optarg)) {
- test_type = TEST_PERF_BY_TIME;
+ if (optarg) {
+ if (!strcmp("ping", optarg)) {
+ test_type = TEST_PING;
+ }
+ if (!strcmp("ping_data", optarg)) {
+ test_type = TEST_PING_AND_DATA;
+ }
+ if (!strcmp("perf-by-size", optarg)) {
+ test_type = TEST_PERF_BY_SIZE;
+ }
+ if (!strcmp("perf-by-time", optarg)) {
+ test_type = TEST_PERF_BY_TIME;
+ }
+ } else {
+ printf("Error: -T requires an option\n");
+ exit(FAIL);
}
break;
case 'S':
@@ -957,15 +966,14 @@ static void display_stats(int level)
struct knet_link_stats total_link_stats;
knet_node_id_t host_list[KNET_MAX_HOST];
uint8_t link_list[KNET_MAX_LINK];
- int res;
unsigned int i,j;
size_t num_hosts, num_links;
- res = knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats));
- if (res) {
+ if (knet_handle_get_stats(knet_h, &handle_stats, sizeof(handle_stats)) < 0) {
perror("[info]: failed to get knet handle stats");
return;
}
+
if (compresscfg || cryptocfg) {
printf("\n");
printf("[stat]: handle stats\n");
@@ -1005,8 +1013,7 @@ static void display_stats(int level)
memset(&total_link_stats, 0, sizeof(struct knet_link_stats));
- res = knet_host_get_host_list(knet_h, host_list, &num_hosts);
- if (res) {
+ if (knet_host_get_host_list(knet_h, host_list, &num_hosts) < 0) {
perror("[info]: cannot get host list for stats");
return;
}
@@ -1015,18 +1022,16 @@ static void display_stats(int level)
qsort(host_list, num_hosts, sizeof(uint16_t), node_compare);
for (j=0; j<num_hosts; j++) {
- res = knet_link_get_link_list(knet_h, host_list[j], link_list, &num_links);
- if (res) {
+ if (knet_link_get_link_list(knet_h, host_list[j], link_list, &num_links) < 0) {
perror("[info]: cannot get link list for stats");
return;
}
for (i=0; i < num_links; i++) {
- res = knet_link_get_status(knet_h,
- host_list[j],
- link_list[i],
- &link_status,
- sizeof(link_status));
+ if (knet_link_get_status(knet_h, host_list[j], link_list[i], &link_status, sizeof(link_status)) < 0) {
+ perror("[info]: cannot get link status");
+ return;
+ }
total_link_stats.tx_data_packets += link_status.stats.tx_data_packets;
total_link_stats.rx_data_packets += link_status.stats.rx_data_packets;
commit d74380a82c00716aafb780f5602182fce90d381f
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Wed Jul 24 08:38:56 2019 +0200
[PMTUd] do not double unlock global read lock
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
index d342697..f884760 100644
--- a/libknet/threads_pmtud.c
+++ b/libknet/threads_pmtud.c
@@ -297,7 +297,11 @@ retry:
return -1;
}
- if (shutdown_in_progress(knet_h)) {
+ /*
+ * we cannot use shutdown_in_progress in here because
+ * we already hold the read lock
+ */
+ if (knet_h->fini_in_progress) {
pthread_mutex_unlock(&knet_h->pmtud_mutex);
log_debug(knet_h, KNET_SUB_PMTUD, "PMTUD aborted. shutdown in progress");
return -1;
commit 01242c683b18b813a67c13d3fc0546fec34f9f7c
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Mon Sep 9 15:11:25 2019 +0200
[pmtud] switch to use async version of dstcache update due to locking context (read vs write)
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
index f884760..d10984f 100644
--- a/libknet/threads_pmtud.c
+++ b/libknet/threads_pmtud.c
@@ -481,7 +481,7 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
}
if (saved_valid_pmtud != dst_link->has_valid_mtu) {
- _host_dstcache_update_sync(knet_h, dst_host);
+ _host_dstcache_update_async(knet_h, dst_host);
}
return dst_link->has_valid_mtu;
commit a70f0adf0d4d38ed614bf2eef1a4e66fec2f2c92
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Fri Sep 13 07:28:55 2019 +0200
[tests] fix ip generation boundaries
https://ci.kronosnet.org/job/knet-build-all-voting/1450/knet-build-all-voting=rhel80z-s390x/console
and similar, when pid = 255, the secondary IP would hit 256 that is of course invalid.
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libnozzle/tests/test-common.c b/libnozzle/tests/test-common.c
index b36be79..3afd2ec 100644
--- a/libnozzle/tests/test-common.c
+++ b/libnozzle/tests/test-common.c
@@ -124,7 +124,7 @@ void make_local_ips(char *testipv4_1, char *testipv4_2, char *testipv6_1, char *
pid = (uint8_t *)&mypid;
for (i = 0; i < sizeof(pid_t); i++) {
- if (pid[i] == 0) {
+ if ((pid[i] == 0) || (pid[i] == 255)) {
pid[i] = 128;
}
}
commit 63567e1e6b6ebb91fe1df43b910d6b9bd78d528f
Author: Fabio M. Di Nitto <fdinitto@redhat.com>
Date: Tue Oct 15 11:53:56 2019 +0200
[PMTUd] invalidate MTU for a link if the value is lower than minimum
Under heavy network load and packet loss, calculated MTU can be
too small. In that case we need to invalidate the link mtu,
that would remove the link from the rotation (and traffic) and
would give PMTUd time to get the right MTU in the next round.
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
diff --git a/libknet/threads_pmtud.c b/libknet/threads_pmtud.c
index d10984f..ab00b47 100644
--- a/libknet/threads_pmtud.c
+++ b/libknet/threads_pmtud.c
@@ -460,7 +460,14 @@ static int _handle_check_pmtud(knet_handle_t knet_h, struct knet_host *dst_host,
}
dst_link->has_valid_mtu = 0;
} else {
- dst_link->has_valid_mtu = 1;
+ if (dst_link->status.mtu < calc_min_mtu(knet_h)) {
+ log_info(knet_h, KNET_SUB_PMTUD,
+ "Invalid MTU detected for host: %u link: %u mtu: %u",
+ dst_host->host_id, dst_link->link_id, dst_link->status.mtu);
+ dst_link->has_valid_mtu = 0;
+ } else {
+ dst_link->has_valid_mtu = 1;
+ }
if (dst_link->has_valid_mtu) {
if ((saved_pmtud) && (saved_pmtud != dst_link->status.mtu)) {
log_info(knet_h, KNET_SUB_PMTUD, "PMTUD link change for host: %u link: %u from %u to %u",