From c02379b03e0ca61bc02afc76f3dd523ae41173df Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Tue, 9 Aug 2022 10:53:56 -0400 Subject: [PATCH] import kronosnet-1.22-4.el9_0 --- ...2923-icmp-faster-link-down-detection.patch | 82 +++++++++++++++++++ .../bz212923-fix-dst_seq_num_init-race.patch | 44 ++++++++++ SPECS/kronosnet.spec | 12 ++- 3 files changed, 136 insertions(+), 2 deletions(-) create mode 100644 SOURCES/bz2112923-icmp-faster-link-down-detection.patch create mode 100644 SOURCES/bz212923-fix-dst_seq_num_init-race.patch diff --git a/SOURCES/bz2112923-icmp-faster-link-down-detection.patch b/SOURCES/bz2112923-icmp-faster-link-down-detection.patch new file mode 100644 index 0000000..cf8f99b --- /dev/null +++ b/SOURCES/bz2112923-icmp-faster-link-down-detection.patch @@ -0,0 +1,82 @@ +commit 28ddb87a2f4562c5d1752a778744cc56136f81c1 +Author: Fabio M. Di Nitto +Date: Sun Nov 7 17:02:05 2021 +0100 + + [udp] use ICMP error messages to trigger faster link down detection + + this solves a possible race condition when: + + - node1 is running + - node2 very fast + - node1 does NOT have enough time to detect that node2 has gone + and reset the local seq numbers / buffers + - node1 will start rejecting valid packets from node2 + + There is still a potential minor race condition where app + can restart so fast that kernel / network don't have time + to generate an ICMP error. This will be addressed using + instance id in onwire v2 protocol, as suggested by Jan F. + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/transport_udp.c b/libknet/transport_udp.c +index 963340d..32dd032 100644 +--- a/libknet/transport_udp.c ++++ b/libknet/transport_udp.c +@@ -364,6 +364,46 @@ static int read_errs_from_sock(knet_handle_t knet_h, int sockfd) + log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s destination unknown", addr_str, strerror(sock_err->ee_errno)); + } else { + log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Received ICMP error from %s: %s %s", addr_str, strerror(sock_err->ee_errno), addr_remote_str); ++ if ((sock_err->ee_errno == ECONNREFUSED) || /* knet is not running on the other node */ ++ (sock_err->ee_errno == ECONNABORTED) || /* local kernel closed the socket */ ++ (sock_err->ee_errno == ENONET) || /* network does not exist */ ++ (sock_err->ee_errno == ENETUNREACH) || /* network unreachable */ ++ (sock_err->ee_errno == EHOSTUNREACH) || /* host unreachable */ ++ (sock_err->ee_errno == EHOSTDOWN) || /* host down (from kernel/net/ipv4/icmp.c */ ++ (sock_err->ee_errno == ENETDOWN)) { /* network down */ ++ struct knet_host *host = NULL; ++ struct knet_link *kn_link = NULL; ++ int link_idx, found = 0; ++ ++ for (host = knet_h->host_head; host != NULL; host = host->next) { ++ for (link_idx = 0; link_idx < KNET_MAX_LINK; link_idx++) { ++ kn_link = &host->link[link_idx]; ++ if (kn_link->outsock == sockfd) { ++ if (!cmpaddr(&remote, &kn_link->dst_addr)) { ++ found = 1; ++ break; ++ } ++ } ++ } ++ if (found) { ++ break; ++ } ++ } ++ ++ if ((host) && (kn_link) && ++ (kn_link->status.connected)) { ++ log_debug(knet_h, KNET_SUB_TRANSP_UDP, "Setting down host %u link %i", host->host_id, kn_link->link_id); ++ /* ++ * setting transport_connected = 0 will trigger ++ * thread_heartbeat link_down process. ++ * ++ * the process terminates calling into transport_link_down ++ * below that will set transport_connected = 1 ++ */ ++ kn_link->transport_connected = 0; ++ } ++ ++ } + } + } + break; +@@ -436,5 +476,9 @@ int udp_transport_link_dyn_connect(knet_handle_t knet_h, int sockfd, struct knet + + int udp_transport_link_is_down(knet_handle_t knet_h, struct knet_link *kn_link) + { ++ /* ++ * see comments about handling ICMP error messages ++ */ ++ kn_link->transport_connected = 1; + return 0; + } diff --git a/SOURCES/bz212923-fix-dst_seq_num_init-race.patch b/SOURCES/bz212923-fix-dst_seq_num_init-race.patch new file mode 100644 index 0000000..5126ffb --- /dev/null +++ b/SOURCES/bz212923-fix-dst_seq_num_init-race.patch @@ -0,0 +1,44 @@ +commit 62271c5c0b08041b24930310c04e3933720917c6 +Author: Fabio M. Di Nitto +Date: Mon Nov 8 09:14:22 2021 +0100 + + [host] fix dst_seq_num initialization race condition + + There is a potential race condition where the sender + is overloaded, sending data packets before pings + can kick in and set the correct dst_seq_num. + + if this node is starting up (dst_seq_num = 0), + it can start rejecing valid packets and get stuck. + + Set the dst_seq_num to the first seen packet and + use that as reference instead. + + Signed-off-by: Fabio M. Di Nitto + +diff --git a/libknet/host.c b/libknet/host.c +index f02ef02..54061fd 100644 +--- a/libknet/host.c ++++ b/libknet/host.c +@@ -573,6 +573,21 @@ int _seq_num_lookup(knet_handle_t knet_h, struct knet_host *host, seq_num_t seq_ + char *dst_cbuf_defrag = host->circular_buffer_defrag; + seq_num_t *dst_seq_num = &host->rx_seq_num; + ++ /* ++ * There is a potential race condition where the sender ++ * is overloaded, sending data packets before pings ++ * can kick in and set the correct dst_seq_num. ++ * ++ * if this node is starting up (dst_seq_num = 0), ++ * it can start rejecing valid packets and get stuck. ++ * ++ * Set the dst_seq_num to the first seen packet and ++ * use that as reference instead. ++ */ ++ if (!*dst_seq_num) { ++ *dst_seq_num = seq_num; ++ } ++ + if (clear_buf) { + _clear_cbuffers(host, seq_num); + } diff --git a/SPECS/kronosnet.spec b/SPECS/kronosnet.spec index 4a655f8..64f01fd 100644 --- a/SPECS/kronosnet.spec +++ b/SPECS/kronosnet.spec @@ -37,12 +37,13 @@ Name: kronosnet Summary: Multipoint-to-Multipoint VPN daemon Version: 1.22 -Release: 3%{?dist} +Release: 4%{?dist} License: GPLv2+ and LGPLv2+ URL: https://kronosnet.org Source0: https://kronosnet.org/releases/%{name}-%{version}.tar.xz -#Patch0: bz1954551-enable-stack-clash-protection.patch +Patch0: bz2112923-icmp-faster-link-down-detection.patch +Patch1: bz212923-fix-dst_seq_num_init-race.patch # Build dependencies BuildRequires: make @@ -87,6 +88,9 @@ BuildRequires: autoconf automake libtool %prep %setup -q -n %{name}-%{version} +%patch0 -p1 -b bz2112923-icmp-faster-link-down-detection +%patch1 -p1 -b bz212923-fix-dst_seq_num_init-race + #%patch0 -p1 -b .bz1954551-enable-stack-clash-protection %build @@ -439,6 +443,10 @@ Requires: libknet1%{_isa} = %{version}-%{release} %endif %changelog +* Wed Aug 03 2022 Christine Caulfield - 1.22-4 +- Fix "node randomly not rejoining cluster after a pcs cluster stop/start" + Resolves: rhbz#2112923 + * Wed Oct 06 2021 Christine Caulfield - 1.22-3 - Add back the test suite that got lost in the pull from Fedora Resolves: rhbz#1999980