From 9e8ddfdb4a81981cf107ecc072903cbfa9b4e587 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Tue, 15 Nov 2022 18:58:14 +0100 Subject: [PATCH] - Resolves: rhbz#2135861 --- .gitignore | 1 + ...cel_hold_on_retransmit-config-option.patch | 128 ------------------ ...ch-totempg-buffers-at-the-right-time.patch | 109 --------------- ...e-Use-copytruncate-method-by-default.patch | 60 -------- corosync.spec | 17 +-- sources | 2 +- 6 files changed, 9 insertions(+), 308 deletions(-) delete mode 100644 bz2024652-1-totem-Add-cancel_hold_on_retransmit-config-option.patch delete mode 100644 bz2024657-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch delete mode 100644 bz2070623-1-logrotate-Use-copytruncate-method-by-default.patch diff --git a/.gitignore b/.gitignore index 8e9ea1a..d7d50dc 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ corosync-1.2.7.tar.gz /corosync-3.1.3.tar.gz /corosync-3.1.4.tar.gz /corosync-3.1.5.tar.gz +/corosync-3.1.7.tar.gz diff --git a/bz2024652-1-totem-Add-cancel_hold_on_retransmit-config-option.patch b/bz2024652-1-totem-Add-cancel_hold_on_retransmit-config-option.patch deleted file mode 100644 index d8b82fd..0000000 --- a/bz2024652-1-totem-Add-cancel_hold_on_retransmit-config-option.patch +++ /dev/null @@ -1,128 +0,0 @@ -From cdf72925db5a81e546ca8e8d7d8291ee1fc77be4 Mon Sep 17 00:00:00 2001 -From: Jan Friesse -Date: Wed, 11 Aug 2021 17:34:05 +0200 -Subject: [PATCH] totem: Add cancel_hold_on_retransmit config option - -Previously, existence of retransmit messages canceled holding -of token (and never allowed representative to enter token hold -state). - -This makes token rotating maximum speed and keeps processor -resending messages over and over again - overloading network -and reducing chance to successfully deliver the messages. - -Also there were reports of various Antivirus / IPS / IDS which slows -down delivery of packets with certain sizes (packets bigger than token) -what make Corosync retransmit messages over and over again. - -Proposed solution is to allow representative to enter token hold -state when there are only retransmit messages. This allows network to -handle overload and/or gives Antivirus/IPS/IDS enough time scan and -deliver packets without corosync entering "FAILED TO RECEIVE" state and -adding more load to network. - -Signed-off-by: Jan Friesse -Reviewed-by: Christine Caulfield ---- - exec/totemconfig.c | 6 ++++++ - exec/totemsrp.c | 5 +++-- - include/corosync/totem/totem.h | 2 ++ - man/corosync.conf.5 | 15 ++++++++++++++- - 4 files changed, 25 insertions(+), 3 deletions(-) - -diff --git a/exec/totemconfig.c b/exec/totemconfig.c -index 57a1587a..46e09952 100644 ---- a/exec/totemconfig.c -+++ b/exec/totemconfig.c -@@ -81,6 +81,7 @@ - #define MAX_MESSAGES 17 - #define MISS_COUNT_CONST 5 - #define BLOCK_UNLISTED_IPS 1 -+#define CANCEL_TOKEN_HOLD_ON_RETRANSMIT 0 - /* This constant is not used for knet */ - #define UDP_NETMTU 1500 - -@@ -144,6 +145,8 @@ static void *totem_get_param_by_name(struct totem_config *totem_config, const ch - return totem_config->knet_compression_model; - if (strcmp(param_name, "totem.block_unlisted_ips") == 0) - return &totem_config->block_unlisted_ips; -+ if (strcmp(param_name, "totem.cancel_token_hold_on_retransmit") == 0) -+ return &totem_config->cancel_token_hold_on_retransmit; - - return NULL; - } -@@ -365,6 +368,9 @@ void totem_volatile_config_read (struct totem_config *totem_config, icmap_map_t - - totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.block_unlisted_ips", deleted_key, - BLOCK_UNLISTED_IPS); -+ -+ totem_volatile_config_set_boolean_value(totem_config, temp_map, "totem.cancel_token_hold_on_retransmit", -+ deleted_key, CANCEL_TOKEN_HOLD_ON_RETRANSMIT); - } - - int totem_volatile_config_validate ( -diff --git a/exec/totemsrp.c b/exec/totemsrp.c -index 949d367b..d24b11fa 100644 ---- a/exec/totemsrp.c -+++ b/exec/totemsrp.c -@@ -3981,8 +3981,9 @@ static int message_handler_orf_token ( - transmits_allowed = fcc_calculate (instance, token); - mcasted_retransmit = orf_token_rtr (instance, token, &transmits_allowed); - -- if (instance->my_token_held == 1 && -- (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) { -+ if (instance->totem_config->cancel_token_hold_on_retransmit && -+ instance->my_token_held == 1 && -+ (token->rtr_list_entries > 0 || mcasted_retransmit > 0)) { - instance->my_token_held = 0; - forward_token = 1; - } -diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h -index 8b166566..bdb6a15f 100644 ---- a/include/corosync/totem/totem.h -+++ b/include/corosync/totem/totem.h -@@ -244,6 +244,8 @@ struct totem_config { - - unsigned int block_unlisted_ips; - -+ unsigned int cancel_token_hold_on_retransmit; -+ - void (*totem_memb_ring_id_create_or_load) ( - struct memb_ring_id *memb_ring_id, - unsigned int nodeid); -diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 -index 0588ad1e..a3771ea7 100644 ---- a/man/corosync.conf.5 -+++ b/man/corosync.conf.5 -@@ -32,7 +32,7 @@ - .\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF - .\" * THE POSSIBILITY OF SUCH DAMAGE. - .\" */ --.TH COROSYNC_CONF 5 2021-07-23 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" -+.TH COROSYNC_CONF 5 2021-08-11 "corosync Man Page" "Corosync Cluster Engine Programmer's Manual" - .SH NAME - corosync.conf - corosync executive configuration file - -@@ -584,6 +584,19 @@ with an old configuration. - - The default value is yes. - -+.TP -+cancel_token_hold_on_retransmit -+Allows Corosync to hold token by representative when there is too much -+retransmit messages. This allows network to process increased load without -+overloading it. Used mechanism is same as described for -+.B hold -+directive. -+ -+Some deployments may prefer to never hold token when there is -+retransmit messages. If so, option should be set to yes. -+ -+The default value is no. -+ - .PP - Within the - .B logging --- -2.27.0 - diff --git a/bz2024657-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch b/bz2024657-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch deleted file mode 100644 index 39f1e01..0000000 --- a/bz2024657-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch +++ /dev/null @@ -1,109 +0,0 @@ -From e7a82370a7b5d3ca342d5e42e25763fa2c938739 Mon Sep 17 00:00:00 2001 -From: Jan Friesse -Date: Tue, 26 Oct 2021 18:17:59 +0200 -Subject: [PATCH] totemsrp: Switch totempg buffers at the right time - -Commit 92e0f9c7bb9b4b6a0da8d64bdf3b2e47ae55b1cc added switching of -totempg buffers in sync phase. But because buffers got switch too early -there was a problem when delivering recovered messages (messages got -corrupted and/or lost). Solution is to switch buffers after recovered -messages got delivered. - -I think it is worth to describe complete history with reproducers so it -doesn't get lost. - -It all started with 402638929e5045ef520a7339696c687fbed0b31b (more info -about original problem is described in -https://bugzilla.redhat.com/show_bug.cgi?id=820821). This patch -solves problem which is way to be reproduced with following reproducer: -- 2 nodes -- Both nodes running corosync and testcpg -- Pause node 1 (SIGSTOP of corosync) -- On node 1, send some messages by testcpg - (it's not answering but this doesn't matter). Simply hit ENTER key - few times is enough) -- Wait till node 2 detects that node 1 left -- Unpause node 1 (SIGCONT of corosync) - -and on node 1 newly mcasted cpg messages got sent before sync barrier, -so node 2 logs "Unknown node -> we will not deliver message". - -Solution was to add switch of totemsrp new messages buffer. - -This patch was not enough so new one -(92e0f9c7bb9b4b6a0da8d64bdf3b2e47ae55b1cc) was created. Reproducer of -problem was similar, just cpgverify was used instead of testcpg. -Occasionally when node 1 was unpaused it hang in sync phase because -there was a partial message in totempg buffers. New sync message had -different frag cont so it was thrown away and never delivered. - -After many years problem was found which is solved by this patch -(original issue describe in -https://github.com/corosync/corosync/issues/660). -Reproducer is more complex: -- 2 nodes -- Node 1 is rate-limited (used script on the hypervisor side): - ``` - iface=tapXXXX - # ~0.1MB/s in bit/s - rate=838856 - # 1mb/s - burst=1048576 - tc qdisc add dev $iface root handle 1: htb default 1 - tc class add dev $iface parent 1: classid 1:1 htb rate ${rate}bps \ - burst ${burst}b - tc qdisc add dev $iface handle ffff: ingress - tc filter add dev $iface parent ffff: prio 50 basic police rate \ - ${rate}bps burst ${burst}b mtu 64kb "drop" - ``` -- Node 2 is running corosync and cpgverify -- Node 1 keeps restarting of corosync and running cpgverify in cycle - - Console 1: while true; do corosync; sleep 20; \ - kill $(pidof corosync); sleep 20; done - - Console 2: while true; do ./cpgverify;done - -And from time to time (reproduced usually in less than 5 minutes) -cpgverify reports corrupted message. - -Signed-off-by: Jan Friesse -Reviewed-by: Fabio M. Di Nitto ---- - exec/totemsrp.c | 16 +++++++++++++++- - 1 file changed, 15 insertions(+), 1 deletion(-) - -diff --git a/exec/totemsrp.c b/exec/totemsrp.c -index d24b11fa..fd71771b 100644 ---- a/exec/totemsrp.c -+++ b/exec/totemsrp.c -@@ -1989,13 +1989,27 @@ static void memb_state_operational_enter (struct totemsrp_instance *instance) - trans_memb_list_totemip, instance->my_trans_memb_entries, - left_list, instance->my_left_memb_entries, - 0, 0, &instance->my_ring_id); -+ /* -+ * Switch new totemsrp messages queue. Messages sent from now on are stored -+ * in different queue so synchronization messages are delivered first. Totempg -+ * buffers will be switched later. -+ */ - instance->waiting_trans_ack = 1; -- instance->totemsrp_waiting_trans_ack_cb_fn (1); - - // TODO we need to filter to ensure we only deliver those - // messages which are part of instance->my_deliver_memb - messages_deliver_to_app (instance, 1, instance->old_ring_state_high_seq_received); - -+ /* -+ * Switch totempg buffers. This used to be right after -+ * instance->waiting_trans_ack = 1; -+ * line. This was causing problem, because there may be not yet -+ * processed parts of messages in totempg buffers. -+ * So when buffers were switched and recovered messages -+ * got delivered it was not possible to assemble them. -+ */ -+ instance->totemsrp_waiting_trans_ack_cb_fn (1); -+ - instance->my_aru = aru_save; - - /* --- -2.27.0 - diff --git a/bz2070623-1-logrotate-Use-copytruncate-method-by-default.patch b/bz2070623-1-logrotate-Use-copytruncate-method-by-default.patch deleted file mode 100644 index 74c71df..0000000 --- a/bz2070623-1-logrotate-Use-copytruncate-method-by-default.patch +++ /dev/null @@ -1,60 +0,0 @@ -From 04362046c4a9d7307feb5b68341d567b7d0b94d6 Mon Sep 17 00:00:00 2001 -From: Jan Friesse -Date: Tue, 29 Mar 2022 17:09:22 +0200 -Subject: [PATCH] logrotate: Use copytruncate method by default - -The reopen lograte method has two main problems: -1. It does fail when corosync is not running (solvable by - adding "|| true") -2. If (for some reason, like SELinux) cfgtool -L fails, logrotate - fails and corosync keeps logging into old file. Added "|| true" - makes situation even worse because logrotate removes file but - corosync keeps logging into it. - -Solution is to install copytruncate logrotate snip by default (and -keep reopen config file only for reference). - -Signed-off-by: Jan Friesse -Reviewed-by: Christine Caulfield ---- - conf/logrotate/Makefile.am | 7 ------- - conf/logrotate/corosync-reopen.in | 5 +++++ - 2 files changed, 5 insertions(+), 7 deletions(-) - -diff --git a/conf/logrotate/Makefile.am b/conf/logrotate/Makefile.am -index 4f7b7536..35efa2de 100644 ---- a/conf/logrotate/Makefile.am -+++ b/conf/logrotate/Makefile.am -@@ -34,16 +34,9 @@ MAINTAINERCLEANFILES = Makefile.in - - EXTRA_DIST = corosync-reopen.in corosync-copytruncate.in - --if HAVE_QB_LOG_FILE_REOPEN --corosync: corosync-reopen.in -- $(SED) -e 's#@''LOGDIR@#${LOGDIR}#g' \ -- -e 's#@''SBINDIR@#$(sbindir)#g' \ -- $< > $@ --else - corosync: corosync-copytruncate.in - $(SED) -e 's#@''LOGDIR@#${LOGDIR}#g' \ - $< > $@ --endif - - logrotatecorosyncdir = ${LOGROTATEDIR} - logrotatecorosync_DATA = corosync -diff --git a/conf/logrotate/corosync-reopen.in b/conf/logrotate/corosync-reopen.in -index 839c5dae..730fb741 100644 ---- a/conf/logrotate/corosync-reopen.in -+++ b/conf/logrotate/corosync-reopen.in -@@ -1,3 +1,8 @@ -+# This logrotate method has two main problems and it's kept only for reference: -+# 1. It does fail when corosync is not running (solvable by adding "|| true") -+# 2. If (for some reason) cfgtool -L fails, logrotate fails and corosync keeps -+# logging into old file. Added "|| true" makes situation even worse -+# because logrotate removes file but corosync keeps logging into it. - @LOGDIR@/corosync.log { - missingok - compress --- -2.27.0 - diff --git a/corosync.spec b/corosync.spec index cc66093..0ceee38 100644 --- a/corosync.spec +++ b/corosync.spec @@ -17,16 +17,12 @@ Name: corosync Summary: The Corosync Cluster Engine and Application Programming Interfaces -Version: 3.1.5 -Release: 4%{?gitver}%{?dist} +Version: 3.1.7 +Release: 1%{?gitver}%{?dist} License: BSD URL: http://corosync.github.io/corosync/ Source0: http://build.clusterlabs.org/corosync/releases/%{name}-%{version}%{?gittarver}.tar.gz -Patch0: bz2024652-1-totem-Add-cancel_hold_on_retransmit-config-option.patch -Patch1: bz2024657-1-totemsrp-Switch-totempg-buffers-at-the-right-time.patch -Patch2: bz2070623-1-logrotate-Use-copytruncate-method-by-default.patch - # Runtime bits # The automatic dependency overridden in favor of explicit version lock Requires: corosynclib%{?_isa} = %{version}-%{release} @@ -76,10 +72,6 @@ BuildRequires: make %prep %setup -q -n %{name}-%{version}%{?gittarver} -%patch0 -p1 -b .bz2024652-1 -%patch1 -p1 -b .bz2024657-1 -%patch2 -p1 -b .bz2070623-1 - %build %if %{with runautogen} ./autogen.sh @@ -297,6 +289,11 @@ network splits) %endif %changelog +* Tue Nov 15 2022 Jan Friesse - 3.1.7-1 +- Resolves: rhbz#2135861 + +- New upstream release (rhbz#2135861) + * Thu Mar 31 2022 Jan Friesse - 3.1.5-4 - Resolves: rhbz#2070623 diff --git a/sources b/sources index 2a816ad..5abef83 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (corosync-3.1.5.tar.gz) = eb974a32f60c52564057ed41c1ebf31fe4332a5a082ebbd5fa2540af8fa9e8c0c42d4ef9066abcb9d7dd04c12b97cd13642289c65b5b6b65cfd30c12641ada1d +SHA512 (corosync-3.1.7.tar.gz) = a4d00f18a6dda07f36e77fc48f5bddff77e12a3e6ee40d9450734e281d20479b90cd0c653e255cfc46e0e42e4a0177291a3daba671e751d027e4317e601f0cd2