From f0be19d9dbf28c2dff0cefde0821f60398d59009 Mon Sep 17 00:00:00 2001 From: Jan Friesse Date: Thu, 8 Sep 2011 10:00:35 +0200 Subject: [PATCH] Import fixes from upstream Signed-off-by: Jan Friesse --- ...oin-messages-during-flush-operations.patch | 58 ++++++++ corosync.spec | 14 +- ...ndless-loop-if-all-ifaces-are-faulty.patch | 85 +++++++++++ ...-threshold-in-passive-mode-for-mcast.patch | 140 ++++++++++++++++++ ...mconfig-change-minimum-RRP-threshold.patch | 30 ++++ 5 files changed, 326 insertions(+), 1 deletion(-) create mode 100644 Ignore-memb_join-messages-during-flush-operations.patch create mode 100644 rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch create mode 100644 rrp-Higher-threshold-in-passive-mode-for-mcast.patch create mode 100644 totemconfig-change-minimum-RRP-threshold.patch diff --git a/Ignore-memb_join-messages-during-flush-operations.patch b/Ignore-memb_join-messages-during-flush-operations.patch new file mode 100644 index 0000000..906f28b --- /dev/null +++ b/Ignore-memb_join-messages-during-flush-operations.patch @@ -0,0 +1,58 @@ +From be608c050247e5f9c8266b8a0f9803cc0a3dc881 Mon Sep 17 00:00:00 2001 +From: Steven Dake +Date: Tue, 30 Aug 2011 22:25:21 -0700 +Subject: [PATCH] Ignore memb_join messages during flush operations + +a memb_join operation that occurs during flushing can result in an +entry into the GATHER state from the RECOVERY state. This results in the +regular sort queue being used instead of the recovery sort queue, resulting +in segfault. + +Signed-off-by: Steven Dake +Reviewed-by: Jan Friesse +(cherry picked from commit 48ffa8892daac18935d96ae46a72aebe2fb70430) +--- + exec/totemudp.c | 13 +++++++++++++ + 1 files changed, 13 insertions(+), 0 deletions(-) + +diff --git a/exec/totemudp.c b/exec/totemudp.c +index 96849b7..0c12b56 100644 +--- a/exec/totemudp.c ++++ b/exec/totemudp.c +@@ -90,6 +90,8 @@ + #define BIND_STATE_REGULAR 1 + #define BIND_STATE_LOOPBACK 2 + ++#define MESSAGE_TYPE_MCAST 1 ++ + #define HMAC_HASH_SIZE 20 + struct security_header { + unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */ +@@ -1172,6 +1174,7 @@ static int net_deliver_fn ( + int res = 0; + unsigned char *msg_offset; + unsigned int size_delv; ++ char *message_type; + + if (instance->flushing == 1) { + iovec = &instance->totemudp_iov_recv_flush; +@@ -1234,6 +1237,16 @@ static int net_deliver_fn ( + } + + /* ++ * Drop all non-mcast messages (more specifically join ++ * messages should be dropped) ++ */ ++ message_type = (char *)msg_offset; ++ if (instance->flushing == 1 && *message_type != MESSAGE_TYPE_MCAST) { ++ iovec->iov_len = FRAME_SIZE_MAX; ++ return (0); ++ } ++ ++ /* + * Handle incoming message + */ + instance->totemudp_deliver_fn ( +-- +1.7.1 + diff --git a/corosync.spec b/corosync.spec index 7f3ff28..9bd7946 100644 --- a/corosync.spec +++ b/corosync.spec @@ -14,12 +14,17 @@ Name: corosync Summary: The Corosync Cluster Engine and Application Programming Interfaces Version: 1.4.1 -Release: 1%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 2%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: BSD Group: System Environment/Base URL: http://ftp.corosync.org Source0: ftp://ftp:user@ftp.corosync.org/downloads/%{name}-%{version}/%{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}.tar.gz +Patch0: rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch +Patch1: rrp-Higher-threshold-in-passive-mode-for-mcast.patch +Patch2: Ignore-memb_join-messages-during-flush-operations.patch +Patch3: totemconfig-change-minimum-RRP-threshold.patch + # Runtime bits Requires: corosynclib = %{version}-%{release} Requires(pre): /usr/sbin/useradd @@ -50,6 +55,10 @@ BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) %prep %setup -q -n %{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}} +%patch0 -p1 +%patch1 -p1 +%patch2 -p1 +%patch3 -p1 %build %if %{buildtrunk} @@ -277,6 +286,9 @@ The Corosync Cluster Engine APIs. %{_mandir}/man8/sam_overview.8* %changelog +* Thu Sep 08 2011 Jan Friesse - 1.4.1-2 +- Add upstream fixes + * Tue Jul 26 2011 Jan Friesse - 1.4.1-1 - New upstream release diff --git a/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch b/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch new file mode 100644 index 0000000..7c70e9b --- /dev/null +++ b/rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch @@ -0,0 +1,85 @@ +From dc862e15cc084926eccc5e1ff3241611c0cb54f0 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Mon, 29 Aug 2011 10:44:05 +0200 +Subject: [PATCH] rrp: Handle endless loop if all ifaces are faulty + +If all interfaces were faulty, passive_mcast_flush_send and related +functions ended in endless loop. This is now handled and if there is no +live interface, message is dropped. + +Signed-off-by: Jan Friesse +Reviewed by: Steven Dake +(cherry picked from commit 0eade8de79b6e5b28e91604d4d460627c7a61ddd) +--- + exec/totemrrp.c | 29 ++++++++++++++++++++--------- + 1 files changed, 20 insertions(+), 9 deletions(-) + +diff --git a/exec/totemrrp.c b/exec/totemrrp.c +index 83292ad..a5abb1b 100644 +--- a/exec/totemrrp.c ++++ b/exec/totemrrp.c +@@ -1015,12 +1015,16 @@ static void passive_mcast_flush_send ( + unsigned int msg_len) + { + struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance; ++ int i = 0; + + do { + passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count; +- } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1); ++ i++; ++ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1)); + +- totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); ++ if (i <= instance->interface_count) { ++ totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); ++ } + } + + static void passive_mcast_noflush_send ( +@@ -1029,13 +1033,16 @@ static void passive_mcast_noflush_send ( + unsigned int msg_len) + { + struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance; ++ int i = 0; + + do { + passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count; +- } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1); +- ++ i++; ++ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1)); + +- totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); ++ if (i <= instance->interface_count) { ++ totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len); ++ } + } + + static void passive_token_recv ( +@@ -1070,14 +1077,18 @@ static void passive_token_send ( + unsigned int msg_len) + { + struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance; ++ int i = 0; + + do { + passive_instance->token_xmit_iface = (passive_instance->token_xmit_iface + 1) % instance->interface_count; +- } while (passive_instance->faulty[passive_instance->token_xmit_iface] == 1); ++ i++; ++ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->token_xmit_iface] == 1)); + +- totemnet_token_send ( +- instance->net_handles[passive_instance->token_xmit_iface], +- msg, msg_len); ++ if (i <= instance->interface_count) { ++ totemnet_token_send ( ++ instance->net_handles[passive_instance->token_xmit_iface], ++ msg, msg_len); ++ } + + } + +-- +1.7.1 + diff --git a/rrp-Higher-threshold-in-passive-mode-for-mcast.patch b/rrp-Higher-threshold-in-passive-mode-for-mcast.patch new file mode 100644 index 0000000..3b52bac --- /dev/null +++ b/rrp-Higher-threshold-in-passive-mode-for-mcast.patch @@ -0,0 +1,140 @@ +From 4e32c3112a2f13a302709d72b0ae989287a48563 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Mon, 29 Aug 2011 15:09:52 +0200 +Subject: [PATCH] rrp: Higher threshold in passive mode for mcast + +There were too much false positives with passive mode rrp when high +number of messages were received. + +Patch adds new configurable variable rrp_problem_count_mcast_threshold +which is by default 10 times rrp_problem_count_threshold and this is +used as threshold for multicast packets in passive mode. Variable is +unused in active mode. + +Signed-off-by: Jan Friesse +Reviewed by: Steven Dake +(cherry picked from commit 752239eaa1edd68695a6e40bcde60471f34a02fd) +--- + exec/totemconfig.c | 11 +++++++++++ + exec/totemrrp.c | 6 ++++-- + exec/totemsrp.c | 3 +++ + include/corosync/totem/totem.h | 2 ++ + man/corosync.conf.5 | 8 ++++++++ + 5 files changed, 28 insertions(+), 2 deletions(-) + +diff --git a/exec/totemconfig.c b/exec/totemconfig.c +index 80ca182..f767f69 100644 +--- a/exec/totemconfig.c ++++ b/exec/totemconfig.c +@@ -213,6 +213,8 @@ static void totem_volatile_config_read ( + + objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold); + ++ objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold); ++ + objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout); + + objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed); +@@ -667,12 +669,21 @@ int totem_config_validate ( + if (totem_config->rrp_problem_count_threshold == 0) { + totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT; + } ++ if (totem_config->rrp_problem_count_mcast_threshold == 0) { ++ totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10; ++ } + if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { + snprintf (local_error_reason, sizeof(local_error_reason), + "The RRP problem count threshold (%d problem count) may not be less then (%d problem count).", + totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); + goto parse_error; + } ++ if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) { ++ snprintf (local_error_reason, sizeof(local_error_reason), ++ "The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).", ++ totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN); ++ goto parse_error; ++ } + if (totem_config->rrp_token_expired_timeout == 0) { + totem_config->rrp_token_expired_timeout = + totem_config->token_retransmit_timeout; +diff --git a/exec/totemrrp.c b/exec/totemrrp.c +index a5abb1b..616d0d5 100644 +--- a/exec/totemrrp.c ++++ b/exec/totemrrp.c +@@ -890,14 +890,17 @@ static void passive_monitor ( + unsigned int max; + unsigned int i; + unsigned int min_all, min_active; ++ unsigned int threshold; + + /* + * Monitor for failures + */ + if (is_token_recv_count) { + recv_count = passive_instance->token_recv_count; ++ threshold = rrp_instance->totem_config->rrp_problem_count_threshold; + } else { + recv_count = passive_instance->mcast_recv_count; ++ threshold = rrp_instance->totem_config->rrp_problem_count_mcast_threshold; + } + + recv_count[iface_no] += 1; +@@ -959,8 +962,7 @@ static void passive_monitor ( + + for (i = 0; i < rrp_instance->interface_count; i++) { + if ((passive_instance->faulty[i] == 0) && +- (max - recv_count[i] > +- rrp_instance->totem_config->rrp_problem_count_threshold)) { ++ (max - recv_count[i] > threshold)) { + passive_instance->faulty[i] = 1; + poll_timer_add (rrp_instance->poll_handle, + rrp_instance->totem_config->rrp_autorecovery_check_timeout, +diff --git a/exec/totemsrp.c b/exec/totemsrp.c +index 40460e0..6981ac1 100644 +--- a/exec/totemsrp.c ++++ b/exec/totemsrp.c +@@ -858,6 +858,9 @@ int totemsrp_initialize ( + "RRP threshold (%d problem count)\n", + totem_config->rrp_problem_count_threshold); + log_printf (instance->totemsrp_log_level_debug, ++ "RRP multicast threshold (%d problem count)\n", ++ totem_config->rrp_problem_count_mcast_threshold); ++ log_printf (instance->totemsrp_log_level_debug, + "RRP automatic recovery check timeout (%d ms)\n", + totem_config->rrp_autorecovery_check_timeout); + log_printf (instance->totemsrp_log_level_debug, +diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h +index f3ac9cc..4dce3b3 100644 +--- a/include/corosync/totem/totem.h ++++ b/include/corosync/totem/totem.h +@@ -143,6 +143,8 @@ struct totem_config { + + unsigned int rrp_problem_count_threshold; + ++ unsigned int rrp_problem_count_mcast_threshold; ++ + unsigned int rrp_autorecovery_check_timeout; + + char rrp_mode[TOTEM_RRP_MODE_BYTES]; +diff --git a/man/corosync.conf.5 b/man/corosync.conf.5 +index b6f769e..78eb2bb 100644 +--- a/man/corosync.conf.5 ++++ b/man/corosync.conf.5 +@@ -472,6 +472,14 @@ may occur. + The default is 10 problem counts. + + .TP ++rrp_problem_count_mcast_threshold ++This specifies the number of times a problem is detected with multicast before ++setting the link faulty for passive rrp mode. This variable is unused in active ++rrp mode. ++ ++The default is 10 times rrp_problem_count_threshold. ++ ++.TP + rrp_token_expired_timeout + This specifies the time in milliseconds to increment the problem counter for + the redundant ring protocol after not having received a token from all rings +-- +1.7.1 + diff --git a/totemconfig-change-minimum-RRP-threshold.patch b/totemconfig-change-minimum-RRP-threshold.patch new file mode 100644 index 0000000..0c70451 --- /dev/null +++ b/totemconfig-change-minimum-RRP-threshold.patch @@ -0,0 +1,30 @@ +From b1aba94732edc2ff084b7dd559a08b687f464ed0 Mon Sep 17 00:00:00 2001 +From: Jan Friesse +Date: Thu, 8 Sep 2011 09:40:04 +0200 +Subject: [PATCH] totemconfig: change minimum RRP threshold + +RRP threshold can be lower value then 5. + +Signed-off-by: Jan Friesse +Reviewed-by: Fabio M. Di Nitto +(cherry picked from commit f6c2a8dab786c50ece36dd3424e258e93a1000d3) +--- + exec/totemconfig.c | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +diff --git a/exec/totemconfig.c b/exec/totemconfig.c +index f767f69..a475bb3 100644 +--- a/exec/totemconfig.c ++++ b/exec/totemconfig.c +@@ -82,7 +82,7 @@ + #define MISS_COUNT_CONST 5 + #define RRP_PROBLEM_COUNT_TIMEOUT 2000 + #define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10 +-#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 5 ++#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2 + #define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000 + + static char error_string_response[512]; +-- +1.7.1 +