Import fixes from upstream

Signed-off-by: Jan Friesse <jfriesse@redhat.com>
This commit is contained in:
Jan Friesse 2011-09-08 10:00:35 +02:00
parent a9111fec33
commit f0be19d9db
5 changed files with 326 additions and 1 deletions

View File

@ -0,0 +1,58 @@
From be608c050247e5f9c8266b8a0f9803cc0a3dc881 Mon Sep 17 00:00:00 2001
From: Steven Dake <sdake@redhat.com>
Date: Tue, 30 Aug 2011 22:25:21 -0700
Subject: [PATCH] Ignore memb_join messages during flush operations
a memb_join operation that occurs during flushing can result in an
entry into the GATHER state from the RECOVERY state. This results in the
regular sort queue being used instead of the recovery sort queue, resulting
in segfault.
Signed-off-by: Steven Dake <sdake@redhat.com>
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
(cherry picked from commit 48ffa8892daac18935d96ae46a72aebe2fb70430)
---
exec/totemudp.c | 13 +++++++++++++
1 files changed, 13 insertions(+), 0 deletions(-)
diff --git a/exec/totemudp.c b/exec/totemudp.c
index 96849b7..0c12b56 100644
--- a/exec/totemudp.c
+++ b/exec/totemudp.c
@@ -90,6 +90,8 @@
#define BIND_STATE_REGULAR 1
#define BIND_STATE_LOOPBACK 2
+#define MESSAGE_TYPE_MCAST 1
+
#define HMAC_HASH_SIZE 20
struct security_header {
unsigned char hash_digest[HMAC_HASH_SIZE]; /* The hash *MUST* be first in the data structure */
@@ -1172,6 +1174,7 @@ static int net_deliver_fn (
int res = 0;
unsigned char *msg_offset;
unsigned int size_delv;
+ char *message_type;
if (instance->flushing == 1) {
iovec = &instance->totemudp_iov_recv_flush;
@@ -1234,6 +1237,16 @@ static int net_deliver_fn (
}
/*
+ * Drop all non-mcast messages (more specifically join
+ * messages should be dropped)
+ */
+ message_type = (char *)msg_offset;
+ if (instance->flushing == 1 && *message_type != MESSAGE_TYPE_MCAST) {
+ iovec->iov_len = FRAME_SIZE_MAX;
+ return (0);
+ }
+
+ /*
* Handle incoming message
*/
instance->totemudp_deliver_fn (
--
1.7.1

View File

@ -14,12 +14,17 @@
Name: corosync
Summary: The Corosync Cluster Engine and Application Programming Interfaces
Version: 1.4.1
Release: 1%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
Release: 2%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: BSD
Group: System Environment/Base
URL: http://ftp.corosync.org
Source0: ftp://ftp:user@ftp.corosync.org/downloads/%{name}-%{version}/%{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}.tar.gz
Patch0: rrp-Handle-endless-loop-if-all-ifaces-are-faulty.patch
Patch1: rrp-Higher-threshold-in-passive-mode-for-mcast.patch
Patch2: Ignore-memb_join-messages-during-flush-operations.patch
Patch3: totemconfig-change-minimum-RRP-threshold.patch
# Runtime bits
Requires: corosynclib = %{version}-%{release}
Requires(pre): /usr/sbin/useradd
@ -50,6 +55,10 @@ BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
%prep
%setup -q -n %{name}-%{version}%{?numcomm:.%{numcomm}}%{?alphatag:-%{alphatag}}%{?dirty:-%{dirty}}
%patch0 -p1
%patch1 -p1
%patch2 -p1
%patch3 -p1
%build
%if %{buildtrunk}
@ -277,6 +286,9 @@ The Corosync Cluster Engine APIs.
%{_mandir}/man8/sam_overview.8*
%changelog
* Thu Sep 08 2011 Jan Friesse <jfriesse@redhat.com> - 1.4.1-2
- Add upstream fixes
* Tue Jul 26 2011 Jan Friesse <jfriesse@redhat.com> - 1.4.1-1
- New upstream release

View File

@ -0,0 +1,85 @@
From dc862e15cc084926eccc5e1ff3241611c0cb54f0 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Mon, 29 Aug 2011 10:44:05 +0200
Subject: [PATCH] rrp: Handle endless loop if all ifaces are faulty
If all interfaces were faulty, passive_mcast_flush_send and related
functions ended in endless loop. This is now handled and if there is no
live interface, message is dropped.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed by: Steven Dake <sdake@redhat.com>
(cherry picked from commit 0eade8de79b6e5b28e91604d4d460627c7a61ddd)
---
exec/totemrrp.c | 29 ++++++++++++++++++++---------
1 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/exec/totemrrp.c b/exec/totemrrp.c
index 83292ad..a5abb1b 100644
--- a/exec/totemrrp.c
+++ b/exec/totemrrp.c
@@ -1015,12 +1015,16 @@ static void passive_mcast_flush_send (
unsigned int msg_len)
{
struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
+ int i = 0;
do {
passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
- } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
+ i++;
+ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
- totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
+ if (i <= instance->interface_count) {
+ totemnet_mcast_flush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
+ }
}
static void passive_mcast_noflush_send (
@@ -1029,13 +1033,16 @@ static void passive_mcast_noflush_send (
unsigned int msg_len)
{
struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
+ int i = 0;
do {
passive_instance->msg_xmit_iface = (passive_instance->msg_xmit_iface + 1) % instance->interface_count;
- } while (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1);
-
+ i++;
+ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->msg_xmit_iface] == 1));
- totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
+ if (i <= instance->interface_count) {
+ totemnet_mcast_noflush_send (instance->net_handles[passive_instance->msg_xmit_iface], msg, msg_len);
+ }
}
static void passive_token_recv (
@@ -1070,14 +1077,18 @@ static void passive_token_send (
unsigned int msg_len)
{
struct passive_instance *passive_instance = (struct passive_instance *)instance->rrp_algo_instance;
+ int i = 0;
do {
passive_instance->token_xmit_iface = (passive_instance->token_xmit_iface + 1) % instance->interface_count;
- } while (passive_instance->faulty[passive_instance->token_xmit_iface] == 1);
+ i++;
+ } while ((i <= instance->interface_count) && (passive_instance->faulty[passive_instance->token_xmit_iface] == 1));
- totemnet_token_send (
- instance->net_handles[passive_instance->token_xmit_iface],
- msg, msg_len);
+ if (i <= instance->interface_count) {
+ totemnet_token_send (
+ instance->net_handles[passive_instance->token_xmit_iface],
+ msg, msg_len);
+ }
}
--
1.7.1

View File

@ -0,0 +1,140 @@
From 4e32c3112a2f13a302709d72b0ae989287a48563 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Mon, 29 Aug 2011 15:09:52 +0200
Subject: [PATCH] rrp: Higher threshold in passive mode for mcast
There were too much false positives with passive mode rrp when high
number of messages were received.
Patch adds new configurable variable rrp_problem_count_mcast_threshold
which is by default 10 times rrp_problem_count_threshold and this is
used as threshold for multicast packets in passive mode. Variable is
unused in active mode.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed by: Steven Dake <sdake@redhat.com>
(cherry picked from commit 752239eaa1edd68695a6e40bcde60471f34a02fd)
---
exec/totemconfig.c | 11 +++++++++++
exec/totemrrp.c | 6 ++++--
exec/totemsrp.c | 3 +++
include/corosync/totem/totem.h | 2 ++
man/corosync.conf.5 | 8 ++++++++
5 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
index 80ca182..f767f69 100644
--- a/exec/totemconfig.c
+++ b/exec/totemconfig.c
@@ -213,6 +213,8 @@ static void totem_volatile_config_read (
objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
+ objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold);
+
objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout);
objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
@@ -667,12 +669,21 @@ int totem_config_validate (
if (totem_config->rrp_problem_count_threshold == 0) {
totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT;
}
+ if (totem_config->rrp_problem_count_mcast_threshold == 0) {
+ totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10;
+ }
if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP problem count threshold (%d problem count) may not be less then (%d problem count).",
totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
goto parse_error;
}
+ if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
+ snprintf (local_error_reason, sizeof(local_error_reason),
+ "The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).",
+ totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
+ goto parse_error;
+ }
if (totem_config->rrp_token_expired_timeout == 0) {
totem_config->rrp_token_expired_timeout =
totem_config->token_retransmit_timeout;
diff --git a/exec/totemrrp.c b/exec/totemrrp.c
index a5abb1b..616d0d5 100644
--- a/exec/totemrrp.c
+++ b/exec/totemrrp.c
@@ -890,14 +890,17 @@ static void passive_monitor (
unsigned int max;
unsigned int i;
unsigned int min_all, min_active;
+ unsigned int threshold;
/*
* Monitor for failures
*/
if (is_token_recv_count) {
recv_count = passive_instance->token_recv_count;
+ threshold = rrp_instance->totem_config->rrp_problem_count_threshold;
} else {
recv_count = passive_instance->mcast_recv_count;
+ threshold = rrp_instance->totem_config->rrp_problem_count_mcast_threshold;
}
recv_count[iface_no] += 1;
@@ -959,8 +962,7 @@ static void passive_monitor (
for (i = 0; i < rrp_instance->interface_count; i++) {
if ((passive_instance->faulty[i] == 0) &&
- (max - recv_count[i] >
- rrp_instance->totem_config->rrp_problem_count_threshold)) {
+ (max - recv_count[i] > threshold)) {
passive_instance->faulty[i] = 1;
poll_timer_add (rrp_instance->poll_handle,
rrp_instance->totem_config->rrp_autorecovery_check_timeout,
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
index 40460e0..6981ac1 100644
--- a/exec/totemsrp.c
+++ b/exec/totemsrp.c
@@ -858,6 +858,9 @@ int totemsrp_initialize (
"RRP threshold (%d problem count)\n",
totem_config->rrp_problem_count_threshold);
log_printf (instance->totemsrp_log_level_debug,
+ "RRP multicast threshold (%d problem count)\n",
+ totem_config->rrp_problem_count_mcast_threshold);
+ log_printf (instance->totemsrp_log_level_debug,
"RRP automatic recovery check timeout (%d ms)\n",
totem_config->rrp_autorecovery_check_timeout);
log_printf (instance->totemsrp_log_level_debug,
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
index f3ac9cc..4dce3b3 100644
--- a/include/corosync/totem/totem.h
+++ b/include/corosync/totem/totem.h
@@ -143,6 +143,8 @@ struct totem_config {
unsigned int rrp_problem_count_threshold;
+ unsigned int rrp_problem_count_mcast_threshold;
+
unsigned int rrp_autorecovery_check_timeout;
char rrp_mode[TOTEM_RRP_MODE_BYTES];
diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
index b6f769e..78eb2bb 100644
--- a/man/corosync.conf.5
+++ b/man/corosync.conf.5
@@ -472,6 +472,14 @@ may occur.
The default is 10 problem counts.
.TP
+rrp_problem_count_mcast_threshold
+This specifies the number of times a problem is detected with multicast before
+setting the link faulty for passive rrp mode. This variable is unused in active
+rrp mode.
+
+The default is 10 times rrp_problem_count_threshold.
+
+.TP
rrp_token_expired_timeout
This specifies the time in milliseconds to increment the problem counter for
the redundant ring protocol after not having received a token from all rings
--
1.7.1

View File

@ -0,0 +1,30 @@
From b1aba94732edc2ff084b7dd559a08b687f464ed0 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Thu, 8 Sep 2011 09:40:04 +0200
Subject: [PATCH] totemconfig: change minimum RRP threshold
RRP threshold can be lower value then 5.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Fabio M. Di Nitto <fdinitto@redhat.com>
(cherry picked from commit f6c2a8dab786c50ece36dd3424e258e93a1000d3)
---
exec/totemconfig.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
index f767f69..a475bb3 100644
--- a/exec/totemconfig.c
+++ b/exec/totemconfig.c
@@ -82,7 +82,7 @@
#define MISS_COUNT_CONST 5
#define RRP_PROBLEM_COUNT_TIMEOUT 2000
#define RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT 10
-#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 5
+#define RRP_PROBLEM_COUNT_THRESHOLD_MIN 2
#define RRP_AUTORECOVERY_CHECK_TIMEOUT 1000
static char error_string_response[512];
--
1.7.1