corosync/rrp-Higher-threshold-in-passive-mode-for-mcast.patch
Jan Friesse f0be19d9db Import fixes from upstream
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
2011-09-08 10:08:06 +02:00

141 lines
5.6 KiB
Diff

From 4e32c3112a2f13a302709d72b0ae989287a48563 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Mon, 29 Aug 2011 15:09:52 +0200
Subject: [PATCH] rrp: Higher threshold in passive mode for mcast
There were too much false positives with passive mode rrp when high
number of messages were received.
Patch adds new configurable variable rrp_problem_count_mcast_threshold
which is by default 10 times rrp_problem_count_threshold and this is
used as threshold for multicast packets in passive mode. Variable is
unused in active mode.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed by: Steven Dake <sdake@redhat.com>
(cherry picked from commit 752239eaa1edd68695a6e40bcde60471f34a02fd)
---
exec/totemconfig.c | 11 +++++++++++
exec/totemrrp.c | 6 ++++--
exec/totemsrp.c | 3 +++
include/corosync/totem/totem.h | 2 ++
man/corosync.conf.5 | 8 ++++++++
5 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
index 80ca182..f767f69 100644
--- a/exec/totemconfig.c
+++ b/exec/totemconfig.c
@@ -213,6 +213,8 @@ static void totem_volatile_config_read (
objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
+ objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold);
+
objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout);
objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
@@ -667,12 +669,21 @@ int totem_config_validate (
if (totem_config->rrp_problem_count_threshold == 0) {
totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT;
}
+ if (totem_config->rrp_problem_count_mcast_threshold == 0) {
+ totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10;
+ }
if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
snprintf (local_error_reason, sizeof(local_error_reason),
"The RRP problem count threshold (%d problem count) may not be less then (%d problem count).",
totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
goto parse_error;
}
+ if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
+ snprintf (local_error_reason, sizeof(local_error_reason),
+ "The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).",
+ totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
+ goto parse_error;
+ }
if (totem_config->rrp_token_expired_timeout == 0) {
totem_config->rrp_token_expired_timeout =
totem_config->token_retransmit_timeout;
diff --git a/exec/totemrrp.c b/exec/totemrrp.c
index a5abb1b..616d0d5 100644
--- a/exec/totemrrp.c
+++ b/exec/totemrrp.c
@@ -890,14 +890,17 @@ static void passive_monitor (
unsigned int max;
unsigned int i;
unsigned int min_all, min_active;
+ unsigned int threshold;
/*
* Monitor for failures
*/
if (is_token_recv_count) {
recv_count = passive_instance->token_recv_count;
+ threshold = rrp_instance->totem_config->rrp_problem_count_threshold;
} else {
recv_count = passive_instance->mcast_recv_count;
+ threshold = rrp_instance->totem_config->rrp_problem_count_mcast_threshold;
}
recv_count[iface_no] += 1;
@@ -959,8 +962,7 @@ static void passive_monitor (
for (i = 0; i < rrp_instance->interface_count; i++) {
if ((passive_instance->faulty[i] == 0) &&
- (max - recv_count[i] >
- rrp_instance->totem_config->rrp_problem_count_threshold)) {
+ (max - recv_count[i] > threshold)) {
passive_instance->faulty[i] = 1;
poll_timer_add (rrp_instance->poll_handle,
rrp_instance->totem_config->rrp_autorecovery_check_timeout,
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
index 40460e0..6981ac1 100644
--- a/exec/totemsrp.c
+++ b/exec/totemsrp.c
@@ -858,6 +858,9 @@ int totemsrp_initialize (
"RRP threshold (%d problem count)\n",
totem_config->rrp_problem_count_threshold);
log_printf (instance->totemsrp_log_level_debug,
+ "RRP multicast threshold (%d problem count)\n",
+ totem_config->rrp_problem_count_mcast_threshold);
+ log_printf (instance->totemsrp_log_level_debug,
"RRP automatic recovery check timeout (%d ms)\n",
totem_config->rrp_autorecovery_check_timeout);
log_printf (instance->totemsrp_log_level_debug,
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
index f3ac9cc..4dce3b3 100644
--- a/include/corosync/totem/totem.h
+++ b/include/corosync/totem/totem.h
@@ -143,6 +143,8 @@ struct totem_config {
unsigned int rrp_problem_count_threshold;
+ unsigned int rrp_problem_count_mcast_threshold;
+
unsigned int rrp_autorecovery_check_timeout;
char rrp_mode[TOTEM_RRP_MODE_BYTES];
diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
index b6f769e..78eb2bb 100644
--- a/man/corosync.conf.5
+++ b/man/corosync.conf.5
@@ -472,6 +472,14 @@ may occur.
The default is 10 problem counts.
.TP
+rrp_problem_count_mcast_threshold
+This specifies the number of times a problem is detected with multicast before
+setting the link faulty for passive rrp mode. This variable is unused in active
+rrp mode.
+
+The default is 10 times rrp_problem_count_threshold.
+
+.TP
rrp_token_expired_timeout
This specifies the time in milliseconds to increment the problem counter for
the redundant ring protocol after not having received a token from all rings
--
1.7.1