f0be19d9db
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
141 lines
5.6 KiB
Diff
141 lines
5.6 KiB
Diff
From 4e32c3112a2f13a302709d72b0ae989287a48563 Mon Sep 17 00:00:00 2001
|
|
From: Jan Friesse <jfriesse@redhat.com>
|
|
Date: Mon, 29 Aug 2011 15:09:52 +0200
|
|
Subject: [PATCH] rrp: Higher threshold in passive mode for mcast
|
|
|
|
There were too much false positives with passive mode rrp when high
|
|
number of messages were received.
|
|
|
|
Patch adds new configurable variable rrp_problem_count_mcast_threshold
|
|
which is by default 10 times rrp_problem_count_threshold and this is
|
|
used as threshold for multicast packets in passive mode. Variable is
|
|
unused in active mode.
|
|
|
|
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
|
Reviewed by: Steven Dake <sdake@redhat.com>
|
|
(cherry picked from commit 752239eaa1edd68695a6e40bcde60471f34a02fd)
|
|
---
|
|
exec/totemconfig.c | 11 +++++++++++
|
|
exec/totemrrp.c | 6 ++++--
|
|
exec/totemsrp.c | 3 +++
|
|
include/corosync/totem/totem.h | 2 ++
|
|
man/corosync.conf.5 | 8 ++++++++
|
|
5 files changed, 28 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/exec/totemconfig.c b/exec/totemconfig.c
|
|
index 80ca182..f767f69 100644
|
|
--- a/exec/totemconfig.c
|
|
+++ b/exec/totemconfig.c
|
|
@@ -213,6 +213,8 @@ static void totem_volatile_config_read (
|
|
|
|
objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_threshold", &totem_config->rrp_problem_count_threshold);
|
|
|
|
+ objdb_get_int (objdb,object_totem_handle, "rrp_problem_count_mcast_threshold", &totem_config->rrp_problem_count_mcast_threshold);
|
|
+
|
|
objdb_get_int (objdb,object_totem_handle, "rrp_autorecovery_check_timeout", &totem_config->rrp_autorecovery_check_timeout);
|
|
|
|
objdb_get_int (objdb,object_totem_handle, "heartbeat_failures_allowed", &totem_config->heartbeat_failures_allowed);
|
|
@@ -667,12 +669,21 @@ int totem_config_validate (
|
|
if (totem_config->rrp_problem_count_threshold == 0) {
|
|
totem_config->rrp_problem_count_threshold = RRP_PROBLEM_COUNT_THRESHOLD_DEFAULT;
|
|
}
|
|
+ if (totem_config->rrp_problem_count_mcast_threshold == 0) {
|
|
+ totem_config->rrp_problem_count_mcast_threshold = totem_config->rrp_problem_count_threshold * 10;
|
|
+ }
|
|
if (totem_config->rrp_problem_count_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
|
|
snprintf (local_error_reason, sizeof(local_error_reason),
|
|
"The RRP problem count threshold (%d problem count) may not be less then (%d problem count).",
|
|
totem_config->rrp_problem_count_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
|
|
goto parse_error;
|
|
}
|
|
+ if (totem_config->rrp_problem_count_mcast_threshold < RRP_PROBLEM_COUNT_THRESHOLD_MIN) {
|
|
+ snprintf (local_error_reason, sizeof(local_error_reason),
|
|
+ "The RRP multicast problem count threshold (%d problem count) may not be less then (%d problem count).",
|
|
+ totem_config->rrp_problem_count_mcast_threshold, RRP_PROBLEM_COUNT_THRESHOLD_MIN);
|
|
+ goto parse_error;
|
|
+ }
|
|
if (totem_config->rrp_token_expired_timeout == 0) {
|
|
totem_config->rrp_token_expired_timeout =
|
|
totem_config->token_retransmit_timeout;
|
|
diff --git a/exec/totemrrp.c b/exec/totemrrp.c
|
|
index a5abb1b..616d0d5 100644
|
|
--- a/exec/totemrrp.c
|
|
+++ b/exec/totemrrp.c
|
|
@@ -890,14 +890,17 @@ static void passive_monitor (
|
|
unsigned int max;
|
|
unsigned int i;
|
|
unsigned int min_all, min_active;
|
|
+ unsigned int threshold;
|
|
|
|
/*
|
|
* Monitor for failures
|
|
*/
|
|
if (is_token_recv_count) {
|
|
recv_count = passive_instance->token_recv_count;
|
|
+ threshold = rrp_instance->totem_config->rrp_problem_count_threshold;
|
|
} else {
|
|
recv_count = passive_instance->mcast_recv_count;
|
|
+ threshold = rrp_instance->totem_config->rrp_problem_count_mcast_threshold;
|
|
}
|
|
|
|
recv_count[iface_no] += 1;
|
|
@@ -959,8 +962,7 @@ static void passive_monitor (
|
|
|
|
for (i = 0; i < rrp_instance->interface_count; i++) {
|
|
if ((passive_instance->faulty[i] == 0) &&
|
|
- (max - recv_count[i] >
|
|
- rrp_instance->totem_config->rrp_problem_count_threshold)) {
|
|
+ (max - recv_count[i] > threshold)) {
|
|
passive_instance->faulty[i] = 1;
|
|
poll_timer_add (rrp_instance->poll_handle,
|
|
rrp_instance->totem_config->rrp_autorecovery_check_timeout,
|
|
diff --git a/exec/totemsrp.c b/exec/totemsrp.c
|
|
index 40460e0..6981ac1 100644
|
|
--- a/exec/totemsrp.c
|
|
+++ b/exec/totemsrp.c
|
|
@@ -858,6 +858,9 @@ int totemsrp_initialize (
|
|
"RRP threshold (%d problem count)\n",
|
|
totem_config->rrp_problem_count_threshold);
|
|
log_printf (instance->totemsrp_log_level_debug,
|
|
+ "RRP multicast threshold (%d problem count)\n",
|
|
+ totem_config->rrp_problem_count_mcast_threshold);
|
|
+ log_printf (instance->totemsrp_log_level_debug,
|
|
"RRP automatic recovery check timeout (%d ms)\n",
|
|
totem_config->rrp_autorecovery_check_timeout);
|
|
log_printf (instance->totemsrp_log_level_debug,
|
|
diff --git a/include/corosync/totem/totem.h b/include/corosync/totem/totem.h
|
|
index f3ac9cc..4dce3b3 100644
|
|
--- a/include/corosync/totem/totem.h
|
|
+++ b/include/corosync/totem/totem.h
|
|
@@ -143,6 +143,8 @@ struct totem_config {
|
|
|
|
unsigned int rrp_problem_count_threshold;
|
|
|
|
+ unsigned int rrp_problem_count_mcast_threshold;
|
|
+
|
|
unsigned int rrp_autorecovery_check_timeout;
|
|
|
|
char rrp_mode[TOTEM_RRP_MODE_BYTES];
|
|
diff --git a/man/corosync.conf.5 b/man/corosync.conf.5
|
|
index b6f769e..78eb2bb 100644
|
|
--- a/man/corosync.conf.5
|
|
+++ b/man/corosync.conf.5
|
|
@@ -472,6 +472,14 @@ may occur.
|
|
The default is 10 problem counts.
|
|
|
|
.TP
|
|
+rrp_problem_count_mcast_threshold
|
|
+This specifies the number of times a problem is detected with multicast before
|
|
+setting the link faulty for passive rrp mode. This variable is unused in active
|
|
+rrp mode.
|
|
+
|
|
+The default is 10 times rrp_problem_count_threshold.
|
|
+
|
|
+.TP
|
|
rrp_token_expired_timeout
|
|
This specifies the time in milliseconds to increment the problem counter for
|
|
the redundant ring protocol after not having received a token from all rings
|
|
--
|
|
1.7.1
|
|
|