import corosync-3.0.3-4.el8
This commit is contained in:
parent
e62565ff01
commit
696a553662
@ -0,0 +1,73 @@
|
||||
From cddd62f972bca276c934e58f08da84071cec1ddb Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Mon, 25 Nov 2019 18:21:52 +0100
|
||||
Subject: [PATCH] votequorum: Ignore the icmap_get_* return value
|
||||
|
||||
Express intention to ignore icmap_get_* return
|
||||
value and rely on default behavior of not changing the output
|
||||
parameter on error.
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
---
|
||||
exec/votequorum.c | 22 +++++++++++-----------
|
||||
1 file changed, 11 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/exec/votequorum.c b/exec/votequorum.c
|
||||
index f78b3f9..e1d7e73 100644
|
||||
--- a/exec/votequorum.c
|
||||
+++ b/exec/votequorum.c
|
||||
@@ -1271,10 +1271,10 @@ static char *votequorum_readconfig(int runtime)
|
||||
/*
|
||||
* gather basic data here
|
||||
*/
|
||||
- icmap_get_uint32("quorum.expected_votes", &expected_votes);
|
||||
+ (void)icmap_get_uint32("quorum.expected_votes", &expected_votes);
|
||||
have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
|
||||
have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
|
||||
- icmap_get_uint8("quorum.two_node", &two_node);
|
||||
+ (void)icmap_get_uint8("quorum.two_node", &two_node);
|
||||
|
||||
/*
|
||||
* do config verification and enablement
|
||||
@@ -1319,13 +1319,13 @@ static char *votequorum_readconfig(int runtime)
|
||||
wait_for_all = 1;
|
||||
}
|
||||
|
||||
- icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
|
||||
- icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
|
||||
- icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
|
||||
- icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
|
||||
- icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
|
||||
- icmap_get_uint8("quorum.auto_tie_breaker", &atb);
|
||||
- icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
|
||||
+ (void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
|
||||
+ (void)icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
|
||||
+ (void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
|
||||
+ (void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
|
||||
+ (void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
|
||||
+ (void)icmap_get_uint8("quorum.auto_tie_breaker", &atb);
|
||||
+ (void)icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
|
||||
|
||||
/* auto_tie_breaker defaults to LOWEST */
|
||||
if (atb) {
|
||||
@@ -1517,7 +1517,7 @@ static char *votequorum_readconfig(int runtime)
|
||||
us->expected_votes = node_expected_votes;
|
||||
} else {
|
||||
us->votes = 1;
|
||||
- icmap_get_uint32("quorum.votes", &us->votes);
|
||||
+ (void)icmap_get_uint32("quorum.votes", &us->votes);
|
||||
}
|
||||
|
||||
if (expected_votes) {
|
||||
@@ -1568,7 +1568,7 @@ static void votequorum_refresh_config(
|
||||
return ;
|
||||
}
|
||||
|
||||
- icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
|
||||
+ (void)icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
|
||||
if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
|
||||
cancel_wfa >= 1) {
|
||||
icmap_set_uint8("quorum.cancel_wait_for_all", 0);
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,80 @@
|
||||
From 8ce65bf951bc1e5b2d64b60ea027fbdc551d4fc8 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Thu, 16 Jan 2020 15:43:59 +0100
|
||||
Subject: [PATCH] votequorum: Reflect runtime change of 2Node to WFA
|
||||
|
||||
When 2Node mode is set, WFA is also set unless WFA is configured
|
||||
explicitly. This behavior was not reflected on runtime change, so
|
||||
restarted corosync behavior was different (WFA not set). Also when
|
||||
cluster is reduced from 3 nodes to 2 nodes during runtime, WFA was not
|
||||
set, what may result in two quorate partitions.
|
||||
|
||||
Solution is to set WFA depending on 2Node when WFA
|
||||
is not explicitly configured.
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
---
|
||||
exec/votequorum.c | 24 +++++++++++++++++++-----
|
||||
1 file changed, 19 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/exec/votequorum.c b/exec/votequorum.c
|
||||
index 0cde8f8..52424fa 100644
|
||||
--- a/exec/votequorum.c
|
||||
+++ b/exec/votequorum.c
|
||||
@@ -80,6 +80,7 @@ static uint8_t two_node = 0;
|
||||
|
||||
static uint8_t wait_for_all = 0;
|
||||
static uint8_t wait_for_all_status = 0;
|
||||
+static uint8_t wait_for_all_autoset = 0; /* Wait for all is not set explicitly and follows two_node */
|
||||
|
||||
static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE, initial_auto_tie_breaker = ATB_NONE;
|
||||
static int lowest_node_id = -1;
|
||||
@@ -1315,12 +1316,10 @@ static char *votequorum_readconfig(int runtime)
|
||||
* Enable special features
|
||||
*/
|
||||
if (!runtime) {
|
||||
- if (two_node) {
|
||||
- wait_for_all = 1;
|
||||
- }
|
||||
-
|
||||
(void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
|
||||
- (void)icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
|
||||
+ if (icmap_get_uint8("quorum.wait_for_all", &wait_for_all) != CS_OK) {
|
||||
+ wait_for_all_autoset = 1;
|
||||
+ }
|
||||
(void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
|
||||
(void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
|
||||
(void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
|
||||
@@ -1361,6 +1360,15 @@ static char *votequorum_readconfig(int runtime)
|
||||
|
||||
}
|
||||
|
||||
+ /*
|
||||
+ * Changing of wait_for_all during runtime is not supported, but changing of two_node is
|
||||
+ * and two_node may set wfa if not configured explicitly. It is safe to unset it
|
||||
+ * (or set it back) when two_node changes.
|
||||
+ */
|
||||
+ if (wait_for_all_autoset) {
|
||||
+ wait_for_all = two_node;
|
||||
+ }
|
||||
+
|
||||
/* two_node and auto_tie_breaker are not compatible as two_node uses
|
||||
* a fence race to decide quorum whereas ATB decides based on node id
|
||||
*/
|
||||
@@ -1540,6 +1548,12 @@ static char *votequorum_readconfig(int runtime)
|
||||
update_two_node();
|
||||
if (wait_for_all) {
|
||||
update_wait_for_all_status(1);
|
||||
+ } else if (wait_for_all_autoset && wait_for_all_status) {
|
||||
+ /*
|
||||
+ * Reset wait for all status for consistency when wfa is auto-unset by 2node.
|
||||
+ * wait_for_all_status would be ignored by are_we_quorate anyway.
|
||||
+ */
|
||||
+ update_wait_for_all_status(0);
|
||||
}
|
||||
|
||||
out:
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,901 @@
|
||||
From f1d36307e524f9440733f0b01a9fc627a0e1cac7 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Ferenc=20W=C3=A1gner?= <wferi@debian.org>
|
||||
Date: Sat, 4 Jan 2020 13:38:08 +0100
|
||||
Subject: [PATCH] man: move cmap_keys man page from section 8 to 7
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Section 8 is for "System administration commands", 7 is "Miscellaneous".
|
||||
|
||||
Signed-off-by: Ferenc Wágner <wferi@debian.org>
|
||||
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
|
||||
---
|
||||
corosync.spec.in | 2 +-
|
||||
man/Makefile.am | 2 +-
|
||||
man/cmap_keys.7 | 397 +++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
man/cmap_keys.8 | 397 -------------------------------------------------
|
||||
man/cmap_overview.3 | 4 +-
|
||||
man/corosync-cmapctl.8 | 2 +-
|
||||
man/index.html | 2 +-
|
||||
7 files changed, 403 insertions(+), 403 deletions(-)
|
||||
create mode 100644 man/cmap_keys.7
|
||||
delete mode 100644 man/cmap_keys.8
|
||||
|
||||
diff --git a/corosync.spec.in b/corosync.spec.in
|
||||
index c06675d..8ac3757 100644
|
||||
--- a/corosync.spec.in
|
||||
+++ b/corosync.spec.in
|
||||
@@ -217,7 +217,7 @@ fi
|
||||
%{_mandir}/man8/corosync-quorumtool.8*
|
||||
%{_mandir}/man5/corosync.conf.5*
|
||||
%{_mandir}/man5/votequorum.5*
|
||||
-%{_mandir}/man8/cmap_keys.8*
|
||||
+%{_mandir}/man7/cmap_keys.7*
|
||||
|
||||
# library
|
||||
#
|
||||
diff --git a/man/Makefile.am b/man/Makefile.am
|
||||
index 2ef5dcd..92a76ed 100644
|
||||
--- a/man/Makefile.am
|
||||
+++ b/man/Makefile.am
|
||||
@@ -140,7 +140,7 @@ dist_man_MANS = corosync.conf.5 \
|
||||
votequorum_overview.3 \
|
||||
sam_overview.3 \
|
||||
cmap_overview.3 \
|
||||
- cmap_keys.8
|
||||
+ cmap_keys.7
|
||||
|
||||
if BUILD_VQSIM
|
||||
dist_man_MANS += $(corosync_vqsim_man)
|
||||
diff --git a/man/cmap_keys.7 b/man/cmap_keys.7
|
||||
new file mode 100644
|
||||
index 0000000..6bc04fe
|
||||
--- /dev/null
|
||||
+++ b/man/cmap_keys.7
|
||||
@@ -0,0 +1,397 @@
|
||||
+.\"/*
|
||||
+.\" * Copyright (c) 2012-2018 Red Hat, Inc.
|
||||
+.\" *
|
||||
+.\" * All rights reserved.
|
||||
+.\" *
|
||||
+.\" * Author: Jan Friesse (jfriesse@redhat.com)
|
||||
+.\" *
|
||||
+.\" * This software licensed under BSD license, the text of which follows:
|
||||
+.\" *
|
||||
+.\" * Redistribution and use in source and binary forms, with or without
|
||||
+.\" * modification, are permitted provided that the following conditions are met:
|
||||
+.\" *
|
||||
+.\" * - Redistributions of source code must retain the above copyright notice,
|
||||
+.\" * this list of conditions and the following disclaimer.
|
||||
+.\" * - Redistributions in binary form must reproduce the above copyright notice,
|
||||
+.\" * this list of conditions and the following disclaimer in the documentation
|
||||
+.\" * and/or other materials provided with the distribution.
|
||||
+.\" * - Neither the name of the Red Hat, Inc. nor the names of its
|
||||
+.\" * contributors may be used to endorse or promote products derived from this
|
||||
+.\" * software without specific prior written permission.
|
||||
+.\" *
|
||||
+.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
+.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
+.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
+.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
+.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
+.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
+.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
+.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
+.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
+.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
+.\" * THE POSSIBILITY OF SUCH DAMAGE.
|
||||
+.\" */
|
||||
+.TH "CMAP_KEYS" 7 "2018-10-08" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
|
||||
+
|
||||
+.SH NAME
|
||||
+.P
|
||||
+cmap_keys \- Overview of keys stored in the Configuration Map
|
||||
+
|
||||
+.SH OVERVIEW
|
||||
+.P
|
||||
+There are 3 main types of keys stored in CMAP:
|
||||
+.PP
|
||||
+* Mapping of values stored in the config file.
|
||||
+.PP
|
||||
+* Runtime statistics.
|
||||
+.PP
|
||||
+* Other user created values.
|
||||
+
|
||||
+In this man page, wild-cards have the usual meaning.
|
||||
+
|
||||
+.SH ICMAP KEYS
|
||||
+These keys are in the icmap (default) map
|
||||
+.TP
|
||||
+internal_configuration.*
|
||||
+Internal configuration data. All keys in this prefix are read only.
|
||||
+It's only useful for getting a list of loaded services.
|
||||
+
|
||||
+.TP
|
||||
+logging.*
|
||||
+Values read from the configuration file. It's possible to change them at runtime.
|
||||
+If subsystem specific configuration is needed, the key must be in the form
|
||||
+logging.logger_subsys.SERVICE.key, where SERVICE is upper case name of the service and
|
||||
+key is same as in the configuration file. All values are of string type.
|
||||
+
|
||||
+.TP
|
||||
+nodelist.*
|
||||
+Values are read from the configuration file only (dynamic updates are not allowed).
|
||||
+Each node element in the configuration file gets
|
||||
+assigned its position starting from zero. So the first node from the config file has
|
||||
+nodelist.node.0. prefix. To be a valid entry, each node must have
|
||||
+.B ring0_addr
|
||||
+key.
|
||||
+To change the
|
||||
+.B nodeid
|
||||
+key, use a u32 data type.
|
||||
+
|
||||
+Local node position is stored in
|
||||
+.B local_node_pos
|
||||
+key (RO), so it's easy to find
|
||||
+out nodeid/ring addresses of the local node directly from cmap.
|
||||
+
|
||||
+.TP
|
||||
+runtime.blackbox.*
|
||||
+Trigger keys for storing fplay data. It's recommended that you use the corosync-blackbox command
|
||||
+to change keys in this prefix.
|
||||
+
|
||||
+.TP
|
||||
+runtime.force_gather
|
||||
+Set to 'yes' to force the processor to move into the GATHER state. This operation
|
||||
+is dangerous and is not recommended.
|
||||
+
|
||||
+.TP
|
||||
+runtime.config.*
|
||||
+Contains the values actually in use by the totem membership protocol.
|
||||
+Values here are either taken from the Corosync configuration file,
|
||||
+defaults or computed from entries in the config file. For information
|
||||
+on individual keys please refer to the man page
|
||||
+.BR corosync.conf (5).
|
||||
+
|
||||
+.TP
|
||||
+runtime.services.*
|
||||
+Prefix with statistics for service engines. Each service has its own
|
||||
+.B service_id
|
||||
+key in the prefix with the name runtime.services.SERVICE., where SERVICE is the lower case
|
||||
+name of the service. Inside the service prefix is the number of messages received and sent
|
||||
+by the corosync engine in the format runtime.services.SERVICE.EXEC_CALL.rx and
|
||||
+runtime.services.SERVICE.EXEC_CALL.tx, where EXEC_CALL is the internal id of the service
|
||||
+call (so for example 3 in cpg service is receive of multicast message from other
|
||||
+nodes).
|
||||
+
|
||||
+.TP
|
||||
+runtime.totem.members.*
|
||||
+Prefix containing members of the totem single ring protocol. Each member
|
||||
+keys has format runtime.totem.members.NODEID.KEY, where key is
|
||||
+one of:
|
||||
+
|
||||
+.B config_version
|
||||
+Config version of the member node.
|
||||
+
|
||||
+.TP
|
||||
+resources.process.PID.*
|
||||
+Prefix created by applications using SAM with CMAP integration.
|
||||
+It contains the following keys:
|
||||
+
|
||||
+.B recovery
|
||||
+Recovery policy of the process. Can be one of quit or restart.
|
||||
+
|
||||
+.B poll_period
|
||||
+Value passed in sam_initialize as a time_interval.
|
||||
+
|
||||
+.B last_updated
|
||||
+Last time SAM received a heartbeat from the client.
|
||||
+
|
||||
+.B state
|
||||
+State of the client. Can be one of failed, stopped, running and waiting for quorum.
|
||||
+
|
||||
+.TP
|
||||
+uidgid.*
|
||||
+Information about users/groups which are allowed to make IPC connections to
|
||||
+corosync. Entries loaded from configuration file are stored with
|
||||
+uidgid.config.* prefix and are pruned on configuration file reload. Dynamic
|
||||
+entries has uidgid.* prefix and a configuration file reload doesn't affect them.
|
||||
+
|
||||
+.TP
|
||||
+quorum.cancel_wait_for_all
|
||||
+Tells votequorum to cancel waiting for all nodes at cluster startup. Can be used
|
||||
+to unblock quorum if notes are known to be down. For pcs use only.
|
||||
+
|
||||
+.TP
|
||||
+config.reload_in_progress
|
||||
+This value will be set to 1 (or created) when a corosync.conf reload is started,
|
||||
+and set to 0 when the reload is completed. This allows interested subsystems
|
||||
+to do atomic reconfiguration rather than changing each key. Note that
|
||||
+individual add/change/delete notifications will still be sent during a reload.
|
||||
+
|
||||
+.TP
|
||||
+config.totemconfig_reload_in_progress
|
||||
+This key is similar to
|
||||
+.B config.totemconfig_reload_in_progress
|
||||
+but changed after the totem config trigger is processed. It is useful (mainly)
|
||||
+for situations when
|
||||
+.B nodelist.local_node_pos
|
||||
+must be correctly reinstated before anything else.
|
||||
+
|
||||
+.SH STATS KEYS
|
||||
+These keys are in the stats map. All keys in this map are read-only.
|
||||
+Modification tracking of individual keys is supported in the stats map, but not
|
||||
+prefixes. Add/Delete operations are supported on prefixes though so you can track
|
||||
+for new ipc connections or knet interfaces.
|
||||
+.TP
|
||||
+stats.srp.*
|
||||
+Prefix containing statistics about totem.
|
||||
+Typical key prefixes:
|
||||
+
|
||||
+.B commit_entered
|
||||
+Number of times the processor entered COMMIT state.
|
||||
+
|
||||
+.B commit_token_lost
|
||||
+Number of times the processor lost token in COMMIT state.
|
||||
+
|
||||
+.B consensus_timeouts
|
||||
+How many times the processor timed out forming a consensus about membership.
|
||||
+
|
||||
+.B continuous_gather
|
||||
+How many times the processor was not able to reach consensus.
|
||||
+
|
||||
+.B firewall_enabled_or_nic_failure
|
||||
+Set to 1 when processor was not able to reach consensus for long time. The usual
|
||||
+reason is a badly configured firewall or connection failure.
|
||||
+
|
||||
+.B gather_entered
|
||||
+Number of times the processor entered GATHER state.
|
||||
+
|
||||
+.B gather_token_lost
|
||||
+Number of times the processor lost token in GATHER state.
|
||||
+
|
||||
+.B mcast_retx
|
||||
+Number of retransmitted messages.
|
||||
+
|
||||
+.B mcast_rx
|
||||
+Number of received multicast messages.
|
||||
+
|
||||
+.B mcast_tx
|
||||
+Number of transmitted multicast messages.
|
||||
+
|
||||
+.B memb_commit_token_rx
|
||||
+Number of received commit tokens.
|
||||
+
|
||||
+.B memb_commit_token_tx
|
||||
+Number of transmitted commit tokens.
|
||||
+
|
||||
+.B memb_join_rx
|
||||
+Number of received join messages.
|
||||
+
|
||||
+.B memb_join_tx
|
||||
+Number of transmitted join messages.
|
||||
+
|
||||
+.B memb_merge_detect_rx
|
||||
+Number of received member merge messages.
|
||||
+
|
||||
+.B memb_merge_detect_tx
|
||||
+Number of transmitted member merge messages.
|
||||
+
|
||||
+.B orf_token_rx
|
||||
+Number of received orf tokens.
|
||||
+
|
||||
+.B orf_token_tx
|
||||
+Number of transmitted orf tokens.
|
||||
+
|
||||
+.B recovery_entered
|
||||
+Number of times the processor entered recovery.
|
||||
+
|
||||
+.B recovery_token_lost
|
||||
+Number of times the token was lost in recovery state.
|
||||
+
|
||||
+.B rx_msg_dropped
|
||||
+Number of received messages which were dropped because they were not expected
|
||||
+(as example multicast message in commit state).
|
||||
+
|
||||
+.B token_hold_cancel_rx
|
||||
+Number of received token hold cancel messages.
|
||||
+
|
||||
+.B token_hold_cancel_tx
|
||||
+Number of transmitted token hold cancel messages.
|
||||
+
|
||||
+.B mtt_rx_token
|
||||
+Mean transit time of token in milliseconds. In other words, time between
|
||||
+two consecutive token receives.
|
||||
+
|
||||
+.B avg_token_workload
|
||||
+Average time in milliseconds of holding time of token on the current processor.
|
||||
+
|
||||
+.B avg_backlog_calc
|
||||
+Average number of not yet sent messages on the current processor.
|
||||
+
|
||||
+.TP
|
||||
+stats.knet.nodeX.linkY.*
|
||||
+Statistics about the network traffic to and from each node and link when using
|
||||
+tke kronosnet transport
|
||||
+
|
||||
+.B connected
|
||||
+Whether the link is connected or not
|
||||
+
|
||||
+.B up_count
|
||||
+Number of times this link has changed state to UP
|
||||
+
|
||||
+.B down_count
|
||||
+Number of times this link has changed state to DOWN
|
||||
+
|
||||
+.B latency_ave / latency_max / latency_max
|
||||
+Calculated latencies of this link. Note that if there has been no traffic
|
||||
+on the link then latency_min will show a very large number.
|
||||
+
|
||||
+.B latency_samples
|
||||
+The number of samples used to calculate the latency figures, so you have
|
||||
+some idea of their precision.
|
||||
+
|
||||
+.B rx_data_packets / tx_data_packets
|
||||
+The number of packets sent/received on this link
|
||||
+
|
||||
+.B rx_data_bytes / tx_data_bytes
|
||||
+The number of bytes sent/received on this link
|
||||
+
|
||||
+.B rx_pmtu_packets / tx_pmtu_packets
|
||||
+The number of packets sent/received by the PMTUd subsystem
|
||||
+
|
||||
+.B rx_pmtu_bytes / tx_pmtu_bytes
|
||||
+The number of bytes sent/received by the PMTUd subsystem
|
||||
+
|
||||
+.B rx_ping_packets / tx_ping_packets
|
||||
+The number of packets sent/received as pings
|
||||
+
|
||||
+.B rx_ping_bytes / tx_ping_bytes
|
||||
+The number of bytes sent/received as pings
|
||||
+
|
||||
+.B rx_pong_packets / tx_pong_packets
|
||||
+The number of packets sent/received as pongs
|
||||
+
|
||||
+.B rx_pong_bytes / tx_pong_bytes
|
||||
+The number of bytes sent/received as pongs
|
||||
+
|
||||
+.B rx_total_packets / tx_total_packets
|
||||
+The total number of packets sent/received. The aggregate of all of the above packet stats
|
||||
+
|
||||
+.B rx_total_bytes / tx_total_bytes
|
||||
+The total number of bytes sent/received. The aggregate of all of the above bytes stats
|
||||
+
|
||||
+.B tx_data_retries / tx_pmtu_retries / tx_ping_retries / tx_pong_retries / tx_total_retries
|
||||
+Number of times a transmit operation had to be retried due to the socket returning EAGAIN
|
||||
+
|
||||
+.TP
|
||||
+stats.ipcs.*
|
||||
+There is information about total number of active connections from client programs
|
||||
+at the time the request was made.
|
||||
+.B active
|
||||
+number of closed connections during whole runtime of corosync
|
||||
+.B closed
|
||||
+Total number of connections that have been made since corosync was started
|
||||
+
|
||||
+.TP
|
||||
+stats.ipcs.ID.*
|
||||
+Each IPC connection has a unique ID. This is in the form [[serviceX:][PID:]internal_id.
|
||||
+
|
||||
+Typical keys in this prefix are:
|
||||
+
|
||||
+.B proc_name
|
||||
+process name of connected process (unavailable on some platforms)
|
||||
+
|
||||
+.B dispatched
|
||||
+number of dispatched messages.
|
||||
+
|
||||
+.B invalid_request
|
||||
+number of requests made by IPC which are invalid (calling non-existing call, ...).
|
||||
+
|
||||
+.B name
|
||||
+contains short name of the IPC connection (unavailable on some platforms).
|
||||
+
|
||||
+.B overload
|
||||
+is number of requests which were not processed because of overload.
|
||||
+
|
||||
+.B queue_size
|
||||
+contains the number of messages in the queue waiting for send.
|
||||
+
|
||||
+.B recv_retries
|
||||
+is the total number of interrupted receives.
|
||||
+
|
||||
+.B requests
|
||||
+contains the number of requests made by IPC.
|
||||
+
|
||||
+.B responses
|
||||
+is the number of responses sent to the IPC client.
|
||||
+
|
||||
+.B send_retries
|
||||
+contains the total number of interrupted sends.
|
||||
+
|
||||
+.B service_id
|
||||
+contains the ID of service which the IPC is connected to.
|
||||
+
|
||||
+.TP
|
||||
+stats.clear.*
|
||||
+These are write-only keys used to clear the stats for various subsystems
|
||||
+
|
||||
+.B totem
|
||||
+Clears the pg & srp totem stats.
|
||||
+
|
||||
+.B knet
|
||||
+Clears the knet stats
|
||||
+
|
||||
+.B ipc
|
||||
+Clears the ipc stats
|
||||
+
|
||||
+.B all
|
||||
+Clears all of the above stats
|
||||
+
|
||||
+
|
||||
+.SH DYNAMIC CHANGE USER/GROUP PERMISSION TO USE COROSYNC IPC
|
||||
+Is the same as in the configuration file. eg: to add UID 500 use
|
||||
+
|
||||
+.br
|
||||
+# corosync-cmapctl -s uidgid.uid.500 u8 1
|
||||
+
|
||||
+GID is similar, so to add a GID use
|
||||
+
|
||||
+.br
|
||||
+# corosync-cmapctl -s uidgid.gid.500 u8 1
|
||||
+
|
||||
+For removal of permissions, simply delete the key
|
||||
+
|
||||
+.br
|
||||
+# corosync-cmapctl -d uidgid.gid.500
|
||||
+
|
||||
+
|
||||
+.SH "SEE ALSO"
|
||||
+.BR corosync_overview (7),
|
||||
+.BR corosync.conf (5),
|
||||
+.BR corosync-cmapctl (8)
|
||||
diff --git a/man/cmap_keys.8 b/man/cmap_keys.8
|
||||
deleted file mode 100644
|
||||
index e2ea1fb..0000000
|
||||
--- a/man/cmap_keys.8
|
||||
+++ /dev/null
|
||||
@@ -1,397 +0,0 @@
|
||||
-.\"/*
|
||||
-.\" * Copyright (c) 2012-2018 Red Hat, Inc.
|
||||
-.\" *
|
||||
-.\" * All rights reserved.
|
||||
-.\" *
|
||||
-.\" * Author: Jan Friesse (jfriesse@redhat.com)
|
||||
-.\" *
|
||||
-.\" * This software licensed under BSD license, the text of which follows:
|
||||
-.\" *
|
||||
-.\" * Redistribution and use in source and binary forms, with or without
|
||||
-.\" * modification, are permitted provided that the following conditions are met:
|
||||
-.\" *
|
||||
-.\" * - Redistributions of source code must retain the above copyright notice,
|
||||
-.\" * this list of conditions and the following disclaimer.
|
||||
-.\" * - Redistributions in binary form must reproduce the above copyright notice,
|
||||
-.\" * this list of conditions and the following disclaimer in the documentation
|
||||
-.\" * and/or other materials provided with the distribution.
|
||||
-.\" * - Neither the name of the Red Hat, Inc. nor the names of its
|
||||
-.\" * contributors may be used to endorse or promote products derived from this
|
||||
-.\" * software without specific prior written permission.
|
||||
-.\" *
|
||||
-.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
-.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
-.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
-.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
-.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
-.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
-.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
-.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
-.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
-.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
-.\" * THE POSSIBILITY OF SUCH DAMAGE.
|
||||
-.\" */
|
||||
-.TH "CMAP_KEYS" 8 "2018-10-08" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
|
||||
-
|
||||
-.SH NAME
|
||||
-.P
|
||||
-cmap_keys \- Overview of keys stored in the Configuration Map
|
||||
-
|
||||
-.SH OVERVIEW
|
||||
-.P
|
||||
-There are 3 main types of keys stored in CMAP:
|
||||
-.PP
|
||||
-* Mapping of values stored in the config file.
|
||||
-.PP
|
||||
-* Runtime statistics.
|
||||
-.PP
|
||||
-* Other user created values.
|
||||
-
|
||||
-In this man page, wild-cards have the usual meaning.
|
||||
-
|
||||
-.SH ICMAP KEYS
|
||||
-These keys are in the icmap (default) map
|
||||
-.TP
|
||||
-internal_configuration.*
|
||||
-Internal configuration data. All keys in this prefix are read only.
|
||||
-It's only useful for getting a list of loaded services.
|
||||
-
|
||||
-.TP
|
||||
-logging.*
|
||||
-Values read from the configuration file. It's possible to change them at runtime.
|
||||
-If subsystem specific configuration is needed, the key must be in the form
|
||||
-logging.logger_subsys.SERVICE.key, where SERVICE is upper case name of the service and
|
||||
-key is same as in the configuration file. All values are of string type.
|
||||
-
|
||||
-.TP
|
||||
-nodelist.*
|
||||
-Values are read from the configuration file only (dynamic updates are not allowed).
|
||||
-Each node element in the configuration file gets
|
||||
-assigned its position starting from zero. So the first node from the config file has
|
||||
-nodelist.node.0. prefix. To be a valid entry, each node must have
|
||||
-.B ring0_addr
|
||||
-key.
|
||||
-To change the
|
||||
-.B nodeid
|
||||
-key, use a u32 data type.
|
||||
-
|
||||
-Local node position is stored in
|
||||
-.B local_node_pos
|
||||
-key (RO), so it's easy to find
|
||||
-out nodeid/ring addresses of the local node directly from cmap.
|
||||
-
|
||||
-.TP
|
||||
-runtime.blackbox.*
|
||||
-Trigger keys for storing fplay data. It's recommended that you use the corosync-blackbox command
|
||||
-to change keys in this prefix.
|
||||
-
|
||||
-.TP
|
||||
-runtime.force_gather
|
||||
-Set to 'yes' to force the processor to move into the GATHER state. This operation
|
||||
-is dangerous and is not recommended.
|
||||
-
|
||||
-.TP
|
||||
-runtime.config.*
|
||||
-Contains the values actually in use by the totem membership protocol.
|
||||
-Values here are either taken from the Corosync configuration file,
|
||||
-defaults or computed from entries in the config file. For information
|
||||
-on individual keys please refer to the man page
|
||||
-.BR corosync.conf (5).
|
||||
-
|
||||
-.TP
|
||||
-runtime.services.*
|
||||
-Prefix with statistics for service engines. Each service has its own
|
||||
-.B service_id
|
||||
-key in the prefix with the name runtime.services.SERVICE., where SERVICE is the lower case
|
||||
-name of the service. Inside the service prefix is the number of messages received and sent
|
||||
-by the corosync engine in the format runtime.services.SERVICE.EXEC_CALL.rx and
|
||||
-runtime.services.SERVICE.EXEC_CALL.tx, where EXEC_CALL is the internal id of the service
|
||||
-call (so for example 3 in cpg service is receive of multicast message from other
|
||||
-nodes).
|
||||
-
|
||||
-.TP
|
||||
-runtime.totem.members.*
|
||||
-Prefix containing members of the totem single ring protocol. Each member
|
||||
-keys has format runtime.totem.members.NODEID.KEY, where key is
|
||||
-one of:
|
||||
-
|
||||
-.B config_version
|
||||
-Config version of the member node.
|
||||
-
|
||||
-.TP
|
||||
-resources.process.PID.*
|
||||
-Prefix created by applications using SAM with CMAP integration.
|
||||
-It contains the following keys:
|
||||
-
|
||||
-.B recovery
|
||||
-Recovery policy of the process. Can be one of quit or restart.
|
||||
-
|
||||
-.B poll_period
|
||||
-Value passed in sam_initialize as a time_interval.
|
||||
-
|
||||
-.B last_updated
|
||||
-Last time SAM received a heartbeat from the client.
|
||||
-
|
||||
-.B state
|
||||
-State of the client. Can be one of failed, stopped, running and waiting for quorum.
|
||||
-
|
||||
-.TP
|
||||
-uidgid.*
|
||||
-Information about users/groups which are allowed to make IPC connections to
|
||||
-corosync. Entries loaded from configuration file are stored with
|
||||
-uidgid.config.* prefix and are pruned on configuration file reload. Dynamic
|
||||
-entries has uidgid.* prefix and a configuration file reload doesn't affect them.
|
||||
-
|
||||
-.TP
|
||||
-quorum.cancel_wait_for_all
|
||||
-Tells votequorum to cancel waiting for all nodes at cluster startup. Can be used
|
||||
-to unblock quorum if notes are known to be down. For pcs use only.
|
||||
-
|
||||
-.TP
|
||||
-config.reload_in_progress
|
||||
-This value will be set to 1 (or created) when a corosync.conf reload is started,
|
||||
-and set to 0 when the reload is completed. This allows interested subsystems
|
||||
-to do atomic reconfiguration rather than changing each key. Note that
|
||||
-individual add/change/delete notifications will still be sent during a reload.
|
||||
-
|
||||
-.TP
|
||||
-config.totemconfig_reload_in_progress
|
||||
-This key is similar to
|
||||
-.B config.totemconfig_reload_in_progress
|
||||
-but changed after the totem config trigger is processed. It is useful (mainly)
|
||||
-for situations when
|
||||
-.B nodelist.local_node_pos
|
||||
-must be correctly reinstated before anything else.
|
||||
-
|
||||
-.SH STATS KEYS
|
||||
-These keys are in the stats map. All keys in this map are read-only.
|
||||
-Modification tracking of individual keys is supported in the stats map, but not
|
||||
-prefixes. Add/Delete operations are supported on prefixes though so you can track
|
||||
-for new ipc connections or knet interfaces.
|
||||
-.TP
|
||||
-stats.srp.*
|
||||
-Prefix containing statistics about totem.
|
||||
-Typical key prefixes:
|
||||
-
|
||||
-.B commit_entered
|
||||
-Number of times the processor entered COMMIT state.
|
||||
-
|
||||
-.B commit_token_lost
|
||||
-Number of times the processor lost token in COMMIT state.
|
||||
-
|
||||
-.B consensus_timeouts
|
||||
-How many times the processor timed out forming a consensus about membership.
|
||||
-
|
||||
-.B continuous_gather
|
||||
-How many times the processor was not able to reach consensus.
|
||||
-
|
||||
-.B firewall_enabled_or_nic_failure
|
||||
-Set to 1 when processor was not able to reach consensus for long time. The usual
|
||||
-reason is a badly configured firewall or connection failure.
|
||||
-
|
||||
-.B gather_entered
|
||||
-Number of times the processor entered GATHER state.
|
||||
-
|
||||
-.B gather_token_lost
|
||||
-Number of times the processor lost token in GATHER state.
|
||||
-
|
||||
-.B mcast_retx
|
||||
-Number of retransmitted messages.
|
||||
-
|
||||
-.B mcast_rx
|
||||
-Number of received multicast messages.
|
||||
-
|
||||
-.B mcast_tx
|
||||
-Number of transmitted multicast messages.
|
||||
-
|
||||
-.B memb_commit_token_rx
|
||||
-Number of received commit tokens.
|
||||
-
|
||||
-.B memb_commit_token_tx
|
||||
-Number of transmitted commit tokens.
|
||||
-
|
||||
-.B memb_join_rx
|
||||
-Number of received join messages.
|
||||
-
|
||||
-.B memb_join_tx
|
||||
-Number of transmitted join messages.
|
||||
-
|
||||
-.B memb_merge_detect_rx
|
||||
-Number of received member merge messages.
|
||||
-
|
||||
-.B memb_merge_detect_tx
|
||||
-Number of transmitted member merge messages.
|
||||
-
|
||||
-.B orf_token_rx
|
||||
-Number of received orf tokens.
|
||||
-
|
||||
-.B orf_token_tx
|
||||
-Number of transmitted orf tokens.
|
||||
-
|
||||
-.B recovery_entered
|
||||
-Number of times the processor entered recovery.
|
||||
-
|
||||
-.B recovery_token_lost
|
||||
-Number of times the token was lost in recovery state.
|
||||
-
|
||||
-.B rx_msg_dropped
|
||||
-Number of received messages which were dropped because they were not expected
|
||||
-(as example multicast message in commit state).
|
||||
-
|
||||
-.B token_hold_cancel_rx
|
||||
-Number of received token hold cancel messages.
|
||||
-
|
||||
-.B token_hold_cancel_tx
|
||||
-Number of transmitted token hold cancel messages.
|
||||
-
|
||||
-.B mtt_rx_token
|
||||
-Mean transit time of token in milliseconds. In other words, time between
|
||||
-two consecutive token receives.
|
||||
-
|
||||
-.B avg_token_workload
|
||||
-Average time in milliseconds of holding time of token on the current processor.
|
||||
-
|
||||
-.B avg_backlog_calc
|
||||
-Average number of not yet sent messages on the current processor.
|
||||
-
|
||||
-.TP
|
||||
-stats.knet.nodeX.linkY.*
|
||||
-Statistics about the network traffic to and from each node and link when using
|
||||
-tke kronosnet transport
|
||||
-
|
||||
-.B connected
|
||||
-Whether the link is connected or not
|
||||
-
|
||||
-.B up_count
|
||||
-Number of times this link has changed state to UP
|
||||
-
|
||||
-.B down_count
|
||||
-Number of times this link has changed state to DOWN
|
||||
-
|
||||
-.B latency_ave / latency_max / latency_max
|
||||
-Calculated latencies of this link. Note that if there has been no traffic
|
||||
-on the link then latency_min will show a very large number.
|
||||
-
|
||||
-.B latency_samples
|
||||
-The number of samples used to calculate the latency figures, so you have
|
||||
-some idea of their precision.
|
||||
-
|
||||
-.B rx_data_packets / tx_data_packets
|
||||
-The number of packets sent/received on this link
|
||||
-
|
||||
-.B rx_data_bytes / tx_data_bytes
|
||||
-The number of bytes sent/received on this link
|
||||
-
|
||||
-.B rx_pmtu_packets / tx_pmtu_packets
|
||||
-The number of packets sent/received by the PMTUd subsystem
|
||||
-
|
||||
-.B rx_pmtu_bytes / tx_pmtu_bytes
|
||||
-The number of bytes sent/received by the PMTUd subsystem
|
||||
-
|
||||
-.B rx_ping_packets / tx_ping_packets
|
||||
-The number of packets sent/received as pings
|
||||
-
|
||||
-.B rx_ping_bytes / tx_ping_bytes
|
||||
-The number of bytes sent/received as pings
|
||||
-
|
||||
-.B rx_pong_packets / tx_pong_packets
|
||||
-The number of packets sent/received as pongs
|
||||
-
|
||||
-.B rx_pong_bytes / tx_pong_bytes
|
||||
-The number of bytes sent/received as pongs
|
||||
-
|
||||
-.B rx_total_packets / tx_total_packets
|
||||
-The total number of packets sent/received. The aggregate of all of the above packet stats
|
||||
-
|
||||
-.B rx_total_bytes / tx_total_bytes
|
||||
-The total number of bytes sent/received. The aggregate of all of the above bytes stats
|
||||
-
|
||||
-.B tx_data_retries / tx_pmtu_retries / tx_ping_retries / tx_pong_retries / tx_total_retries
|
||||
-Number of times a transmit operation had to be retried due to the socket returning EAGAIN
|
||||
-
|
||||
-.TP
|
||||
-stats.ipcs.*
|
||||
-There is information about total number of active connections from client programs
|
||||
-at the time the request was made.
|
||||
-.B active
|
||||
-number of closed connections during whole runtime of corosync
|
||||
-.B closed
|
||||
-Total number of connections that have been made since corosync was started
|
||||
-
|
||||
-.TP
|
||||
-stats.ipcs.ID.*
|
||||
-Each IPC connection has a unique ID. This is in the form [[serviceX:][PID:]internal_id.
|
||||
-
|
||||
-Typical keys in this prefix are:
|
||||
-
|
||||
-.B proc_name
|
||||
-process name of connected process (unavailable on some platforms)
|
||||
-
|
||||
-.B dispatched
|
||||
-number of dispatched messages.
|
||||
-
|
||||
-.B invalid_request
|
||||
-number of requests made by IPC which are invalid (calling non-existing call, ...).
|
||||
-
|
||||
-.B name
|
||||
-contains short name of the IPC connection (unavailable on some platforms).
|
||||
-
|
||||
-.B overload
|
||||
-is number of requests which were not processed because of overload.
|
||||
-
|
||||
-.B queue_size
|
||||
-contains the number of messages in the queue waiting for send.
|
||||
-
|
||||
-.B recv_retries
|
||||
-is the total number of interrupted receives.
|
||||
-
|
||||
-.B requests
|
||||
-contains the number of requests made by IPC.
|
||||
-
|
||||
-.B responses
|
||||
-is the number of responses sent to the IPC client.
|
||||
-
|
||||
-.B send_retries
|
||||
-contains the total number of interrupted sends.
|
||||
-
|
||||
-.B service_id
|
||||
-contains the ID of service which the IPC is connected to.
|
||||
-
|
||||
-.TP
|
||||
-stats.clear.*
|
||||
-These are write-only keys used to clear the stats for various subsystems
|
||||
-
|
||||
-.B totem
|
||||
-Clears the pg & srp totem stats.
|
||||
-
|
||||
-.B knet
|
||||
-Clears the knet stats
|
||||
-
|
||||
-.B ipc
|
||||
-Clears the ipc stats
|
||||
-
|
||||
-.B all
|
||||
-Clears all of the above stats
|
||||
-
|
||||
-
|
||||
-.SH DYNAMIC CHANGE USER/GROUP PERMISSION TO USE COROSYNC IPC
|
||||
-Is the same as in the configuration file. eg: to add UID 500 use
|
||||
-
|
||||
-.br
|
||||
-# corosync-cmapctl -s uidgid.uid.500 u8 1
|
||||
-
|
||||
-GID is similar, so to add a GID use
|
||||
-
|
||||
-.br
|
||||
-# corosync-cmapctl -s uidgid.gid.500 u8 1
|
||||
-
|
||||
-For removal of permissions, simply delete the key
|
||||
-
|
||||
-.br
|
||||
-# corosync-cmapctl -d uidgid.gid.500
|
||||
-
|
||||
-
|
||||
-.SH "SEE ALSO"
|
||||
-.BR corosync_overview (7),
|
||||
-.BR corosync.conf (5),
|
||||
-.BR corosync-cmapctl (8)
|
||||
diff --git a/man/cmap_overview.3 b/man/cmap_overview.3
|
||||
index cf4cabb..0aa3c14 100644
|
||||
--- a/man/cmap_overview.3
|
||||
+++ b/man/cmap_overview.3
|
||||
@@ -54,7 +54,7 @@ The library provides a mechanism to:
|
||||
.PP
|
||||
* Track changes on keys
|
||||
|
||||
-Description of most keys created by corosync itself can be found in cmap_keys (8).
|
||||
+Description of most keys created by corosync itself can be found in cmap_keys (7).
|
||||
|
||||
.SH BUGS
|
||||
.SH "SEE ALSO"
|
||||
@@ -75,4 +75,4 @@ Description of most keys created by corosync itself can be found in cmap_keys (8
|
||||
.BR cmap_iter_finalize (3),
|
||||
.BR cmap_track_add (3),
|
||||
.BR cmap_track_delete (3),
|
||||
-.BR cmap_keys (8)
|
||||
+.BR cmap_keys (7)
|
||||
diff --git a/man/corosync-cmapctl.8 b/man/corosync-cmapctl.8
|
||||
index 637e597..8826503 100644
|
||||
--- a/man/corosync-cmapctl.8
|
||||
+++ b/man/corosync-cmapctl.8
|
||||
@@ -96,4 +96,4 @@ corosync\-cmapctl \fB\-C\fR [ipc|totem|knet|all]
|
||||
|
||||
.SH "SEE ALSO"
|
||||
.BR cmap_overview (3),
|
||||
-.BR cmap_keys (8)
|
||||
+.BR cmap_keys (7)
|
||||
diff --git a/man/index.html b/man/index.html
|
||||
index f4819e5..21326dc 100644
|
||||
--- a/man/index.html
|
||||
+++ b/man/index.html
|
||||
@@ -63,7 +63,7 @@
|
||||
Description of corosync-cmapctl tool.
|
||||
<br>
|
||||
|
||||
- <a href="cmap_keys.8.html">cmap_keys(8)</a>:
|
||||
+ <a href="cmap_keys.7.html">cmap_keys(7)</a>:
|
||||
Overview of keys stored in the Configuration Map.
|
||||
<br>
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
320
SOURCES/bz1791792-2-stats-Add-stats-for-scheduler-misses.patch
Normal file
320
SOURCES/bz1791792-2-stats-Add-stats-for-scheduler-misses.patch
Normal file
@ -0,0 +1,320 @@
|
||||
From 48b6894ef41e9a06ccbb696d062d86ef60dc2c4b Mon Sep 17 00:00:00 2001
|
||||
From: Christine Caulfield <ccaulfie@redhat.com>
|
||||
Date: Fri, 17 Jan 2020 14:22:16 +0000
|
||||
Subject: [PATCH] stats: Add stats for scheduler misses
|
||||
|
||||
This patch add a stats.schedmiss.* set of entries that
|
||||
are a record of the last 10 times corosync was not scheduled
|
||||
in time.
|
||||
|
||||
These entries are keypt in reverse order (so stats.schedmiss.0.* is
|
||||
always the latest one kept) and the values, including the timestamp,
|
||||
are in milliseconds.
|
||||
|
||||
It's also possible to use a cmap tracker to follow these events, which
|
||||
might be useful.
|
||||
|
||||
Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
|
||||
---
|
||||
exec/main.c | 2 +
|
||||
exec/stats.c | 113 +++++++++++++++++++++++++++++++++++++++++++----
|
||||
exec/stats.h | 2 +
|
||||
man/cmap_keys.7 | 26 ++++++++++-
|
||||
tools/corosync-cmapctl.c | 5 ++-
|
||||
5 files changed, 136 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/exec/main.c b/exec/main.c
|
||||
index 7a471a1..fb0486e 100644
|
||||
--- a/exec/main.c
|
||||
+++ b/exec/main.c
|
||||
@@ -835,6 +835,8 @@ static void timer_function_scheduler_timeout (void *data)
|
||||
log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
|
||||
"(threshold is %0.4f ms). Consider token timeout increase.",
|
||||
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
+
|
||||
+ stats_add_schedmiss_event(tv_current / 1000, (float)tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
}
|
||||
|
||||
/*
|
||||
diff --git a/exec/stats.c b/exec/stats.c
|
||||
index e89504e..d5c1cbc 100644
|
||||
--- a/exec/stats.c
|
||||
+++ b/exec/stats.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright (c) 2017 Red Hat, Inc.
|
||||
+ * Copyright (c) 2017-2020 Red Hat, Inc.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
@@ -60,9 +60,20 @@ LOGSYS_DECLARE_SUBSYS ("STATS");
|
||||
|
||||
static qb_map_t *stats_map;
|
||||
|
||||
+/* Structure of an element in the schedmiss array */
|
||||
+struct schedmiss_entry {
|
||||
+ uint64_t timestamp;
|
||||
+ float delay;
|
||||
+};
|
||||
+#define MAX_SCHEDMISS_EVENTS 10
|
||||
+static struct schedmiss_entry schedmiss_event[MAX_SCHEDMISS_EVENTS];
|
||||
+static unsigned int highest_schedmiss_event;
|
||||
+
|
||||
+#define SCHEDMISS_PREFIX "stats.schedmiss"
|
||||
+
|
||||
/* Convert iterator number to text and a stats pointer */
|
||||
struct cs_stats_conv {
|
||||
- enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG} type;
|
||||
+ enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG, STAT_SCHEDMISS} type;
|
||||
const char *name;
|
||||
const size_t offset;
|
||||
const icmap_value_types_t value_type;
|
||||
@@ -190,6 +201,10 @@ struct cs_stats_conv cs_ipcs_global_stats[] = {
|
||||
{ STAT_IPCSG, "global.active", offsetof(struct ipcs_global_stats, active), ICMAP_VALUETYPE_UINT64},
|
||||
{ STAT_IPCSG, "global.closed", offsetof(struct ipcs_global_stats, closed), ICMAP_VALUETYPE_UINT64},
|
||||
};
|
||||
+struct cs_stats_conv cs_schedmiss_stats[] = {
|
||||
+ { STAT_SCHEDMISS, "timestamp", offsetof(struct schedmiss_entry, timestamp), ICMAP_VALUETYPE_UINT64},
|
||||
+ { STAT_SCHEDMISS, "delay", offsetof(struct schedmiss_entry, delay), ICMAP_VALUETYPE_FLOAT},
|
||||
+};
|
||||
|
||||
#define NUM_PG_STATS (sizeof(cs_pg_stats) / sizeof(struct cs_stats_conv))
|
||||
#define NUM_SRP_STATS (sizeof(cs_srp_stats) / sizeof(struct cs_stats_conv))
|
||||
@@ -286,7 +301,7 @@ cs_error_t stats_map_init(const struct corosync_api_v1 *corosync_api)
|
||||
stats_add_entry(param, &cs_ipcs_global_stats[i]);
|
||||
}
|
||||
|
||||
- /* KNET and IPCS stats are added when appropriate */
|
||||
+ /* KNET, IPCS & SCHEDMISS stats are added when appropriate */
|
||||
return CS_OK;
|
||||
}
|
||||
|
||||
@@ -307,6 +322,8 @@ cs_error_t stats_map_get(const char *key_name,
|
||||
int link_no;
|
||||
int service_id;
|
||||
uint32_t pid;
|
||||
+ unsigned int sm_event;
|
||||
+ char *sm_type;
|
||||
void *conn_ptr;
|
||||
|
||||
item = qb_map_get(stats_map, key_name);
|
||||
@@ -363,17 +380,85 @@ cs_error_t stats_map_get(const char *key_name,
|
||||
cs_ipcs_get_global_stats(&ipcs_global_stats);
|
||||
stats_map_set_value(statinfo, &ipcs_global_stats, value, value_len, type);
|
||||
break;
|
||||
+ case STAT_SCHEDMISS:
|
||||
+ if (sscanf(key_name, SCHEDMISS_PREFIX ".%d", &sm_event) != 1) {
|
||||
+ return CS_ERR_NOT_EXIST;
|
||||
+ }
|
||||
+
|
||||
+ sm_type = strrchr(key_name, '.');
|
||||
+ if (sm_type == NULL) {
|
||||
+ return CS_ERR_NOT_EXIST;
|
||||
+ }
|
||||
+ sm_type++;
|
||||
+
|
||||
+ if (strcmp(sm_type, "timestamp") == 0) {
|
||||
+ memcpy(value, &schedmiss_event[sm_event].timestamp, sizeof(uint64_t));
|
||||
+ *value_len = sizeof(uint64_t);
|
||||
+ *type = ICMAP_VALUETYPE_UINT64;
|
||||
+ }
|
||||
+ if (strcmp(sm_type, "delay") == 0) {
|
||||
+ memcpy(value, &schedmiss_event[sm_event].delay, sizeof(float));
|
||||
+ *value_len = sizeof(float);
|
||||
+ *type = ICMAP_VALUETYPE_FLOAT;
|
||||
+ }
|
||||
+ break;
|
||||
default:
|
||||
return CS_ERR_LIBRARY;
|
||||
}
|
||||
return CS_OK;
|
||||
}
|
||||
|
||||
-#define STATS_CLEAR "stats.clear."
|
||||
-#define STATS_CLEAR_KNET "stats.clear.knet"
|
||||
-#define STATS_CLEAR_IPC "stats.clear.ipc"
|
||||
-#define STATS_CLEAR_TOTEM "stats.clear.totem"
|
||||
-#define STATS_CLEAR_ALL "stats.clear.all"
|
||||
+static void schedmiss_clear_stats(void)
|
||||
+{
|
||||
+ int i;
|
||||
+ char param[ICMAP_KEYNAME_MAXLEN];
|
||||
+
|
||||
+ for (i=0; i<MAX_SCHEDMISS_EVENTS; i++) {
|
||||
+ if (i < highest_schedmiss_event) {
|
||||
+ sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", i);
|
||||
+ stats_rm_entry(param);
|
||||
+ sprintf(param, SCHEDMISS_PREFIX ".%i.delay", i);
|
||||
+ stats_rm_entry(param);
|
||||
+ }
|
||||
+ schedmiss_event[i].timestamp = (uint64_t)0LL;
|
||||
+ schedmiss_event[i].delay = 0.0f;
|
||||
+ }
|
||||
+ highest_schedmiss_event = 0;
|
||||
+}
|
||||
+
|
||||
+/* Called from main.c */
|
||||
+void stats_add_schedmiss_event(uint64_t timestamp, float delay)
|
||||
+{
|
||||
+ char param[ICMAP_KEYNAME_MAXLEN];
|
||||
+ int i;
|
||||
+
|
||||
+ /* Move 'em all up */
|
||||
+ for (i=MAX_SCHEDMISS_EVENTS-2; i>=0; i--) {
|
||||
+ schedmiss_event[i+1].timestamp = schedmiss_event[i].timestamp;
|
||||
+ schedmiss_event[i+1].delay = schedmiss_event[i].delay;
|
||||
+ }
|
||||
+
|
||||
+ /* New entries are always at the front */
|
||||
+ schedmiss_event[0].timestamp = timestamp;
|
||||
+ schedmiss_event[0].delay = delay;
|
||||
+
|
||||
+ /* If we've not run off the end then add an entry in the trie for the new 'end' one */
|
||||
+ if (highest_schedmiss_event < MAX_SCHEDMISS_EVENTS) {
|
||||
+ sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", highest_schedmiss_event);
|
||||
+ stats_add_entry(param, &cs_schedmiss_stats[0]);
|
||||
+ sprintf(param, SCHEDMISS_PREFIX ".%i.delay", highest_schedmiss_event);
|
||||
+ stats_add_entry(param, &cs_schedmiss_stats[1]);
|
||||
+ highest_schedmiss_event++;
|
||||
+ }
|
||||
+ /* Notifications get sent by the stats_updater */
|
||||
+}
|
||||
+
|
||||
+#define STATS_CLEAR "stats.clear."
|
||||
+#define STATS_CLEAR_KNET "stats.clear.knet"
|
||||
+#define STATS_CLEAR_IPC "stats.clear.ipc"
|
||||
+#define STATS_CLEAR_TOTEM "stats.clear.totem"
|
||||
+#define STATS_CLEAR_ALL "stats.clear.all"
|
||||
+#define STATS_CLEAR_SCHEDMISS "stats.clear.schedmiss"
|
||||
|
||||
cs_error_t stats_map_set(const char *key_name,
|
||||
const void *value,
|
||||
@@ -394,9 +479,14 @@ cs_error_t stats_map_set(const char *key_name,
|
||||
totempg_stats_clear(TOTEMPG_STATS_CLEAR_TOTEM);
|
||||
cleared = 1;
|
||||
}
|
||||
+ if (strncmp(key_name, STATS_CLEAR_SCHEDMISS, strlen(STATS_CLEAR_SCHEDMISS)) == 0) {
|
||||
+ schedmiss_clear_stats();
|
||||
+ cleared = 1;
|
||||
+ }
|
||||
if (strncmp(key_name, STATS_CLEAR_ALL, strlen(STATS_CLEAR_ALL)) == 0) {
|
||||
totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT | TOTEMPG_STATS_CLEAR_TOTEM);
|
||||
cs_ipcs_clear_stats();
|
||||
+ schedmiss_clear_stats();
|
||||
cleared = 1;
|
||||
}
|
||||
if (!cleared) {
|
||||
@@ -500,6 +590,11 @@ static void stats_map_notify_fn(uint32_t event, char *key, void *old_value, void
|
||||
return ;
|
||||
}
|
||||
|
||||
+ /* Ignore schedmiss trackers as the values are read from the circular buffer */
|
||||
+ if (strncmp(key, SCHEDMISS_PREFIX, strlen(SCHEDMISS_PREFIX)) == 0 ) {
|
||||
+ return ;
|
||||
+ }
|
||||
+
|
||||
new_val.data = new_value;
|
||||
if (stats_map_get(key,
|
||||
&new_value,
|
||||
@@ -556,7 +651,7 @@ cs_error_t stats_map_track_add(const char *key_name,
|
||||
}
|
||||
/* Get initial value */
|
||||
if (stats_map_get(tracker->key_name,
|
||||
- &tracker->old_value, &value_len, &type) == CS_OK) {
|
||||
+ &tracker->old_value, &value_len, &type) != CS_OK) {
|
||||
tracker->old_value = 0ULL;
|
||||
}
|
||||
} else {
|
||||
diff --git a/exec/stats.h b/exec/stats.h
|
||||
index 45891ae..eac9e7c 100644
|
||||
--- a/exec/stats.h
|
||||
+++ b/exec/stats.h
|
||||
@@ -69,3 +69,5 @@ void stats_trigger_trackers(void);
|
||||
void stats_ipcs_add_connection(int service_id, uint32_t pid, void *ptr);
|
||||
void stats_ipcs_del_connection(int service_id, uint32_t pid, void *ptr);
|
||||
cs_error_t cs_ipcs_get_conn_stats(int service_id, uint32_t pid, void *conn_ptr, struct ipcs_conn_stats *ipcs_stats);
|
||||
+
|
||||
+void stats_add_schedmiss_event(uint64_t, float delay);
|
||||
diff --git a/man/cmap_keys.7 b/man/cmap_keys.7
|
||||
index 6bc04fe..da95c51 100644
|
||||
--- a/man/cmap_keys.7
|
||||
+++ b/man/cmap_keys.7
|
||||
@@ -1,5 +1,5 @@
|
||||
.\"/*
|
||||
-.\" * Copyright (c) 2012-2018 Red Hat, Inc.
|
||||
+.\" * Copyright (c) 2012-2020 Red Hat, Inc.
|
||||
.\" *
|
||||
.\" * All rights reserved.
|
||||
.\" *
|
||||
@@ -357,6 +357,27 @@ contains the total number of interrupted sends.
|
||||
.B service_id
|
||||
contains the ID of service which the IPC is connected to.
|
||||
|
||||
+
|
||||
+.TP
|
||||
+stats.schedmiss.<n>.*
|
||||
+If corosync is not scheduled after the required period of time it will
|
||||
+log this event and also write an entry to the stats cmap under this key.
|
||||
+There can be up to 10 entries (0..9) in here, when an 11th event happens
|
||||
+the earliest will be removed.
|
||||
+
|
||||
+These events will always be in reverse order, so stats.schedmiss.0.* will
|
||||
+always be the latest event kept and 9 the oldest. If you want to listen
|
||||
+for notifications then you are recommended to listen for changes
|
||||
+to stats.schedmiss.0.timestamp or stats.schedmiss.0.delay.
|
||||
+
|
||||
+.B timestamp
|
||||
+The time of the event in ms since the Epoch (ie time_t * 1000 but with
|
||||
+valid milliseconds).
|
||||
+
|
||||
+.B delay
|
||||
+The time that corosync was paused (in ms, float value).
|
||||
+
|
||||
+
|
||||
.TP
|
||||
stats.clear.*
|
||||
These are write-only keys used to clear the stats for various subsystems
|
||||
@@ -370,6 +391,9 @@ Clears the knet stats
|
||||
.B ipc
|
||||
Clears the ipc stats
|
||||
|
||||
+.B schedmiss
|
||||
+Clears the schedmiss stats
|
||||
+
|
||||
.B all
|
||||
Clears all of the above stats
|
||||
|
||||
diff --git a/tools/corosync-cmapctl.c b/tools/corosync-cmapctl.c
|
||||
index a4b61bd..ffca7e1 100644
|
||||
--- a/tools/corosync-cmapctl.c
|
||||
+++ b/tools/corosync-cmapctl.c
|
||||
@@ -115,7 +115,7 @@ static int print_help(void)
|
||||
printf(" about the networking and IPC traffic in some detail.\n");
|
||||
printf("\n");
|
||||
printf("Clear stats:\n");
|
||||
- printf(" corosync-cmapctl -C [knet|ipc|totem|all]\n");
|
||||
+ printf(" corosync-cmapctl -C [knet|ipc|totem|schedmiss|all]\n");
|
||||
printf(" The 'stats' map is implied\n");
|
||||
printf("\n");
|
||||
printf("Load settings from a file:\n");
|
||||
@@ -849,6 +849,7 @@ int main(int argc, char *argv[])
|
||||
if (strcmp(optarg, "knet") == 0 ||
|
||||
strcmp(optarg, "totem") == 0 ||
|
||||
strcmp(optarg, "ipc") == 0 ||
|
||||
+ strcmp(optarg, "schedmiss") == 0 ||
|
||||
strcmp(optarg, "all") == 0) {
|
||||
action = ACTION_CLEARSTATS;
|
||||
clear_opt = optarg;
|
||||
@@ -857,7 +858,7 @@ int main(int argc, char *argv[])
|
||||
map = CMAP_MAP_STATS;
|
||||
}
|
||||
else {
|
||||
- fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc' or 'all'\n");
|
||||
+ fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc', 'schedmiss' or 'all'\n");
|
||||
return (EXIT_FAILURE);
|
||||
}
|
||||
break;
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,31 @@
|
||||
From ebd05fa00826c366922e619b012a0684c6856539 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Thu, 23 Jan 2020 17:11:54 +0100
|
||||
Subject: [PATCH] stats: Use nanoseconds from epoch for schedmiss
|
||||
|
||||
Using monotonic time is not working because it doesn't have to match
|
||||
time from epoch.
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
---
|
||||
exec/main.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/exec/main.c b/exec/main.c
|
||||
index fb0486e..821d97e 100644
|
||||
--- a/exec/main.c
|
||||
+++ b/exec/main.c
|
||||
@@ -836,7 +836,8 @@ static void timer_function_scheduler_timeout (void *data)
|
||||
"(threshold is %0.4f ms). Consider token timeout increase.",
|
||||
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
|
||||
- stats_add_schedmiss_event(tv_current / 1000, (float)tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
+ stats_add_schedmiss_event(qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC,
|
||||
+ (float)tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,47 @@
|
||||
From 35662dd0ec53f456445c30c0ef92892f47b25aa2 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Mon, 24 Feb 2020 14:58:45 +0100
|
||||
Subject: [PATCH] main: Add schedmiss timestamp into message
|
||||
|
||||
This is useful for matching schedmiss event in stats map with logged
|
||||
event.
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
---
|
||||
exec/main.c | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/exec/main.c b/exec/main.c
|
||||
index 821d97e..8c3df79 100644
|
||||
--- a/exec/main.c
|
||||
+++ b/exec/main.c
|
||||
@@ -817,6 +817,7 @@ static void timer_function_scheduler_timeout (void *data)
|
||||
struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
|
||||
unsigned long long tv_current;
|
||||
unsigned long long tv_diff;
|
||||
+ uint64_t schedmiss_event_tstamp;
|
||||
|
||||
tv_current = qb_util_nano_current_get ();
|
||||
|
||||
@@ -832,12 +833,14 @@ static void timer_function_scheduler_timeout (void *data)
|
||||
timeout_data->tv_prev = tv_current;
|
||||
|
||||
if (tv_diff > timeout_data->max_tv_diff) {
|
||||
- log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
|
||||
+ schedmiss_event_tstamp = qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC;
|
||||
+
|
||||
+ log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled (@%" PRIu64 ") for %0.4f ms "
|
||||
"(threshold is %0.4f ms). Consider token timeout increase.",
|
||||
+ schedmiss_event_tstamp,
|
||||
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
|
||||
- stats_add_schedmiss_event(qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC,
|
||||
- (float)tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
+ stats_add_schedmiss_event(schedmiss_event_tstamp, (float)tv_diff / QB_TIME_NS_IN_MSEC);
|
||||
}
|
||||
|
||||
/*
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,51 @@
|
||||
From 0c16442f2d93f32a229b87d2672e2dc8025ec704 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Wed, 4 Mar 2020 11:42:15 +0100
|
||||
Subject: [PATCH] votequorum: Change check of expected_votes
|
||||
|
||||
Previously value of new expected_votes was checked so newly computed
|
||||
quorum value was in the interval <total_votes / 2, total_votes>. The
|
||||
upper range prevented the cluster to become unquorate, but bottom check
|
||||
was almost useless because it allowed to change expected_votes so it is
|
||||
smaller than total_votes.
|
||||
|
||||
Solution is to check if expected_votes is bigger or equal to total_votes
|
||||
and for quorate cluster only check if cluster doesn't become unquorate
|
||||
(for unquorate cluster one can set upper range freely - as it is
|
||||
perfectly possible when using config file)
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
---
|
||||
exec/votequorum.c | 10 +++++++---
|
||||
1 file changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/exec/votequorum.c b/exec/votequorum.c
|
||||
index 52424fa..b152425 100644
|
||||
--- a/exec/votequorum.c
|
||||
+++ b/exec/votequorum.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright (c) 2009-2015 Red Hat, Inc.
|
||||
+ * Copyright (c) 2009-2020 Red Hat, Inc.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
@@ -2688,8 +2688,12 @@ static void message_handler_req_lib_votequorum_setexpected (void *conn, const vo
|
||||
*/
|
||||
newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
|
||||
allow_downscale = allow_downscale_status;
|
||||
- if (newquorum < total_votes / 2 ||
|
||||
- newquorum > total_votes) {
|
||||
+ /*
|
||||
+ * Setting expected_votes < total_votes doesn't make sense.
|
||||
+ * For quorate cluster prevent cluster to become unquorate.
|
||||
+ */
|
||||
+ if (req_lib_votequorum_setexpected->expected_votes < total_votes ||
|
||||
+ (cluster_is_quorate && (newquorum > total_votes))) {
|
||||
error = CS_ERR_INVALID_PARAM;
|
||||
goto error_exit;
|
||||
}
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,33 @@
|
||||
From 5f543465bb3506b7f4929a426f1c22a9c854cecd Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Wed, 4 Mar 2020 08:53:41 +0100
|
||||
Subject: [PATCH] quorumtool: exit on invalid expected votes
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
---
|
||||
tools/corosync-quorumtool.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/tools/corosync-quorumtool.c b/tools/corosync-quorumtool.c
|
||||
index 9bef844..44bf181 100644
|
||||
--- a/tools/corosync-quorumtool.c
|
||||
+++ b/tools/corosync-quorumtool.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
- * Copyright (c) 2009-2019 Red Hat, Inc.
|
||||
+ * Copyright (c) 2009-2020 Red Hat, Inc.
|
||||
*
|
||||
* All rights reserved.
|
||||
*
|
||||
@@ -937,6 +937,7 @@ int main (int argc, char *argv[]) {
|
||||
votes = strtol(optarg, &endptr, 0);
|
||||
if ((votes == 0 && endptr == optarg) || votes <= 0) {
|
||||
fprintf(stderr, "New expected votes value was not valid, try a positive number\n");
|
||||
+ exit(EXIT_FAILURE);
|
||||
} else {
|
||||
command_opt = CMD_SETEXPECTED;
|
||||
}
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -0,0 +1,67 @@
|
||||
From ca320beac25f82c0c555799e647a47975a333c28 Mon Sep 17 00:00:00 2001
|
||||
From: Jan Friesse <jfriesse@redhat.com>
|
||||
Date: Tue, 10 Mar 2020 17:49:27 +0100
|
||||
Subject: [PATCH] votequorum: set wfa status only on startup
|
||||
|
||||
Previously reload of configuration with enabled wait_for_all result in
|
||||
set of wait_for_all_status which set cluster_is_quorate to 0 but didn't
|
||||
inform the quorum service so votequorum and quorum information may get
|
||||
out of sync.
|
||||
|
||||
Example is 1 node cluster, which is extended to 3 nodes. Quorum service
|
||||
reports cluster as a quorate (incorrect) and votequorum as not-quorate
|
||||
(correct). Similar behavior happens when extending cluster in general,
|
||||
but some configurations are less incorrect (3->4).
|
||||
|
||||
Discussed solution was to inform quorum service but that would mean
|
||||
every reload would cause loss of quorum until all nodes would be seen
|
||||
again.
|
||||
|
||||
Such behaviour is consistent but seems to be a bit too strict.
|
||||
|
||||
Proposed solution sets wait_for_all_status only on startup and
|
||||
doesn't touch it during reload.
|
||||
|
||||
This solution fulfills requirement of "cluster will be quorate for
|
||||
the first time only after all nodes have been visible at least
|
||||
once at the same time." because node clears wait_for_all_status only
|
||||
after it sees all other nodes or joins cluster which is quorate. It also
|
||||
solves problem with extending cluster, because when cluster becomes
|
||||
unquorate (1->3) wait_for_all_status is set.
|
||||
|
||||
Added assert is only for ensure that I haven't missed any case when
|
||||
quorate cluster may become unquorate.
|
||||
|
||||
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
|
||||
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
|
||||
---
|
||||
exec/votequorum.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/exec/votequorum.c b/exec/votequorum.c
|
||||
index b152425..fb9f1cd 100644
|
||||
--- a/exec/votequorum.c
|
||||
+++ b/exec/votequorum.c
|
||||
@@ -1009,7 +1009,7 @@ static void are_we_quorate(unsigned int total_votes)
|
||||
"Waiting for all cluster members. "
|
||||
"Current votes: %d expected_votes: %d",
|
||||
total_votes, us->expected_votes);
|
||||
- cluster_is_quorate = 0;
|
||||
+ assert(!cluster_is_quorate);
|
||||
return;
|
||||
}
|
||||
update_wait_for_all_status(0);
|
||||
@@ -1547,7 +1547,9 @@ static char *votequorum_readconfig(int runtime)
|
||||
update_ev_barrier(us->expected_votes);
|
||||
update_two_node();
|
||||
if (wait_for_all) {
|
||||
- update_wait_for_all_status(1);
|
||||
+ if (!runtime) {
|
||||
+ update_wait_for_all_status(1);
|
||||
+ }
|
||||
} else if (wait_for_all_autoset && wait_for_all_status) {
|
||||
/*
|
||||
* Reset wait for all status for consistency when wfa is auto-unset by 2node.
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -23,11 +23,21 @@
|
||||
Name: corosync
|
||||
Summary: The Corosync Cluster Engine and Application Programming Interfaces
|
||||
Version: 3.0.3
|
||||
Release: 2%{?gitver}%{?dist}
|
||||
Release: 4%{?gitver}%{?dist}
|
||||
License: BSD
|
||||
URL: http://corosync.github.io/corosync/
|
||||
Source0: http://build.clusterlabs.org/corosync/releases/%{name}-%{version}%{?gittarver}.tar.gz
|
||||
|
||||
Patch0: bz1780137-1-votequorum-Ignore-the-icmap_get_-return-value.patch
|
||||
Patch1: bz1791792-1-man-move-cmap_keys-man-page-from-section-8-to-7.patch
|
||||
Patch2: bz1780137-2-votequorum-Reflect-runtime-change-of-2Node-to-WFA.patch
|
||||
Patch3: bz1791792-2-stats-Add-stats-for-scheduler-misses.patch
|
||||
Patch4: bz1791792-3-stats-Use-nanoseconds-from-epoch-for-schedmiss.patch
|
||||
Patch5: bz1791792-4-main-Add-schedmiss-timestamp-into-message.patch
|
||||
Patch6: bz1809864-1-votequorum-Change-check-of-expected_votes.patch
|
||||
Patch7: bz1809864-2-quorumtool-exit-on-invalid-expected-votes.patch
|
||||
Patch8: bz1816653-1-votequorum-set-wfa-status-only-on-startup.patch
|
||||
|
||||
%if %{with spausedd}
|
||||
Source1: https://github.com/jfriesse/spausedd/releases/download/%{spausedd_version}/spausedd-%{spausedd_version}.tar.gz
|
||||
# VMGuestLib exists only for x86_64 architecture
|
||||
@ -89,6 +99,16 @@ BuildRequires: pkgconfig(vmguestlib)
|
||||
%setup -q -n %{name}-%{version}%{?gittarver}
|
||||
%endif
|
||||
|
||||
%patch0 -p1 -b .bz1780137-1
|
||||
%patch1 -p1 -b .bz1791792-1
|
||||
%patch2 -p1 -b .bz1780137-2
|
||||
%patch3 -p1 -b .bz1791792-2
|
||||
%patch4 -p1 -b .bz1791792-3
|
||||
%patch5 -p1 -b .bz1791792-4
|
||||
%patch6 -p1 -b .bz1809864-1
|
||||
%patch7 -p1 -b .bz1809864-2
|
||||
%patch8 -p1 -b .bz1816653-1
|
||||
|
||||
%build
|
||||
%if %{with runautogen}
|
||||
./autogen.sh
|
||||
@ -254,7 +274,7 @@ fi
|
||||
%{_mandir}/man8/corosync-quorumtool.8*
|
||||
%{_mandir}/man5/corosync.conf.5*
|
||||
%{_mandir}/man5/votequorum.5*
|
||||
%{_mandir}/man8/cmap_keys.8*
|
||||
%{_mandir}/man7/cmap_keys.7*
|
||||
|
||||
# library
|
||||
#
|
||||
@ -387,6 +407,36 @@ fi
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Tue May 26 2020 Jan Friesse <jfriesse@redhat.com> 3.0.3-4
|
||||
- Resolves: rhbz#1780137
|
||||
- Resolves: rhbz#1791792
|
||||
- Resolves: rhbz#1809864
|
||||
- Resolves: rhbz#1816653
|
||||
|
||||
- votequorum: Ignore the icmap_get_* return value (rhbz#1780137)
|
||||
- merge upstream commit cddd62f972bca276c934e58f08da84071cec1ddb (rhbz#1780137)
|
||||
- man: move cmap_keys man page from section 8 to 7 (rhbz#1791792)
|
||||
- merge upstream commit f1d36307e524f9440733f0b01a9fc627a0e1cac7 (rhbz#1791792)
|
||||
- votequorum: Reflect runtime change of 2Node to WFA (rhbz#1780137)
|
||||
- merge upstream commit 8ce65bf951bc1e5b2d64b60ea027fbdc551d4fc8 (rhbz#1780137)
|
||||
- stats: Add stats for scheduler misses (rhbz#1791792)
|
||||
- merge upstream commit 48b6894ef41e9a06ccbb696d062d86ef60dc2c4b (rhbz#1791792)
|
||||
- stats: Use nanoseconds from epoch for schedmiss (rhbz#1791792)
|
||||
- merge upstream commit ebd05fa00826c366922e619b012a0684c6856539 (rhbz#1791792)
|
||||
- main: Add schedmiss timestamp into message (rhbz#1791792)
|
||||
- merge upstream commit 35662dd0ec53f456445c30c0ef92892f47b25aa2 (rhbz#1791792)
|
||||
- votequorum: Change check of expected_votes (rhbz#1809864)
|
||||
- merge upstream commit 0c16442f2d93f32a229b87d2672e2dc8025ec704 (rhbz#1809864)
|
||||
- quorumtool: exit on invalid expected votes (rhbz#1809864)
|
||||
- merge upstream commit 5f543465bb3506b7f4929a426f1c22a9c854cecd (rhbz#1809864)
|
||||
- votequorum: set wfa status only on startup (rhbz#1816653)
|
||||
- merge upstream commit ca320beac25f82c0c555799e647a47975a333c28 (rhbz#1816653)
|
||||
|
||||
* Tue Apr 28 2020 Jan Friesse <jfriesse@redhat.com> - 3.0.3-3
|
||||
- Resolves: rhbz#1828295
|
||||
|
||||
- Add explicit spausedd dependency for revdeps CI test
|
||||
|
||||
* Mon Nov 25 2019 Jan Friesse <jfriesse@redhat.com> - 3.0.3-2
|
||||
- Related: rhbz#1745623
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user