import corosync-3.0.3-4.el8

This commit is contained in:
CentOS Sources 2020-07-28 05:28:03 -04:00 committed by Stepan Oksanichenko
parent e62565ff01
commit 696a553662
10 changed files with 1655 additions and 2 deletions

View File

@ -0,0 +1,73 @@
From cddd62f972bca276c934e58f08da84071cec1ddb Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Mon, 25 Nov 2019 18:21:52 +0100
Subject: [PATCH] votequorum: Ignore the icmap_get_* return value
Express intention to ignore icmap_get_* return
value and rely on default behavior of not changing the output
parameter on error.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
---
exec/votequorum.c | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/exec/votequorum.c b/exec/votequorum.c
index f78b3f9..e1d7e73 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -1271,10 +1271,10 @@ static char *votequorum_readconfig(int runtime)
/*
* gather basic data here
*/
- icmap_get_uint32("quorum.expected_votes", &expected_votes);
+ (void)icmap_get_uint32("quorum.expected_votes", &expected_votes);
have_nodelist = votequorum_read_nodelist_configuration(&node_votes, &node_count, &node_expected_votes);
have_qdevice = votequorum_qdevice_is_configured(&qdevice_votes);
- icmap_get_uint8("quorum.two_node", &two_node);
+ (void)icmap_get_uint8("quorum.two_node", &two_node);
/*
* do config verification and enablement
@@ -1319,13 +1319,13 @@ static char *votequorum_readconfig(int runtime)
wait_for_all = 1;
}
- icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
- icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
- icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
- icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
- icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
- icmap_get_uint8("quorum.auto_tie_breaker", &atb);
- icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
+ (void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
+ (void)icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
+ (void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
+ (void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
+ (void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
+ (void)icmap_get_uint8("quorum.auto_tie_breaker", &atb);
+ (void)icmap_get_string("quorum.auto_tie_breaker_node", &atb_string);
/* auto_tie_breaker defaults to LOWEST */
if (atb) {
@@ -1517,7 +1517,7 @@ static char *votequorum_readconfig(int runtime)
us->expected_votes = node_expected_votes;
} else {
us->votes = 1;
- icmap_get_uint32("quorum.votes", &us->votes);
+ (void)icmap_get_uint32("quorum.votes", &us->votes);
}
if (expected_votes) {
@@ -1568,7 +1568,7 @@ static void votequorum_refresh_config(
return ;
}
- icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
+ (void)icmap_get_uint8("quorum.cancel_wait_for_all", &cancel_wfa);
if (strcmp(key_name, "quorum.cancel_wait_for_all") == 0 &&
cancel_wfa >= 1) {
icmap_set_uint8("quorum.cancel_wait_for_all", 0);
--
1.8.3.1

View File

@ -0,0 +1,80 @@
From 8ce65bf951bc1e5b2d64b60ea027fbdc551d4fc8 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Thu, 16 Jan 2020 15:43:59 +0100
Subject: [PATCH] votequorum: Reflect runtime change of 2Node to WFA
When 2Node mode is set, WFA is also set unless WFA is configured
explicitly. This behavior was not reflected on runtime change, so
restarted corosync behavior was different (WFA not set). Also when
cluster is reduced from 3 nodes to 2 nodes during runtime, WFA was not
set, what may result in two quorate partitions.
Solution is to set WFA depending on 2Node when WFA
is not explicitly configured.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
exec/votequorum.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/exec/votequorum.c b/exec/votequorum.c
index 0cde8f8..52424fa 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -80,6 +80,7 @@ static uint8_t two_node = 0;
static uint8_t wait_for_all = 0;
static uint8_t wait_for_all_status = 0;
+static uint8_t wait_for_all_autoset = 0; /* Wait for all is not set explicitly and follows two_node */
static enum {ATB_NONE, ATB_LOWEST, ATB_HIGHEST, ATB_LIST} auto_tie_breaker = ATB_NONE, initial_auto_tie_breaker = ATB_NONE;
static int lowest_node_id = -1;
@@ -1315,12 +1316,10 @@ static char *votequorum_readconfig(int runtime)
* Enable special features
*/
if (!runtime) {
- if (two_node) {
- wait_for_all = 1;
- }
-
(void)icmap_get_uint8("quorum.allow_downscale", &allow_downscale);
- (void)icmap_get_uint8("quorum.wait_for_all", &wait_for_all);
+ if (icmap_get_uint8("quorum.wait_for_all", &wait_for_all) != CS_OK) {
+ wait_for_all_autoset = 1;
+ }
(void)icmap_get_uint8("quorum.last_man_standing", &last_man_standing);
(void)icmap_get_uint32("quorum.last_man_standing_window", &last_man_standing_window);
(void)icmap_get_uint8("quorum.expected_votes_tracking", &ev_tracking);
@@ -1361,6 +1360,15 @@ static char *votequorum_readconfig(int runtime)
}
+ /*
+ * Changing of wait_for_all during runtime is not supported, but changing of two_node is
+ * and two_node may set wfa if not configured explicitly. It is safe to unset it
+ * (or set it back) when two_node changes.
+ */
+ if (wait_for_all_autoset) {
+ wait_for_all = two_node;
+ }
+
/* two_node and auto_tie_breaker are not compatible as two_node uses
* a fence race to decide quorum whereas ATB decides based on node id
*/
@@ -1540,6 +1548,12 @@ static char *votequorum_readconfig(int runtime)
update_two_node();
if (wait_for_all) {
update_wait_for_all_status(1);
+ } else if (wait_for_all_autoset && wait_for_all_status) {
+ /*
+ * Reset wait for all status for consistency when wfa is auto-unset by 2node.
+ * wait_for_all_status would be ignored by are_we_quorate anyway.
+ */
+ update_wait_for_all_status(0);
}
out:
--
1.8.3.1

View File

@ -0,0 +1,901 @@
From f1d36307e524f9440733f0b01a9fc627a0e1cac7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ferenc=20W=C3=A1gner?= <wferi@debian.org>
Date: Sat, 4 Jan 2020 13:38:08 +0100
Subject: [PATCH] man: move cmap_keys man page from section 8 to 7
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Section 8 is for "System administration commands", 7 is "Miscellaneous".
Signed-off-by: Ferenc Wágner <wferi@debian.org>
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
---
corosync.spec.in | 2 +-
man/Makefile.am | 2 +-
man/cmap_keys.7 | 397 +++++++++++++++++++++++++++++++++++++++++++++++++
man/cmap_keys.8 | 397 -------------------------------------------------
man/cmap_overview.3 | 4 +-
man/corosync-cmapctl.8 | 2 +-
man/index.html | 2 +-
7 files changed, 403 insertions(+), 403 deletions(-)
create mode 100644 man/cmap_keys.7
delete mode 100644 man/cmap_keys.8
diff --git a/corosync.spec.in b/corosync.spec.in
index c06675d..8ac3757 100644
--- a/corosync.spec.in
+++ b/corosync.spec.in
@@ -217,7 +217,7 @@ fi
%{_mandir}/man8/corosync-quorumtool.8*
%{_mandir}/man5/corosync.conf.5*
%{_mandir}/man5/votequorum.5*
-%{_mandir}/man8/cmap_keys.8*
+%{_mandir}/man7/cmap_keys.7*
# library
#
diff --git a/man/Makefile.am b/man/Makefile.am
index 2ef5dcd..92a76ed 100644
--- a/man/Makefile.am
+++ b/man/Makefile.am
@@ -140,7 +140,7 @@ dist_man_MANS = corosync.conf.5 \
votequorum_overview.3 \
sam_overview.3 \
cmap_overview.3 \
- cmap_keys.8
+ cmap_keys.7
if BUILD_VQSIM
dist_man_MANS += $(corosync_vqsim_man)
diff --git a/man/cmap_keys.7 b/man/cmap_keys.7
new file mode 100644
index 0000000..6bc04fe
--- /dev/null
+++ b/man/cmap_keys.7
@@ -0,0 +1,397 @@
+.\"/*
+.\" * Copyright (c) 2012-2018 Red Hat, Inc.
+.\" *
+.\" * All rights reserved.
+.\" *
+.\" * Author: Jan Friesse (jfriesse@redhat.com)
+.\" *
+.\" * This software licensed under BSD license, the text of which follows:
+.\" *
+.\" * Redistribution and use in source and binary forms, with or without
+.\" * modification, are permitted provided that the following conditions are met:
+.\" *
+.\" * - Redistributions of source code must retain the above copyright notice,
+.\" * this list of conditions and the following disclaimer.
+.\" * - Redistributions in binary form must reproduce the above copyright notice,
+.\" * this list of conditions and the following disclaimer in the documentation
+.\" * and/or other materials provided with the distribution.
+.\" * - Neither the name of the Red Hat, Inc. nor the names of its
+.\" * contributors may be used to endorse or promote products derived from this
+.\" * software without specific prior written permission.
+.\" *
+.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+.\" * THE POSSIBILITY OF SUCH DAMAGE.
+.\" */
+.TH "CMAP_KEYS" 7 "2018-10-08" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
+
+.SH NAME
+.P
+cmap_keys \- Overview of keys stored in the Configuration Map
+
+.SH OVERVIEW
+.P
+There are 3 main types of keys stored in CMAP:
+.PP
+* Mapping of values stored in the config file.
+.PP
+* Runtime statistics.
+.PP
+* Other user created values.
+
+In this man page, wild-cards have the usual meaning.
+
+.SH ICMAP KEYS
+These keys are in the icmap (default) map
+.TP
+internal_configuration.*
+Internal configuration data. All keys in this prefix are read only.
+It's only useful for getting a list of loaded services.
+
+.TP
+logging.*
+Values read from the configuration file. It's possible to change them at runtime.
+If subsystem specific configuration is needed, the key must be in the form
+logging.logger_subsys.SERVICE.key, where SERVICE is upper case name of the service and
+key is same as in the configuration file. All values are of string type.
+
+.TP
+nodelist.*
+Values are read from the configuration file only (dynamic updates are not allowed).
+Each node element in the configuration file gets
+assigned its position starting from zero. So the first node from the config file has
+nodelist.node.0. prefix. To be a valid entry, each node must have
+.B ring0_addr
+key.
+To change the
+.B nodeid
+key, use a u32 data type.
+
+Local node position is stored in
+.B local_node_pos
+key (RO), so it's easy to find
+out nodeid/ring addresses of the local node directly from cmap.
+
+.TP
+runtime.blackbox.*
+Trigger keys for storing fplay data. It's recommended that you use the corosync-blackbox command
+to change keys in this prefix.
+
+.TP
+runtime.force_gather
+Set to 'yes' to force the processor to move into the GATHER state. This operation
+is dangerous and is not recommended.
+
+.TP
+runtime.config.*
+Contains the values actually in use by the totem membership protocol.
+Values here are either taken from the Corosync configuration file,
+defaults or computed from entries in the config file. For information
+on individual keys please refer to the man page
+.BR corosync.conf (5).
+
+.TP
+runtime.services.*
+Prefix with statistics for service engines. Each service has its own
+.B service_id
+key in the prefix with the name runtime.services.SERVICE., where SERVICE is the lower case
+name of the service. Inside the service prefix is the number of messages received and sent
+by the corosync engine in the format runtime.services.SERVICE.EXEC_CALL.rx and
+runtime.services.SERVICE.EXEC_CALL.tx, where EXEC_CALL is the internal id of the service
+call (so for example 3 in cpg service is receive of multicast message from other
+nodes).
+
+.TP
+runtime.totem.members.*
+Prefix containing members of the totem single ring protocol. Each member
+keys has format runtime.totem.members.NODEID.KEY, where key is
+one of:
+
+.B config_version
+Config version of the member node.
+
+.TP
+resources.process.PID.*
+Prefix created by applications using SAM with CMAP integration.
+It contains the following keys:
+
+.B recovery
+Recovery policy of the process. Can be one of quit or restart.
+
+.B poll_period
+Value passed in sam_initialize as a time_interval.
+
+.B last_updated
+Last time SAM received a heartbeat from the client.
+
+.B state
+State of the client. Can be one of failed, stopped, running and waiting for quorum.
+
+.TP
+uidgid.*
+Information about users/groups which are allowed to make IPC connections to
+corosync. Entries loaded from configuration file are stored with
+uidgid.config.* prefix and are pruned on configuration file reload. Dynamic
+entries has uidgid.* prefix and a configuration file reload doesn't affect them.
+
+.TP
+quorum.cancel_wait_for_all
+Tells votequorum to cancel waiting for all nodes at cluster startup. Can be used
+to unblock quorum if notes are known to be down. For pcs use only.
+
+.TP
+config.reload_in_progress
+This value will be set to 1 (or created) when a corosync.conf reload is started,
+and set to 0 when the reload is completed. This allows interested subsystems
+to do atomic reconfiguration rather than changing each key. Note that
+individual add/change/delete notifications will still be sent during a reload.
+
+.TP
+config.totemconfig_reload_in_progress
+This key is similar to
+.B config.totemconfig_reload_in_progress
+but changed after the totem config trigger is processed. It is useful (mainly)
+for situations when
+.B nodelist.local_node_pos
+must be correctly reinstated before anything else.
+
+.SH STATS KEYS
+These keys are in the stats map. All keys in this map are read-only.
+Modification tracking of individual keys is supported in the stats map, but not
+prefixes. Add/Delete operations are supported on prefixes though so you can track
+for new ipc connections or knet interfaces.
+.TP
+stats.srp.*
+Prefix containing statistics about totem.
+Typical key prefixes:
+
+.B commit_entered
+Number of times the processor entered COMMIT state.
+
+.B commit_token_lost
+Number of times the processor lost token in COMMIT state.
+
+.B consensus_timeouts
+How many times the processor timed out forming a consensus about membership.
+
+.B continuous_gather
+How many times the processor was not able to reach consensus.
+
+.B firewall_enabled_or_nic_failure
+Set to 1 when processor was not able to reach consensus for long time. The usual
+reason is a badly configured firewall or connection failure.
+
+.B gather_entered
+Number of times the processor entered GATHER state.
+
+.B gather_token_lost
+Number of times the processor lost token in GATHER state.
+
+.B mcast_retx
+Number of retransmitted messages.
+
+.B mcast_rx
+Number of received multicast messages.
+
+.B mcast_tx
+Number of transmitted multicast messages.
+
+.B memb_commit_token_rx
+Number of received commit tokens.
+
+.B memb_commit_token_tx
+Number of transmitted commit tokens.
+
+.B memb_join_rx
+Number of received join messages.
+
+.B memb_join_tx
+Number of transmitted join messages.
+
+.B memb_merge_detect_rx
+Number of received member merge messages.
+
+.B memb_merge_detect_tx
+Number of transmitted member merge messages.
+
+.B orf_token_rx
+Number of received orf tokens.
+
+.B orf_token_tx
+Number of transmitted orf tokens.
+
+.B recovery_entered
+Number of times the processor entered recovery.
+
+.B recovery_token_lost
+Number of times the token was lost in recovery state.
+
+.B rx_msg_dropped
+Number of received messages which were dropped because they were not expected
+(as example multicast message in commit state).
+
+.B token_hold_cancel_rx
+Number of received token hold cancel messages.
+
+.B token_hold_cancel_tx
+Number of transmitted token hold cancel messages.
+
+.B mtt_rx_token
+Mean transit time of token in milliseconds. In other words, time between
+two consecutive token receives.
+
+.B avg_token_workload
+Average time in milliseconds of holding time of token on the current processor.
+
+.B avg_backlog_calc
+Average number of not yet sent messages on the current processor.
+
+.TP
+stats.knet.nodeX.linkY.*
+Statistics about the network traffic to and from each node and link when using
+tke kronosnet transport
+
+.B connected
+Whether the link is connected or not
+
+.B up_count
+Number of times this link has changed state to UP
+
+.B down_count
+Number of times this link has changed state to DOWN
+
+.B latency_ave / latency_max / latency_max
+Calculated latencies of this link. Note that if there has been no traffic
+on the link then latency_min will show a very large number.
+
+.B latency_samples
+The number of samples used to calculate the latency figures, so you have
+some idea of their precision.
+
+.B rx_data_packets / tx_data_packets
+The number of packets sent/received on this link
+
+.B rx_data_bytes / tx_data_bytes
+The number of bytes sent/received on this link
+
+.B rx_pmtu_packets / tx_pmtu_packets
+The number of packets sent/received by the PMTUd subsystem
+
+.B rx_pmtu_bytes / tx_pmtu_bytes
+The number of bytes sent/received by the PMTUd subsystem
+
+.B rx_ping_packets / tx_ping_packets
+The number of packets sent/received as pings
+
+.B rx_ping_bytes / tx_ping_bytes
+The number of bytes sent/received as pings
+
+.B rx_pong_packets / tx_pong_packets
+The number of packets sent/received as pongs
+
+.B rx_pong_bytes / tx_pong_bytes
+The number of bytes sent/received as pongs
+
+.B rx_total_packets / tx_total_packets
+The total number of packets sent/received. The aggregate of all of the above packet stats
+
+.B rx_total_bytes / tx_total_bytes
+The total number of bytes sent/received. The aggregate of all of the above bytes stats
+
+.B tx_data_retries / tx_pmtu_retries / tx_ping_retries / tx_pong_retries / tx_total_retries
+Number of times a transmit operation had to be retried due to the socket returning EAGAIN
+
+.TP
+stats.ipcs.*
+There is information about total number of active connections from client programs
+at the time the request was made.
+.B active
+number of closed connections during whole runtime of corosync
+.B closed
+Total number of connections that have been made since corosync was started
+
+.TP
+stats.ipcs.ID.*
+Each IPC connection has a unique ID. This is in the form [[serviceX:][PID:]internal_id.
+
+Typical keys in this prefix are:
+
+.B proc_name
+process name of connected process (unavailable on some platforms)
+
+.B dispatched
+number of dispatched messages.
+
+.B invalid_request
+number of requests made by IPC which are invalid (calling non-existing call, ...).
+
+.B name
+contains short name of the IPC connection (unavailable on some platforms).
+
+.B overload
+is number of requests which were not processed because of overload.
+
+.B queue_size
+contains the number of messages in the queue waiting for send.
+
+.B recv_retries
+is the total number of interrupted receives.
+
+.B requests
+contains the number of requests made by IPC.
+
+.B responses
+is the number of responses sent to the IPC client.
+
+.B send_retries
+contains the total number of interrupted sends.
+
+.B service_id
+contains the ID of service which the IPC is connected to.
+
+.TP
+stats.clear.*
+These are write-only keys used to clear the stats for various subsystems
+
+.B totem
+Clears the pg & srp totem stats.
+
+.B knet
+Clears the knet stats
+
+.B ipc
+Clears the ipc stats
+
+.B all
+Clears all of the above stats
+
+
+.SH DYNAMIC CHANGE USER/GROUP PERMISSION TO USE COROSYNC IPC
+Is the same as in the configuration file. eg: to add UID 500 use
+
+.br
+# corosync-cmapctl -s uidgid.uid.500 u8 1
+
+GID is similar, so to add a GID use
+
+.br
+# corosync-cmapctl -s uidgid.gid.500 u8 1
+
+For removal of permissions, simply delete the key
+
+.br
+# corosync-cmapctl -d uidgid.gid.500
+
+
+.SH "SEE ALSO"
+.BR corosync_overview (7),
+.BR corosync.conf (5),
+.BR corosync-cmapctl (8)
diff --git a/man/cmap_keys.8 b/man/cmap_keys.8
deleted file mode 100644
index e2ea1fb..0000000
--- a/man/cmap_keys.8
+++ /dev/null
@@ -1,397 +0,0 @@
-.\"/*
-.\" * Copyright (c) 2012-2018 Red Hat, Inc.
-.\" *
-.\" * All rights reserved.
-.\" *
-.\" * Author: Jan Friesse (jfriesse@redhat.com)
-.\" *
-.\" * This software licensed under BSD license, the text of which follows:
-.\" *
-.\" * Redistribution and use in source and binary forms, with or without
-.\" * modification, are permitted provided that the following conditions are met:
-.\" *
-.\" * - Redistributions of source code must retain the above copyright notice,
-.\" * this list of conditions and the following disclaimer.
-.\" * - Redistributions in binary form must reproduce the above copyright notice,
-.\" * this list of conditions and the following disclaimer in the documentation
-.\" * and/or other materials provided with the distribution.
-.\" * - Neither the name of the Red Hat, Inc. nor the names of its
-.\" * contributors may be used to endorse or promote products derived from this
-.\" * software without specific prior written permission.
-.\" *
-.\" * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-.\" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-.\" * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-.\" * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-.\" * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-.\" * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-.\" * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-.\" * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-.\" * THE POSSIBILITY OF SUCH DAMAGE.
-.\" */
-.TH "CMAP_KEYS" 8 "2018-10-08" "corosync Man Page" "Corosync Cluster Engine Programmer's Manual"
-
-.SH NAME
-.P
-cmap_keys \- Overview of keys stored in the Configuration Map
-
-.SH OVERVIEW
-.P
-There are 3 main types of keys stored in CMAP:
-.PP
-* Mapping of values stored in the config file.
-.PP
-* Runtime statistics.
-.PP
-* Other user created values.
-
-In this man page, wild-cards have the usual meaning.
-
-.SH ICMAP KEYS
-These keys are in the icmap (default) map
-.TP
-internal_configuration.*
-Internal configuration data. All keys in this prefix are read only.
-It's only useful for getting a list of loaded services.
-
-.TP
-logging.*
-Values read from the configuration file. It's possible to change them at runtime.
-If subsystem specific configuration is needed, the key must be in the form
-logging.logger_subsys.SERVICE.key, where SERVICE is upper case name of the service and
-key is same as in the configuration file. All values are of string type.
-
-.TP
-nodelist.*
-Values are read from the configuration file only (dynamic updates are not allowed).
-Each node element in the configuration file gets
-assigned its position starting from zero. So the first node from the config file has
-nodelist.node.0. prefix. To be a valid entry, each node must have
-.B ring0_addr
-key.
-To change the
-.B nodeid
-key, use a u32 data type.
-
-Local node position is stored in
-.B local_node_pos
-key (RO), so it's easy to find
-out nodeid/ring addresses of the local node directly from cmap.
-
-.TP
-runtime.blackbox.*
-Trigger keys for storing fplay data. It's recommended that you use the corosync-blackbox command
-to change keys in this prefix.
-
-.TP
-runtime.force_gather
-Set to 'yes' to force the processor to move into the GATHER state. This operation
-is dangerous and is not recommended.
-
-.TP
-runtime.config.*
-Contains the values actually in use by the totem membership protocol.
-Values here are either taken from the Corosync configuration file,
-defaults or computed from entries in the config file. For information
-on individual keys please refer to the man page
-.BR corosync.conf (5).
-
-.TP
-runtime.services.*
-Prefix with statistics for service engines. Each service has its own
-.B service_id
-key in the prefix with the name runtime.services.SERVICE., where SERVICE is the lower case
-name of the service. Inside the service prefix is the number of messages received and sent
-by the corosync engine in the format runtime.services.SERVICE.EXEC_CALL.rx and
-runtime.services.SERVICE.EXEC_CALL.tx, where EXEC_CALL is the internal id of the service
-call (so for example 3 in cpg service is receive of multicast message from other
-nodes).
-
-.TP
-runtime.totem.members.*
-Prefix containing members of the totem single ring protocol. Each member
-keys has format runtime.totem.members.NODEID.KEY, where key is
-one of:
-
-.B config_version
-Config version of the member node.
-
-.TP
-resources.process.PID.*
-Prefix created by applications using SAM with CMAP integration.
-It contains the following keys:
-
-.B recovery
-Recovery policy of the process. Can be one of quit or restart.
-
-.B poll_period
-Value passed in sam_initialize as a time_interval.
-
-.B last_updated
-Last time SAM received a heartbeat from the client.
-
-.B state
-State of the client. Can be one of failed, stopped, running and waiting for quorum.
-
-.TP
-uidgid.*
-Information about users/groups which are allowed to make IPC connections to
-corosync. Entries loaded from configuration file are stored with
-uidgid.config.* prefix and are pruned on configuration file reload. Dynamic
-entries has uidgid.* prefix and a configuration file reload doesn't affect them.
-
-.TP
-quorum.cancel_wait_for_all
-Tells votequorum to cancel waiting for all nodes at cluster startup. Can be used
-to unblock quorum if notes are known to be down. For pcs use only.
-
-.TP
-config.reload_in_progress
-This value will be set to 1 (or created) when a corosync.conf reload is started,
-and set to 0 when the reload is completed. This allows interested subsystems
-to do atomic reconfiguration rather than changing each key. Note that
-individual add/change/delete notifications will still be sent during a reload.
-
-.TP
-config.totemconfig_reload_in_progress
-This key is similar to
-.B config.totemconfig_reload_in_progress
-but changed after the totem config trigger is processed. It is useful (mainly)
-for situations when
-.B nodelist.local_node_pos
-must be correctly reinstated before anything else.
-
-.SH STATS KEYS
-These keys are in the stats map. All keys in this map are read-only.
-Modification tracking of individual keys is supported in the stats map, but not
-prefixes. Add/Delete operations are supported on prefixes though so you can track
-for new ipc connections or knet interfaces.
-.TP
-stats.srp.*
-Prefix containing statistics about totem.
-Typical key prefixes:
-
-.B commit_entered
-Number of times the processor entered COMMIT state.
-
-.B commit_token_lost
-Number of times the processor lost token in COMMIT state.
-
-.B consensus_timeouts
-How many times the processor timed out forming a consensus about membership.
-
-.B continuous_gather
-How many times the processor was not able to reach consensus.
-
-.B firewall_enabled_or_nic_failure
-Set to 1 when processor was not able to reach consensus for long time. The usual
-reason is a badly configured firewall or connection failure.
-
-.B gather_entered
-Number of times the processor entered GATHER state.
-
-.B gather_token_lost
-Number of times the processor lost token in GATHER state.
-
-.B mcast_retx
-Number of retransmitted messages.
-
-.B mcast_rx
-Number of received multicast messages.
-
-.B mcast_tx
-Number of transmitted multicast messages.
-
-.B memb_commit_token_rx
-Number of received commit tokens.
-
-.B memb_commit_token_tx
-Number of transmitted commit tokens.
-
-.B memb_join_rx
-Number of received join messages.
-
-.B memb_join_tx
-Number of transmitted join messages.
-
-.B memb_merge_detect_rx
-Number of received member merge messages.
-
-.B memb_merge_detect_tx
-Number of transmitted member merge messages.
-
-.B orf_token_rx
-Number of received orf tokens.
-
-.B orf_token_tx
-Number of transmitted orf tokens.
-
-.B recovery_entered
-Number of times the processor entered recovery.
-
-.B recovery_token_lost
-Number of times the token was lost in recovery state.
-
-.B rx_msg_dropped
-Number of received messages which were dropped because they were not expected
-(as example multicast message in commit state).
-
-.B token_hold_cancel_rx
-Number of received token hold cancel messages.
-
-.B token_hold_cancel_tx
-Number of transmitted token hold cancel messages.
-
-.B mtt_rx_token
-Mean transit time of token in milliseconds. In other words, time between
-two consecutive token receives.
-
-.B avg_token_workload
-Average time in milliseconds of holding time of token on the current processor.
-
-.B avg_backlog_calc
-Average number of not yet sent messages on the current processor.
-
-.TP
-stats.knet.nodeX.linkY.*
-Statistics about the network traffic to and from each node and link when using
-tke kronosnet transport
-
-.B connected
-Whether the link is connected or not
-
-.B up_count
-Number of times this link has changed state to UP
-
-.B down_count
-Number of times this link has changed state to DOWN
-
-.B latency_ave / latency_max / latency_max
-Calculated latencies of this link. Note that if there has been no traffic
-on the link then latency_min will show a very large number.
-
-.B latency_samples
-The number of samples used to calculate the latency figures, so you have
-some idea of their precision.
-
-.B rx_data_packets / tx_data_packets
-The number of packets sent/received on this link
-
-.B rx_data_bytes / tx_data_bytes
-The number of bytes sent/received on this link
-
-.B rx_pmtu_packets / tx_pmtu_packets
-The number of packets sent/received by the PMTUd subsystem
-
-.B rx_pmtu_bytes / tx_pmtu_bytes
-The number of bytes sent/received by the PMTUd subsystem
-
-.B rx_ping_packets / tx_ping_packets
-The number of packets sent/received as pings
-
-.B rx_ping_bytes / tx_ping_bytes
-The number of bytes sent/received as pings
-
-.B rx_pong_packets / tx_pong_packets
-The number of packets sent/received as pongs
-
-.B rx_pong_bytes / tx_pong_bytes
-The number of bytes sent/received as pongs
-
-.B rx_total_packets / tx_total_packets
-The total number of packets sent/received. The aggregate of all of the above packet stats
-
-.B rx_total_bytes / tx_total_bytes
-The total number of bytes sent/received. The aggregate of all of the above bytes stats
-
-.B tx_data_retries / tx_pmtu_retries / tx_ping_retries / tx_pong_retries / tx_total_retries
-Number of times a transmit operation had to be retried due to the socket returning EAGAIN
-
-.TP
-stats.ipcs.*
-There is information about total number of active connections from client programs
-at the time the request was made.
-.B active
-number of closed connections during whole runtime of corosync
-.B closed
-Total number of connections that have been made since corosync was started
-
-.TP
-stats.ipcs.ID.*
-Each IPC connection has a unique ID. This is in the form [[serviceX:][PID:]internal_id.
-
-Typical keys in this prefix are:
-
-.B proc_name
-process name of connected process (unavailable on some platforms)
-
-.B dispatched
-number of dispatched messages.
-
-.B invalid_request
-number of requests made by IPC which are invalid (calling non-existing call, ...).
-
-.B name
-contains short name of the IPC connection (unavailable on some platforms).
-
-.B overload
-is number of requests which were not processed because of overload.
-
-.B queue_size
-contains the number of messages in the queue waiting for send.
-
-.B recv_retries
-is the total number of interrupted receives.
-
-.B requests
-contains the number of requests made by IPC.
-
-.B responses
-is the number of responses sent to the IPC client.
-
-.B send_retries
-contains the total number of interrupted sends.
-
-.B service_id
-contains the ID of service which the IPC is connected to.
-
-.TP
-stats.clear.*
-These are write-only keys used to clear the stats for various subsystems
-
-.B totem
-Clears the pg & srp totem stats.
-
-.B knet
-Clears the knet stats
-
-.B ipc
-Clears the ipc stats
-
-.B all
-Clears all of the above stats
-
-
-.SH DYNAMIC CHANGE USER/GROUP PERMISSION TO USE COROSYNC IPC
-Is the same as in the configuration file. eg: to add UID 500 use
-
-.br
-# corosync-cmapctl -s uidgid.uid.500 u8 1
-
-GID is similar, so to add a GID use
-
-.br
-# corosync-cmapctl -s uidgid.gid.500 u8 1
-
-For removal of permissions, simply delete the key
-
-.br
-# corosync-cmapctl -d uidgid.gid.500
-
-
-.SH "SEE ALSO"
-.BR corosync_overview (7),
-.BR corosync.conf (5),
-.BR corosync-cmapctl (8)
diff --git a/man/cmap_overview.3 b/man/cmap_overview.3
index cf4cabb..0aa3c14 100644
--- a/man/cmap_overview.3
+++ b/man/cmap_overview.3
@@ -54,7 +54,7 @@ The library provides a mechanism to:
.PP
* Track changes on keys
-Description of most keys created by corosync itself can be found in cmap_keys (8).
+Description of most keys created by corosync itself can be found in cmap_keys (7).
.SH BUGS
.SH "SEE ALSO"
@@ -75,4 +75,4 @@ Description of most keys created by corosync itself can be found in cmap_keys (8
.BR cmap_iter_finalize (3),
.BR cmap_track_add (3),
.BR cmap_track_delete (3),
-.BR cmap_keys (8)
+.BR cmap_keys (7)
diff --git a/man/corosync-cmapctl.8 b/man/corosync-cmapctl.8
index 637e597..8826503 100644
--- a/man/corosync-cmapctl.8
+++ b/man/corosync-cmapctl.8
@@ -96,4 +96,4 @@ corosync\-cmapctl \fB\-C\fR [ipc|totem|knet|all]
.SH "SEE ALSO"
.BR cmap_overview (3),
-.BR cmap_keys (8)
+.BR cmap_keys (7)
diff --git a/man/index.html b/man/index.html
index f4819e5..21326dc 100644
--- a/man/index.html
+++ b/man/index.html
@@ -63,7 +63,7 @@
Description of corosync-cmapctl tool.
<br>
- <a href="cmap_keys.8.html">cmap_keys(8)</a>:
+ <a href="cmap_keys.7.html">cmap_keys(7)</a>:
Overview of keys stored in the Configuration Map.
<br>
--
1.8.3.1

View File

@ -0,0 +1,320 @@
From 48b6894ef41e9a06ccbb696d062d86ef60dc2c4b Mon Sep 17 00:00:00 2001
From: Christine Caulfield <ccaulfie@redhat.com>
Date: Fri, 17 Jan 2020 14:22:16 +0000
Subject: [PATCH] stats: Add stats for scheduler misses
This patch add a stats.schedmiss.* set of entries that
are a record of the last 10 times corosync was not scheduled
in time.
These entries are keypt in reverse order (so stats.schedmiss.0.* is
always the latest one kept) and the values, including the timestamp,
are in milliseconds.
It's also possible to use a cmap tracker to follow these events, which
might be useful.
Signed-off-by: Christine Caulfield <ccaulfie@redhat.com>
Reviewed-by: Jan Friesse <jfriesse@redhat.com>
---
exec/main.c | 2 +
exec/stats.c | 113 +++++++++++++++++++++++++++++++++++++++++++----
exec/stats.h | 2 +
man/cmap_keys.7 | 26 ++++++++++-
tools/corosync-cmapctl.c | 5 ++-
5 files changed, 136 insertions(+), 12 deletions(-)
diff --git a/exec/main.c b/exec/main.c
index 7a471a1..fb0486e 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -835,6 +835,8 @@ static void timer_function_scheduler_timeout (void *data)
log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
"(threshold is %0.4f ms). Consider token timeout increase.",
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
+
+ stats_add_schedmiss_event(tv_current / 1000, (float)tv_diff / QB_TIME_NS_IN_MSEC);
}
/*
diff --git a/exec/stats.c b/exec/stats.c
index e89504e..d5c1cbc 100644
--- a/exec/stats.c
+++ b/exec/stats.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Red Hat, Inc.
+ * Copyright (c) 2017-2020 Red Hat, Inc.
*
* All rights reserved.
*
@@ -60,9 +60,20 @@ LOGSYS_DECLARE_SUBSYS ("STATS");
static qb_map_t *stats_map;
+/* Structure of an element in the schedmiss array */
+struct schedmiss_entry {
+ uint64_t timestamp;
+ float delay;
+};
+#define MAX_SCHEDMISS_EVENTS 10
+static struct schedmiss_entry schedmiss_event[MAX_SCHEDMISS_EVENTS];
+static unsigned int highest_schedmiss_event;
+
+#define SCHEDMISS_PREFIX "stats.schedmiss"
+
/* Convert iterator number to text and a stats pointer */
struct cs_stats_conv {
- enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG} type;
+ enum {STAT_PG, STAT_SRP, STAT_KNET, STAT_KNET_HANDLE, STAT_IPCSC, STAT_IPCSG, STAT_SCHEDMISS} type;
const char *name;
const size_t offset;
const icmap_value_types_t value_type;
@@ -190,6 +201,10 @@ struct cs_stats_conv cs_ipcs_global_stats[] = {
{ STAT_IPCSG, "global.active", offsetof(struct ipcs_global_stats, active), ICMAP_VALUETYPE_UINT64},
{ STAT_IPCSG, "global.closed", offsetof(struct ipcs_global_stats, closed), ICMAP_VALUETYPE_UINT64},
};
+struct cs_stats_conv cs_schedmiss_stats[] = {
+ { STAT_SCHEDMISS, "timestamp", offsetof(struct schedmiss_entry, timestamp), ICMAP_VALUETYPE_UINT64},
+ { STAT_SCHEDMISS, "delay", offsetof(struct schedmiss_entry, delay), ICMAP_VALUETYPE_FLOAT},
+};
#define NUM_PG_STATS (sizeof(cs_pg_stats) / sizeof(struct cs_stats_conv))
#define NUM_SRP_STATS (sizeof(cs_srp_stats) / sizeof(struct cs_stats_conv))
@@ -286,7 +301,7 @@ cs_error_t stats_map_init(const struct corosync_api_v1 *corosync_api)
stats_add_entry(param, &cs_ipcs_global_stats[i]);
}
- /* KNET and IPCS stats are added when appropriate */
+ /* KNET, IPCS & SCHEDMISS stats are added when appropriate */
return CS_OK;
}
@@ -307,6 +322,8 @@ cs_error_t stats_map_get(const char *key_name,
int link_no;
int service_id;
uint32_t pid;
+ unsigned int sm_event;
+ char *sm_type;
void *conn_ptr;
item = qb_map_get(stats_map, key_name);
@@ -363,17 +380,85 @@ cs_error_t stats_map_get(const char *key_name,
cs_ipcs_get_global_stats(&ipcs_global_stats);
stats_map_set_value(statinfo, &ipcs_global_stats, value, value_len, type);
break;
+ case STAT_SCHEDMISS:
+ if (sscanf(key_name, SCHEDMISS_PREFIX ".%d", &sm_event) != 1) {
+ return CS_ERR_NOT_EXIST;
+ }
+
+ sm_type = strrchr(key_name, '.');
+ if (sm_type == NULL) {
+ return CS_ERR_NOT_EXIST;
+ }
+ sm_type++;
+
+ if (strcmp(sm_type, "timestamp") == 0) {
+ memcpy(value, &schedmiss_event[sm_event].timestamp, sizeof(uint64_t));
+ *value_len = sizeof(uint64_t);
+ *type = ICMAP_VALUETYPE_UINT64;
+ }
+ if (strcmp(sm_type, "delay") == 0) {
+ memcpy(value, &schedmiss_event[sm_event].delay, sizeof(float));
+ *value_len = sizeof(float);
+ *type = ICMAP_VALUETYPE_FLOAT;
+ }
+ break;
default:
return CS_ERR_LIBRARY;
}
return CS_OK;
}
-#define STATS_CLEAR "stats.clear."
-#define STATS_CLEAR_KNET "stats.clear.knet"
-#define STATS_CLEAR_IPC "stats.clear.ipc"
-#define STATS_CLEAR_TOTEM "stats.clear.totem"
-#define STATS_CLEAR_ALL "stats.clear.all"
+static void schedmiss_clear_stats(void)
+{
+ int i;
+ char param[ICMAP_KEYNAME_MAXLEN];
+
+ for (i=0; i<MAX_SCHEDMISS_EVENTS; i++) {
+ if (i < highest_schedmiss_event) {
+ sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", i);
+ stats_rm_entry(param);
+ sprintf(param, SCHEDMISS_PREFIX ".%i.delay", i);
+ stats_rm_entry(param);
+ }
+ schedmiss_event[i].timestamp = (uint64_t)0LL;
+ schedmiss_event[i].delay = 0.0f;
+ }
+ highest_schedmiss_event = 0;
+}
+
+/* Called from main.c */
+void stats_add_schedmiss_event(uint64_t timestamp, float delay)
+{
+ char param[ICMAP_KEYNAME_MAXLEN];
+ int i;
+
+ /* Move 'em all up */
+ for (i=MAX_SCHEDMISS_EVENTS-2; i>=0; i--) {
+ schedmiss_event[i+1].timestamp = schedmiss_event[i].timestamp;
+ schedmiss_event[i+1].delay = schedmiss_event[i].delay;
+ }
+
+ /* New entries are always at the front */
+ schedmiss_event[0].timestamp = timestamp;
+ schedmiss_event[0].delay = delay;
+
+ /* If we've not run off the end then add an entry in the trie for the new 'end' one */
+ if (highest_schedmiss_event < MAX_SCHEDMISS_EVENTS) {
+ sprintf(param, SCHEDMISS_PREFIX ".%i.timestamp", highest_schedmiss_event);
+ stats_add_entry(param, &cs_schedmiss_stats[0]);
+ sprintf(param, SCHEDMISS_PREFIX ".%i.delay", highest_schedmiss_event);
+ stats_add_entry(param, &cs_schedmiss_stats[1]);
+ highest_schedmiss_event++;
+ }
+ /* Notifications get sent by the stats_updater */
+}
+
+#define STATS_CLEAR "stats.clear."
+#define STATS_CLEAR_KNET "stats.clear.knet"
+#define STATS_CLEAR_IPC "stats.clear.ipc"
+#define STATS_CLEAR_TOTEM "stats.clear.totem"
+#define STATS_CLEAR_ALL "stats.clear.all"
+#define STATS_CLEAR_SCHEDMISS "stats.clear.schedmiss"
cs_error_t stats_map_set(const char *key_name,
const void *value,
@@ -394,9 +479,14 @@ cs_error_t stats_map_set(const char *key_name,
totempg_stats_clear(TOTEMPG_STATS_CLEAR_TOTEM);
cleared = 1;
}
+ if (strncmp(key_name, STATS_CLEAR_SCHEDMISS, strlen(STATS_CLEAR_SCHEDMISS)) == 0) {
+ schedmiss_clear_stats();
+ cleared = 1;
+ }
if (strncmp(key_name, STATS_CLEAR_ALL, strlen(STATS_CLEAR_ALL)) == 0) {
totempg_stats_clear(TOTEMPG_STATS_CLEAR_TRANSPORT | TOTEMPG_STATS_CLEAR_TOTEM);
cs_ipcs_clear_stats();
+ schedmiss_clear_stats();
cleared = 1;
}
if (!cleared) {
@@ -500,6 +590,11 @@ static void stats_map_notify_fn(uint32_t event, char *key, void *old_value, void
return ;
}
+ /* Ignore schedmiss trackers as the values are read from the circular buffer */
+ if (strncmp(key, SCHEDMISS_PREFIX, strlen(SCHEDMISS_PREFIX)) == 0 ) {
+ return ;
+ }
+
new_val.data = new_value;
if (stats_map_get(key,
&new_value,
@@ -556,7 +651,7 @@ cs_error_t stats_map_track_add(const char *key_name,
}
/* Get initial value */
if (stats_map_get(tracker->key_name,
- &tracker->old_value, &value_len, &type) == CS_OK) {
+ &tracker->old_value, &value_len, &type) != CS_OK) {
tracker->old_value = 0ULL;
}
} else {
diff --git a/exec/stats.h b/exec/stats.h
index 45891ae..eac9e7c 100644
--- a/exec/stats.h
+++ b/exec/stats.h
@@ -69,3 +69,5 @@ void stats_trigger_trackers(void);
void stats_ipcs_add_connection(int service_id, uint32_t pid, void *ptr);
void stats_ipcs_del_connection(int service_id, uint32_t pid, void *ptr);
cs_error_t cs_ipcs_get_conn_stats(int service_id, uint32_t pid, void *conn_ptr, struct ipcs_conn_stats *ipcs_stats);
+
+void stats_add_schedmiss_event(uint64_t, float delay);
diff --git a/man/cmap_keys.7 b/man/cmap_keys.7
index 6bc04fe..da95c51 100644
--- a/man/cmap_keys.7
+++ b/man/cmap_keys.7
@@ -1,5 +1,5 @@
.\"/*
-.\" * Copyright (c) 2012-2018 Red Hat, Inc.
+.\" * Copyright (c) 2012-2020 Red Hat, Inc.
.\" *
.\" * All rights reserved.
.\" *
@@ -357,6 +357,27 @@ contains the total number of interrupted sends.
.B service_id
contains the ID of service which the IPC is connected to.
+
+.TP
+stats.schedmiss.<n>.*
+If corosync is not scheduled after the required period of time it will
+log this event and also write an entry to the stats cmap under this key.
+There can be up to 10 entries (0..9) in here, when an 11th event happens
+the earliest will be removed.
+
+These events will always be in reverse order, so stats.schedmiss.0.* will
+always be the latest event kept and 9 the oldest. If you want to listen
+for notifications then you are recommended to listen for changes
+to stats.schedmiss.0.timestamp or stats.schedmiss.0.delay.
+
+.B timestamp
+The time of the event in ms since the Epoch (ie time_t * 1000 but with
+valid milliseconds).
+
+.B delay
+The time that corosync was paused (in ms, float value).
+
+
.TP
stats.clear.*
These are write-only keys used to clear the stats for various subsystems
@@ -370,6 +391,9 @@ Clears the knet stats
.B ipc
Clears the ipc stats
+.B schedmiss
+Clears the schedmiss stats
+
.B all
Clears all of the above stats
diff --git a/tools/corosync-cmapctl.c b/tools/corosync-cmapctl.c
index a4b61bd..ffca7e1 100644
--- a/tools/corosync-cmapctl.c
+++ b/tools/corosync-cmapctl.c
@@ -115,7 +115,7 @@ static int print_help(void)
printf(" about the networking and IPC traffic in some detail.\n");
printf("\n");
printf("Clear stats:\n");
- printf(" corosync-cmapctl -C [knet|ipc|totem|all]\n");
+ printf(" corosync-cmapctl -C [knet|ipc|totem|schedmiss|all]\n");
printf(" The 'stats' map is implied\n");
printf("\n");
printf("Load settings from a file:\n");
@@ -849,6 +849,7 @@ int main(int argc, char *argv[])
if (strcmp(optarg, "knet") == 0 ||
strcmp(optarg, "totem") == 0 ||
strcmp(optarg, "ipc") == 0 ||
+ strcmp(optarg, "schedmiss") == 0 ||
strcmp(optarg, "all") == 0) {
action = ACTION_CLEARSTATS;
clear_opt = optarg;
@@ -857,7 +858,7 @@ int main(int argc, char *argv[])
map = CMAP_MAP_STATS;
}
else {
- fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc' or 'all'\n");
+ fprintf(stderr, "argument to -C should be 'knet', 'totem', 'ipc', 'schedmiss' or 'all'\n");
return (EXIT_FAILURE);
}
break;
--
1.8.3.1

View File

@ -0,0 +1,31 @@
From ebd05fa00826c366922e619b012a0684c6856539 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Thu, 23 Jan 2020 17:11:54 +0100
Subject: [PATCH] stats: Use nanoseconds from epoch for schedmiss
Using monotonic time is not working because it doesn't have to match
time from epoch.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
exec/main.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/exec/main.c b/exec/main.c
index fb0486e..821d97e 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -836,7 +836,8 @@ static void timer_function_scheduler_timeout (void *data)
"(threshold is %0.4f ms). Consider token timeout increase.",
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
- stats_add_schedmiss_event(tv_current / 1000, (float)tv_diff / QB_TIME_NS_IN_MSEC);
+ stats_add_schedmiss_event(qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC,
+ (float)tv_diff / QB_TIME_NS_IN_MSEC);
}
/*
--
1.8.3.1

View File

@ -0,0 +1,47 @@
From 35662dd0ec53f456445c30c0ef92892f47b25aa2 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Mon, 24 Feb 2020 14:58:45 +0100
Subject: [PATCH] main: Add schedmiss timestamp into message
This is useful for matching schedmiss event in stats map with logged
event.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
exec/main.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/exec/main.c b/exec/main.c
index 821d97e..8c3df79 100644
--- a/exec/main.c
+++ b/exec/main.c
@@ -817,6 +817,7 @@ static void timer_function_scheduler_timeout (void *data)
struct scheduler_pause_timeout_data *timeout_data = (struct scheduler_pause_timeout_data *)data;
unsigned long long tv_current;
unsigned long long tv_diff;
+ uint64_t schedmiss_event_tstamp;
tv_current = qb_util_nano_current_get ();
@@ -832,12 +833,14 @@ static void timer_function_scheduler_timeout (void *data)
timeout_data->tv_prev = tv_current;
if (tv_diff > timeout_data->max_tv_diff) {
- log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled for %0.4f ms "
+ schedmiss_event_tstamp = qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC;
+
+ log_printf (LOGSYS_LEVEL_WARNING, "Corosync main process was not scheduled (@%" PRIu64 ") for %0.4f ms "
"(threshold is %0.4f ms). Consider token timeout increase.",
+ schedmiss_event_tstamp,
(float)tv_diff / QB_TIME_NS_IN_MSEC, (float)timeout_data->max_tv_diff / QB_TIME_NS_IN_MSEC);
- stats_add_schedmiss_event(qb_util_nano_from_epoch_get() / QB_TIME_NS_IN_MSEC,
- (float)tv_diff / QB_TIME_NS_IN_MSEC);
+ stats_add_schedmiss_event(schedmiss_event_tstamp, (float)tv_diff / QB_TIME_NS_IN_MSEC);
}
/*
--
1.8.3.1

View File

@ -0,0 +1,51 @@
From 0c16442f2d93f32a229b87d2672e2dc8025ec704 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Wed, 4 Mar 2020 11:42:15 +0100
Subject: [PATCH] votequorum: Change check of expected_votes
Previously value of new expected_votes was checked so newly computed
quorum value was in the interval <total_votes / 2, total_votes>. The
upper range prevented the cluster to become unquorate, but bottom check
was almost useless because it allowed to change expected_votes so it is
smaller than total_votes.
Solution is to check if expected_votes is bigger or equal to total_votes
and for quorate cluster only check if cluster doesn't become unquorate
(for unquorate cluster one can set upper range freely - as it is
perfectly possible when using config file)
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
exec/votequorum.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/exec/votequorum.c b/exec/votequorum.c
index 52424fa..b152425 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2015 Red Hat, Inc.
+ * Copyright (c) 2009-2020 Red Hat, Inc.
*
* All rights reserved.
*
@@ -2688,8 +2688,12 @@ static void message_handler_req_lib_votequorum_setexpected (void *conn, const vo
*/
newquorum = calculate_quorum(1, req_lib_votequorum_setexpected->expected_votes, &total_votes);
allow_downscale = allow_downscale_status;
- if (newquorum < total_votes / 2 ||
- newquorum > total_votes) {
+ /*
+ * Setting expected_votes < total_votes doesn't make sense.
+ * For quorate cluster prevent cluster to become unquorate.
+ */
+ if (req_lib_votequorum_setexpected->expected_votes < total_votes ||
+ (cluster_is_quorate && (newquorum > total_votes))) {
error = CS_ERR_INVALID_PARAM;
goto error_exit;
}
--
1.8.3.1

View File

@ -0,0 +1,33 @@
From 5f543465bb3506b7f4929a426f1c22a9c854cecd Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Wed, 4 Mar 2020 08:53:41 +0100
Subject: [PATCH] quorumtool: exit on invalid expected votes
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
tools/corosync-quorumtool.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/corosync-quorumtool.c b/tools/corosync-quorumtool.c
index 9bef844..44bf181 100644
--- a/tools/corosync-quorumtool.c
+++ b/tools/corosync-quorumtool.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009-2019 Red Hat, Inc.
+ * Copyright (c) 2009-2020 Red Hat, Inc.
*
* All rights reserved.
*
@@ -937,6 +937,7 @@ int main (int argc, char *argv[]) {
votes = strtol(optarg, &endptr, 0);
if ((votes == 0 && endptr == optarg) || votes <= 0) {
fprintf(stderr, "New expected votes value was not valid, try a positive number\n");
+ exit(EXIT_FAILURE);
} else {
command_opt = CMD_SETEXPECTED;
}
--
1.8.3.1

View File

@ -0,0 +1,67 @@
From ca320beac25f82c0c555799e647a47975a333c28 Mon Sep 17 00:00:00 2001
From: Jan Friesse <jfriesse@redhat.com>
Date: Tue, 10 Mar 2020 17:49:27 +0100
Subject: [PATCH] votequorum: set wfa status only on startup
Previously reload of configuration with enabled wait_for_all result in
set of wait_for_all_status which set cluster_is_quorate to 0 but didn't
inform the quorum service so votequorum and quorum information may get
out of sync.
Example is 1 node cluster, which is extended to 3 nodes. Quorum service
reports cluster as a quorate (incorrect) and votequorum as not-quorate
(correct). Similar behavior happens when extending cluster in general,
but some configurations are less incorrect (3->4).
Discussed solution was to inform quorum service but that would mean
every reload would cause loss of quorum until all nodes would be seen
again.
Such behaviour is consistent but seems to be a bit too strict.
Proposed solution sets wait_for_all_status only on startup and
doesn't touch it during reload.
This solution fulfills requirement of "cluster will be quorate for
the first time only after all nodes have been visible at least
once at the same time." because node clears wait_for_all_status only
after it sees all other nodes or joins cluster which is quorate. It also
solves problem with extending cluster, because when cluster becomes
unquorate (1->3) wait_for_all_status is set.
Added assert is only for ensure that I haven't missed any case when
quorate cluster may become unquorate.
Signed-off-by: Jan Friesse <jfriesse@redhat.com>
Reviewed-by: Christine Caulfield <ccaulfie@redhat.com>
---
exec/votequorum.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/exec/votequorum.c b/exec/votequorum.c
index b152425..fb9f1cd 100644
--- a/exec/votequorum.c
+++ b/exec/votequorum.c
@@ -1009,7 +1009,7 @@ static void are_we_quorate(unsigned int total_votes)
"Waiting for all cluster members. "
"Current votes: %d expected_votes: %d",
total_votes, us->expected_votes);
- cluster_is_quorate = 0;
+ assert(!cluster_is_quorate);
return;
}
update_wait_for_all_status(0);
@@ -1547,7 +1547,9 @@ static char *votequorum_readconfig(int runtime)
update_ev_barrier(us->expected_votes);
update_two_node();
if (wait_for_all) {
- update_wait_for_all_status(1);
+ if (!runtime) {
+ update_wait_for_all_status(1);
+ }
} else if (wait_for_all_autoset && wait_for_all_status) {
/*
* Reset wait for all status for consistency when wfa is auto-unset by 2node.
--
1.8.3.1

View File

@ -23,11 +23,21 @@
Name: corosync
Summary: The Corosync Cluster Engine and Application Programming Interfaces
Version: 3.0.3
Release: 2%{?gitver}%{?dist}
Release: 4%{?gitver}%{?dist}
License: BSD
URL: http://corosync.github.io/corosync/
Source0: http://build.clusterlabs.org/corosync/releases/%{name}-%{version}%{?gittarver}.tar.gz
Patch0: bz1780137-1-votequorum-Ignore-the-icmap_get_-return-value.patch
Patch1: bz1791792-1-man-move-cmap_keys-man-page-from-section-8-to-7.patch
Patch2: bz1780137-2-votequorum-Reflect-runtime-change-of-2Node-to-WFA.patch
Patch3: bz1791792-2-stats-Add-stats-for-scheduler-misses.patch
Patch4: bz1791792-3-stats-Use-nanoseconds-from-epoch-for-schedmiss.patch
Patch5: bz1791792-4-main-Add-schedmiss-timestamp-into-message.patch
Patch6: bz1809864-1-votequorum-Change-check-of-expected_votes.patch
Patch7: bz1809864-2-quorumtool-exit-on-invalid-expected-votes.patch
Patch8: bz1816653-1-votequorum-set-wfa-status-only-on-startup.patch
%if %{with spausedd}
Source1: https://github.com/jfriesse/spausedd/releases/download/%{spausedd_version}/spausedd-%{spausedd_version}.tar.gz
# VMGuestLib exists only for x86_64 architecture
@ -89,6 +99,16 @@ BuildRequires: pkgconfig(vmguestlib)
%setup -q -n %{name}-%{version}%{?gittarver}
%endif
%patch0 -p1 -b .bz1780137-1
%patch1 -p1 -b .bz1791792-1
%patch2 -p1 -b .bz1780137-2
%patch3 -p1 -b .bz1791792-2
%patch4 -p1 -b .bz1791792-3
%patch5 -p1 -b .bz1791792-4
%patch6 -p1 -b .bz1809864-1
%patch7 -p1 -b .bz1809864-2
%patch8 -p1 -b .bz1816653-1
%build
%if %{with runautogen}
./autogen.sh
@ -254,7 +274,7 @@ fi
%{_mandir}/man8/corosync-quorumtool.8*
%{_mandir}/man5/corosync.conf.5*
%{_mandir}/man5/votequorum.5*
%{_mandir}/man8/cmap_keys.8*
%{_mandir}/man7/cmap_keys.7*
# library
#
@ -387,6 +407,36 @@ fi
%endif
%changelog
* Tue May 26 2020 Jan Friesse <jfriesse@redhat.com> 3.0.3-4
- Resolves: rhbz#1780137
- Resolves: rhbz#1791792
- Resolves: rhbz#1809864
- Resolves: rhbz#1816653
- votequorum: Ignore the icmap_get_* return value (rhbz#1780137)
- merge upstream commit cddd62f972bca276c934e58f08da84071cec1ddb (rhbz#1780137)
- man: move cmap_keys man page from section 8 to 7 (rhbz#1791792)
- merge upstream commit f1d36307e524f9440733f0b01a9fc627a0e1cac7 (rhbz#1791792)
- votequorum: Reflect runtime change of 2Node to WFA (rhbz#1780137)
- merge upstream commit 8ce65bf951bc1e5b2d64b60ea027fbdc551d4fc8 (rhbz#1780137)
- stats: Add stats for scheduler misses (rhbz#1791792)
- merge upstream commit 48b6894ef41e9a06ccbb696d062d86ef60dc2c4b (rhbz#1791792)
- stats: Use nanoseconds from epoch for schedmiss (rhbz#1791792)
- merge upstream commit ebd05fa00826c366922e619b012a0684c6856539 (rhbz#1791792)
- main: Add schedmiss timestamp into message (rhbz#1791792)
- merge upstream commit 35662dd0ec53f456445c30c0ef92892f47b25aa2 (rhbz#1791792)
- votequorum: Change check of expected_votes (rhbz#1809864)
- merge upstream commit 0c16442f2d93f32a229b87d2672e2dc8025ec704 (rhbz#1809864)
- quorumtool: exit on invalid expected votes (rhbz#1809864)
- merge upstream commit 5f543465bb3506b7f4929a426f1c22a9c854cecd (rhbz#1809864)
- votequorum: set wfa status only on startup (rhbz#1816653)
- merge upstream commit ca320beac25f82c0c555799e647a47975a333c28 (rhbz#1816653)
* Tue Apr 28 2020 Jan Friesse <jfriesse@redhat.com> - 3.0.3-3
- Resolves: rhbz#1828295
- Add explicit spausedd dependency for revdeps CI test
* Mon Nov 25 2019 Jan Friesse <jfriesse@redhat.com> - 3.0.3-2
- Related: rhbz#1745623