dlm/0001-dlm_controld-use-new-quorum-api-to-detect-missed-fai.patch

200 lines
6.0 KiB
Diff
Raw Normal View History

From 2e893b981b19430aeca983dd63fb3ac0979d0b35 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Wed, 9 Sep 2020 17:16:59 -0500
Subject: [PATCH] dlm_controld: use new quorum api to detect missed failures
If a node failed and then quickly rejoined before corosync
detected the original failure, the quorum callback from
corosync would not include an indication that the node had
ever failed. This meant there would be no removal and
readdition of the node's configfs settings in the kernel,
and the failed network connections may never be cleared.
The new quorum nodelist callback does include an indication
of a node that has left and rejoined. This patch keeps track
of nodeids that have left and rejoined through that callback
between sequential occurances of the standard quorum callback.
When the standard quorum callback is removing and adding
configfs settings for nodes that have been removed or added,
it will also now look at the list of nodes that have left
and rejoined, and do configfs removal and addition for those.
---
dlm_controld/member.c | 106 +++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 91 insertions(+), 15 deletions(-)
diff --git a/dlm_controld/member.c b/dlm_controld/member.c
index 1d5bfa3d9166..d567c114b259 100644
--- a/dlm_controld/member.c
+++ b/dlm_controld/member.c
@@ -20,6 +20,8 @@ static int old_node_count;
static uint32_t quorum_nodes[MAX_NODES];
static int quorum_node_count;
static struct list_head cluster_nodes;
+static uint32_t leavejoin_nodes[MAX_NODES];
+static int leavejoin_count;
struct node_cluster {
struct list_head list;
@@ -103,15 +105,51 @@ int is_cluster_member(uint32_t nodeid)
return is_member(quorum_nodes, quorum_node_count, nodeid);
}
-static void quorum_callback(quorum_handle_t h, uint32_t quorate,
- uint64_t ring_seq, uint32_t node_list_entries,
- uint32_t *node_list)
+static int is_leavejoin_node(uint32_t nodeid)
+{
+ return is_member(leavejoin_nodes, leavejoin_count, nodeid);
+}
+
+static void quorum_nodelist_callback(quorum_handle_t cbhandle, struct quorum_ring_id ring_id,
+ uint32_t member_list_entries, const uint32_t *member_list,
+ uint32_t joined_list_entries, const uint32_t *joined_list,
+ uint32_t left_list_entries, const uint32_t *left_list)
+{
+ uint64_t ring_seq = ring_id.seq;
+ int i, j;
+
+ for (i = 0; i < left_list_entries; i++) {
+ log_debug("cluster left_list %u seq %llu",
+ left_list[i], (unsigned long long)ring_seq);
+ }
+
+ for (j = 0; j < joined_list_entries; j++) {
+ log_debug("cluster joined_list %u seq %llu",
+ joined_list[j], (unsigned long long)ring_seq);
+ }
+
+ for (i = 0; i < left_list_entries; i++) {
+ for (j = 0; j < joined_list_entries; j++) {
+ if (joined_list[j] == left_list[i]) {
+ log_debug("cluster node %d left and joined", joined_list[j]);
+ if (!is_leavejoin_node(joined_list[j]))
+ leavejoin_nodes[leavejoin_count++] = joined_list[j];
+ }
+ }
+ }
+}
+
+static void quorum_callback(quorum_handle_t cbhandle, uint32_t quorate,
+ struct quorum_ring_id ring_id, uint32_t node_list_entries,
+ const uint32_t *node_list)
{
corosync_cfg_node_address_t addrs[MAX_NODE_ADDRESSES];
corosync_cfg_node_address_t *addrptr = addrs;
const struct node_config *nc;
cs_error_t err;
int i, j, num_addrs;
+ uint32_t nodeid;
+ uint64_t ring_seq = ring_id.seq;
uint64_t now = monotime();
if (!cluster_joined_monotime) {
@@ -142,15 +180,55 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
if (!is_cluster_member(old_nodes[i])) {
log_debug("cluster node %u removed seq %llu",
old_nodes[i], (unsigned long long)cluster_ringid_seq);
+
rem_cluster_node(old_nodes[i], now);
del_configfs_node(old_nodes[i]);
}
}
+ for (i = 0; i < leavejoin_count; i++) {
+ nodeid = leavejoin_nodes[i];
+
+ log_debug("cluster node %u leavejoin seq %llu",
+ nodeid, (unsigned long long)cluster_ringid_seq);
+
+ /* remove */
+
+ rem_cluster_node(nodeid, now);
+ del_configfs_node(nodeid);
+
+ /* add */
+
+ add_cluster_node(nodeid, now);
+
+ fence_delay_begin = now;
+
+ err = corosync_cfg_get_node_addrs(ch, nodeid,
+ MAX_NODE_ADDRESSES,
+ &num_addrs, addrs);
+ if (err != CS_OK) {
+ log_error("corosync_cfg_get_node_addrs failed nodeid %u", nodeid);
+ continue;
+ }
+
+ nc = node_config_get(nodeid);
+
+ for (j = 0; j < num_addrs; j++) {
+ add_configfs_node(nodeid,
+ addrptr[j].address,
+ addrptr[j].address_length,
+ (nodeid == our_nodeid),
+ nc->mark);
+ }
+ }
+
for (i = 0; i < quorum_node_count; i++) {
+ if (is_leavejoin_node(quorum_nodes[i]))
+ continue;
if (!is_old_member(quorum_nodes[i])) {
log_debug("cluster node %u added seq %llu",
quorum_nodes[i], (unsigned long long)cluster_ringid_seq);
+
add_cluster_node(quorum_nodes[i], now);
fence_delay_begin = now;
@@ -176,12 +254,10 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
}
}
}
-}
-static quorum_callbacks_t quorum_callbacks =
-{
- .quorum_notify_fn = quorum_callback,
-};
+ memset(leavejoin_nodes, 0, sizeof(leavejoin_nodes));
+ leavejoin_count = 0;
+}
void process_cluster(int ci)
{
@@ -208,23 +284,23 @@ void update_cluster(void)
int setup_cluster(void)
{
+ quorum_model_v1_data_t model_data;
cs_error_t err;
int fd;
- uint32_t quorum_type;
+ uint32_t quorum_type = 0;
INIT_LIST_HEAD(&cluster_nodes);
- err = quorum_initialize(&qh, &quorum_callbacks, &quorum_type);
+ memset(&model_data, 0, sizeof(model_data));
+ model_data.quorum_notify_fn = quorum_callback;
+ model_data.nodelist_notify_fn = quorum_nodelist_callback;
+
+ err = quorum_model_initialize(&qh, QUORUM_MODEL_V1, (quorum_model_data_t *)&model_data, &quorum_type, NULL);
if (err != CS_OK) {
log_error("quorum init error %d", err);
return -1;
}
- if (quorum_type == QUORUM_FREE) {
- log_error("no quorum provider configured in corosync, unable to operate");
- goto fail;
- }
-
err = quorum_fd_get(qh, &fd);
if (err != CS_OK) {
log_error("quorum fd_get error %d", err);
--
2.7.5