200 lines
6.0 KiB
Diff
200 lines
6.0 KiB
Diff
|
From 2e893b981b19430aeca983dd63fb3ac0979d0b35 Mon Sep 17 00:00:00 2001
|
||
|
From: David Teigland <teigland@redhat.com>
|
||
|
Date: Wed, 9 Sep 2020 17:16:59 -0500
|
||
|
Subject: [PATCH] dlm_controld: use new quorum api to detect missed failures
|
||
|
|
||
|
If a node failed and then quickly rejoined before corosync
|
||
|
detected the original failure, the quorum callback from
|
||
|
corosync would not include an indication that the node had
|
||
|
ever failed. This meant there would be no removal and
|
||
|
readdition of the node's configfs settings in the kernel,
|
||
|
and the failed network connections may never be cleared.
|
||
|
|
||
|
The new quorum nodelist callback does include an indication
|
||
|
of a node that has left and rejoined. This patch keeps track
|
||
|
of nodeids that have left and rejoined through that callback
|
||
|
between sequential occurances of the standard quorum callback.
|
||
|
|
||
|
When the standard quorum callback is removing and adding
|
||
|
configfs settings for nodes that have been removed or added,
|
||
|
it will also now look at the list of nodes that have left
|
||
|
and rejoined, and do configfs removal and addition for those.
|
||
|
---
|
||
|
dlm_controld/member.c | 106 +++++++++++++++++++++++++++++++++++++++++++-------
|
||
|
1 file changed, 91 insertions(+), 15 deletions(-)
|
||
|
|
||
|
diff --git a/dlm_controld/member.c b/dlm_controld/member.c
|
||
|
index 1d5bfa3d9166..d567c114b259 100644
|
||
|
--- a/dlm_controld/member.c
|
||
|
+++ b/dlm_controld/member.c
|
||
|
@@ -20,6 +20,8 @@ static int old_node_count;
|
||
|
static uint32_t quorum_nodes[MAX_NODES];
|
||
|
static int quorum_node_count;
|
||
|
static struct list_head cluster_nodes;
|
||
|
+static uint32_t leavejoin_nodes[MAX_NODES];
|
||
|
+static int leavejoin_count;
|
||
|
|
||
|
struct node_cluster {
|
||
|
struct list_head list;
|
||
|
@@ -103,15 +105,51 @@ int is_cluster_member(uint32_t nodeid)
|
||
|
return is_member(quorum_nodes, quorum_node_count, nodeid);
|
||
|
}
|
||
|
|
||
|
-static void quorum_callback(quorum_handle_t h, uint32_t quorate,
|
||
|
- uint64_t ring_seq, uint32_t node_list_entries,
|
||
|
- uint32_t *node_list)
|
||
|
+static int is_leavejoin_node(uint32_t nodeid)
|
||
|
+{
|
||
|
+ return is_member(leavejoin_nodes, leavejoin_count, nodeid);
|
||
|
+}
|
||
|
+
|
||
|
+static void quorum_nodelist_callback(quorum_handle_t cbhandle, struct quorum_ring_id ring_id,
|
||
|
+ uint32_t member_list_entries, const uint32_t *member_list,
|
||
|
+ uint32_t joined_list_entries, const uint32_t *joined_list,
|
||
|
+ uint32_t left_list_entries, const uint32_t *left_list)
|
||
|
+{
|
||
|
+ uint64_t ring_seq = ring_id.seq;
|
||
|
+ int i, j;
|
||
|
+
|
||
|
+ for (i = 0; i < left_list_entries; i++) {
|
||
|
+ log_debug("cluster left_list %u seq %llu",
|
||
|
+ left_list[i], (unsigned long long)ring_seq);
|
||
|
+ }
|
||
|
+
|
||
|
+ for (j = 0; j < joined_list_entries; j++) {
|
||
|
+ log_debug("cluster joined_list %u seq %llu",
|
||
|
+ joined_list[j], (unsigned long long)ring_seq);
|
||
|
+ }
|
||
|
+
|
||
|
+ for (i = 0; i < left_list_entries; i++) {
|
||
|
+ for (j = 0; j < joined_list_entries; j++) {
|
||
|
+ if (joined_list[j] == left_list[i]) {
|
||
|
+ log_debug("cluster node %d left and joined", joined_list[j]);
|
||
|
+ if (!is_leavejoin_node(joined_list[j]))
|
||
|
+ leavejoin_nodes[leavejoin_count++] = joined_list[j];
|
||
|
+ }
|
||
|
+ }
|
||
|
+ }
|
||
|
+}
|
||
|
+
|
||
|
+static void quorum_callback(quorum_handle_t cbhandle, uint32_t quorate,
|
||
|
+ struct quorum_ring_id ring_id, uint32_t node_list_entries,
|
||
|
+ const uint32_t *node_list)
|
||
|
{
|
||
|
corosync_cfg_node_address_t addrs[MAX_NODE_ADDRESSES];
|
||
|
corosync_cfg_node_address_t *addrptr = addrs;
|
||
|
const struct node_config *nc;
|
||
|
cs_error_t err;
|
||
|
int i, j, num_addrs;
|
||
|
+ uint32_t nodeid;
|
||
|
+ uint64_t ring_seq = ring_id.seq;
|
||
|
uint64_t now = monotime();
|
||
|
|
||
|
if (!cluster_joined_monotime) {
|
||
|
@@ -142,15 +180,55 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
|
||
|
if (!is_cluster_member(old_nodes[i])) {
|
||
|
log_debug("cluster node %u removed seq %llu",
|
||
|
old_nodes[i], (unsigned long long)cluster_ringid_seq);
|
||
|
+
|
||
|
rem_cluster_node(old_nodes[i], now);
|
||
|
del_configfs_node(old_nodes[i]);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+ for (i = 0; i < leavejoin_count; i++) {
|
||
|
+ nodeid = leavejoin_nodes[i];
|
||
|
+
|
||
|
+ log_debug("cluster node %u leavejoin seq %llu",
|
||
|
+ nodeid, (unsigned long long)cluster_ringid_seq);
|
||
|
+
|
||
|
+ /* remove */
|
||
|
+
|
||
|
+ rem_cluster_node(nodeid, now);
|
||
|
+ del_configfs_node(nodeid);
|
||
|
+
|
||
|
+ /* add */
|
||
|
+
|
||
|
+ add_cluster_node(nodeid, now);
|
||
|
+
|
||
|
+ fence_delay_begin = now;
|
||
|
+
|
||
|
+ err = corosync_cfg_get_node_addrs(ch, nodeid,
|
||
|
+ MAX_NODE_ADDRESSES,
|
||
|
+ &num_addrs, addrs);
|
||
|
+ if (err != CS_OK) {
|
||
|
+ log_error("corosync_cfg_get_node_addrs failed nodeid %u", nodeid);
|
||
|
+ continue;
|
||
|
+ }
|
||
|
+
|
||
|
+ nc = node_config_get(nodeid);
|
||
|
+
|
||
|
+ for (j = 0; j < num_addrs; j++) {
|
||
|
+ add_configfs_node(nodeid,
|
||
|
+ addrptr[j].address,
|
||
|
+ addrptr[j].address_length,
|
||
|
+ (nodeid == our_nodeid),
|
||
|
+ nc->mark);
|
||
|
+ }
|
||
|
+ }
|
||
|
+
|
||
|
for (i = 0; i < quorum_node_count; i++) {
|
||
|
+ if (is_leavejoin_node(quorum_nodes[i]))
|
||
|
+ continue;
|
||
|
if (!is_old_member(quorum_nodes[i])) {
|
||
|
log_debug("cluster node %u added seq %llu",
|
||
|
quorum_nodes[i], (unsigned long long)cluster_ringid_seq);
|
||
|
+
|
||
|
add_cluster_node(quorum_nodes[i], now);
|
||
|
|
||
|
fence_delay_begin = now;
|
||
|
@@ -176,12 +254,10 @@ static void quorum_callback(quorum_handle_t h, uint32_t quorate,
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
-}
|
||
|
|
||
|
-static quorum_callbacks_t quorum_callbacks =
|
||
|
-{
|
||
|
- .quorum_notify_fn = quorum_callback,
|
||
|
-};
|
||
|
+ memset(leavejoin_nodes, 0, sizeof(leavejoin_nodes));
|
||
|
+ leavejoin_count = 0;
|
||
|
+}
|
||
|
|
||
|
void process_cluster(int ci)
|
||
|
{
|
||
|
@@ -208,23 +284,23 @@ void update_cluster(void)
|
||
|
|
||
|
int setup_cluster(void)
|
||
|
{
|
||
|
+ quorum_model_v1_data_t model_data;
|
||
|
cs_error_t err;
|
||
|
int fd;
|
||
|
- uint32_t quorum_type;
|
||
|
+ uint32_t quorum_type = 0;
|
||
|
|
||
|
INIT_LIST_HEAD(&cluster_nodes);
|
||
|
|
||
|
- err = quorum_initialize(&qh, &quorum_callbacks, &quorum_type);
|
||
|
+ memset(&model_data, 0, sizeof(model_data));
|
||
|
+ model_data.quorum_notify_fn = quorum_callback;
|
||
|
+ model_data.nodelist_notify_fn = quorum_nodelist_callback;
|
||
|
+
|
||
|
+ err = quorum_model_initialize(&qh, QUORUM_MODEL_V1, (quorum_model_data_t *)&model_data, &quorum_type, NULL);
|
||
|
if (err != CS_OK) {
|
||
|
log_error("quorum init error %d", err);
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
- if (quorum_type == QUORUM_FREE) {
|
||
|
- log_error("no quorum provider configured in corosync, unable to operate");
|
||
|
- goto fail;
|
||
|
- }
|
||
|
-
|
||
|
err = quorum_fd_get(qh, &fd);
|
||
|
if (err != CS_OK) {
|
||
|
log_error("quorum fd_get error %d", err);
|
||
|
--
|
||
|
2.7.5
|
||
|
|