import pacemaker-2.0.5-9.el8_4.3
This commit is contained in:
parent
185e259ad8
commit
2930d403d4
733
SOURCES/042-unfencing-loop.patch
Normal file
733
SOURCES/042-unfencing-loop.patch
Normal file
@ -0,0 +1,733 @@
|
||||
From 6dcd6b51d7d3993bc483588d6ed75077518ed600 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 4 Jun 2021 16:30:55 -0500
|
||||
Subject: [PATCH 01/11] Low: controller: check whether unfenced node was remote
|
||||
node
|
||||
|
||||
... so the controller can indicate the node is remote (if known at that point,
|
||||
which is not guaranteed) when setting unfencing-related node attributes.
|
||||
---
|
||||
daemons/controld/controld_fencing.c | 21 ++++++++++++++++++---
|
||||
1 file changed, 18 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
||||
index 23dff28..0fba661 100644
|
||||
--- a/daemons/controld/controld_fencing.c
|
||||
+++ b/daemons/controld/controld_fencing.c
|
||||
@@ -757,15 +757,30 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
|
||||
if (pcmk__str_eq("on", op, pcmk__str_casei)) {
|
||||
const char *value = NULL;
|
||||
char *now = crm_ttoa(time(NULL));
|
||||
+ gboolean is_remote_node = FALSE;
|
||||
+
|
||||
+ /* This check is not 100% reliable, since this node is not
|
||||
+ * guaranteed to have the remote node cached. However, it
|
||||
+ * doesn't have to be reliable, since the attribute manager can
|
||||
+ * learn a node's "remoteness" by other means sooner or later.
|
||||
+ * This allows it to learn more quickly if this node does have
|
||||
+ * the information.
|
||||
+ */
|
||||
+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
|
||||
+ is_remote_node = TRUE;
|
||||
+ }
|
||||
|
||||
- update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
|
||||
+ update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
|
||||
+ is_remote_node);
|
||||
free(now);
|
||||
|
||||
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
|
||||
- update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
|
||||
+ update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
|
||||
+ is_remote_node);
|
||||
|
||||
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
|
||||
- update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
|
||||
+ update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
|
||||
+ is_remote_node);
|
||||
|
||||
} else if (action->sent_update == FALSE) {
|
||||
send_stonith_update(action, target, uuid);
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From 3ef6d9403f68ab8559c45cc99f5a8da05ca6420b Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 7 Jun 2021 10:50:36 -0500
|
||||
Subject: [PATCH 02/11] Refactor: pacemaker-attrd: functionize adding remote
|
||||
node to cache
|
||||
|
||||
... for future reuse
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 34 +++++++++++++++++++++++-----------
|
||||
1 file changed, 23 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index 731c243..93a165b 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -102,6 +102,28 @@ free_attribute(gpointer data)
|
||||
}
|
||||
}
|
||||
|
||||
+/*!
|
||||
+ * \internal
|
||||
+ * \brief Ensure a Pacemaker Remote node is in the correct peer cache
|
||||
+ *
|
||||
+ * \param[in]
|
||||
+ */
|
||||
+static void
|
||||
+cache_remote_node(const char *node_name)
|
||||
+{
|
||||
+ /* If we previously assumed this node was an unseen cluster node,
|
||||
+ * remove its entry from the cluster peer cache.
|
||||
+ */
|
||||
+ crm_node_t *dup = crm_find_peer(0, node_name);
|
||||
+
|
||||
+ if (dup && (dup->uuid == NULL)) {
|
||||
+ reap_crm_member(0, node_name);
|
||||
+ }
|
||||
+
|
||||
+ // Ensure node is in the remote peer cache
|
||||
+ CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
|
||||
+}
|
||||
+
|
||||
static xmlNode *
|
||||
build_attribute_xml(
|
||||
xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
|
||||
@@ -709,17 +731,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
|
||||
|
||||
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
|
||||
if (is_remote) {
|
||||
- /* If we previously assumed this node was an unseen cluster node,
|
||||
- * remove its entry from the cluster peer cache.
|
||||
- */
|
||||
- crm_node_t *dup = crm_find_peer(0, host);
|
||||
-
|
||||
- if (dup && (dup->uuid == NULL)) {
|
||||
- reap_crm_member(0, host);
|
||||
- }
|
||||
-
|
||||
- /* Ensure this host is in the remote peer cache */
|
||||
- CRM_ASSERT(crm_remote_peer_get(host) != NULL);
|
||||
+ cache_remote_node(host);
|
||||
}
|
||||
|
||||
if (v == NULL) {
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From 6fac2c71bc2c56870ac828d7cd7b7c799279c47e Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 7 Jun 2021 10:39:34 -0500
|
||||
Subject: [PATCH 03/11] Refactor: pacemaker-attrd: don't try to remove votes
|
||||
for remote nodes
|
||||
|
||||
Remote nodes never vote.
|
||||
|
||||
This has no effect in practice since the removal would simply do nothing,
|
||||
but we might as well not waste time trying.
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 11 ++++++-----
|
||||
1 file changed, 6 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index 93a165b..dbe777e 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -976,7 +976,8 @@ attrd_election_cb(gpointer user_data)
|
||||
void
|
||||
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
|
||||
{
|
||||
- bool remove_voter = FALSE;
|
||||
+ bool gone = false;
|
||||
+ bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
|
||||
|
||||
switch (kind) {
|
||||
case crm_status_uname:
|
||||
@@ -984,7 +985,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
||||
|
||||
case crm_status_processes:
|
||||
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
|
||||
- remove_voter = TRUE;
|
||||
+ gone = true;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -1000,13 +1001,13 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
||||
} else {
|
||||
// Remove all attribute values associated with lost nodes
|
||||
attrd_peer_remove(peer->uname, FALSE, "loss");
|
||||
- remove_voter = TRUE;
|
||||
+ gone = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
- // In case an election is in progress, remove any vote by the node
|
||||
- if (remove_voter) {
|
||||
+ // Remove votes from cluster nodes that leave, in case election in progress
|
||||
+ if (gone && !is_remote) {
|
||||
attrd_remove_voter(peer);
|
||||
}
|
||||
}
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From 54089fc663d6aaf10ca164c6c94b3b17237788de Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 7 Jun 2021 10:40:06 -0500
|
||||
Subject: [PATCH 04/11] Low: pacemaker-attrd: check for remote nodes in peer
|
||||
update callback
|
||||
|
||||
If a remote node was started before the local cluster node joined the cluster,
|
||||
the cluster node will assume its node attributes are for a cluster node until
|
||||
it learns otherwise. Check for remoteness in the peer update callback, to have
|
||||
another way we can learn it.
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index dbe777e..5f6a754 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -1009,6 +1009,10 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
||||
// Remove votes from cluster nodes that leave, in case election in progress
|
||||
if (gone && !is_remote) {
|
||||
attrd_remove_voter(peer);
|
||||
+
|
||||
+ // Ensure remote nodes that come up are in the remote node cache
|
||||
+ } else if (!gone && is_remote) {
|
||||
+ cache_remote_node(peer->uname);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From 8c048df0312d0d9c857d87b570a352429a710928 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 7 Jun 2021 11:29:12 -0500
|
||||
Subject: [PATCH 05/11] Log: pacemaker-attrd: log peer status changes
|
||||
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 9 +++++++++
|
||||
1 file changed, 9 insertions(+)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index 5f6a754..d6d179b 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -972,6 +972,7 @@ attrd_election_cb(gpointer user_data)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
+#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
|
||||
|
||||
void
|
||||
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
|
||||
@@ -981,15 +982,23 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
||||
|
||||
switch (kind) {
|
||||
case crm_status_uname:
|
||||
+ crm_debug("%s node %s is now %s",
|
||||
+ (is_remote? "Remote" : "Cluster"),
|
||||
+ peer->uname, state_text(peer->state));
|
||||
break;
|
||||
|
||||
case crm_status_processes:
|
||||
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
|
||||
gone = true;
|
||||
}
|
||||
+ crm_debug("Node %s is %s a peer",
|
||||
+ peer->uname, (gone? "no longer" : "now"));
|
||||
break;
|
||||
|
||||
case crm_status_nstate:
|
||||
+ crm_debug("%s node %s is now %s (was %s)",
|
||||
+ (is_remote? "Remote" : "Cluster"),
|
||||
+ peer->uname, state_text(peer->state), state_text(data));
|
||||
if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
|
||||
/* If we're the writer, send new peers a list of all attributes
|
||||
* (unless it's a remote node, which doesn't run its own attrd)
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From 1dcc8dee4990cf0dbdec0e14db6d9a3ad67a41d5 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 7 Jun 2021 11:13:53 -0500
|
||||
Subject: [PATCH 06/11] Low: pacemaker-attrd: ensure node ID is only set for
|
||||
attributes when known
|
||||
|
||||
In most cases, attribute updates contained the node ID, and the node ID was
|
||||
used by other code, only if known (i.e. positive). However a couple places did
|
||||
not check this, so add that.
|
||||
|
||||
I am unsure whether the missing check caused problems in practice, but there
|
||||
appears to be the possibility that a remote node would wrongly be added to the
|
||||
cluster node cache.
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 6 ++++--
|
||||
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index d6d179b..b3f441c 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -136,7 +136,9 @@ build_attribute_xml(
|
||||
crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
|
||||
crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
|
||||
crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
|
||||
- crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
|
||||
+ if (peerid > 0) {
|
||||
+ crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
|
||||
+ }
|
||||
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
|
||||
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
|
||||
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
|
||||
@@ -937,7 +939,7 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
|
||||
/* If this is a cluster node whose node ID we are learning, remember it */
|
||||
if ((v->nodeid == 0) && (v->is_remote == FALSE)
|
||||
&& (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
|
||||
- (int*)&v->nodeid) == 0)) {
|
||||
+ (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
|
||||
|
||||
crm_node_t *known_peer = crm_get_peer(v->nodeid, host);
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From 8d12490e88b558d01db37a38f7d35175c6d2d69a Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Thu, 10 Jun 2021 17:25:57 -0500
|
||||
Subject: [PATCH 07/11] Refactor: pacemaker-attrd: functionize processing a
|
||||
sync response
|
||||
|
||||
... for code isolation, and because we need to add more to it
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 59 ++++++++++++++++++++++++++++--------------
|
||||
1 file changed, 39 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index b3f441c..d02d3e6 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -572,6 +572,43 @@ attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml)
|
||||
}
|
||||
|
||||
/*!
|
||||
+ * \internal
|
||||
+ * \brief Load attributes from a peer sync response
|
||||
+ *
|
||||
+ * \param[in] peer Peer that sent clear request
|
||||
+ * \param[in] peer_won Whether peer is the attribute writer
|
||||
+ * \param[in] xml Request XML
|
||||
+ */
|
||||
+static void
|
||||
+process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
|
||||
+{
|
||||
+ crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
|
||||
+ peer->uname);
|
||||
+
|
||||
+ if (peer_won) {
|
||||
+ /* Initialize the "seen" flag for all attributes to cleared, so we can
|
||||
+ * detect attributes that local node has but the writer doesn't.
|
||||
+ */
|
||||
+ clear_attribute_value_seen();
|
||||
+ }
|
||||
+
|
||||
+ // Process each attribute update in the sync response
|
||||
+ for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL;
|
||||
+ child = pcmk__xml_next(child)) {
|
||||
+ attrd_peer_update(peer, child,
|
||||
+ crm_element_value(child, PCMK__XA_ATTR_NODE_NAME),
|
||||
+ TRUE);
|
||||
+ }
|
||||
+
|
||||
+ if (peer_won) {
|
||||
+ /* If any attributes are still not marked as seen, the writer doesn't
|
||||
+ * know about them, so send all peers an update with them.
|
||||
+ */
|
||||
+ attrd_current_only_attribute_update(peer, xml);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/*!
|
||||
\internal
|
||||
\brief Broadcast private attribute for local node with protocol version
|
||||
*/
|
||||
@@ -596,7 +633,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
|
||||
const char *op = crm_element_value(xml, PCMK__XA_TASK);
|
||||
const char *election_op = crm_element_value(xml, F_CRM_TASK);
|
||||
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
|
||||
- bool peer_won = FALSE;
|
||||
+ bool peer_won = false;
|
||||
|
||||
if (election_op) {
|
||||
attrd_handle_election_op(peer, xml);
|
||||
@@ -631,25 +668,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
|
||||
|
||||
} else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_SYNC_RESPONSE, pcmk__str_casei)
|
||||
&& !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
|
||||
- xmlNode *child = NULL;
|
||||
-
|
||||
- crm_info("Processing %s from %s", op, peer->uname);
|
||||
-
|
||||
- /* Clear the seen flag for attribute processing held only in the own node. */
|
||||
- if (peer_won) {
|
||||
- clear_attribute_value_seen();
|
||||
- }
|
||||
-
|
||||
- for (child = pcmk__xml_first_child(xml); child != NULL;
|
||||
- child = pcmk__xml_next(child)) {
|
||||
- host = crm_element_value(child, PCMK__XA_ATTR_NODE_NAME);
|
||||
- attrd_peer_update(peer, child, host, TRUE);
|
||||
- }
|
||||
-
|
||||
- if (peer_won) {
|
||||
- /* Synchronize if there is an attribute held only by own node that Writer does not have. */
|
||||
- attrd_current_only_attribute_update(peer, xml);
|
||||
- }
|
||||
+ process_peer_sync_response(peer, peer_won, xml);
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From a890a0e5bbbcabf907f51ed0460868035f72464d Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 11 Jun 2021 14:40:39 -0500
|
||||
Subject: [PATCH 08/11] Refactor: pacemaker-attrd: functionize broadcasting
|
||||
local override
|
||||
|
||||
... for code isolation
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 42 +++++++++++++++++++++++++++++-------------
|
||||
1 file changed, 29 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index d02d3e6..4783427 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -804,6 +804,34 @@ attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
|
||||
free_xml(sync);
|
||||
}
|
||||
|
||||
+/*!
|
||||
+ * \internal
|
||||
+ * \brief Override an attribute sync with a local value
|
||||
+ *
|
||||
+ * Broadcast the local node's value for an attribute that's different from the
|
||||
+ * value provided in a peer's attribute synchronization response. This ensures a
|
||||
+ * node's values for itself take precedence and all peers are kept in sync.
|
||||
+ *
|
||||
+ * \param[in] a Attribute entry to override
|
||||
+ *
|
||||
+ * \return Local instance of attribute value
|
||||
+ */
|
||||
+static attribute_value_t *
|
||||
+broadcast_local_value(attribute_t *a)
|
||||
+{
|
||||
+ attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname);
|
||||
+ xmlNode *sync = create_xml_node(NULL, __func__);
|
||||
+
|
||||
+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
||||
+ build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
|
||||
+ a->user, a->is_private, v->nodename, v->nodeid,
|
||||
+ v->current, FALSE);
|
||||
+ attrd_xml_add_writer(sync);
|
||||
+ send_attrd_message(NULL, sync);
|
||||
+ free_xml(sync);
|
||||
+ return v;
|
||||
+}
|
||||
+
|
||||
void
|
||||
attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
|
||||
{
|
||||
@@ -899,21 +927,9 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
|
||||
if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
|
||||
&& pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
|
||||
|
||||
- xmlNode *sync = create_xml_node(NULL, __func__);
|
||||
-
|
||||
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
|
||||
attr, host, v->current, value, peer->uname);
|
||||
-
|
||||
- crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
||||
- v = g_hash_table_lookup(a->values, host);
|
||||
- build_attribute_xml(sync, attr, a->set, a->uuid, a->timeout_ms, a->user,
|
||||
- a->is_private, v->nodename, v->nodeid, v->current, FALSE);
|
||||
-
|
||||
- attrd_xml_add_writer(sync);
|
||||
-
|
||||
- /* Broadcast in case any other nodes had the inconsistent value */
|
||||
- send_attrd_message(NULL, sync);
|
||||
- free_xml(sync);
|
||||
+ v = broadcast_local_value(a);
|
||||
|
||||
} else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
|
||||
crm_notice("Setting %s[%s]: %s -> %s " CRM_XS " from %s",
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From f6f65e3dab070f1bbdf6d1383f4d6173a8840bc9 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 11 Jun 2021 14:50:29 -0500
|
||||
Subject: [PATCH 09/11] Log: pacemaker-attrd: improve messages when
|
||||
broadcasting local-only values
|
||||
|
||||
The traces aren't necessary since build_attribute_xml() already logs the same
|
||||
info at debug. Also, rename function for clarity, and make static.
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 35 ++++++++++++++++-------------------
|
||||
1 file changed, 16 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index 4783427..356defb 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -51,11 +51,12 @@ GHashTable *attributes = NULL;
|
||||
|
||||
void write_attribute(attribute_t *a, bool ignore_delay);
|
||||
void write_or_elect_attribute(attribute_t *a);
|
||||
-void attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml);
|
||||
void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
|
||||
void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
|
||||
void attrd_peer_remove(const char *host, gboolean uncache, const char *source);
|
||||
|
||||
+static void broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml);
|
||||
+
|
||||
static gboolean
|
||||
send_attrd_message(crm_node_t * node, xmlNode * data)
|
||||
{
|
||||
@@ -604,7 +605,7 @@ process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
|
||||
/* If any attributes are still not marked as seen, the writer doesn't
|
||||
* know about them, so send all peers an update with them.
|
||||
*/
|
||||
- attrd_current_only_attribute_update(peer, xml);
|
||||
+ broadcast_unseen_local_values(peer, xml);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -768,40 +769,36 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
|
||||
return(v);
|
||||
}
|
||||
|
||||
-void
|
||||
-attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
|
||||
+void
|
||||
+broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
|
||||
{
|
||||
GHashTableIter aIter;
|
||||
GHashTableIter vIter;
|
||||
- attribute_t *a;
|
||||
+ attribute_t *a = NULL;
|
||||
attribute_value_t *v = NULL;
|
||||
- xmlNode *sync = create_xml_node(NULL, __func__);
|
||||
- gboolean build = FALSE;
|
||||
-
|
||||
- crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
||||
+ xmlNode *sync = NULL;
|
||||
|
||||
g_hash_table_iter_init(&aIter, attributes);
|
||||
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
|
||||
g_hash_table_iter_init(&vIter, a->values);
|
||||
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
|
||||
- if (pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei) && v->seen == FALSE) {
|
||||
- crm_trace("Syncing %s[%s] = %s to everyone.(from local only attributes)", a->id, v->nodename, v->current);
|
||||
-
|
||||
- build = TRUE;
|
||||
+ if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname,
|
||||
+ pcmk__str_casei)) {
|
||||
+ if (sync == NULL) {
|
||||
+ sync = create_xml_node(NULL, __func__);
|
||||
+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
||||
+ }
|
||||
build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
|
||||
v->nodename, v->nodeid, v->current, (a->timeout_ms && a->timer ? TRUE : FALSE));
|
||||
- } else {
|
||||
- crm_trace("Local attribute(%s[%s] = %s) was ignore.(another host) : [%s]", a->id, v->nodename, v->current, attrd_cluster->uname);
|
||||
- continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
- if (build) {
|
||||
- crm_debug("Syncing values to everyone.(from local only attributes)");
|
||||
+ if (sync != NULL) {
|
||||
+ crm_debug("Broadcasting local-only values");
|
||||
send_attrd_message(NULL, sync);
|
||||
+ free_xml(sync);
|
||||
}
|
||||
- free_xml(sync);
|
||||
}
|
||||
|
||||
/*!
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From ab90ffb785ea018556f216b8f540f8c3429a3947 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 11 Jun 2021 15:04:20 -0500
|
||||
Subject: [PATCH 10/11] Refactor: pacemaker-attrd: simplify attribute XML
|
||||
creation function
|
||||
|
||||
... and rename for clarity
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 48 ++++++++++++++++++++++++------------------
|
||||
1 file changed, 27 insertions(+), 21 deletions(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index 356defb..5b32a77 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -125,25 +125,35 @@ cache_remote_node(const char *node_name)
|
||||
CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
|
||||
}
|
||||
|
||||
+/*!
|
||||
+ * \internal
|
||||
+ * \brief Create an XML representation of an attribute for use in peer messages
|
||||
+ *
|
||||
+ * \param[in] parent Create attribute XML as child element of this element
|
||||
+ * \param[in] a Attribute to represent
|
||||
+ * \param[in] v Attribute value to represent
|
||||
+ * \param[in] force_write If true, value should be written even if unchanged
|
||||
+ *
|
||||
+ * \return XML representation of attribute
|
||||
+ */
|
||||
static xmlNode *
|
||||
-build_attribute_xml(
|
||||
- xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
|
||||
- gboolean is_private, const char *peer, uint32_t peerid, const char *value, gboolean is_force_write)
|
||||
+add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
|
||||
+ bool force_write)
|
||||
{
|
||||
xmlNode *xml = create_xml_node(parent, __func__);
|
||||
|
||||
- crm_xml_add(xml, PCMK__XA_ATTR_NAME, name);
|
||||
- crm_xml_add(xml, PCMK__XA_ATTR_SET, set);
|
||||
- crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
|
||||
- crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
|
||||
- crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
|
||||
- if (peerid > 0) {
|
||||
- crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
|
||||
+ crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
|
||||
+ crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set);
|
||||
+ crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
|
||||
+ crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
|
||||
+ crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, v->nodename);
|
||||
+ if (v->nodeid > 0) {
|
||||
+ crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
|
||||
}
|
||||
- crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
|
||||
- crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
|
||||
- crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
|
||||
- crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, is_force_write);
|
||||
+ crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
|
||||
+ crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
|
||||
+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
|
||||
+ crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write);
|
||||
|
||||
return xml;
|
||||
}
|
||||
@@ -695,8 +705,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
|
||||
g_hash_table_iter_init(&vIter, a->values);
|
||||
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
|
||||
crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
|
||||
- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
|
||||
- v->nodename, v->nodeid, v->current, FALSE);
|
||||
+ add_attribute_value_xml(sync, a, v, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -788,8 +797,7 @@ broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
|
||||
sync = create_xml_node(NULL, __func__);
|
||||
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
||||
}
|
||||
- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
|
||||
- v->nodename, v->nodeid, v->current, (a->timeout_ms && a->timer ? TRUE : FALSE));
|
||||
+ add_attribute_value_xml(sync, a, v, a->timeout_ms && a->timer);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -820,9 +828,7 @@ broadcast_local_value(attribute_t *a)
|
||||
xmlNode *sync = create_xml_node(NULL, __func__);
|
||||
|
||||
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
||||
- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
|
||||
- a->user, a->is_private, v->nodename, v->nodeid,
|
||||
- v->current, FALSE);
|
||||
+ add_attribute_value_xml(sync, a, v, false);
|
||||
attrd_xml_add_writer(sync);
|
||||
send_attrd_message(NULL, sync);
|
||||
free_xml(sync);
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
|
||||
From 540d74130c5c8d9c626d6c50475e4dc4f64234e7 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Fri, 4 Jun 2021 16:34:26 -0500
|
||||
Subject: [PATCH 11/11] Fix: pacemaker-attrd: avoid repeated unfencing of
|
||||
remote nodes
|
||||
|
||||
The attribute manager can't record a remote node's attributes to the CIB until
|
||||
it knows the node is remote. Normally, this is learned when the remote node
|
||||
starts, because the controller clears the CRM_OP_PROBED attribute and indicates
|
||||
that it is for a remote node.
|
||||
|
||||
However, if a cluster node is down when a remote node starts, and later comes
|
||||
up, it learns the remote node's existing attributes as part of the attribute
|
||||
sync. Previously, this did not include whether each value is for a cluster or
|
||||
remote node, so the newly joined attribute manager couldn't write out remote
|
||||
nodes' attributes until it learned that via some other event -- which might not
|
||||
happen before the node becomes DC, in which case its scheduler will not see any
|
||||
unfencing-related node attributes and may wrongly schedule unfencing.
|
||||
|
||||
The sync response handling already calls attrd_lookup_or_create_value(), which
|
||||
checks PCMK__XA_ATTR_IS_REMOTE, so all we need to do is add that to the sync
|
||||
response.
|
||||
---
|
||||
daemons/attrd/attrd_commands.c | 6 +++++-
|
||||
1 file changed, 5 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
||||
index 5b32a77..0142383 100644
|
||||
--- a/daemons/attrd/attrd_commands.c
|
||||
+++ b/daemons/attrd/attrd_commands.c
|
||||
@@ -43,8 +43,9 @@
|
||||
* 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH,
|
||||
* PCMK__ATTRD_CMD_UPDATE_DELAY
|
||||
* 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE
|
||||
+ * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
|
||||
*/
|
||||
-#define ATTRD_PROTOCOL_VERSION "2"
|
||||
+#define ATTRD_PROTOCOL_VERSION "3"
|
||||
|
||||
int last_cib_op_done = 0;
|
||||
GHashTable *attributes = NULL;
|
||||
@@ -150,6 +151,9 @@ add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
|
||||
if (v->nodeid > 0) {
|
||||
crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
|
||||
}
|
||||
+ if (v->is_remote != 0) {
|
||||
+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
|
||||
+ }
|
||||
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
|
||||
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
|
||||
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
|
||||
--
|
||||
1.8.3.1
|
||||
|
176
SOURCES/043-retry-metadata.patch
Normal file
176
SOURCES/043-retry-metadata.patch
Normal file
@ -0,0 +1,176 @@
|
||||
From 5c2d8665773254ff8b9676ac359a1210e34640e3 Mon Sep 17 00:00:00 2001
|
||||
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||
Date: Mon, 1 Mar 2021 14:02:52 +0100
|
||||
Subject: [PATCH] API: add pcmk__mainloop_timer_get_period() to internal API
|
||||
|
||||
---
|
||||
include/crm/common/internal.h | 1 +
|
||||
lib/common/mainloop.c | 34 +++++++++++++++++++++++++---------
|
||||
2 files changed, 26 insertions(+), 9 deletions(-)
|
||||
|
||||
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
|
||||
index f69abe8..63bfd2c 100644
|
||||
--- a/include/crm/common/internal.h
|
||||
+++ b/include/crm/common/internal.h
|
||||
@@ -96,6 +96,7 @@ pcmk__open_devnull(int flags)
|
||||
int pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata,
|
||||
struct ipc_client_callbacks *callbacks,
|
||||
mainloop_io_t **source);
|
||||
+guint pcmk__mainloop_timer_get_period(mainloop_timer_t *timer);
|
||||
|
||||
|
||||
/* internal messaging utilities (from messages.c) */
|
||||
diff --git a/lib/common/mainloop.c b/lib/common/mainloop.c
|
||||
index 2f00e31..75f24e2 100644
|
||||
--- a/lib/common/mainloop.c
|
||||
+++ b/lib/common/mainloop.c
|
||||
@@ -49,6 +49,15 @@ struct trigger_s {
|
||||
|
||||
};
|
||||
|
||||
+struct mainloop_timer_s {
|
||||
+ guint id;
|
||||
+ guint period_ms;
|
||||
+ bool repeat;
|
||||
+ char *name;
|
||||
+ GSourceFunc cb;
|
||||
+ void *userdata;
|
||||
+};
|
||||
+
|
||||
static gboolean
|
||||
crm_trigger_prepare(GSource * source, gint * timeout)
|
||||
{
|
||||
@@ -875,6 +884,22 @@ pcmk__add_mainloop_ipc(crm_ipc_t *ipc, int priority, void *userdata,
|
||||
return pcmk_rc_ok;
|
||||
}
|
||||
|
||||
+/*!
|
||||
+ * \brief Get period for mainloop timer
|
||||
+ *
|
||||
+ * \param[in] timer Timer
|
||||
+ *
|
||||
+ * \return Period in ms
|
||||
+ */
|
||||
+guint
|
||||
+pcmk__mainloop_timer_get_period(mainloop_timer_t *timer)
|
||||
+{
|
||||
+ if (timer) {
|
||||
+ return timer->period_ms;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
mainloop_io_t *
|
||||
mainloop_add_ipc_client(const char *name, int priority, size_t max_size,
|
||||
void *userdata, struct ipc_client_callbacks *callbacks)
|
||||
@@ -1252,15 +1277,6 @@ mainloop_child_add(pid_t pid, int timeout, const char *desc, void *privatedata,
|
||||
mainloop_child_add_with_flags(pid, timeout, desc, privatedata, 0, callback);
|
||||
}
|
||||
|
||||
-struct mainloop_timer_s {
|
||||
- guint id;
|
||||
- guint period_ms;
|
||||
- bool repeat;
|
||||
- char *name;
|
||||
- GSourceFunc cb;
|
||||
- void *userdata;
|
||||
-};
|
||||
-
|
||||
static gboolean
|
||||
mainloop_timer_cb(gpointer user_data)
|
||||
{
|
||||
--
|
||||
1.8.3.1
|
||||
|
||||
From 1d33712201e42f0e8ee108999cd4cb8fa0eeca95 Mon Sep 17 00:00:00 2001
|
||||
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||
Date: Fri, 19 Feb 2021 12:34:04 +0100
|
||||
Subject: [PATCH] Feature: fenced: retry getting metadata until we get it
|
||||
|
||||
---
|
||||
daemons/fenced/fenced_commands.c | 35 +++++++++++++++++++++++++++++++++++
|
||||
daemons/fenced/pacemaker-fenced.h | 1 +
|
||||
2 files changed, 36 insertions(+)
|
||||
|
||||
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
|
||||
index 41901e5..65b41c5 100644
|
||||
--- a/daemons/fenced/fenced_commands.c
|
||||
+++ b/daemons/fenced/fenced_commands.c
|
||||
@@ -69,6 +69,9 @@ static void stonith_send_reply(xmlNode * reply, int call_options, const char *re
|
||||
static void search_devices_record_result(struct device_search_s *search, const char *device,
|
||||
gboolean can_fence);
|
||||
|
||||
+static xmlNode * get_agent_metadata(const char *agent);
|
||||
+static void read_action_metadata(stonith_device_t *device);
|
||||
+
|
||||
typedef struct async_command_s {
|
||||
|
||||
int id;
|
||||
@@ -323,6 +326,25 @@ fork_cb(GPid pid, gpointer user_data)
|
||||
cmd->activating_on = NULL;
|
||||
}
|
||||
|
||||
+static int
|
||||
+get_agent_metadata_cb(gpointer data) {
|
||||
+ stonith_device_t *device = data;
|
||||
+
|
||||
+ device->agent_metadata = get_agent_metadata(device->agent);
|
||||
+ if (device->agent_metadata) {
|
||||
+ read_action_metadata(device);
|
||||
+ stonith__device_parameter_flags(&(device->flags), device->id,
|
||||
+ device->agent_metadata);
|
||||
+ return G_SOURCE_REMOVE;
|
||||
+ } else {
|
||||
+ guint period_ms = pcmk__mainloop_timer_get_period(device->timer);
|
||||
+ if (period_ms < 160 * 1000) {
|
||||
+ mainloop_timer_set_period(device->timer, 2 * period_ms);
|
||||
+ }
|
||||
+ return G_SOURCE_CONTINUE;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
static gboolean
|
||||
stonith_device_execute(stonith_device_t * device)
|
||||
{
|
||||
@@ -569,6 +591,11 @@ free_device(gpointer data)
|
||||
|
||||
g_list_free_full(device->targets, free);
|
||||
|
||||
+ if (device->timer) {
|
||||
+ mainloop_timer_stop(device->timer);
|
||||
+ mainloop_timer_del(device->timer);
|
||||
+ }
|
||||
+
|
||||
mainloop_destroy_trigger(device->work);
|
||||
|
||||
free_xml(device->agent_metadata);
|
||||
@@ -916,6 +943,14 @@ build_device_from_xml(xmlNode * msg)
|
||||
read_action_metadata(device);
|
||||
stonith__device_parameter_flags(&(device->flags), device->id,
|
||||
device->agent_metadata);
|
||||
+ } else {
|
||||
+ if (device->timer == NULL) {
|
||||
+ device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000,
|
||||
+ TRUE, get_agent_metadata_cb, device);
|
||||
+ }
|
||||
+ if (!mainloop_timer_running(device->timer)) {
|
||||
+ mainloop_timer_start(device->timer);
|
||||
+ }
|
||||
}
|
||||
|
||||
value = g_hash_table_lookup(device->params, "nodeid");
|
||||
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
|
||||
index 13cf6dc..e342692 100644
|
||||
--- a/daemons/fenced/pacemaker-fenced.h
|
||||
+++ b/daemons/fenced/pacemaker-fenced.h
|
||||
@@ -41,6 +41,7 @@ typedef struct stonith_device_s {
|
||||
GHashTable *params;
|
||||
GHashTable *aliases;
|
||||
GList *pending_ops;
|
||||
+ mainloop_timer_t *timer;
|
||||
crm_trigger_t *work;
|
||||
xmlNode *agent_metadata;
|
||||
|
||||
--
|
||||
1.8.3.1
|
||||
|
1633
SOURCES/044-sbd.patch
Normal file
1633
SOURCES/044-sbd.patch
Normal file
File diff suppressed because it is too large
Load Diff
122
SOURCES/045-controller-attribute.patch
Normal file
122
SOURCES/045-controller-attribute.patch
Normal file
@ -0,0 +1,122 @@
|
||||
From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001
|
||||
From: Ken Gaillot <kgaillot@redhat.com>
|
||||
Date: Mon, 9 Aug 2021 14:48:57 -0500
|
||||
Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are
|
||||
cleared without DC
|
||||
|
||||
Previously, peer_update_callback() cleared a lost node's transient attributes
|
||||
if either the local node is DC, or there is no DC.
|
||||
|
||||
However, that left the possibility of the DC being lost at the same time as
|
||||
another node -- the local node would still have fsa_our_dc set while processing
|
||||
the leave notifications, so no node would clear the attributes for the non-DC
|
||||
node.
|
||||
|
||||
Now, the controller has its own CPG configuration change callback, which sets a
|
||||
global boolean before calling the usual one, so that peer_update_callback() can
|
||||
know when the DC has been lost.
|
||||
---
|
||||
daemons/controld/controld_callbacks.c | 4 ++-
|
||||
daemons/controld/controld_corosync.c | 57 ++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 59 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
|
||||
index af24856..e564b3d 100644
|
||||
--- a/daemons/controld/controld_callbacks.c
|
||||
+++ b/daemons/controld/controld_callbacks.c
|
||||
@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node)
|
||||
|
||||
#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
|
||||
|
||||
+bool controld_dc_left = false;
|
||||
+
|
||||
void
|
||||
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
|
||||
{
|
||||
@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
|
||||
cib_scope_local);
|
||||
}
|
||||
|
||||
- } else if (AM_I_DC || (fsa_our_dc == NULL)) {
|
||||
+ } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) {
|
||||
/* This only needs to be done once, so normally the DC should do
|
||||
* it. However if there is no DC, every node must do it, since
|
||||
* there is no other way to ensure some one node does it.
|
||||
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
|
||||
index db99630..c5ab658 100644
|
||||
--- a/daemons/controld/controld_corosync.c
|
||||
+++ b/daemons/controld/controld_corosync.c
|
||||
@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data)
|
||||
}
|
||||
}
|
||||
|
||||
+extern bool controld_dc_left;
|
||||
+
|
||||
+/*!
|
||||
+ * \brief Handle a Corosync notification of a CPG configuration change
|
||||
+ *
|
||||
+ * \param[in] handle CPG connection
|
||||
+ * \param[in] cpg_name CPG group name
|
||||
+ * \param[in] member_list List of current CPG members
|
||||
+ * \param[in] member_list_entries Number of entries in \p member_list
|
||||
+ * \param[in] left_list List of CPG members that left
|
||||
+ * \param[in] left_list_entries Number of entries in \p left_list
|
||||
+ * \param[in] joined_list List of CPG members that joined
|
||||
+ * \param[in] joined_list_entries Number of entries in \p joined_list
|
||||
+ */
|
||||
+static void
|
||||
+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
|
||||
+ const struct cpg_address *member_list,
|
||||
+ size_t member_list_entries,
|
||||
+ const struct cpg_address *left_list,
|
||||
+ size_t left_list_entries,
|
||||
+ const struct cpg_address *joined_list,
|
||||
+ size_t joined_list_entries)
|
||||
+{
|
||||
+ /* When nodes leave CPG, the DC clears their transient node attributes.
|
||||
+ *
|
||||
+ * However if there is no DC, or the DC is among the nodes that left, each
|
||||
+ * remaining node needs to do the clearing, to ensure it gets done.
|
||||
+ * Otherwise, the attributes would persist when the nodes rejoin, which
|
||||
+ * could have serious consequences for unfencing, agents that use attributes
|
||||
+ * for internal logic, etc.
|
||||
+ *
|
||||
+ * Here, we set a global boolean if the DC is among the nodes that left, for
|
||||
+ * use by the peer callback.
|
||||
+ */
|
||||
+ if (fsa_our_dc != NULL) {
|
||||
+ crm_node_t *peer = crm_find_peer(0, fsa_our_dc);
|
||||
+
|
||||
+ if (peer != NULL) {
|
||||
+ for (int i = 0; i < left_list_entries; ++i) {
|
||||
+ if (left_list[i].nodeid == peer->id) {
|
||||
+ controld_dc_left = true;
|
||||
+ break;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ // Process the change normally, which will call the peer callback as needed
|
||||
+ pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
|
||||
+ left_list, left_list_entries,
|
||||
+ joined_list, joined_list_entries);
|
||||
+
|
||||
+ controld_dc_left = false;
|
||||
+}
|
||||
+
|
||||
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
|
||||
|
||||
gboolean
|
||||
@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster)
|
||||
if (is_corosync_cluster()) {
|
||||
crm_set_status_callback(&peer_update_callback);
|
||||
cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
|
||||
- cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
|
||||
+ cluster->cpg.cpg_confchg_fn = cpg_membership_callback;
|
||||
cluster->destroy = crmd_cs_destroy;
|
||||
|
||||
if (crm_cluster_connect(cluster)) {
|
||||
--
|
||||
1.8.3.1
|
||||
|
@ -226,7 +226,7 @@
|
||||
Name: pacemaker
|
||||
Summary: Scalable High-Availability cluster resource manager
|
||||
Version: %{pcmkversion}
|
||||
Release: %{pcmk_release}%{?dist}.1
|
||||
Release: %{pcmk_release}%{?dist}.3
|
||||
%if %{defined _unitdir}
|
||||
License: GPLv2+ and LGPLv2+
|
||||
%else
|
||||
@ -289,6 +289,10 @@ Patch38: 038-feature-set.patch
|
||||
Patch39: 039-crm_mon.patch
|
||||
Patch40: 040-crm_mon-shutdown.patch
|
||||
Patch41: 041-crm_mon-shutdown.patch
|
||||
Patch42: 042-unfencing-loop.patch
|
||||
Patch43: 043-retry-metadata.patch
|
||||
Patch44: 044-sbd.patch
|
||||
Patch45: 045-controller-attribute.patch
|
||||
|
||||
# downstream-only commits
|
||||
Patch100: 100-default-to-syncing-with-sbd.patch
|
||||
@ -811,6 +815,7 @@ exit 0
|
||||
|
||||
%{_sbindir}/crm_attribute
|
||||
%{_sbindir}/crm_master
|
||||
%{_sbindir}/fence_watchdog
|
||||
|
||||
%doc %{_mandir}/man7/pacemaker-controld.*
|
||||
%doc %{_mandir}/man7/pacemaker-schedulerd.*
|
||||
@ -819,6 +824,7 @@ exit 0
|
||||
%doc %{_mandir}/man7/ocf_pacemaker_remote.*
|
||||
%doc %{_mandir}/man8/crm_attribute.*
|
||||
%doc %{_mandir}/man8/crm_master.*
|
||||
%doc %{_mandir}/man8/fence_watchdog.*
|
||||
%doc %{_mandir}/man8/pacemakerd.*
|
||||
|
||||
%doc %{_datadir}/pacemaker/alerts
|
||||
@ -893,6 +899,7 @@ exit 0
|
||||
%doc %{_mandir}/man8/*
|
||||
%exclude %{_mandir}/man8/crm_attribute.*
|
||||
%exclude %{_mandir}/man8/crm_master.*
|
||||
%exclude %{_mandir}/man8/fence_watchdog.*
|
||||
%exclude %{_mandir}/man8/pacemakerd.*
|
||||
%exclude %{_mandir}/man8/pacemaker-remoted.*
|
||||
|
||||
@ -986,6 +993,18 @@ exit 0
|
||||
%license %{nagios_name}-%{nagios_hash}/COPYING
|
||||
|
||||
%changelog
|
||||
* Mon Aug 9 2021 Klaus Wenninger <kwenning@redhat.com> - 2.0.5-9.3
|
||||
- retry fence-agent metadata
|
||||
- assure transient attributes of lost node are cleared
|
||||
- added configurable watchdog-fencing feature
|
||||
- Resolves: rhbz1992014
|
||||
- Resolves: rhbz1989622
|
||||
- Resolves: rhbz1993891
|
||||
|
||||
* Thu Jun 24 2021 Ken Gaillot <kgaillot@redhat.com> - 2.0.5-9.2
|
||||
- Avoid remote node unfencing loop
|
||||
- Resolves: rhbz1972273
|
||||
|
||||
* Mon Apr 19 2021 Ken Gaillot <kgaillot@redhat.com> - 2.0.5-9.1
|
||||
- Fix regression in crm_mon during cluster shutdown that affects ocf:heartbeat:pgsql agent
|
||||
- Resolves: rhbz1951098
|
||||
|
Loading…
Reference in New Issue
Block a user