24b716c802
# Conflicts: # SOURCES/003-pacemakerd-output.patch # SPECS/pacemaker.spec
734 lines
28 KiB
Diff
734 lines
28 KiB
Diff
From 6dcd6b51d7d3993bc483588d6ed75077518ed600 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Fri, 4 Jun 2021 16:30:55 -0500
|
|
Subject: [PATCH 01/11] Low: controller: check whether unfenced node was remote
|
|
node
|
|
|
|
... so the controller can indicate the node is remote (if known at that point,
|
|
which is not guaranteed) when setting unfencing-related node attributes.
|
|
---
|
|
daemons/controld/controld_fencing.c | 21 ++++++++++++++++++---
|
|
1 file changed, 18 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
|
index 23dff28..0fba661 100644
|
|
--- a/daemons/controld/controld_fencing.c
|
|
+++ b/daemons/controld/controld_fencing.c
|
|
@@ -757,15 +757,30 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
|
|
if (pcmk__str_eq("on", op, pcmk__str_casei)) {
|
|
const char *value = NULL;
|
|
char *now = pcmk__ttoa(time(NULL));
|
|
+ gboolean is_remote_node = FALSE;
|
|
+
|
|
+ /* This check is not 100% reliable, since this node is not
|
|
+ * guaranteed to have the remote node cached. However, it
|
|
+ * doesn't have to be reliable, since the attribute manager can
|
|
+ * learn a node's "remoteness" by other means sooner or later.
|
|
+ * This allows it to learn more quickly if this node does have
|
|
+ * the information.
|
|
+ */
|
|
+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) {
|
|
+ is_remote_node = TRUE;
|
|
+ }
|
|
|
|
- update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE);
|
|
+ update_attrd(target, CRM_ATTR_UNFENCED, now, NULL,
|
|
+ is_remote_node);
|
|
free(now);
|
|
|
|
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL);
|
|
- update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE);
|
|
+ update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL,
|
|
+ is_remote_node);
|
|
|
|
value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE);
|
|
- update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE);
|
|
+ update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL,
|
|
+ is_remote_node);
|
|
|
|
} else if (action->sent_update == FALSE) {
|
|
send_stonith_update(action, target, uuid);
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 3ef6d9403f68ab8559c45cc99f5a8da05ca6420b Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 7 Jun 2021 10:50:36 -0500
|
|
Subject: [PATCH 02/11] Refactor: pacemaker-attrd: functionize adding remote
|
|
node to cache
|
|
|
|
... for future reuse
|
|
---
|
|
daemons/attrd/attrd_commands.c | 34 +++++++++++++++++++++++-----------
|
|
1 file changed, 23 insertions(+), 11 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index 731c243..93a165b 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -102,6 +102,28 @@ free_attribute(gpointer data)
|
|
}
|
|
}
|
|
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Ensure a Pacemaker Remote node is in the correct peer cache
|
|
+ *
|
|
+ * \param[in]
|
|
+ */
|
|
+static void
|
|
+cache_remote_node(const char *node_name)
|
|
+{
|
|
+ /* If we previously assumed this node was an unseen cluster node,
|
|
+ * remove its entry from the cluster peer cache.
|
|
+ */
|
|
+ crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name);
|
|
+
|
|
+ if (dup && (dup->uuid == NULL)) {
|
|
+ reap_crm_member(0, node_name);
|
|
+ }
|
|
+
|
|
+ // Ensure node is in the remote peer cache
|
|
+ CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
|
|
+}
|
|
+
|
|
static xmlNode *
|
|
build_attribute_xml(
|
|
xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
|
|
@@ -709,17 +731,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
|
|
|
|
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
|
|
if (is_remote) {
|
|
- /* If we previously assumed this node was an unseen cluster node,
|
|
- * remove its entry from the cluster peer cache.
|
|
- */
|
|
- crm_node_t *dup = pcmk__search_cluster_node_cache(0, host);
|
|
-
|
|
- if (dup && (dup->uuid == NULL)) {
|
|
- reap_crm_member(0, host);
|
|
- }
|
|
-
|
|
- /* Ensure this host is in the remote peer cache */
|
|
- CRM_ASSERT(crm_remote_peer_get(host) != NULL);
|
|
+ cache_remote_node(host);
|
|
}
|
|
|
|
if (v == NULL) {
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 6fac2c71bc2c56870ac828d7cd7b7c799279c47e Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 7 Jun 2021 10:39:34 -0500
|
|
Subject: [PATCH 03/11] Refactor: pacemaker-attrd: don't try to remove votes
|
|
for remote nodes
|
|
|
|
Remote nodes never vote.
|
|
|
|
This has no effect in practice since the removal would simply do nothing,
|
|
but we might as well not waste time trying.
|
|
---
|
|
daemons/attrd/attrd_commands.c | 11 ++++++-----
|
|
1 file changed, 6 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index 93a165b..dbe777e 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -976,7 +976,8 @@ attrd_election_cb(gpointer user_data)
|
|
void
|
|
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
|
|
{
|
|
- bool remove_voter = FALSE;
|
|
+ bool gone = false;
|
|
+ bool is_remote = pcmk_is_set(peer->flags, crm_remote_node);
|
|
|
|
switch (kind) {
|
|
case crm_status_uname:
|
|
@@ -984,7 +985,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
|
|
|
case crm_status_processes:
|
|
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
|
|
- remove_voter = TRUE;
|
|
+ gone = true;
|
|
}
|
|
break;
|
|
|
|
@@ -1000,13 +1001,13 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
|
} else {
|
|
// Remove all attribute values associated with lost nodes
|
|
attrd_peer_remove(peer->uname, FALSE, "loss");
|
|
- remove_voter = TRUE;
|
|
+ gone = true;
|
|
}
|
|
break;
|
|
}
|
|
|
|
- // In case an election is in progress, remove any vote by the node
|
|
- if (remove_voter) {
|
|
+ // Remove votes from cluster nodes that leave, in case election in progress
|
|
+ if (gone && !is_remote) {
|
|
attrd_remove_voter(peer);
|
|
}
|
|
}
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 54089fc663d6aaf10ca164c6c94b3b17237788de Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 7 Jun 2021 10:40:06 -0500
|
|
Subject: [PATCH 04/11] Low: pacemaker-attrd: check for remote nodes in peer
|
|
update callback
|
|
|
|
If a remote node was started before the local cluster node joined the cluster,
|
|
the cluster node will assume its node attributes are for a cluster node until
|
|
it learns otherwise. Check for remoteness in the peer update callback, to have
|
|
another way we can learn it.
|
|
---
|
|
daemons/attrd/attrd_commands.c | 4 ++++
|
|
1 file changed, 4 insertions(+)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index dbe777e..5f6a754 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -1009,6 +1009,10 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
|
// Remove votes from cluster nodes that leave, in case election in progress
|
|
if (gone && !is_remote) {
|
|
attrd_remove_voter(peer);
|
|
+
|
|
+ // Ensure remote nodes that come up are in the remote node cache
|
|
+ } else if (!gone && is_remote) {
|
|
+ cache_remote_node(peer->uname);
|
|
}
|
|
}
|
|
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 8c048df0312d0d9c857d87b570a352429a710928 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 7 Jun 2021 11:29:12 -0500
|
|
Subject: [PATCH 05/11] Log: pacemaker-attrd: log peer status changes
|
|
|
|
---
|
|
daemons/attrd/attrd_commands.c | 9 +++++++++
|
|
1 file changed, 9 insertions(+)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index 5f6a754..d6d179b 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -972,6 +972,7 @@ attrd_election_cb(gpointer user_data)
|
|
return FALSE;
|
|
}
|
|
|
|
+#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
|
|
|
|
void
|
|
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
|
|
@@ -981,15 +982,23 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
|
|
|
switch (kind) {
|
|
case crm_status_uname:
|
|
+ crm_debug("%s node %s is now %s",
|
|
+ (is_remote? "Remote" : "Cluster"),
|
|
+ peer->uname, state_text(peer->state));
|
|
break;
|
|
|
|
case crm_status_processes:
|
|
if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) {
|
|
gone = true;
|
|
}
|
|
+ crm_debug("Node %s is %s a peer",
|
|
+ peer->uname, (gone? "no longer" : "now"));
|
|
break;
|
|
|
|
case crm_status_nstate:
|
|
+ crm_debug("%s node %s is now %s (was %s)",
|
|
+ (is_remote? "Remote" : "Cluster"),
|
|
+ peer->uname, state_text(peer->state), state_text(data));
|
|
if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) {
|
|
/* If we're the writer, send new peers a list of all attributes
|
|
* (unless it's a remote node, which doesn't run its own attrd)
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 1dcc8dee4990cf0dbdec0e14db6d9a3ad67a41d5 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Mon, 7 Jun 2021 11:13:53 -0500
|
|
Subject: [PATCH 06/11] Low: pacemaker-attrd: ensure node ID is only set for
|
|
attributes when known
|
|
|
|
In most cases, attribute updates contained the node ID, and the node ID was
|
|
used by other code, only if known (i.e. positive). However a couple places did
|
|
not check this, so add that.
|
|
|
|
I am unsure whether the missing check caused problems in practice, but there
|
|
appears to be the possibility that a remote node would wrongly be added to the
|
|
cluster node cache.
|
|
---
|
|
daemons/attrd/attrd_commands.c | 6 ++++--
|
|
1 file changed, 4 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index d6d179b..b3f441c 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -136,7 +136,9 @@ build_attribute_xml(
|
|
crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
|
|
crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
|
|
crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
|
|
- crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
|
|
+ if (peerid > 0) {
|
|
+ crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
|
|
+ }
|
|
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
|
|
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
|
|
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
|
|
@@ -937,7 +939,7 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
|
|
/* If this is a cluster node whose node ID we are learning, remember it */
|
|
if ((v->nodeid == 0) && (v->is_remote == FALSE)
|
|
&& (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
|
|
- (int*)&v->nodeid) == 0)) {
|
|
+ (int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
|
|
|
|
crm_node_t *known_peer = crm_get_peer(v->nodeid, host);
|
|
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 8d12490e88b558d01db37a38f7d35175c6d2d69a Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 10 Jun 2021 17:25:57 -0500
|
|
Subject: [PATCH 07/11] Refactor: pacemaker-attrd: functionize processing a
|
|
sync response
|
|
|
|
... for code isolation, and because we need to add more to it
|
|
---
|
|
daemons/attrd/attrd_commands.c | 59 ++++++++++++++++++++++++++++--------------
|
|
1 file changed, 39 insertions(+), 20 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index b3f441c..d02d3e6 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -572,6 +572,43 @@ attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml)
|
|
}
|
|
|
|
/*!
|
|
+ * \internal
|
|
+ * \brief Load attributes from a peer sync response
|
|
+ *
|
|
+ * \param[in] peer Peer that sent clear request
|
|
+ * \param[in] peer_won Whether peer is the attribute writer
|
|
+ * \param[in] xml Request XML
|
|
+ */
|
|
+static void
|
|
+process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
|
|
+{
|
|
+ crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s",
|
|
+ peer->uname);
|
|
+
|
|
+ if (peer_won) {
|
|
+ /* Initialize the "seen" flag for all attributes to cleared, so we can
|
|
+ * detect attributes that local node has but the writer doesn't.
|
|
+ */
|
|
+ clear_attribute_value_seen();
|
|
+ }
|
|
+
|
|
+ // Process each attribute update in the sync response
|
|
+ for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL;
|
|
+ child = pcmk__xml_next(child)) {
|
|
+ attrd_peer_update(peer, child,
|
|
+ crm_element_value(child, PCMK__XA_ATTR_NODE_NAME),
|
|
+ TRUE);
|
|
+ }
|
|
+
|
|
+ if (peer_won) {
|
|
+ /* If any attributes are still not marked as seen, the writer doesn't
|
|
+ * know about them, so send all peers an update with them.
|
|
+ */
|
|
+ attrd_current_only_attribute_update(peer, xml);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*!
|
|
\internal
|
|
\brief Broadcast private attribute for local node with protocol version
|
|
*/
|
|
@@ -596,7 +633,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
|
|
const char *op = crm_element_value(xml, PCMK__XA_TASK);
|
|
const char *election_op = crm_element_value(xml, F_CRM_TASK);
|
|
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
|
|
- bool peer_won = FALSE;
|
|
+ bool peer_won = false;
|
|
|
|
if (election_op) {
|
|
attrd_handle_election_op(peer, xml);
|
|
@@ -631,25 +668,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml)
|
|
|
|
} else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_SYNC_RESPONSE, pcmk__str_casei)
|
|
&& !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) {
|
|
- xmlNode *child = NULL;
|
|
-
|
|
- crm_info("Processing %s from %s", op, peer->uname);
|
|
-
|
|
- /* Clear the seen flag for attribute processing held only in the own node. */
|
|
- if (peer_won) {
|
|
- clear_attribute_value_seen();
|
|
- }
|
|
-
|
|
- for (child = pcmk__xml_first_child(xml); child != NULL;
|
|
- child = pcmk__xml_next(child)) {
|
|
- host = crm_element_value(child, PCMK__XA_ATTR_NODE_NAME);
|
|
- attrd_peer_update(peer, child, host, TRUE);
|
|
- }
|
|
-
|
|
- if (peer_won) {
|
|
- /* Synchronize if there is an attribute held only by own node that Writer does not have. */
|
|
- attrd_current_only_attribute_update(peer, xml);
|
|
- }
|
|
+ process_peer_sync_response(peer, peer_won, xml);
|
|
|
|
} else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_FLUSH, pcmk__str_casei)) {
|
|
/* Ignore. The flush command was removed in 2.0.0 but may be
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From a890a0e5bbbcabf907f51ed0460868035f72464d Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Fri, 11 Jun 2021 14:40:39 -0500
|
|
Subject: [PATCH 08/11] Refactor: pacemaker-attrd: functionize broadcasting
|
|
local override
|
|
|
|
... for code isolation
|
|
---
|
|
daemons/attrd/attrd_commands.c | 42 +++++++++++++++++++++++++++++-------------
|
|
1 file changed, 29 insertions(+), 13 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index d02d3e6..4783427 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -804,6 +804,34 @@ attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
|
|
free_xml(sync);
|
|
}
|
|
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Override an attribute sync with a local value
|
|
+ *
|
|
+ * Broadcast the local node's value for an attribute that's different from the
|
|
+ * value provided in a peer's attribute synchronization response. This ensures a
|
|
+ * node's values for itself take precedence and all peers are kept in sync.
|
|
+ *
|
|
+ * \param[in] a Attribute entry to override
|
|
+ *
|
|
+ * \return Local instance of attribute value
|
|
+ */
|
|
+static attribute_value_t *
|
|
+broadcast_local_value(attribute_t *a)
|
|
+{
|
|
+ attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname);
|
|
+ xmlNode *sync = create_xml_node(NULL, __func__);
|
|
+
|
|
+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
|
+ build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
|
|
+ a->user, a->is_private, v->nodename, v->nodeid,
|
|
+ v->current, FALSE);
|
|
+ attrd_xml_add_writer(sync);
|
|
+ send_attrd_message(NULL, sync);
|
|
+ free_xml(sync);
|
|
+ return v;
|
|
+}
|
|
+
|
|
void
|
|
attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
|
|
{
|
|
@@ -899,21 +927,9 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter)
|
|
if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
|
|
&& pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
|
|
|
|
- xmlNode *sync = create_xml_node(NULL, __func__);
|
|
-
|
|
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
|
|
attr, host, v->current, value, peer->uname);
|
|
-
|
|
- crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
|
- v = g_hash_table_lookup(a->values, host);
|
|
- build_attribute_xml(sync, attr, a->set, a->uuid, a->timeout_ms, a->user,
|
|
- a->is_private, v->nodename, v->nodeid, v->current, FALSE);
|
|
-
|
|
- attrd_xml_add_writer(sync);
|
|
-
|
|
- /* Broadcast in case any other nodes had the inconsistent value */
|
|
- send_attrd_message(NULL, sync);
|
|
- free_xml(sync);
|
|
+ v = broadcast_local_value(a);
|
|
|
|
} else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
|
|
crm_notice("Setting %s[%s]: %s -> %s " CRM_XS " from %s",
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From f6f65e3dab070f1bbdf6d1383f4d6173a8840bc9 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Fri, 11 Jun 2021 14:50:29 -0500
|
|
Subject: [PATCH 09/11] Log: pacemaker-attrd: improve messages when
|
|
broadcasting local-only values
|
|
|
|
The traces aren't necessary since build_attribute_xml() already logs the same
|
|
info at debug. Also, rename function for clarity, and make static.
|
|
---
|
|
daemons/attrd/attrd_commands.c | 35 ++++++++++++++++-------------------
|
|
1 file changed, 16 insertions(+), 19 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index 4783427..356defb 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -51,11 +51,12 @@ GHashTable *attributes = NULL;
|
|
|
|
void write_attribute(attribute_t *a, bool ignore_delay);
|
|
void write_or_elect_attribute(attribute_t *a);
|
|
-void attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml);
|
|
void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter);
|
|
void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
|
|
void attrd_peer_remove(const char *host, gboolean uncache, const char *source);
|
|
|
|
+static void broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml);
|
|
+
|
|
static gboolean
|
|
send_attrd_message(crm_node_t * node, xmlNode * data)
|
|
{
|
|
@@ -604,7 +605,7 @@ process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml)
|
|
/* If any attributes are still not marked as seen, the writer doesn't
|
|
* know about them, so send all peers an update with them.
|
|
*/
|
|
- attrd_current_only_attribute_update(peer, xml);
|
|
+ broadcast_unseen_local_values(peer, xml);
|
|
}
|
|
}
|
|
|
|
@@ -768,40 +769,36 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml)
|
|
return(v);
|
|
}
|
|
|
|
-void
|
|
-attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml)
|
|
+void
|
|
+broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
|
|
{
|
|
GHashTableIter aIter;
|
|
GHashTableIter vIter;
|
|
- attribute_t *a;
|
|
+ attribute_t *a = NULL;
|
|
attribute_value_t *v = NULL;
|
|
- xmlNode *sync = create_xml_node(NULL, __func__);
|
|
- gboolean build = FALSE;
|
|
-
|
|
- crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
|
+ xmlNode *sync = NULL;
|
|
|
|
g_hash_table_iter_init(&aIter, attributes);
|
|
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
|
|
g_hash_table_iter_init(&vIter, a->values);
|
|
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
|
|
- if (pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei) && v->seen == FALSE) {
|
|
- crm_trace("Syncing %s[%s] = %s to everyone.(from local only attributes)", a->id, v->nodename, v->current);
|
|
-
|
|
- build = TRUE;
|
|
+ if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname,
|
|
+ pcmk__str_casei)) {
|
|
+ if (sync == NULL) {
|
|
+ sync = create_xml_node(NULL, __func__);
|
|
+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
|
+ }
|
|
build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
|
|
v->nodename, v->nodeid, v->current, (a->timeout_ms && a->timer ? TRUE : FALSE));
|
|
- } else {
|
|
- crm_trace("Local attribute(%s[%s] = %s) was ignore.(another host) : [%s]", a->id, v->nodename, v->current, attrd_cluster->uname);
|
|
- continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
- if (build) {
|
|
- crm_debug("Syncing values to everyone.(from local only attributes)");
|
|
+ if (sync != NULL) {
|
|
+ crm_debug("Broadcasting local-only values");
|
|
send_attrd_message(NULL, sync);
|
|
+ free_xml(sync);
|
|
}
|
|
- free_xml(sync);
|
|
}
|
|
|
|
/*!
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From ab90ffb785ea018556f216b8f540f8c3429a3947 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Fri, 11 Jun 2021 15:04:20 -0500
|
|
Subject: [PATCH 10/11] Refactor: pacemaker-attrd: simplify attribute XML
|
|
creation function
|
|
|
|
... and rename for clarity
|
|
---
|
|
daemons/attrd/attrd_commands.c | 48 ++++++++++++++++++++++++------------------
|
|
1 file changed, 27 insertions(+), 21 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index 356defb..5b32a77 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -125,25 +125,35 @@ cache_remote_node(const char *node_name)
|
|
CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
|
|
}
|
|
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Create an XML representation of an attribute for use in peer messages
|
|
+ *
|
|
+ * \param[in] parent Create attribute XML as child element of this element
|
|
+ * \param[in] a Attribute to represent
|
|
+ * \param[in] v Attribute value to represent
|
|
+ * \param[in] force_write If true, value should be written even if unchanged
|
|
+ *
|
|
+ * \return XML representation of attribute
|
|
+ */
|
|
static xmlNode *
|
|
-build_attribute_xml(
|
|
- xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user,
|
|
- gboolean is_private, const char *peer, uint32_t peerid, const char *value, gboolean is_force_write)
|
|
+add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
|
|
+ bool force_write)
|
|
{
|
|
xmlNode *xml = create_xml_node(parent, __func__);
|
|
|
|
- crm_xml_add(xml, PCMK__XA_ATTR_NAME, name);
|
|
- crm_xml_add(xml, PCMK__XA_ATTR_SET, set);
|
|
- crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid);
|
|
- crm_xml_add(xml, PCMK__XA_ATTR_USER, user);
|
|
- crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer);
|
|
- if (peerid > 0) {
|
|
- crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid);
|
|
+ crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
|
|
+ crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set);
|
|
+ crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
|
|
+ crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
|
|
+ crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, v->nodename);
|
|
+ if (v->nodeid > 0) {
|
|
+ crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
|
|
}
|
|
- crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value);
|
|
- crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000);
|
|
- crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private);
|
|
- crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, is_force_write);
|
|
+ crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
|
|
+ crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
|
|
+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
|
|
+ crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write);
|
|
|
|
return xml;
|
|
}
|
|
@@ -695,8 +705,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
|
|
g_hash_table_iter_init(&vIter, a->values);
|
|
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
|
|
crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
|
|
- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
|
|
- v->nodename, v->nodeid, v->current, FALSE);
|
|
+ add_attribute_value_xml(sync, a, v, false);
|
|
}
|
|
}
|
|
|
|
@@ -788,8 +797,7 @@ broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml)
|
|
sync = create_xml_node(NULL, __func__);
|
|
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
|
}
|
|
- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private,
|
|
- v->nodename, v->nodeid, v->current, (a->timeout_ms && a->timer ? TRUE : FALSE));
|
|
+ add_attribute_value_xml(sync, a, v, a->timeout_ms && a->timer);
|
|
}
|
|
}
|
|
}
|
|
@@ -820,9 +828,7 @@ broadcast_local_value(attribute_t *a)
|
|
xmlNode *sync = create_xml_node(NULL, __func__);
|
|
|
|
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
|
- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms,
|
|
- a->user, a->is_private, v->nodename, v->nodeid,
|
|
- v->current, FALSE);
|
|
+ add_attribute_value_xml(sync, a, v, false);
|
|
attrd_xml_add_writer(sync);
|
|
send_attrd_message(NULL, sync);
|
|
free_xml(sync);
|
|
--
|
|
1.8.3.1
|
|
|
|
|
|
From 540d74130c5c8d9c626d6c50475e4dc4f64234e7 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Fri, 4 Jun 2021 16:34:26 -0500
|
|
Subject: [PATCH 11/11] Fix: pacemaker-attrd: avoid repeated unfencing of
|
|
remote nodes
|
|
|
|
The attribute manager can't record a remote node's attributes to the CIB until
|
|
it knows the node is remote. Normally, this is learned when the remote node
|
|
starts, because the controller clears the CRM_OP_PROBED attribute and indicates
|
|
that it is for a remote node.
|
|
|
|
However, if a cluster node is down when a remote node starts, and later comes
|
|
up, it learns the remote node's existing attributes as part of the attribute
|
|
sync. Previously, this did not include whether each value is for a cluster or
|
|
remote node, so the newly joined attribute manager couldn't write out remote
|
|
nodes' attributes until it learned that via some other event -- which might not
|
|
happen before the node becomes DC, in which case its scheduler will not see any
|
|
unfencing-related node attributes and may wrongly schedule unfencing.
|
|
|
|
The sync response handling already calls attrd_lookup_or_create_value(), which
|
|
checks PCMK__XA_ATTR_IS_REMOTE, so all we need to do is add that to the sync
|
|
response.
|
|
---
|
|
daemons/attrd/attrd_commands.c | 6 +++++-
|
|
1 file changed, 5 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c
|
|
index 5b32a77..0142383 100644
|
|
--- a/daemons/attrd/attrd_commands.c
|
|
+++ b/daemons/attrd/attrd_commands.c
|
|
@@ -43,8 +43,9 @@
|
|
* 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH,
|
|
* PCMK__ATTRD_CMD_UPDATE_DELAY
|
|
* 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE
|
|
+ * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes
|
|
*/
|
|
-#define ATTRD_PROTOCOL_VERSION "2"
|
|
+#define ATTRD_PROTOCOL_VERSION "3"
|
|
|
|
int last_cib_op_done = 0;
|
|
GHashTable *attributes = NULL;
|
|
@@ -150,6 +151,9 @@ add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v,
|
|
if (v->nodeid > 0) {
|
|
crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid);
|
|
}
|
|
+ if (v->is_remote != 0) {
|
|
+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
|
|
+ }
|
|
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
|
|
crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000);
|
|
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private);
|
|
--
|
|
1.8.3.1
|
|
|