pacemaker/007-transient_attrs.patch
2025-12-08 16:24:15 -05:00

1263 lines
50 KiB
Diff

From 26c022d2a3b6061ff9a60f86e50834a08e8360d4 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Thu, 13 Nov 2025 02:14:45 -0800
Subject: [PATCH 01/10] Fix: pacemaker-attrd: Wipe CIB along with memory
Previously, when the attribute manager purged a node, it would purge the
node's transient attributes only from memory, and assumed the controller
would purge them from the CIB. Now, the writer will purge them from the
CIB as well.
This fixes a variety of timing issues when multiple nodes including the
attribute writer are shutting down. If the writer leaves before some
other node, the DC wipes that other node's attributes from the CIB when
that other node leaves the controller process group (or all other nodes
do if the DC is the leaving node). If a new writer (possibly even the
node itself) is elected before the node's attribute manager leaves the
cluster layer, it will write the attributes back to the CIB. Once the
other node leaves the cluster layer, all attribute managers remove its
attributes from memory, but they are now "stuck" in the CIB.
As of this commit, the controller still erases the attributes from the
CIB when the node leaves the controller process group, which is
redundant but doesn't cause any new problems. This will be corrected in
an upcoming commit.
Note: This will cause an insignificant regression if backported to
Pacemaker 2. The Pacemaker 2 controller purges attributes from the CIB
for leaving DCs only if they are at version 1.1.13 or later, because
earlier DCs will otherwise get fenced after a clean shutdown. Since the
attribute manager doesn't know the DC or its version, the attributes
would now always be wiped, so old leaving DCs will get fenced. The
fencing would occur only in the highly unlikely situation of a rolling
upgrade from Pacemaker 2-supported versions 1.1.11 or 1.1.12, and the
upgrade would still succeed without any negative impact on resources.
Fixes T138
Co-Authored-By: Ken Gaillot <kgaillot@redhat.com>
Co-Authored-By: Chris Lumens <clumens@redhat.com>
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
daemons/attrd/attrd_corosync.c | 93 +++++++++++++++++++++++++++++++++
daemons/attrd/attrd_elections.c | 2 +
daemons/attrd/pacemaker-attrd.c | 1 +
daemons/attrd/pacemaker-attrd.h | 3 ++
4 files changed, 99 insertions(+)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 94fc85f..8497f34 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -23,6 +23,43 @@
#include "pacemaker-attrd.h"
+/*!
+ * \internal
+ * \brief Nodes removed by \c attrd_peer_remove()
+ *
+ * This table is to be used as a set. It contains nodes that have been removed
+ * by \c attrd_peer_remove() and whose transient attributes should be erased
+ * from the CIB.
+ *
+ * Setting an attribute value for a node via \c update_attr_on_host() removes
+ * the node from the table. At that point, we have transient attributes in
+ * memory for the node, so it should no longer be erased from the CIB.
+ *
+ * If another node erases a removed node's transient attributes from the CIB,
+ * the removed node remains in this table until an attribute value is set for
+ * it. This is for convenience: it avoids the need to monitor for CIB updates
+ * that erase a node's \c node_state or \c transient attributes element, just to
+ * remove the node from the table.
+ *
+ * Leaving a removed node in the table after erasure should be harmless. If a
+ * node is in this table, then we have no transient attributes for it in memory.
+ * If for some reason we erase its transient attributes from the CIB twice, its
+ * state in the CIB will still be correct.
+ */
+static GHashTable *removed_peers = NULL;
+
+/*!
+ * \internal
+ * \brief Free the removed nodes table
+ */
+void
+attrd_free_removed_peers(void)
+{
+ if (removed_peers != NULL) {
+ g_hash_table_destroy(removed_peers);
+ }
+}
+
static xmlNode *
attrd_confirmation(int callid)
{
@@ -236,6 +273,10 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
const char *prev_xml_id = NULL;
const char *node_xml_id = crm_element_value(xml, PCMK__XA_ATTR_HOST_ID);
+ if (removed_peers != NULL) {
+ g_hash_table_remove(removed_peers, host);
+ }
+
// Create entry for value if not already existing
v = g_hash_table_lookup(a->values, host);
if (v == NULL) {
@@ -530,6 +571,29 @@ attrd_peer_sync_response(const pcmk__node_status_t *peer, bool peer_won,
}
}
+/*!
+ * \internal
+ * \brief Erase all removed nodes' transient attributes from the CIB
+ *
+ * This should be called by a newly elected writer upon winning the election.
+ */
+void
+attrd_erase_removed_peer_attributes(void)
+{
+ const char *host = NULL;
+ GHashTableIter iter;
+
+ if (!attrd_election_won() || (removed_peers == NULL)) {
+ return;
+ }
+
+ g_hash_table_iter_init(&iter, removed_peers);
+ while (g_hash_table_iter_next(&iter, (gpointer *) &host, NULL)) {
+ attrd_cib_erase_transient_attrs(host);
+ g_hash_table_iter_remove(&iter);
+ }
+}
+
/*!
* \internal
* \brief Remove all attributes and optionally peer cache entries for a node
@@ -556,6 +620,35 @@ attrd_peer_remove(const char *host, bool uncache, const char *source)
}
}
+ if (attrd_election_won()) {
+ // We are the writer. Wipe node's transient attributes from CIB now.
+ attrd_cib_erase_transient_attrs(host);
+
+ } else {
+ /* Make sure the attributes get erased from the CIB eventually.
+ * - If there's already a writer, it will call this function and enter
+ * the "if" block above, requesting the erasure (unless it leaves
+ * before sending the request -- see below).
+ * attrd_start_election_if_needed() will do nothing here.
+ * - Otherwise, we ensure an election is happening (unless we're
+ * shutting down). The winner will erase transient attributes from the
+ * CIB for all removed nodes in attrd_election_cb().
+ *
+ * We add the node to the removed_peers table in case we win an election
+ * and need to request CIB erasures based on the table contents. This
+ * could happen for either of two reasons:
+ * - There is no current writer and we're not shutting down. An election
+ * either is already in progress or will be triggered here.
+ * - The current writer leaves before sending the CIB update request. A
+ * new election will be triggered.
+ */
+ if (removed_peers == NULL) {
+ removed_peers = pcmk__strikey_table(free, NULL);
+ }
+ g_hash_table_add(removed_peers, pcmk__str_copy(host));
+ attrd_start_election_if_needed();
+ }
+
if (uncache) {
pcmk__purge_node_from_cache(host, 0);
attrd_forget_node_xml_id(host);
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index 281ec12..e75a1d3 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -24,6 +24,8 @@ attrd_election_cb(pcmk_cluster_t *cluster)
/* Update the peers after an election */
attrd_peer_sync(NULL);
+ attrd_erase_removed_peer_attributes();
+
/* After winning an election, update the CIB with the values of all
* attributes as the winner knows them.
*/
diff --git a/daemons/attrd/pacemaker-attrd.c b/daemons/attrd/pacemaker-attrd.c
index 7711fd2..3fa099b 100644
--- a/daemons/attrd/pacemaker-attrd.c
+++ b/daemons/attrd/pacemaker-attrd.c
@@ -201,6 +201,7 @@ main(int argc, char **argv)
attrd_cib_disconnect();
}
+ attrd_free_removed_peers();
attrd_free_waitlist();
pcmk_cluster_disconnect(attrd_cluster);
pcmk_cluster_free(attrd_cluster);
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index d9423c8..80ae0d9 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -184,6 +184,9 @@ extern GHashTable *peer_protocol_vers;
#define CIB_OP_TIMEOUT_S 120
+void attrd_free_removed_peers(void);
+void attrd_erase_removed_peer_attributes(void);
+
int attrd_cluster_connect(void);
void attrd_broadcast_value(const attribute_t *a, const attribute_value_t *v);
void attrd_peer_update(const pcmk__node_status_t *peer, xmlNode *xml,
--
2.47.1
From 9db7cad74c9c051761c9d8a099a235cc2320f35d Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 14 Dec 2023 14:56:11 -0600
Subject: [PATCH 02/10] Low: pacemaker-attrd: Drop "requesting shutdown" code
The requesting_shutdown variable was checked only by
attrd_shutting_down(), when the if_requested argument was set to true.
In that case, it returned true if either the shutting_down variable was
true or both the if_requested argument and the requesting_shutdown
variable were true.
The only caller that passed if_requested=true was
attrd_cib_updated_cb(). It did this if:
a. the alerts section was changed, or
b. the status section or nodes section was changed by an untrusted
client.
Details:
a. Prior to f42e170, we didn't pass if_requested=true for an alerts
section change. We started doing so as of that commit mostly for
convenience. We decided that it seemed reasonable to ignore alert
changes when there was a shutdown pending.
This commit reverts to NOT ignoring alert changes due to pending
shutdown. That seems like it might be better. I'm not sure if it's
possible for us to land in attrd_send_attribute_alert() while a
shutdown is requested but has not begun. If so, it would be good to
send the correct alerts.
b. The other call with true is to avoid writing out all attributes when
the status or nodes section changes. It's probably okay to drop the
true there too. It was added by a1a9c54, to resolve a race condition
where:
* node2 left.
* node1's controller deleted node2's transient attributes from the
CIB.
* node1 took over as DC and replaced the CIB.
* node2's attribute manager was not yet actually shutting down, and
it responded to the CIB replacement by writing out all of the
attributes that were in its memory, including its own "shutdown"
attribute.
Now (as of the previous commit), node1's attribute manager would
delete this "shutdown" attribute as part of its shutdown process. (Or
more accurately, I think the attribute writer node will do that.)
So if we understand correctly, the attrd_shutting_down(true)
workaround is no longer needed.
With no more callers needing to pass true, the supporting code can go
away.
Co-Authored-By: Reid Wahl <nrwahl@protonmail.com>
---
daemons/attrd/attrd_cib.c | 6 +++---
daemons/attrd/attrd_corosync.c | 15 ++-----------
daemons/attrd/attrd_elections.c | 4 ++--
daemons/attrd/attrd_ipc.c | 2 +-
daemons/attrd/attrd_utils.c | 37 ++++-----------------------------
daemons/attrd/pacemaker-attrd.h | 4 +---
6 files changed, 13 insertions(+), 55 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 4231e4a..acd4621 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -34,7 +34,7 @@ attrd_cib_destroy_cb(gpointer user_data)
cib->cmds->signoff(cib);
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
crm_info("Disconnected from the CIB manager");
} else {
@@ -57,7 +57,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
}
if (pcmk__cib_element_in_patchset(patchset, PCMK_XE_ALERTS)) {
- if (attrd_shutting_down(true)) {
+ if (attrd_shutting_down()) {
crm_debug("Ignoring alerts change in CIB during shutdown");
} else {
mainloop_set_trigger(attrd_config_read);
@@ -82,7 +82,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
if (status_changed
|| pcmk__cib_element_in_patchset(patchset, PCMK_XE_NODES)) {
- if (attrd_shutting_down(true)) {
+ if (attrd_shutting_down()) {
crm_debug("Ignoring node change in CIB during shutdown");
return;
}
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 8497f34..02ddec6 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -83,7 +83,7 @@ attrd_peer_message(pcmk__node_status_t *peer, xmlNode *xml)
return;
}
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
/* If we're shutting down, we want to continue responding to election
* ops as long as we're a cluster member (because our vote may be
* needed). Ignore all other messages.
@@ -166,7 +166,7 @@ attrd_cpg_dispatch(cpg_handle_t handle,
static void
attrd_cpg_destroy(gpointer unused)
{
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
crm_info("Disconnected from Corosync process group");
} else {
@@ -328,17 +328,6 @@ update_attr_on_host(attribute_t *a, const pcmk__node_status_t *peer,
pcmk__str_update(&v->current, value);
attrd_set_attr_flags(a, attrd_attr_changed);
- if (pcmk__str_eq(host, attrd_cluster->priv->node_name, pcmk__str_casei)
- && pcmk__str_eq(attr, PCMK__NODE_ATTR_SHUTDOWN, pcmk__str_none)) {
-
- if (!pcmk__str_eq(value, "0", pcmk__str_null_matches)) {
- attrd_set_requesting_shutdown();
-
- } else {
- attrd_clear_requesting_shutdown();
- }
- }
-
// Write out new value or start dampening timer
if (a->timeout_ms && a->timer) {
crm_trace("Delaying write of %s %s for dampening",
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index e75a1d3..eb9ef8c 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -43,7 +43,7 @@ attrd_start_election_if_needed(void)
{
if ((peer_writer == NULL)
&& (election_state(attrd_cluster) != election_in_progress)
- && !attrd_shutting_down(false)) {
+ && !attrd_shutting_down()) {
crm_info("Starting an election to determine the writer");
election_vote(attrd_cluster);
@@ -65,7 +65,7 @@ attrd_handle_election_op(const pcmk__node_status_t *peer, xmlNode *xml)
crm_xml_add(xml, PCMK__XA_SRC, peer->name);
// Don't become writer if we're shutting down
- rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down(false));
+ rc = election_count_vote(attrd_cluster, xml, !attrd_shutting_down());
switch(rc) {
case election_start:
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index 43e0f41..8a3bb36 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -492,7 +492,7 @@ static int32_t
attrd_ipc_accept(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
{
crm_trace("New client connection %p", c);
- if (attrd_shutting_down(false)) {
+ if (attrd_shutting_down()) {
crm_info("Ignoring new connection from pid %d during shutdown",
pcmk__client_pid(c));
return -ECONNREFUSED;
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
index f219b88..e3e814d 100644
--- a/daemons/attrd/attrd_utils.c
+++ b/daemons/attrd/attrd_utils.c
@@ -25,7 +25,6 @@
cib_t *the_cib = NULL;
-static bool requesting_shutdown = false;
static bool shutting_down = false;
static GMainLoop *mloop = NULL;
@@ -34,45 +33,17 @@ static GMainLoop *mloop = NULL;
*/
GHashTable *peer_protocol_vers = NULL;
-/*!
- * \internal
- * \brief Set requesting_shutdown state
- */
-void
-attrd_set_requesting_shutdown(void)
-{
- requesting_shutdown = true;
-}
-
-/*!
- * \internal
- * \brief Clear requesting_shutdown state
- */
-void
-attrd_clear_requesting_shutdown(void)
-{
- requesting_shutdown = false;
-}
-
/*!
* \internal
* \brief Check whether local attribute manager is shutting down
*
- * \param[in] if_requested If \c true, also consider presence of
- * \c PCMK__NODE_ATTR_SHUTDOWN attribute
- *
- * \return \c true if local attribute manager has begun shutdown sequence
- * or (if \p if_requested is \c true) whether local node has a nonzero
- * \c PCMK__NODE_ATTR_SHUTDOWN attribute set, otherwise \c false
- * \note Most callers should pass \c false for \p if_requested, because the
- * attribute manager needs to continue performing while the controller is
- * shutting down, and even needs to be eligible for election in case all
- * nodes are shutting down.
+ * \return \c true if local attribute manager has begun shutdown sequence,
+ * otherwise \c false
*/
bool
-attrd_shutting_down(bool if_requested)
+attrd_shutting_down(void)
{
- return shutting_down || (if_requested && requesting_shutdown);
+ return shutting_down;
}
/*!
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 80ae0d9..d3e5765 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -56,10 +56,8 @@
void attrd_init_mainloop(void);
void attrd_run_mainloop(void);
-void attrd_set_requesting_shutdown(void);
-void attrd_clear_requesting_shutdown(void);
void attrd_free_waitlist(void);
-bool attrd_shutting_down(bool if_requested);
+bool attrd_shutting_down(void);
void attrd_shutdown(int nsig);
void attrd_init_ipc(void);
void attrd_ipc_fini(void);
--
2.47.1
From 19a157cb90466aaa5d929573edeabded3ba047ef Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 18 Dec 2023 11:38:00 -0600
Subject: [PATCH 03/10] Low: controller: don't need to erase node attributes
for remote nodes
Now that the attribute manager will erase transient attributes from the
CIB when purging a node, we don't need to do that separately in the
controller.
Co-Authored-By: Chris Lumens <clumens@redhat.com>
---
daemons/controld/controld_remote_ra.c | 41 +++++++--------------------
1 file changed, 11 insertions(+), 30 deletions(-)
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 1cc4ae0..c9adf97 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -237,35 +237,19 @@ should_purge_attributes(pcmk__node_status_t *node)
return true;
}
-static enum controld_section_e
-section_to_delete(bool purge)
-{
- if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
- if (purge) {
- return controld_section_all_unlocked;
- } else {
- return controld_section_lrm_unlocked;
- }
- } else {
- if (purge) {
- return controld_section_all;
- } else {
- return controld_section_lrm;
- }
- }
-}
-
static void
purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node)
{
- bool purge = should_purge_attributes(node);
- enum controld_section_e section = section_to_delete(purge);
+ enum controld_section_e section = controld_section_lrm;
- /* Purge node from attrd's memory */
- if (purge) {
+ // Purge node's transient attributes (from attribute manager and CIB)
+ if (should_purge_attributes(node)) {
update_attrd_remote_node_removed(node->name, NULL);
}
+ if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
+ section = controld_section_lrm_unlocked;
+ }
controld_delete_node_state(node->name, section, call_opt);
}
@@ -367,18 +351,15 @@ remote_node_down(const char *node_name, const enum down_opts opts)
int call_opt = crmd_cib_smart_opt();
pcmk__node_status_t *node = NULL;
- /* Purge node from attrd's memory */
+ // Purge node's transient attributes (from attribute manager and CIB)
update_attrd_remote_node_removed(node_name, NULL);
- /* Normally, only node attributes should be erased, and the resource history
- * should be kept until the node comes back up. However, after a successful
- * fence, we want to clear the history as well, so we don't think resources
- * are still running on the node.
+ /* Normally, the resource history should be kept until the node comes back
+ * up. However, after a successful fence, clear the history so we don't
+ * think resources are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
- controld_delete_node_state(node_name, controld_section_all, call_opt);
- } else {
- controld_delete_node_state(node_name, controld_section_attrs, call_opt);
+ controld_delete_node_state(node_name, controld_section_lrm, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
--
2.47.1
From d49965412a5433a9a92463178d69074da9b3c349 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 14 Dec 2023 15:42:39 -0600
Subject: [PATCH 04/10] Refactor: controller: Allow purging node attrs without
cache removal
Nothing uses the new capability yet.
---
daemons/controld/controld_attrd.c | 22 +++++++++++++++-------
daemons/controld/controld_remote_ra.c | 4 ++--
daemons/controld/controld_utils.h | 2 +-
3 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
index eff8070..c8591ef 100644
--- a/daemons/controld/controld_attrd.c
+++ b/daemons/controld/controld_attrd.c
@@ -106,8 +106,15 @@ update_attrd_list(GList *attrs, uint32_t opts)
}
}
+/*!
+ * \internal
+ * \brief Ask attribute manager to purge a node and its transient attributes
+ *
+ * \param[in] node_name Node to purge
+ * \param[in] from_cache If true, purge from node caches as well
+ */
void
-update_attrd_remote_node_removed(const char *host, const char *user_name)
+controld_purge_node_attrs(const char *node_name, bool from_cache)
{
int rc = pcmk_rc_ok;
@@ -115,14 +122,15 @@ update_attrd_remote_node_removed(const char *host, const char *user_name)
rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
}
if (rc == pcmk_rc_ok) {
- crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
- host);
- rc = pcmk__attrd_api_purge(attrd_api, host, true);
+ crm_debug("Asking %s to purge transient attributes%s for %s",
+ pcmk_ipc_name(attrd_api, true),
+ (from_cache? " and node cache" : ""), node_name);
+ rc = pcmk__attrd_api_purge(attrd_api, node_name, from_cache);
}
if (rc != pcmk_rc_ok) {
- crm_err("Could not purge Pacemaker Remote node %s "
- "in attribute manager%s: %s " QB_XS " rc=%d",
- host, when(), pcmk_rc_str(rc), rc);
+ crm_err("Could not purge node %s from %s%s: %s "
+ QB_XS " rc=%d", node_name, pcmk_ipc_name(attrd_api, true),
+ when(), pcmk_rc_str(rc), rc);
}
}
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index c9adf97..3136180 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -244,7 +244,7 @@ purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node)
// Purge node's transient attributes (from attribute manager and CIB)
if (should_purge_attributes(node)) {
- update_attrd_remote_node_removed(node->name, NULL);
+ controld_purge_node_attrs(node->name, true);
}
if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
@@ -352,7 +352,7 @@ remote_node_down(const char *node_name, const enum down_opts opts)
pcmk__node_status_t *node = NULL;
// Purge node's transient attributes (from attribute manager and CIB)
- update_attrd_remote_node_removed(node_name, NULL);
+ controld_purge_node_attrs(node_name, true);
/* Normally, the resource history should be kept until the node comes back
* up. However, after a successful fence, clear the history so we don't
diff --git a/daemons/controld/controld_utils.h b/daemons/controld/controld_utils.h
index e633888..262e0d1 100644
--- a/daemons/controld/controld_utils.h
+++ b/daemons/controld/controld_utils.h
@@ -69,7 +69,7 @@ void crm_update_quorum(gboolean quorum, gboolean force_update);
void controld_close_attrd_ipc(void);
void update_attrd(const char *host, const char *name, const char *value, const char *user_name, gboolean is_remote_node);
void update_attrd_list(GList *attrs, uint32_t opts);
-void update_attrd_remote_node_removed(const char *host, const char *user_name);
+void controld_purge_node_attrs(const char *node_name, bool from_cache);
void update_attrd_clear_failures(const char *host, const char *rsc,
const char *op, const char *interval_spec,
gboolean is_remote_node);
--
2.47.1
From 5fb8fdc72f457c7e9a691c10a99d54d0e03bd77d Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 14 Dec 2023 16:09:40 -0600
Subject: [PATCH 05/10] Fix: controller: Don't purge transient attributes on
node loss
With recent changes, the attribute manager now handles it when the node
leaves the cluster, so the controller purge is redundant.
This does alter the timing somewhat, since the controller's purge
occurred when the node left the controller process group, while the
attribute manager's purge occurs when it leaves the cluster, but that
shouldn't make a significant difference.
This fixes a problem when a node's controller crashes and is respawned
while fencing is disabled. Previously, another node's controller would
remove that node's transient attributes from the CIB, but they would
remain in the attribute managers' memory. Now, the attributes are
correctly retained in the CIB in this situation.
Fixes T137
Fixes T139
Co-Authored-By: Chris Lumens <clumens@redhat.com>
---
daemons/controld/controld_callbacks.c | 14 +-------------
1 file changed, 1 insertion(+), 13 deletions(-)
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index 48c255e..57e5183 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -233,19 +233,11 @@ peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
pcmk__str_casei)
&& !pcmk__cluster_is_node_active(node)) {
- /* The DC has left, so delete its transient attributes and
- * trigger a new election.
- *
- * A DC sends its shutdown request to all peers, who update the
- * DC's expected state to down. This avoids fencing upon
- * deletion of its transient attributes.
- */
+ // The DC has left, so trigger a new election
crm_notice("Our peer on the DC (%s) is dead",
controld_globals.dc_name);
register_fsa_input(C_CRMD_STATUS_CALLBACK, I_ELECTION, NULL);
- controld_delete_node_state(node->name, controld_section_attrs,
- cib_none);
} else if (AM_I_DC
|| pcmk_is_set(controld_globals.flags, controld_dc_left)
@@ -256,10 +248,6 @@ peer_update_callback(enum pcmk__node_update type, pcmk__node_status_t *node,
*/
if (appeared) {
te_trigger_stonith_history_sync(FALSE);
- } else {
- controld_delete_node_state(node->name,
- controld_section_attrs,
- cib_none);
}
}
break;
--
2.47.1
From c40026fb77a6f7ee804979293e3019943a34e06b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 18 Dec 2023 13:05:35 -0600
Subject: [PATCH 06/10] Low: controller: Ask attribute manager to purge fenced
nodes' attributes
...instead of wiping from the CIB directly.
Co-Authored-By: Chris Lumens <clumens@redhat.com>
---
daemons/controld/controld_fencing.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 51367ca..de074aa 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -267,7 +267,13 @@ update_node_state_after_fencing(const char *target, const char *target_xml_id)
crm_debug("Updating node state for %s after fencing (call %d)", target, rc);
fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
- controld_delete_node_state(peer->name, controld_section_all, cib_none);
+ // Delete node's resource history from CIB
+ controld_delete_node_state(peer->name, controld_section_lrm, cib_none);
+
+ // Ask attribute manager to delete node's transient attributes
+ // @TODO: This is the only call to controld_purge_node_attrs that doesn't
+ // want to also purge the node from the caches. Why?
+ controld_purge_node_attrs(peer->name, false);
}
/*!
--
2.47.1
From d9d19827d93f2394a831a9651aae064ea5a04fa4 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 18 Dec 2023 13:14:53 -0600
Subject: [PATCH 07/10] Refactor: controller: Drop no-longer-used section enum
values
---
daemons/controld/controld_cib.c | 24 ------------------------
daemons/controld/controld_cib.h | 5 +----
2 files changed, 1 insertion(+), 28 deletions(-)
diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c
index e2a0d50..39c2b06 100644
--- a/daemons/controld/controld_cib.c
+++ b/daemons/controld/controld_cib.c
@@ -279,17 +279,6 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
"[not(@" PCMK_OPT_SHUTDOWN_LOCK ") " \
"or " PCMK_OPT_SHUTDOWN_LOCK "<%lld]"
-// Node's PCMK__XE_TRANSIENT_ATTRIBUTES section (name 1x)
-#define XPATH_NODE_ATTRS XPATH_NODE_STATE "/" PCMK__XE_TRANSIENT_ATTRIBUTES
-
-// Everything under PCMK__XE_NODE_STATE (name 1x)
-#define XPATH_NODE_ALL XPATH_NODE_STATE "/*"
-
-/* Unlocked history + transient attributes
- * (name 2x, (seconds_since_epoch - PCMK_OPT_SHUTDOWN_LOCK_LIMIT) 1x, name 1x)
- */
-#define XPATH_NODE_ALL_UNLOCKED XPATH_NODE_LRM_UNLOCKED "|" XPATH_NODE_ATTRS
-
/*!
* \internal
* \brief Get the XPath and description of a node state section to be deleted
@@ -320,19 +309,6 @@ controld_node_state_deletion_strings(const char *uname,
uname, uname, expire);
desc_pre = "resource history (other than shutdown locks)";
break;
- case controld_section_attrs:
- *xpath = crm_strdup_printf(XPATH_NODE_ATTRS, uname);
- desc_pre = "transient attributes";
- break;
- case controld_section_all:
- *xpath = crm_strdup_printf(XPATH_NODE_ALL, uname);
- desc_pre = "all state";
- break;
- case controld_section_all_unlocked:
- *xpath = crm_strdup_printf(XPATH_NODE_ALL_UNLOCKED,
- uname, uname, expire, uname);
- desc_pre = "all state (other than shutdown locks)";
- break;
default:
// We called this function incorrectly
pcmk__assert(false);
diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h
index b8622d5..25277e7 100644
--- a/daemons/controld/controld_cib.h
+++ b/daemons/controld/controld_cib.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2024 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -50,9 +50,6 @@ unsigned int cib_op_timeout(void);
enum controld_section_e {
controld_section_lrm,
controld_section_lrm_unlocked,
- controld_section_attrs,
- controld_section_all,
- controld_section_all_unlocked
};
void controld_node_state_deletion_strings(const char *uname,
--
2.47.1
From 1056a0e3f6b618c23eb5a73d7e4a600619713a0c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 18 Dec 2023 13:39:49 -0600
Subject: [PATCH 08/10] Refactor: controller: Drop node state section enum
It now boils down to a bool for whether we want only unlocked resources.
---
daemons/controld/controld_cib.c | 48 +++++++++++----------------
daemons/controld/controld_cib.h | 13 ++------
daemons/controld/controld_execd.c | 3 +-
daemons/controld/controld_fencing.c | 2 +-
daemons/controld/controld_join_dc.c | 9 +++--
daemons/controld/controld_remote_ra.c | 10 +++---
6 files changed, 32 insertions(+), 53 deletions(-)
diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c
index 39c2b06..298c321 100644
--- a/daemons/controld/controld_cib.c
+++ b/daemons/controld/controld_cib.c
@@ -281,16 +281,15 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
/*!
* \internal
- * \brief Get the XPath and description of a node state section to be deleted
+ * \brief Get the XPath and description of resource history to be deleted
*
- * \param[in] uname Desired node
- * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to be deleted
- * \param[out] xpath Where to store XPath of \p section
- * \param[out] desc If not \c NULL, where to store description of \p section
+ * \param[in] uname Name of node to delete resource history for
+ * \param[in] unlocked_only If true, delete history of only unlocked resources
+ * \param[out] xpath Where to store XPath for history deletion
+ * \param[out] desc If not NULL, where to store loggable description
*/
void
-controld_node_state_deletion_strings(const char *uname,
- enum controld_section_e section,
+controld_node_state_deletion_strings(const char *uname, bool unlocked_only,
char **xpath, char **desc)
{
const char *desc_pre = NULL;
@@ -299,20 +298,13 @@ controld_node_state_deletion_strings(const char *uname,
long long expire = (long long) time(NULL)
- controld_globals.shutdown_lock_limit;
- switch (section) {
- case controld_section_lrm:
- *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
- desc_pre = "resource history";
- break;
- case controld_section_lrm_unlocked:
- *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
- uname, uname, expire);
- desc_pre = "resource history (other than shutdown locks)";
- break;
- default:
- // We called this function incorrectly
- pcmk__assert(false);
- break;
+ if (unlocked_only) {
+ *xpath = crm_strdup_printf(XPATH_NODE_LRM_UNLOCKED,
+ uname, uname, expire);
+ desc_pre = "resource history (other than shutdown locks)";
+ } else {
+ *xpath = crm_strdup_printf(XPATH_NODE_LRM, uname);
+ desc_pre = "resource history";
}
if (desc != NULL) {
@@ -322,15 +314,14 @@ controld_node_state_deletion_strings(const char *uname,
/*!
* \internal
- * \brief Delete subsection of a node's CIB \c PCMK__XE_NODE_STATE
+ * \brief Delete a node's resource history from the CIB
*
- * \param[in] uname Desired node
- * \param[in] section Subsection of \c PCMK__XE_NODE_STATE to delete
- * \param[in] options CIB call options to use
+ * \param[in] uname Name of node to delete resource history for
+ * \param[in] unlocked_only If true, delete history of only unlocked resources
+ * \param[in] options CIB call options to use
*/
void
-controld_delete_node_state(const char *uname, enum controld_section_e section,
- int options)
+controld_delete_node_state(const char *uname, bool unlocked_only, int options)
{
cib_t *cib = controld_globals.cib_conn;
char *xpath = NULL;
@@ -339,8 +330,7 @@ controld_delete_node_state(const char *uname, enum controld_section_e section,
pcmk__assert((uname != NULL) && (cib != NULL));
- controld_node_state_deletion_strings(uname, section, &xpath, &desc);
-
+ controld_node_state_deletion_strings(uname, unlocked_only, &xpath, &desc);
cib__set_call_options(options, "node state deletion",
cib_xpath|cib_multiple);
cib_rc = cib->cmds->remove(cib, xpath, NULL, options);
diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h
index 25277e7..f423f93 100644
--- a/daemons/controld/controld_cib.h
+++ b/daemons/controld/controld_cib.h
@@ -46,17 +46,10 @@ int controld_update_cib(const char *section, xmlNode *data, int options,
void *));
unsigned int cib_op_timeout(void);
-// Subsections of PCMK__XE_NODE_STATE
-enum controld_section_e {
- controld_section_lrm,
- controld_section_lrm_unlocked,
-};
-
-void controld_node_state_deletion_strings(const char *uname,
- enum controld_section_e section,
+void controld_node_state_deletion_strings(const char *uname, bool unlocked_only,
char **xpath, char **desc);
-void controld_delete_node_state(const char *uname,
- enum controld_section_e section, int options);
+void controld_delete_node_state(const char *uname, bool unlocked_only,
+ int options);
int controld_delete_resource_history(const char *rsc_id, const char *node,
const char *user_name, int call_options);
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 2ec6893..801a5db 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1074,8 +1074,7 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
}
/* Now delete the copy in the CIB */
- controld_delete_node_state(lrm_state->node_name, controld_section_lrm,
- cib_none);
+ controld_delete_node_state(lrm_state->node_name, false, cib_none);
}
/*!
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index de074aa..6270dcd 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -268,7 +268,7 @@ update_node_state_after_fencing(const char *target, const char *target_xml_id)
fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
// Delete node's resource history from CIB
- controld_delete_node_state(peer->name, controld_section_lrm, cib_none);
+ controld_delete_node_state(peer->name, false, cib_none);
// Ask attribute manager to delete node's transient attributes
// @TODO: This is the only call to controld_purge_node_attrs that doesn't
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index a91fbfa..f88cc47 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -771,7 +771,8 @@ do_dc_join_ack(long long action,
pcmk__node_status_t *peer = NULL;
enum controld_join_phase phase = controld_join_none;
- enum controld_section_e section = controld_section_lrm;
+ const bool unlocked_only = pcmk_is_set(controld_globals.flags,
+ controld_shutdown_lock_enabled);
char *xpath = NULL;
xmlNode *state = join_ack->xml;
xmlNode *execd_state = NULL;
@@ -832,10 +833,8 @@ do_dc_join_ack(long long action,
}
// Delete relevant parts of node's current executor state from CIB
- if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
- section = controld_section_lrm_unlocked;
- }
- controld_node_state_deletion_strings(join_from, section, &xpath, NULL);
+ controld_node_state_deletion_strings(join_from, unlocked_only, &xpath,
+ NULL);
rc = cib->cmds->remove(cib, xpath, NULL,
cib_xpath|cib_multiple|cib_transaction);
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 3136180..86a3544 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -240,17 +240,15 @@ should_purge_attributes(pcmk__node_status_t *node)
static void
purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node)
{
- enum controld_section_e section = controld_section_lrm;
+ const bool unlocked_only = pcmk_is_set(controld_globals.flags,
+ controld_shutdown_lock_enabled);
// Purge node's transient attributes (from attribute manager and CIB)
if (should_purge_attributes(node)) {
controld_purge_node_attrs(node->name, true);
}
- if (pcmk_is_set(controld_globals.flags, controld_shutdown_lock_enabled)) {
- section = controld_section_lrm_unlocked;
- }
- controld_delete_node_state(node->name, section, call_opt);
+ controld_delete_node_state(node->name, unlocked_only, call_opt);
}
/*!
@@ -359,7 +357,7 @@ remote_node_down(const char *node_name, const enum down_opts opts)
* think resources are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
- controld_delete_node_state(node_name, controld_section_lrm, call_opt);
+ controld_delete_node_state(node_name, false, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
--
2.47.1
From 050a3caad4989cc1c958420dff47b04be9a1cd55 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 18 Dec 2023 15:45:00 -0600
Subject: [PATCH 09/10] Refactor: controller: Rename
controld_delete_node_state()
...to controld_delete_node_history(), and
controld_node_state_deletion_strings() to
controld_node_history_deletion_strings(), since they delete only history
now.
---
daemons/controld/controld_cib.c | 8 ++++----
daemons/controld/controld_cib.h | 9 +++++----
daemons/controld/controld_execd.c | 2 +-
daemons/controld/controld_fencing.c | 2 +-
daemons/controld/controld_join_dc.c | 4 ++--
daemons/controld/controld_remote_ra.c | 4 ++--
6 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/daemons/controld/controld_cib.c b/daemons/controld/controld_cib.c
index 298c321..fb06f22 100644
--- a/daemons/controld/controld_cib.c
+++ b/daemons/controld/controld_cib.c
@@ -289,8 +289,8 @@ cib_delete_callback(xmlNode *msg, int call_id, int rc, xmlNode *output,
* \param[out] desc If not NULL, where to store loggable description
*/
void
-controld_node_state_deletion_strings(const char *uname, bool unlocked_only,
- char **xpath, char **desc)
+controld_node_history_deletion_strings(const char *uname, bool unlocked_only,
+ char **xpath, char **desc)
{
const char *desc_pre = NULL;
@@ -321,7 +321,7 @@ controld_node_state_deletion_strings(const char *uname, bool unlocked_only,
* \param[in] options CIB call options to use
*/
void
-controld_delete_node_state(const char *uname, bool unlocked_only, int options)
+controld_delete_node_history(const char *uname, bool unlocked_only, int options)
{
cib_t *cib = controld_globals.cib_conn;
char *xpath = NULL;
@@ -330,7 +330,7 @@ controld_delete_node_state(const char *uname, bool unlocked_only, int options)
pcmk__assert((uname != NULL) && (cib != NULL));
- controld_node_state_deletion_strings(uname, unlocked_only, &xpath, &desc);
+ controld_node_history_deletion_strings(uname, unlocked_only, &xpath, &desc);
cib__set_call_options(options, "node state deletion",
cib_xpath|cib_multiple);
cib_rc = cib->cmds->remove(cib, xpath, NULL, options);
diff --git a/daemons/controld/controld_cib.h b/daemons/controld/controld_cib.h
index f423f93..116db64 100644
--- a/daemons/controld/controld_cib.h
+++ b/daemons/controld/controld_cib.h
@@ -46,10 +46,11 @@ int controld_update_cib(const char *section, xmlNode *data, int options,
void *));
unsigned int cib_op_timeout(void);
-void controld_node_state_deletion_strings(const char *uname, bool unlocked_only,
- char **xpath, char **desc);
-void controld_delete_node_state(const char *uname, bool unlocked_only,
- int options);
+void controld_node_history_deletion_strings(const char *uname,
+ bool unlocked_only,
+ char **xpath, char **desc);
+void controld_delete_node_history(const char *uname, bool unlocked_only,
+ int options);
int controld_delete_resource_history(const char *rsc_id, const char *node,
const char *user_name, int call_options);
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index 801a5db..977acf0 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1074,7 +1074,7 @@ force_reprobe(lrm_state_t *lrm_state, const char *from_sys,
}
/* Now delete the copy in the CIB */
- controld_delete_node_state(lrm_state->node_name, false, cib_none);
+ controld_delete_node_history(lrm_state->node_name, false, cib_none);
}
/*!
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
index 6270dcd..026b240 100644
--- a/daemons/controld/controld_fencing.c
+++ b/daemons/controld/controld_fencing.c
@@ -268,7 +268,7 @@ update_node_state_after_fencing(const char *target, const char *target_xml_id)
fsa_register_cib_callback(rc, pcmk__str_copy(target), cib_fencing_updated);
// Delete node's resource history from CIB
- controld_delete_node_state(peer->name, false, cib_none);
+ controld_delete_node_history(peer->name, false, cib_none);
// Ask attribute manager to delete node's transient attributes
// @TODO: This is the only call to controld_purge_node_attrs that doesn't
diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c
index f88cc47..90d1bc0 100644
--- a/daemons/controld/controld_join_dc.c
+++ b/daemons/controld/controld_join_dc.c
@@ -833,8 +833,8 @@ do_dc_join_ack(long long action,
}
// Delete relevant parts of node's current executor state from CIB
- controld_node_state_deletion_strings(join_from, unlocked_only, &xpath,
- NULL);
+ controld_node_history_deletion_strings(join_from, unlocked_only, &xpath,
+ NULL);
rc = cib->cmds->remove(cib, xpath, NULL,
cib_xpath|cib_multiple|cib_transaction);
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 86a3544..1c52477 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -248,7 +248,7 @@ purge_remote_node_attrs(int call_opt, pcmk__node_status_t *node)
controld_purge_node_attrs(node->name, true);
}
- controld_delete_node_state(node->name, unlocked_only, call_opt);
+ controld_delete_node_history(node->name, unlocked_only, call_opt);
}
/*!
@@ -357,7 +357,7 @@ remote_node_down(const char *node_name, const enum down_opts opts)
* think resources are still running on the node.
*/
if (opts == DOWN_ERASE_LRM) {
- controld_delete_node_state(node_name, false, call_opt);
+ controld_delete_node_history(node_name, false, call_opt);
}
/* Ensure node is in the remote peer cache with lost state */
--
2.47.1
From 97dfc11f6c9d1a90ef744e5de2fe7678f3518bba Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 10 Sep 2025 14:59:38 -0400
Subject: [PATCH 10/10] Refactor: daemons: Remove the down_opts enum
This has only ever had two values, which basically just means it's a
bool.
---
daemons/controld/controld_remote_ra.c | 21 ++++++++-------------
1 file changed, 8 insertions(+), 13 deletions(-)
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
index 1c52477..eb1bc55 100644
--- a/daemons/controld/controld_remote_ra.c
+++ b/daemons/controld/controld_remote_ra.c
@@ -330,20 +330,15 @@ remote_node_up(const char *node_name)
pcmk__xml_free(update);
}
-enum down_opts {
- DOWN_KEEP_LRM,
- DOWN_ERASE_LRM
-};
-
/*!
* \internal
* \brief Handle cluster communication related to pacemaker_remote node leaving
*
* \param[in] node_name Name of lost node
- * \param[in] opts Whether to keep or erase LRM history
+ * \param[in] erase_lrm If \c true, erase the LRM history
*/
static void
-remote_node_down(const char *node_name, const enum down_opts opts)
+remote_node_down(const char *node_name, bool erase_lrm)
{
xmlNode *update;
int call_opt = crmd_cib_smart_opt();
@@ -356,7 +351,7 @@ remote_node_down(const char *node_name, const enum down_opts opts)
* up. However, after a successful fence, clear the history so we don't
* think resources are still running on the node.
*/
- if (opts == DOWN_ERASE_LRM) {
+ if (erase_lrm) {
controld_delete_node_history(node_name, false, call_opt);
}
@@ -416,7 +411,7 @@ check_remote_node_state(const remote_ra_cmd_t *cmd)
if (ra_data) {
if (!pcmk_is_set(ra_data->status, takeover_complete)) {
/* Stop means down if we didn't successfully migrate elsewhere */
- remote_node_down(cmd->rsc_id, DOWN_KEEP_LRM);
+ remote_node_down(cmd->rsc_id, false);
} else if (AM_I_DC == FALSE) {
/* Only the connection host and DC track node state,
* so if the connection migrated elsewhere and we aren't DC,
@@ -692,7 +687,7 @@ remote_lrm_op_callback(lrmd_event_data_t * op)
lrm_state->node_name);
/* Do roughly what a 'stop' on the remote-resource would do */
handle_remote_ra_stop(lrm_state, NULL);
- remote_node_down(lrm_state->node_name, DOWN_KEEP_LRM);
+ remote_node_down(lrm_state->node_name, false);
/* now fake the reply of a successful 'stop' */
synthesize_lrmd_success(NULL, lrm_state->node_name,
PCMK_ACTION_STOP);
@@ -1366,11 +1361,11 @@ remote_ra_process_pseudo(xmlNode *xml)
* peer cache state will be incorrect unless and until the guest is
* recovered.
*/
- if (result) {
+ if (result != NULL) {
const char *remote = pcmk__xe_id(result);
- if (remote) {
- remote_node_down(remote, DOWN_ERASE_LRM);
+ if (remote != NULL) {
+ remote_node_down(remote, true);
}
}
}
--
2.47.1