79752652c5
- Fix documentation for Pacemaker Remote schema transfers - Do not check CIB feature set version when CIB_file is set - Consolidate attrd cache handling - Avoid duplicating option metadata across daemons - Related: RHEL-7597 - Related: RHEL-14045
1444 lines
52 KiB
Diff
1444 lines
52 KiB
Diff
From 543a1e9b6f22f13956a8ef22b20c8fe93dad7ae9 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Tue, 12 Dec 2023 16:08:44 -0600
|
|
Subject: [PATCH 01/12] Refactor: libcrmcommon: support attrd purge requests
|
|
without clearing cache
|
|
|
|
Nothing uses the new capability yet
|
|
---
|
|
daemons/attrd/attrd_corosync.c | 4 +++-
|
|
daemons/attrd/attrd_messages.c | 8 +++++++-
|
|
daemons/attrd/pacemaker-attrd.h | 3 ++-
|
|
daemons/controld/controld_attrd.c | 2 +-
|
|
include/crm/common/ipc_attrd_internal.h | 7 ++++---
|
|
include/crm_internal.h | 1 +
|
|
lib/common/ipc_attrd.c | 3 ++-
|
|
lib/common/ipc_client.c | 1 +
|
|
8 files changed, 21 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
|
|
index 86dc67b04..e6cd07f65 100644
|
|
--- a/daemons/attrd/attrd_corosync.c
|
|
+++ b/daemons/attrd/attrd_corosync.c
|
|
@@ -540,7 +540,9 @@ attrd_peer_remove(const char *host, bool uncache, const char *source)
|
|
GHashTableIter aIter;
|
|
|
|
CRM_CHECK(host != NULL, return);
|
|
- crm_notice("Removing all %s attributes for peer %s", host, source);
|
|
+ crm_notice("Removing all %s attributes for node %s "
|
|
+ CRM_XS " %s reaping node from cache",
|
|
+ host, source, (uncache? "and" : "without"));
|
|
|
|
g_hash_table_iter_init(&aIter, attributes);
|
|
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
|
|
diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
|
|
index 89da6d894..ac32e18af 100644
|
|
--- a/daemons/attrd/attrd_messages.c
|
|
+++ b/daemons/attrd/attrd_messages.c
|
|
@@ -148,7 +148,13 @@ handle_remove_request(pcmk__request_t *request)
|
|
{
|
|
if (request->peer != NULL) {
|
|
const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME);
|
|
- attrd_peer_remove(host, true, request->peer);
|
|
+ bool reap = false;
|
|
+
|
|
+ if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP,
|
|
+ &reap) != pcmk_rc_ok) {
|
|
+ reap = true; // Default to true for backward compatibility
|
|
+ }
|
|
+ attrd_peer_remove(host, reap, request->peer);
|
|
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
|
|
return NULL;
|
|
} else {
|
|
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
|
|
index b8929a7f7..70e2cb41b 100644
|
|
--- a/daemons/attrd/pacemaker-attrd.h
|
|
+++ b/daemons/attrd/pacemaker-attrd.h
|
|
@@ -42,8 +42,9 @@
|
|
* 4 2.1.5 Multiple attributes can be updated in a single IPC
|
|
* message
|
|
* 5 2.1.5 Peers can request confirmation of a sent message
|
|
+ * 6 2.1.7 PCMK__ATTRD_CMD_PEER_REMOVE supports PCMK__XA_REAP
|
|
*/
|
|
-#define ATTRD_PROTOCOL_VERSION "5"
|
|
+#define ATTRD_PROTOCOL_VERSION "6"
|
|
|
|
#define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4)
|
|
#define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5)
|
|
diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
|
|
index 923abb92d..958dc2f14 100644
|
|
--- a/daemons/controld/controld_attrd.c
|
|
+++ b/daemons/controld/controld_attrd.c
|
|
@@ -117,7 +117,7 @@ update_attrd_remote_node_removed(const char *host, const char *user_name)
|
|
if (rc == pcmk_rc_ok) {
|
|
crm_trace("Asking attribute manager to purge Pacemaker Remote node %s",
|
|
host);
|
|
- rc = pcmk__attrd_api_purge(attrd_api, host);
|
|
+ rc = pcmk__attrd_api_purge(attrd_api, host, true);
|
|
}
|
|
if (rc != pcmk_rc_ok) {
|
|
crm_err("Could not purge Pacemaker Remote node %s "
|
|
diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h
|
|
index b1b7584bd..39a55ad1d 100644
|
|
--- a/include/crm/common/ipc_attrd_internal.h
|
|
+++ b/include/crm/common/ipc_attrd_internal.h
|
|
@@ -89,10 +89,11 @@ int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *na
|
|
|
|
/*!
|
|
* \internal
|
|
- * \brief Purge a node from pacemaker-attrd
|
|
+ * \brief Request removal of a node's transient attributes
|
|
*
|
|
* \param[in,out] api pacemaker-attrd IPC object
|
|
- * \param[in] node Node to remove
|
|
+ * \param[in] node Node whose attributes should be purged
|
|
+ * \param[in] reap If true, also request removal from node caches
|
|
*
|
|
* \note If \p api is NULL, a new temporary connection will be created
|
|
* just for this operation and destroyed afterwards. If \p api is
|
|
@@ -102,7 +103,7 @@ int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *na
|
|
*
|
|
* \return Standard Pacemaker return code
|
|
*/
|
|
-int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node);
|
|
+int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap);
|
|
|
|
/*!
|
|
* \internal
|
|
diff --git a/include/crm_internal.h b/include/crm_internal.h
|
|
index 3bc8d096a..f800ab0cc 100644
|
|
--- a/include/crm_internal.h
|
|
+++ b/include/crm_internal.h
|
|
@@ -92,6 +92,7 @@
|
|
#define PCMK__XA_MODE "mode"
|
|
#define PCMK__XA_NODE_START_STATE "node_start_state"
|
|
#define PCMK__XA_PATH "path"
|
|
+#define PCMK__XA_REAP "reap"
|
|
#define PCMK__XA_SCHEMA "schema"
|
|
#define PCMK__XA_SCHEMAS "schemas"
|
|
#define PCMK__XA_TASK "task"
|
|
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
|
|
index 9caaabec0..56cdb5aba 100644
|
|
--- a/lib/common/ipc_attrd.c
|
|
+++ b/lib/common/ipc_attrd.c
|
|
@@ -277,7 +277,7 @@ pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
}
|
|
|
|
int
|
|
-pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node)
|
|
+pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap)
|
|
{
|
|
int rc = pcmk_rc_ok;
|
|
xmlNode *request = NULL;
|
|
@@ -291,6 +291,7 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node)
|
|
request = create_attrd_op(NULL);
|
|
|
|
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE);
|
|
+ pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap);
|
|
pcmk__xe_add_node(request, node, 0);
|
|
|
|
if (api == NULL) {
|
|
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
|
|
index 0d3865095..5e64e2324 100644
|
|
--- a/lib/common/ipc_client.c
|
|
+++ b/lib/common/ipc_client.c
|
|
@@ -759,6 +759,7 @@ create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name,
|
|
crm_xml_add(request, F_TYPE, T_ATTRD);
|
|
crm_xml_add(request, F_ORIG, crm_system_name);
|
|
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE);
|
|
+ pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true);
|
|
pcmk__xe_add_node(request, node_name, nodeid);
|
|
break;
|
|
|
|
--
|
|
2.41.0
|
|
|
|
From adc1d8ef587913e5505494e0205bd77a8e0a878e Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 13 Dec 2023 09:24:28 -0600
|
|
Subject: [PATCH 02/12] Log: attrd: improve messages for CIB wipe
|
|
|
|
Also, expose attrd_erase_attrs() as attrd_cib_erase_transient_attrs() and make
|
|
it take the node name as an argument, for future reuse.
|
|
---
|
|
daemons/attrd/attrd_cib.c | 60 ++++++++++++++++++++-------------
|
|
daemons/attrd/pacemaker-attrd.h | 1 +
|
|
2 files changed, 37 insertions(+), 24 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
|
|
index 80e5580d9..ca1c5b9e0 100644
|
|
--- a/daemons/attrd/attrd_cib.c
|
|
+++ b/daemons/attrd/attrd_cib.c
|
|
@@ -153,41 +153,44 @@ static void
|
|
attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output,
|
|
void *user_data)
|
|
{
|
|
- do_crm_log_unlikely(((rc != pcmk_ok)? LOG_NOTICE : LOG_DEBUG),
|
|
- "Cleared transient attributes: %s "
|
|
- CRM_XS " xpath=%s rc=%d",
|
|
- pcmk_strerror(rc), (char *) user_data, rc);
|
|
+ const char *node = pcmk__s((const char *) user_data, "a node");
|
|
+
|
|
+ if (rc == pcmk_ok) {
|
|
+ crm_info("Cleared transient node attributes for %s from CIB", node);
|
|
+ } else {
|
|
+ crm_err("Unable to clear transient node attributes for %s from CIB: %s",
|
|
+ node, pcmk_strerror(rc));
|
|
+ }
|
|
}
|
|
|
|
#define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS
|
|
|
|
/*!
|
|
* \internal
|
|
- * \brief Wipe all transient attributes for this node from the CIB
|
|
+ * \brief Wipe all transient node attributes for a node from the CIB
|
|
*
|
|
- * Clear any previous transient node attributes from the CIB. This is
|
|
- * normally done by the DC's controller when this node leaves the cluster, but
|
|
- * this handles the case where the node restarted so quickly that the
|
|
- * cluster layer didn't notice.
|
|
- *
|
|
- * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED),
|
|
- * ideally we'd skip this and sync our attributes from the writer.
|
|
- * However, currently we reject any values for us that the writer has, in
|
|
- * attrd_peer_update().
|
|
+ * \param[in] node Node to clear attributes for
|
|
*/
|
|
-static void
|
|
-attrd_erase_attrs(void)
|
|
+void
|
|
+attrd_cib_erase_transient_attrs(const char *node)
|
|
{
|
|
int call_id = 0;
|
|
- char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname);
|
|
+ char *xpath = NULL;
|
|
+
|
|
+ CRM_CHECK(node != NULL, return);
|
|
+
|
|
+ xpath = crm_strdup_printf(XPATH_TRANSIENT, node);
|
|
|
|
- crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s",
|
|
- xpath);
|
|
+ crm_debug("Clearing transient node attributes for %s from CIB using %s",
|
|
+ node, xpath);
|
|
|
|
call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath);
|
|
- the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath,
|
|
- "attrd_erase_cb", attrd_erase_cb,
|
|
- free);
|
|
+ free(xpath);
|
|
+
|
|
+ // strdup() is just for logging here, so ignore failure
|
|
+ the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE,
|
|
+ strdup(node), "attrd_erase_cb",
|
|
+ attrd_erase_cb, free);
|
|
}
|
|
|
|
/*!
|
|
@@ -197,8 +200,17 @@ attrd_erase_attrs(void)
|
|
void
|
|
attrd_cib_init(void)
|
|
{
|
|
- // We have no attribute values in memory, wipe the CIB to match
|
|
- attrd_erase_attrs();
|
|
+ /* We have no attribute values in memory, so wipe the CIB to match. This is
|
|
+ * normally done by the DC's controller when this node leaves the cluster, but
|
|
+ * this handles the case where the node restarted so quickly that the
|
|
+ * cluster layer didn't notice.
|
|
+ *
|
|
+ * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED),
|
|
+ * ideally we'd skip this and sync our attributes from the writer.
|
|
+ * However, currently we reject any values for us that the writer has, in
|
|
+ * attrd_peer_update().
|
|
+ */
|
|
+ attrd_cib_erase_transient_attrs(attrd_cluster->uname);
|
|
|
|
// Set a trigger for reading the CIB (for the alerts section)
|
|
attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL);
|
|
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
|
|
index 70e2cb41b..62637d1d7 100644
|
|
--- a/daemons/attrd/pacemaker-attrd.h
|
|
+++ b/daemons/attrd/pacemaker-attrd.h
|
|
@@ -66,6 +66,7 @@ void attrd_ipc_fini(void);
|
|
int attrd_cib_connect(int max_retry);
|
|
void attrd_cib_disconnect(void);
|
|
void attrd_cib_init(void);
|
|
+void attrd_cib_erase_transient_attrs(const char *node);
|
|
|
|
bool attrd_value_needs_expansion(const char *value);
|
|
int attrd_expand_value(const char *value, const char *old_value);
|
|
--
|
|
2.41.0
|
|
|
|
From 9be38897eaa683ad7920503d9c9fd7db7a20a8ec Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 13 Dec 2023 11:20:07 -0600
|
|
Subject: [PATCH 03/12] Refactor: attrd: convert value booleans to flags
|
|
|
|
---
|
|
daemons/attrd/attrd_attributes.c | 7 +++---
|
|
daemons/attrd/attrd_corosync.c | 38 +++++++++++++++++---------------
|
|
daemons/attrd/pacemaker-attrd.h | 21 ++++++++++++++++--
|
|
3 files changed, 42 insertions(+), 24 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
|
|
index 388c181d7..8f32988be 100644
|
|
--- a/daemons/attrd/attrd_attributes.c
|
|
+++ b/daemons/attrd/attrd_attributes.c
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright 2013-2022 the Pacemaker project contributors
|
|
+ * Copyright 2013-2023 the Pacemaker project contributors
|
|
*
|
|
* The version control history for this file may have further details.
|
|
*
|
|
@@ -143,7 +143,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
|
|
crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
|
|
crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
|
|
pcmk__xe_add_node(xml, v->nodename, v->nodeid);
|
|
- if (v->is_remote != 0) {
|
|
+ if (pcmk_is_set(v->flags, attrd_value_remote)) {
|
|
crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1);
|
|
}
|
|
crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current);
|
|
@@ -166,8 +166,7 @@ attrd_clear_value_seen(void)
|
|
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
|
|
g_hash_table_iter_init(&vIter, a->values);
|
|
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
|
|
- v->seen = FALSE;
|
|
- crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current);
|
|
+ attrd_clear_value_flags(v, attrd_value_from_peer);
|
|
}
|
|
}
|
|
}
|
|
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
|
|
index e6cd07f65..ca20bdc0f 100644
|
|
--- a/daemons/attrd/attrd_corosync.c
|
|
+++ b/daemons/attrd/attrd_corosync.c
|
|
@@ -192,34 +192,35 @@ cache_remote_node(const char *node_name)
|
|
|
|
/*!
|
|
* \internal
|
|
- * \brief Return host's hash table entry (creating one if needed)
|
|
+ * \brief Return a node's value from hash table (creating one if needed)
|
|
*
|
|
- * \param[in,out] values Hash table of values
|
|
- * \param[in] host Name of peer to look up
|
|
- * \param[in] xml XML describing the attribute
|
|
+ * \param[in,out] values Hash table of values
|
|
+ * \param[in] node_name Name of node to look up
|
|
+ * \param[in] xml XML describing the attribute
|
|
*
|
|
* \return Pointer to new or existing hash table entry
|
|
*/
|
|
static attribute_value_t *
|
|
-attrd_lookup_or_create_value(GHashTable *values, const char *host,
|
|
+attrd_lookup_or_create_value(GHashTable *values, const char *node_name,
|
|
const xmlNode *xml)
|
|
{
|
|
- attribute_value_t *v = g_hash_table_lookup(values, host);
|
|
+ attribute_value_t *v = g_hash_table_lookup(values, node_name);
|
|
int is_remote = 0;
|
|
|
|
- crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
|
|
- if (is_remote) {
|
|
- cache_remote_node(host);
|
|
- }
|
|
-
|
|
if (v == NULL) {
|
|
v = calloc(1, sizeof(attribute_value_t));
|
|
CRM_ASSERT(v != NULL);
|
|
|
|
- pcmk__str_update(&v->nodename, host);
|
|
- v->is_remote = is_remote;
|
|
+ pcmk__str_update(&v->nodename, node_name);
|
|
g_hash_table_replace(values, v->nodename, v);
|
|
}
|
|
+
|
|
+ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
|
|
+ if (is_remote) {
|
|
+ attrd_set_value_flags(v, attrd_value_remote);
|
|
+ cache_remote_node(node_name);
|
|
+ }
|
|
+
|
|
return(v);
|
|
}
|
|
|
|
@@ -344,11 +345,11 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
|
|
}
|
|
}
|
|
|
|
- /* Set the seen flag for attribute processing held only in the own node. */
|
|
- v->seen = TRUE;
|
|
+ // This allows us to later detect local values that peer doesn't know about
|
|
+ attrd_set_value_flags(v, attrd_value_from_peer);
|
|
|
|
/* If this is a cluster node whose node ID we are learning, remember it */
|
|
- if ((v->nodeid == 0) && (v->is_remote == FALSE)
|
|
+ if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote)
|
|
&& (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID,
|
|
(int*)&v->nodeid) == 0) && (v->nodeid > 0)) {
|
|
record_peer_nodeid(v, host);
|
|
@@ -414,8 +415,9 @@ broadcast_unseen_local_values(void)
|
|
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
|
|
g_hash_table_iter_init(&vIter, a->values);
|
|
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
|
|
- if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname,
|
|
- pcmk__str_casei)) {
|
|
+ if (!pcmk_is_set(v->flags, attrd_value_from_peer)
|
|
+ && pcmk__str_eq(v->nodename, attrd_cluster->uname,
|
|
+ pcmk__str_casei)) {
|
|
if (sync == NULL) {
|
|
sync = create_xml_node(NULL, __func__);
|
|
crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE);
|
|
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
|
|
index 62637d1d7..738418857 100644
|
|
--- a/daemons/attrd/pacemaker-attrd.h
|
|
+++ b/daemons/attrd/pacemaker-attrd.h
|
|
@@ -140,15 +140,32 @@ typedef struct attribute_s {
|
|
|
|
} attribute_t;
|
|
|
|
+enum attrd_value_flags {
|
|
+ attrd_value_none = 0U,
|
|
+ attrd_value_remote = (1U << 0), // Value is for Pacemaker Remote node
|
|
+ attrd_value_from_peer = (1U << 1), // Value is from peer sync response
|
|
+};
|
|
+
|
|
typedef struct attribute_value_s {
|
|
uint32_t nodeid;
|
|
- gboolean is_remote;
|
|
char *nodename;
|
|
char *current;
|
|
char *requested;
|
|
- gboolean seen;
|
|
+ uint32_t flags; // Group of attrd_value_flags
|
|
} attribute_value_t;
|
|
|
|
+#define attrd_set_value_flags(attr_value, flags_to_set) do { \
|
|
+ (attr_value)->flags = pcmk__set_flags_as(__func__, __LINE__, \
|
|
+ LOG_TRACE, "Value for node", (attr_value)->nodename, \
|
|
+ (attr_value)->flags, (flags_to_set), #flags_to_set); \
|
|
+ } while (0)
|
|
+
|
|
+#define attrd_clear_value_flags(attr_value, flags_to_clear) do { \
|
|
+ (attr_value)->flags = pcmk__clear_flags_as(__func__, __LINE__, \
|
|
+ LOG_TRACE, "Value for node", (attr_value)->nodename, \
|
|
+ (attr_value)->flags, (flags_to_clear), #flags_to_clear); \
|
|
+ } while (0)
|
|
+
|
|
extern crm_cluster_t *attrd_cluster;
|
|
extern GHashTable *attributes;
|
|
extern GHashTable *peer_protocol_vers;
|
|
--
|
|
2.41.0
|
|
|
|
From 922c79f4e39dc9501ff7c0136df8043081b771cb Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 13 Dec 2023 16:51:39 -0600
|
|
Subject: [PATCH 04/12] Log: attrd: improve logging of CIB write result
|
|
|
|
When attrd requests a write-out of a changed attribute value, it saves the new
|
|
value in attribute_value_t:requested so it can be used in a log when the write
|
|
completes (which may occur after the value has already changed again, so we
|
|
can't log the current value at that time).
|
|
|
|
Previously, the log call relied on libqb mapping a NULL pointer to "(null)".
|
|
To be safer, do that explicitly.
|
|
|
|
Also, it previously erased "requested" after the write completed, even if the
|
|
write failed and would be reattempted. Leave the value alone in this case so
|
|
the result of the reattempt can be logged correctly.
|
|
---
|
|
daemons/attrd/attrd_cib.c | 18 ++++++++----------
|
|
1 file changed, 8 insertions(+), 10 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
|
|
index ca1c5b9e0..ae6564856 100644
|
|
--- a/daemons/attrd/attrd_cib.c
|
|
+++ b/daemons/attrd/attrd_cib.c
|
|
@@ -274,11 +274,12 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use
|
|
|
|
g_hash_table_iter_init(&iter, a->values);
|
|
while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
|
|
- do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested);
|
|
- free(v->requested);
|
|
- v->requested = NULL;
|
|
- if (rc != pcmk_ok) {
|
|
- a->changed = true; /* Attempt write out again */
|
|
+ do_crm_log(level, "* %s[%s]=%s",
|
|
+ a->id, peer, pcmk__s(v->requested, "(null)"));
|
|
+ if (rc == pcmk_ok) {
|
|
+ pcmk__str_update(&(v->requested), NULL);
|
|
+ } else {
|
|
+ a->changed = true; // Reattempt write below if we are still writer
|
|
}
|
|
}
|
|
|
|
@@ -605,11 +606,8 @@ write_attribute(attribute_t *a, bool ignore_delay)
|
|
/* Preservation of the attribute to transmit alert */
|
|
set_alert_attribute_value(alert_attribute_value, v);
|
|
|
|
- free(v->requested);
|
|
- v->requested = NULL;
|
|
- if (v->current) {
|
|
- v->requested = strdup(v->current);
|
|
- }
|
|
+ // Save this value so we can log it when write completes
|
|
+ pcmk__str_update(&(v->requested), v->current);
|
|
}
|
|
|
|
if (private_updates) {
|
|
--
|
|
2.41.0
|
|
|
|
From fa2830b1c4acf061faa40490620eb63c48a56a2b Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Wed, 13 Dec 2023 17:01:01 -0600
|
|
Subject: [PATCH 05/12] Low: libcrmcluster: avoid use-after-free in trace log
|
|
|
|
---
|
|
lib/cluster/membership.c | 16 ++++++++++++++--
|
|
1 file changed, 14 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
|
|
index f856ccaca..6958e65f2 100644
|
|
--- a/lib/cluster/membership.c
|
|
+++ b/lib/cluster/membership.c
|
|
@@ -143,11 +143,23 @@ crm_remote_peer_get(const char *node_name)
|
|
return node;
|
|
}
|
|
|
|
+/*!
|
|
+ * \brief Remove a node from the Pacemaker Remote node cache
|
|
+ *
|
|
+ * \param[in] node_name Name of node to remove from cache
|
|
+ *
|
|
+ * \note The caller must be careful not to use \p node_name after calling this
|
|
+ * function if it might be a pointer into the cache entry being removed.
|
|
+ */
|
|
void
|
|
crm_remote_peer_cache_remove(const char *node_name)
|
|
{
|
|
- if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
|
|
- crm_trace("removed %s from remote peer cache", node_name);
|
|
+ /* Do a lookup first, because node_name could be a pointer within the entry
|
|
+ * being removed -- we can't log it *after* removing it.
|
|
+ */
|
|
+ if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) {
|
|
+ crm_trace("Removing %s from Pacemaker Remote node cache", node_name);
|
|
+ g_hash_table_remove(crm_remote_peer_cache, node_name);
|
|
}
|
|
}
|
|
|
|
--
|
|
2.41.0
|
|
|
|
From 14a7449a413f3f10eb80634c607386007d264475 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 14 Dec 2023 09:24:38 -0600
|
|
Subject: [PATCH 06/12] Refactor: libcrmcluster,attrd: functionize removing
|
|
node from both caches
|
|
|
|
This future-proofs against a potential use-after-free (not possible with
|
|
current code) and isolates cache management better.
|
|
---
|
|
daemons/attrd/attrd_corosync.c | 3 +--
|
|
include/crm/cluster/internal.h | 9 +++----
|
|
lib/cluster/membership.c | 44 ++++++++++++++++++++++++++++++++++
|
|
3 files changed, 50 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
|
|
index ca20bdc0f..aa94a078e 100644
|
|
--- a/daemons/attrd/attrd_corosync.c
|
|
+++ b/daemons/attrd/attrd_corosync.c
|
|
@@ -554,8 +554,7 @@ attrd_peer_remove(const char *host, bool uncache, const char *source)
|
|
}
|
|
|
|
if (uncache) {
|
|
- crm_remote_peer_cache_remove(host);
|
|
- reap_crm_member(0, host);
|
|
+ pcmk__purge_node_from_cache(host, 0);
|
|
}
|
|
}
|
|
|
|
diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h
|
|
index e20ee4c59..c71069be2 100644
|
|
--- a/include/crm/cluster/internal.h
|
|
+++ b/include/crm/cluster/internal.h
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright 2004-2021 the Pacemaker project contributors
|
|
+ * Copyright 2004-2023 the Pacemaker project contributors
|
|
*
|
|
* The version control history for this file may have further details.
|
|
*
|
|
@@ -7,8 +7,8 @@
|
|
* version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
|
|
*/
|
|
|
|
-#ifndef CRM_CLUSTER_INTERNAL__H
|
|
-# define CRM_CLUSTER_INTERNAL__H
|
|
+#ifndef PCMK__CRM_CLUSTER_INTERNAL__H
|
|
+# define PCMK__CRM_CLUSTER_INTERNAL__H
|
|
|
|
# include <stdint.h> // uint32_t, uint64_t
|
|
# include <crm/cluster.h>
|
|
@@ -126,6 +126,7 @@ crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname,
|
|
uint32_t flags);
|
|
crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname,
|
|
const char *uuid);
|
|
+void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id);
|
|
|
|
void pcmk__refresh_node_caches_from_cib(xmlNode *cib);
|
|
crm_node_t *pcmk__search_known_node_cache(unsigned int id, const char *uname,
|
|
@@ -136,4 +137,4 @@ crm_node_t *pcmk__get_peer(unsigned int id, const char *uname,
|
|
crm_node_t *pcmk__get_peer_full(unsigned int id, const char *uname,
|
|
const char *uuid, int flags);
|
|
|
|
-#endif
|
|
+#endif // PCMK__CRM_CLUSTER_INTERNAL__H
|
|
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
|
|
index 6958e65f2..173aaaa17 100644
|
|
--- a/lib/cluster/membership.c
|
|
+++ b/lib/cluster/membership.c
|
|
@@ -341,6 +341,9 @@ crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
|
|
* \param[in] name Uname of node to remove (or NULL to ignore)
|
|
*
|
|
* \return Number of cache entries removed
|
|
+ *
|
|
+ * \note The caller must be careful not to use \p name after calling this
|
|
+ * function if it might be a pointer into the cache entry being removed.
|
|
*/
|
|
guint
|
|
reap_crm_member(uint32_t id, const char *name)
|
|
@@ -564,6 +567,47 @@ pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid,
|
|
return node;
|
|
}
|
|
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Purge a node from cache (both cluster and Pacemaker Remote)
|
|
+ *
|
|
+ * \param[in] node_name If not NULL, purge only nodes with this name
|
|
+ * \param[in] node_id If not 0, purge cluster nodes only if they have this ID
|
|
+ *
|
|
+ * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged.
|
|
+ * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote
|
|
+ * nodes that match \p node_name will be purged, and cluster nodes that
|
|
+ * match both \p node_name and \p node_id will be purged.
|
|
+ * \note The caller must be careful not to use \p node_name after calling this
|
|
+ * function if it might be a pointer into a cache entry being removed.
|
|
+ */
|
|
+void
|
|
+pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id)
|
|
+{
|
|
+ char *node_name_copy = NULL;
|
|
+
|
|
+ if ((node_name == NULL) && (node_id == 0U)) {
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ // Purge from Pacemaker Remote node cache
|
|
+ if ((node_name != NULL)
|
|
+ && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) {
|
|
+ /* node_name could be a pointer into the cache entry being purged,
|
|
+ * so reassign it to a copy before the original gets freed
|
|
+ */
|
|
+ node_name_copy = strdup(node_name);
|
|
+ CRM_ASSERT(node_name_copy != NULL);
|
|
+ node_name = node_name_copy;
|
|
+
|
|
+ crm_trace("Purging %s from Pacemaker Remote node cache", node_name);
|
|
+ g_hash_table_remove(crm_remote_peer_cache, node_name);
|
|
+ }
|
|
+
|
|
+ reap_crm_member(node_id, node_name);
|
|
+ free(node_name_copy);
|
|
+}
|
|
+
|
|
/*!
|
|
* \brief Get a node cache entry (cluster or Pacemaker Remote)
|
|
*
|
|
--
|
|
2.41.0
|
|
|
|
From 8d552c1b582a95f9879b15e2dd991a7f995e7eca Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 14 Dec 2023 09:51:37 -0600
|
|
Subject: [PATCH 07/12] Fix: pacemaker-attrd,libcrmcluster: avoid
|
|
use-after-free when remote node in cluster node cache
|
|
|
|
Previously, pacemaker-attrd removed any conflicting entry from the cluster node
|
|
cache before adding a node to the remote node cache. However, if the name used
|
|
was a pointer into the cluster node cache entry being freed, it would be reused
|
|
to create the remote node cache entry.
|
|
|
|
This avoids that and also moves the functionality into libcrmcluster for better
|
|
isolation of cache management. It also corrects mistakenly setting errno to a
|
|
negative value.
|
|
---
|
|
daemons/attrd/attrd_corosync.c | 26 ++------------------------
|
|
lib/cluster/membership.c | 30 ++++++++++++++++++++++++++++--
|
|
2 files changed, 30 insertions(+), 26 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
|
|
index aa94a078e..1d0f87f04 100644
|
|
--- a/daemons/attrd/attrd_corosync.c
|
|
+++ b/daemons/attrd/attrd_corosync.c
|
|
@@ -166,28 +166,6 @@ broadcast_local_value(const attribute_t *a)
|
|
return v;
|
|
}
|
|
|
|
-/*!
|
|
- * \internal
|
|
- * \brief Ensure a Pacemaker Remote node is in the correct peer cache
|
|
- *
|
|
- * \param[in] node_name Name of Pacemaker Remote node to check
|
|
- */
|
|
-static void
|
|
-cache_remote_node(const char *node_name)
|
|
-{
|
|
- /* If we previously assumed this node was an unseen cluster node,
|
|
- * remove its entry from the cluster peer cache.
|
|
- */
|
|
- crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name, NULL);
|
|
-
|
|
- if (dup && (dup->uuid == NULL)) {
|
|
- reap_crm_member(0, node_name);
|
|
- }
|
|
-
|
|
- // Ensure node is in the remote peer cache
|
|
- CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
|
|
-}
|
|
-
|
|
#define state_text(state) pcmk__s((state), "in unknown state")
|
|
|
|
/*!
|
|
@@ -218,7 +196,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *node_name,
|
|
crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
|
|
if (is_remote) {
|
|
attrd_set_value_flags(v, attrd_value_remote);
|
|
- cache_remote_node(node_name);
|
|
+ CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
|
|
}
|
|
|
|
return(v);
|
|
@@ -273,7 +251,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
|
|
|
// Ensure remote nodes that come up are in the remote node cache
|
|
} else if (!gone && is_remote) {
|
|
- cache_remote_node(peer->uname);
|
|
+ CRM_ASSERT(crm_remote_peer_get(peer->uname) != NULL);
|
|
}
|
|
}
|
|
|
|
diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c
|
|
index 173aaaa17..a653617fa 100644
|
|
--- a/lib/cluster/membership.c
|
|
+++ b/lib/cluster/membership.c
|
|
@@ -102,26 +102,50 @@ crm_remote_peer_cache_size(void)
|
|
* \note When creating a new entry, this will leave the node state undetermined,
|
|
* so the caller should also call pcmk__update_peer_state() if the state
|
|
* is known.
|
|
+ * \note Because this can add and remove cache entries, callers should not
|
|
+ * assume any previously obtained cache entry pointers remain valid.
|
|
*/
|
|
crm_node_t *
|
|
crm_remote_peer_get(const char *node_name)
|
|
{
|
|
crm_node_t *node;
|
|
+ char *node_name_copy = NULL;
|
|
|
|
if (node_name == NULL) {
|
|
- errno = -EINVAL;
|
|
+ errno = EINVAL;
|
|
return NULL;
|
|
}
|
|
|
|
+ /* It's theoretically possible that the node was added to the cluster peer
|
|
+ * cache before it was known to be a Pacemaker Remote node. Remove that
|
|
+ * entry unless it has a node ID, which means the name actually is
|
|
+ * associated with a cluster node. (@TODO return an error in that case?)
|
|
+ */
|
|
+ node = pcmk__search_cluster_node_cache(0, node_name, NULL);
|
|
+ if ((node != NULL) && (node->uuid == NULL)) {
|
|
+ /* node_name could be a pointer into the cache entry being removed, so
|
|
+ * reassign it to a copy before the original gets freed
|
|
+ */
|
|
+ node_name_copy = strdup(node_name);
|
|
+ if (node_name_copy == NULL) {
|
|
+ errno = ENOMEM;
|
|
+ return NULL;
|
|
+ }
|
|
+ node_name = node_name_copy;
|
|
+ reap_crm_member(0, node_name);
|
|
+ }
|
|
+
|
|
/* Return existing cache entry if one exists */
|
|
node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
|
|
if (node) {
|
|
+ free(node_name_copy);
|
|
return node;
|
|
}
|
|
|
|
/* Allocate a new entry */
|
|
node = calloc(1, sizeof(crm_node_t));
|
|
if (node == NULL) {
|
|
+ free(node_name_copy);
|
|
return NULL;
|
|
}
|
|
|
|
@@ -130,7 +154,8 @@ crm_remote_peer_get(const char *node_name)
|
|
node->uuid = strdup(node_name);
|
|
if (node->uuid == NULL) {
|
|
free(node);
|
|
- errno = -ENOMEM;
|
|
+ errno = ENOMEM;
|
|
+ free(node_name_copy);
|
|
return NULL;
|
|
}
|
|
|
|
@@ -140,6 +165,7 @@ crm_remote_peer_get(const char *node_name)
|
|
|
|
/* Update the entry's uname, ensuring peer status callbacks are called */
|
|
update_peer_uname(node, node_name);
|
|
+ free(node_name_copy);
|
|
return node;
|
|
}
|
|
|
|
--
|
|
2.41.0
|
|
|
|
From 17ac8f0409021cbcd3e03a1b70518ab7abd9b259 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 14 Dec 2023 10:03:05 -0600
|
|
Subject: [PATCH 08/12] Refactor: attrd: remove dead code
|
|
|
|
The peer change callback can't be called for a Pacemaker Remote node unless the
|
|
node is already in the remote node cache, so don't bother trying to add it.
|
|
Modifying the peer caches is forbidden in peer change callbacks anyway since it
|
|
could lead to use-after-free issues in libcrmcluster.
|
|
---
|
|
daemons/attrd/attrd_corosync.c | 4 ----
|
|
1 file changed, 4 deletions(-)
|
|
|
|
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
|
|
index 1d0f87f04..eba734c3a 100644
|
|
--- a/daemons/attrd/attrd_corosync.c
|
|
+++ b/daemons/attrd/attrd_corosync.c
|
|
@@ -248,10 +248,6 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
|
|
attrd_remove_voter(peer);
|
|
attrd_remove_peer_protocol_ver(peer->uname);
|
|
attrd_do_not_expect_from_peer(peer->uname);
|
|
-
|
|
- // Ensure remote nodes that come up are in the remote node cache
|
|
- } else if (!gone && is_remote) {
|
|
- CRM_ASSERT(crm_remote_peer_get(peer->uname) != NULL);
|
|
}
|
|
}
|
|
|
|
--
|
|
2.41.0
|
|
|
|
From 221c4d697edc0481817c206ce8fdd878afd98ca1 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 14 Dec 2023 17:17:32 -0600
|
|
Subject: [PATCH 09/12] Low: libcrmcommon: handle disconnected attrd API
|
|
connections consistently
|
|
|
|
Drop send_attrd_request() in favor of using connect_and_send_attrd_request(),
|
|
since pcmk__connect_ipc() will return pcmk_rc_ok immediately if the API is
|
|
already connected.
|
|
|
|
All the attribute manager IPC APIs attempted the connection if not already
|
|
connected except for pcmk__attrd_api_query(). Now that it uses
|
|
connect_and_send_attrd_request(), they are all consistent.
|
|
---
|
|
lib/common/ipc_attrd.c | 28 +++++-----------------------
|
|
1 file changed, 5 insertions(+), 23 deletions(-)
|
|
|
|
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
|
|
index 56cdb5aba..e36b42cbc 100644
|
|
--- a/lib/common/ipc_attrd.c
|
|
+++ b/lib/common/ipc_attrd.c
|
|
@@ -190,12 +190,6 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request)
|
|
return pcmk_rc_ok;
|
|
}
|
|
|
|
-static int
|
|
-send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request)
|
|
-{
|
|
- return pcmk__send_ipc_request(api, request);
|
|
-}
|
|
-
|
|
int
|
|
pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node,
|
|
const char *resource, const char *operation,
|
|
@@ -229,11 +223,8 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node,
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
destroy_api(api);
|
|
|
|
- } else if (!pcmk_ipc_is_connected(api)) {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
-
|
|
} else {
|
|
- rc = send_attrd_request(api, request);
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
}
|
|
|
|
free_xml(request);
|
|
@@ -303,11 +294,8 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap)
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
destroy_api(api);
|
|
|
|
- } else if (!pcmk_ipc_is_connected(api)) {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
-
|
|
} else {
|
|
- rc = send_attrd_request(api, request);
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
}
|
|
|
|
free_xml(request);
|
|
@@ -346,7 +334,7 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_QUERY);
|
|
pcmk__xe_add_node(request, node, 0);
|
|
|
|
- rc = send_attrd_request(api, request);
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
free_xml(request);
|
|
|
|
if (node) {
|
|
@@ -386,11 +374,8 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node)
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
destroy_api(api);
|
|
|
|
- } else if (!pcmk_ipc_is_connected(api)) {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
-
|
|
} else {
|
|
- rc = send_attrd_request(api, request);
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
}
|
|
|
|
free_xml(request);
|
|
@@ -479,11 +464,8 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
destroy_api(api);
|
|
|
|
- } else if (!pcmk_ipc_is_connected(api)) {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
-
|
|
} else {
|
|
- rc = send_attrd_request(api, request);
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
}
|
|
|
|
free_xml(request);
|
|
--
|
|
2.41.0
|
|
|
|
From 85502a405c384fdf0331e43ec161910ee1d14973 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 14 Dec 2023 17:29:11 -0600
|
|
Subject: [PATCH 10/12] Low: libcrmcommon: handle NULL attribute manager IPC
|
|
API connections consistently
|
|
|
|
Previously, all attribute manager IPC APIs except pcmk__attrd_api_query() would
|
|
create a temporary connection if passed a NULL argument for one. Now,
|
|
connect_and_send_attrd_request() does this itself, reducing code duplication and
|
|
making the handling consistent across all APIs.
|
|
---
|
|
lib/common/ipc_attrd.c | 116 +++++++++--------------------------------
|
|
1 file changed, 25 insertions(+), 91 deletions(-)
|
|
|
|
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
|
|
index e36b42cbc..68975c7b6 100644
|
|
--- a/lib/common/ipc_attrd.c
|
|
+++ b/lib/common/ipc_attrd.c
|
|
@@ -148,46 +148,39 @@ create_attrd_op(const char *user_name)
|
|
return attrd_op;
|
|
}
|
|
|
|
-static int
|
|
-create_api(pcmk_ipc_api_t **api)
|
|
-{
|
|
- int rc = pcmk_new_ipc_api(api, pcmk_ipc_attrd);
|
|
-
|
|
- if (rc != pcmk_rc_ok) {
|
|
- crm_err("Could not connect to attrd: %s", pcmk_rc_str(rc));
|
|
- }
|
|
-
|
|
- return rc;
|
|
-}
|
|
-
|
|
-static void
|
|
-destroy_api(pcmk_ipc_api_t *api)
|
|
-{
|
|
- pcmk_disconnect_ipc(api);
|
|
- pcmk_free_ipc_api(api);
|
|
- api = NULL;
|
|
-}
|
|
-
|
|
static int
|
|
connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request)
|
|
{
|
|
int rc = pcmk_rc_ok;
|
|
+ bool created_api = false;
|
|
+
|
|
+ if (api == NULL) {
|
|
+ rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd);
|
|
+ if (rc != pcmk_rc_ok) {
|
|
+ crm_err("Could not connect to attribute manager: %s",
|
|
+ pcmk_rc_str(rc));
|
|
+ return rc;
|
|
+ }
|
|
+ created_api = true;
|
|
+ }
|
|
|
|
rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5);
|
|
if (rc != pcmk_rc_ok) {
|
|
crm_err("Could not connect to %s: %s",
|
|
pcmk_ipc_name(api, true), pcmk_rc_str(rc));
|
|
- return rc;
|
|
- }
|
|
|
|
- rc = pcmk__send_ipc_request(api, request);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- crm_err("Could not send request to %s: %s",
|
|
- pcmk_ipc_name(api, true), pcmk_rc_str(rc));
|
|
- return rc;
|
|
+ } else {
|
|
+ rc = pcmk__send_ipc_request(api, request);
|
|
+ if (rc != pcmk_rc_ok) {
|
|
+ crm_err("Could not send request to %s: %s",
|
|
+ pcmk_ipc_name(api, true), pcmk_rc_str(rc));
|
|
+ }
|
|
}
|
|
|
|
- return pcmk_rc_ok;
|
|
+ if (created_api) {
|
|
+ pcmk_free_ipc_api(api);
|
|
+ }
|
|
+ return rc;
|
|
}
|
|
|
|
int
|
|
@@ -214,18 +207,7 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node,
|
|
crm_xml_add_int(request, PCMK__XA_ATTR_IS_REMOTE,
|
|
pcmk_is_set(options, pcmk__node_attr_remote));
|
|
|
|
- if (api == NULL) {
|
|
- rc = create_api(&api);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- return rc;
|
|
- }
|
|
-
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- destroy_api(api);
|
|
-
|
|
- } else {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- }
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
|
|
@@ -285,18 +267,7 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap)
|
|
pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap);
|
|
pcmk__xe_add_node(request, node, 0);
|
|
|
|
- if (api == NULL) {
|
|
- rc = create_api(&api);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- return rc;
|
|
- }
|
|
-
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- destroy_api(api);
|
|
-
|
|
- } else {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- }
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
|
|
@@ -365,18 +336,7 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node)
|
|
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_REFRESH);
|
|
pcmk__xe_add_node(request, node, 0);
|
|
|
|
- if (api == NULL) {
|
|
- rc = create_api(&api);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- return rc;
|
|
- }
|
|
-
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- destroy_api(api);
|
|
-
|
|
- } else {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- }
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
|
|
@@ -455,18 +415,7 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
request = create_attrd_op(user_name);
|
|
populate_update_op(request, node, name, value, dampen, set, options);
|
|
|
|
- if (api == NULL) {
|
|
- rc = create_api(&api);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- return rc;
|
|
- }
|
|
-
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- destroy_api(api);
|
|
-
|
|
- } else {
|
|
- rc = connect_and_send_attrd_request(api, request);
|
|
- }
|
|
+ rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
|
|
@@ -547,23 +496,8 @@ pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampe
|
|
* request. Do that now, creating and destroying the API object if needed.
|
|
*/
|
|
if (pcmk__is_daemon) {
|
|
- bool created_api = false;
|
|
-
|
|
- if (api == NULL) {
|
|
- rc = create_api(&api);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- return rc;
|
|
- }
|
|
-
|
|
- created_api = true;
|
|
- }
|
|
-
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
free_xml(request);
|
|
-
|
|
- if (created_api) {
|
|
- destroy_api(api);
|
|
- }
|
|
}
|
|
|
|
return rc;
|
|
--
|
|
2.41.0
|
|
|
|
From 4b25e2e2cf52e6c772805309e1f3dd6bb7ce8fab Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 14 Dec 2023 18:11:14 -0600
|
|
Subject: [PATCH 11/12] Log: controld,libcrmcommon: improve attrd IPC API
|
|
messages
|
|
|
|
Previously, connect_and_send_attrd_request() would log error messages for
|
|
failures, attrd IPC APIs would log debug messages with the result whether
|
|
success or failure, and then callers would log or output failures again.
|
|
|
|
Now, connect_and_send_attrd_request() does not log, the attrd IPC APIs log a
|
|
debug message before sending the request, and the callers log or output
|
|
failures.
|
|
---
|
|
daemons/controld/controld_attrd.c | 22 ++++-----
|
|
lib/common/ipc_attrd.c | 76 ++++++++++++-------------------
|
|
2 files changed, 38 insertions(+), 60 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c
|
|
index 958dc2f14..24c1e7068 100644
|
|
--- a/daemons/controld/controld_attrd.c
|
|
+++ b/daemons/controld/controld_attrd.c
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright 2006-2022 the Pacemaker project contributors
|
|
+ * Copyright 2006-2023 the Pacemaker project contributors
|
|
*
|
|
* The version control history for this file may have further details.
|
|
*
|
|
@@ -136,25 +136,23 @@ update_attrd_clear_failures(const char *host, const char *rsc, const char *op,
|
|
rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd);
|
|
}
|
|
if (rc == pcmk_rc_ok) {
|
|
- const char *op_desc = pcmk__s(op, "operations");
|
|
- const char *interval_desc = "all";
|
|
uint32_t attrd_opts = pcmk__node_attr_none;
|
|
|
|
- if (op != NULL) {
|
|
- interval_desc = pcmk__s(interval_spec, "nonrecurring");
|
|
- }
|
|
if (is_remote_node) {
|
|
pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote);
|
|
}
|
|
- crm_info("Asking attribute manager to clear failure of %s %s for %s "
|
|
- "on %s node %s", interval_desc, op_desc, rsc,
|
|
- node_type(is_remote_node), host);
|
|
rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op,
|
|
interval_spec, NULL, attrd_opts);
|
|
}
|
|
if (rc != pcmk_rc_ok) {
|
|
- crm_err("Could not clear failure attributes for %s on %s node %s%s: %s "
|
|
- CRM_XS " rc=%d", pcmk__s(rsc, "all resources"),
|
|
- node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc);
|
|
+ const char *interval_desc = "all";
|
|
+
|
|
+ if (op != NULL) {
|
|
+ interval_desc = pcmk__s(interval_spec, "nonrecurring");
|
|
+ }
|
|
+ crm_err("Could not clear failure of %s %s for %s on %s node %s%s: %s "
|
|
+ CRM_XS " rc=%d", interval_desc, pcmk__s(op, "operations"),
|
|
+ pcmk__s(rsc, "all resources"), node_type(is_remote_node), host,
|
|
+ when(), pcmk_rc_str(rc), rc);
|
|
}
|
|
}
|
|
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
|
|
index 68975c7b6..3951bd3df 100644
|
|
--- a/lib/common/ipc_attrd.c
|
|
+++ b/lib/common/ipc_attrd.c
|
|
@@ -157,24 +157,14 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request)
|
|
if (api == NULL) {
|
|
rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd);
|
|
if (rc != pcmk_rc_ok) {
|
|
- crm_err("Could not connect to attribute manager: %s",
|
|
- pcmk_rc_str(rc));
|
|
return rc;
|
|
}
|
|
created_api = true;
|
|
}
|
|
|
|
rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- crm_err("Could not connect to %s: %s",
|
|
- pcmk_ipc_name(api, true), pcmk_rc_str(rc));
|
|
-
|
|
- } else {
|
|
+ if (rc == pcmk_rc_ok) {
|
|
rc = pcmk__send_ipc_request(api, request);
|
|
- if (rc != pcmk_rc_ok) {
|
|
- crm_err("Could not send request to %s: %s",
|
|
- pcmk_ipc_name(api, true), pcmk_rc_str(rc));
|
|
- }
|
|
}
|
|
|
|
if (created_api) {
|
|
@@ -199,6 +189,17 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node,
|
|
node = target;
|
|
}
|
|
|
|
+ if (operation) {
|
|
+ interval_desc = pcmk__s(interval_spec, "nonrecurring");
|
|
+ op_desc = operation;
|
|
+ } else {
|
|
+ interval_desc = "all";
|
|
+ op_desc = "operations";
|
|
+ }
|
|
+ crm_debug("Asking %s to clear failure of %s %s for %s on %s",
|
|
+ pcmk_ipc_name(api, true), interval_desc, op_desc,
|
|
+ pcmk__s(resource, "all resources"), pcmk__s(node, "all nodes"));
|
|
+
|
|
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_CLEAR_FAILURE);
|
|
pcmk__xe_add_node(request, node, 0);
|
|
crm_xml_add(request, PCMK__XA_ATTR_RESOURCE, resource);
|
|
@@ -210,19 +211,6 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node,
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
-
|
|
- if (operation) {
|
|
- interval_desc = interval_spec? interval_spec : "nonrecurring";
|
|
- op_desc = operation;
|
|
- } else {
|
|
- interval_desc = "all";
|
|
- op_desc = "operations";
|
|
- }
|
|
-
|
|
- crm_debug("Asked pacemaker-attrd to clear failure of %s %s for %s on %s: %s (%d)",
|
|
- interval_desc, op_desc, (resource? resource : "all resources"),
|
|
- (node? node : "all nodes"), pcmk_rc_str(rc), rc);
|
|
-
|
|
return rc;
|
|
}
|
|
|
|
@@ -254,13 +242,17 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap)
|
|
{
|
|
int rc = pcmk_rc_ok;
|
|
xmlNode *request = NULL;
|
|
- const char *display_host = (node ? node : "localhost");
|
|
const char *target = pcmk__node_attr_target(node);
|
|
|
|
if (target != NULL) {
|
|
node = target;
|
|
}
|
|
|
|
+ crm_debug("Asking %s to purge transient attributes%s for %s",
|
|
+ pcmk_ipc_name(api, true),
|
|
+ (reap? " and node cache entries" : ""),
|
|
+ pcmk__s(node, "local node"));
|
|
+
|
|
request = create_attrd_op(NULL);
|
|
|
|
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE);
|
|
@@ -270,10 +262,6 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap)
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
-
|
|
- crm_debug("Asked pacemaker-attrd to purge %s: %s (%d)",
|
|
- display_host, pcmk_rc_str(rc), rc);
|
|
-
|
|
return rc;
|
|
}
|
|
|
|
@@ -299,6 +287,10 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
}
|
|
}
|
|
|
|
+ crm_debug("Querying %s for value of '%s'%s%s",
|
|
+ pcmk_ipc_name(api, true), name,
|
|
+ ((node == NULL)? "" : " on "), pcmk__s(node, ""));
|
|
+
|
|
request = create_attrd_op(NULL);
|
|
|
|
crm_xml_add(request, PCMK__XA_ATTR_NAME, name);
|
|
@@ -307,15 +299,6 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
free_xml(request);
|
|
-
|
|
- if (node) {
|
|
- crm_debug("Queried pacemaker-attrd for %s on %s: %s (%d)",
|
|
- name, node, pcmk_rc_str(rc), rc);
|
|
- } else {
|
|
- crm_debug("Queried pacemaker-attrd for %s: %s (%d)",
|
|
- name, pcmk_rc_str(rc), rc);
|
|
- }
|
|
-
|
|
return rc;
|
|
}
|
|
|
|
@@ -324,13 +307,15 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node)
|
|
{
|
|
int rc = pcmk_rc_ok;
|
|
xmlNode *request = NULL;
|
|
- const char *display_host = (node ? node : "localhost");
|
|
const char *target = pcmk__node_attr_target(node);
|
|
|
|
if (target != NULL) {
|
|
node = target;
|
|
}
|
|
|
|
+ crm_debug("Asking %s to write all transient attributes for %s to CIB",
|
|
+ pcmk_ipc_name(api, true), pcmk__s(node, "local node"));
|
|
+
|
|
request = create_attrd_op(NULL);
|
|
|
|
crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_REFRESH);
|
|
@@ -339,10 +324,6 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node)
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
-
|
|
- crm_debug("Asked pacemaker-attrd to refresh %s: %s (%d)",
|
|
- display_host, pcmk_rc_str(rc), rc);
|
|
-
|
|
return rc;
|
|
}
|
|
|
|
@@ -399,7 +380,6 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
{
|
|
int rc = pcmk_rc_ok;
|
|
xmlNode *request = NULL;
|
|
- const char *display_host = (node ? node : "localhost");
|
|
const char *target = NULL;
|
|
|
|
if (name == NULL) {
|
|
@@ -412,16 +392,16 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name,
|
|
node = target;
|
|
}
|
|
|
|
+ crm_debug("Asking %s to update '%s' to '%s' for %s",
|
|
+ pcmk_ipc_name(api, true), name, pcmk__s(value, "(null)"),
|
|
+ pcmk__s(node, "local node"));
|
|
+
|
|
request = create_attrd_op(user_name);
|
|
populate_update_op(request, node, name, value, dampen, set, options);
|
|
|
|
rc = connect_and_send_attrd_request(api, request);
|
|
|
|
free_xml(request);
|
|
-
|
|
- crm_debug("Asked pacemaker-attrd to update %s on %s: %s (%d)",
|
|
- name, display_host, pcmk_rc_str(rc), rc);
|
|
-
|
|
return rc;
|
|
}
|
|
|
|
--
|
|
2.41.0
|
|
|
|
From e5d22ef2a6b130768bd59ab5b7d8cd1155bb02a5 Mon Sep 17 00:00:00 2001
|
|
From: Ken Gaillot <kgaillot@redhat.com>
|
|
Date: Thu, 14 Dec 2023 17:54:01 -0600
|
|
Subject: [PATCH 12/12] Log: libcrmcommon: use log-friendly name in pacemakerd
|
|
IPC logs
|
|
|
|
---
|
|
lib/common/ipc_pacemakerd.c | 15 ++++++++-------
|
|
1 file changed, 8 insertions(+), 7 deletions(-)
|
|
|
|
diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c
|
|
index 2f0370974..6d6f6d6bf 100644
|
|
--- a/lib/common/ipc_pacemakerd.c
|
|
+++ b/lib/common/ipc_pacemakerd.c
|
|
@@ -210,15 +210,16 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
|
|
value = crm_element_value(reply, F_CRM_MSG_TYPE);
|
|
if (pcmk__str_empty(value)
|
|
|| !pcmk__str_eq(value, XML_ATTR_RESPONSE, pcmk__str_none)) {
|
|
- crm_info("Unrecognizable message from pacemakerd: "
|
|
+ crm_info("Unrecognizable message from %s: "
|
|
"message type '%s' not '" XML_ATTR_RESPONSE "'",
|
|
- pcmk__s(value, ""));
|
|
+ pcmk_ipc_name(api, true), pcmk__s(value, ""));
|
|
status = CRM_EX_PROTOCOL;
|
|
goto done;
|
|
}
|
|
|
|
if (pcmk__str_empty(crm_element_value(reply, XML_ATTR_REFERENCE))) {
|
|
- crm_info("Unrecognizable message from pacemakerd: no reference");
|
|
+ crm_info("Unrecognizable message from %s: no reference",
|
|
+ pcmk_ipc_name(api, true));
|
|
status = CRM_EX_PROTOCOL;
|
|
goto done;
|
|
}
|
|
@@ -244,8 +245,8 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
|
|
reply_data.reply_type = pcmk_pacemakerd_reply_shutdown;
|
|
reply_data.data.shutdown.status = atoi(crm_element_value(msg_data, XML_LRM_ATTR_OPSTATUS));
|
|
} else {
|
|
- crm_info("Unrecognizable message from pacemakerd: "
|
|
- "unknown command '%s'", pcmk__s(value, ""));
|
|
+ crm_info("Unrecognizable message from %s: unknown command '%s'",
|
|
+ pcmk_ipc_name(api, true), pcmk__s(value, ""));
|
|
status = CRM_EX_PROTOCOL;
|
|
goto done;
|
|
}
|
|
@@ -292,8 +293,8 @@ do_pacemakerd_api_call(pcmk_ipc_api_t *api, const char *ipc_name, const char *ta
|
|
if (cmd) {
|
|
rc = pcmk__send_ipc_request(api, cmd);
|
|
if (rc != pcmk_rc_ok) {
|
|
- crm_debug("Couldn't send request to pacemakerd: %s rc=%d",
|
|
- pcmk_rc_str(rc), rc);
|
|
+ crm_debug("Couldn't send request to %s: %s rc=%d",
|
|
+ pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc);
|
|
}
|
|
free_xml(cmd);
|
|
} else {
|
|
--
|
|
2.41.0
|
|
|