From 79752652c5703fd52b82217586fad30e9689cc86 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Wed, 17 Jan 2024 17:38:42 -0500 Subject: [PATCH] Rebase on upstream 2.1.7 final release - Fix documentation for Pacemaker Remote schema transfers - Do not check CIB feature set version when CIB_file is set - Consolidate attrd cache handling - Avoid duplicating option metadata across daemons - Related: RHEL-7597 - Related: RHEL-14045 --- .gitignore | 1 + 003-schema-doc.patch | 42 + 004-attrd-cache-1.patch | 1443 +++++++++++++ 005-attrd-cache-2.patch | 2786 ++++++++++++++++++++++++ 006-cib-file-feature-set.patch | 276 +++ 007-option-metadata.patch | 3689 ++++++++++++++++++++++++++++++++ 008-attrd-prep.patch | 373 ++++ pacemaker.spec | 19 +- sources | 2 +- 9 files changed, 8628 insertions(+), 3 deletions(-) create mode 100644 003-schema-doc.patch create mode 100644 004-attrd-cache-1.patch create mode 100644 005-attrd-cache-2.patch create mode 100644 006-cib-file-feature-set.patch create mode 100644 007-option-metadata.patch create mode 100644 008-attrd-prep.patch diff --git a/.gitignore b/.gitignore index 7a6e9ba..8bfd4db 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ SOURCES/pacemaker-a3f4479.tar.gz /pacemaker-6fdc9deea.tar.gz /pacemaker-7534cc50a.tar.gz /pacemaker-c858c13cb.tar.gz +/pacemaker-0f7f88312.tar.gz diff --git a/003-schema-doc.patch b/003-schema-doc.patch new file mode 100644 index 0000000..293e50f --- /dev/null +++ b/003-schema-doc.patch @@ -0,0 +1,42 @@ +From a3bffc7c66bf6f796f977cffd44f223635b008c5 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 20 Dec 2023 13:33:47 -0800 +Subject: [PATCH] Doc: Pacemaker Explained: Add replace for + PCMK__REMOTE_SCHEMA_DIR + +So that the existing use in local-options.rst expands correctly. + +Signed-off-by: Reid Wahl +--- + doc/sphinx/Makefile.am | 1 + + doc/sphinx/conf.py.in | 1 + + 3 files changed, 2 insertions(+) + create mode 100644 doc/sphinx/conf.py.in.rej + +diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am +index e48e19a..d0309ff 100644 +--- a/doc/sphinx/Makefile.am ++++ b/doc/sphinx/Makefile.am +@@ -134,6 +134,7 @@ $(BOOKS:%=%/conf.py): conf.py.in + -e 's#%CRM_SCHEMA_DIRECTORY%#@CRM_SCHEMA_DIRECTORY@#g' \ + -e 's#%PACEMAKER_CONFIG_DIR%#@PACEMAKER_CONFIG_DIR@#g' \ + -e 's#%PCMK_GNUTLS_PRIORITIES%#@PCMK_GNUTLS_PRIORITIES@#g' \ ++ -e 's#%PCMK__REMOTE_SCHEMA_DIR%#@PCMK__REMOTE_SCHEMA_DIR@#g' \ + $(<) > "$@" + + $(BOOK)/_build: $(STATIC_FILES) $(BOOK)/conf.py $(DEPS_$(BOOK)) $(wildcard $(srcdir)/$(BOOK)/*.rst) +diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in +index 556eb72..511f029 100644 +--- a/doc/sphinx/conf.py.in ++++ b/doc/sphinx/conf.py.in +@@ -40,6 +40,7 @@ rst_prolog=""" + .. |PCMK_INIT_ENV_FILE| replace:: ``%PACEMAKER_CONFIG_DIR%/pcmk-init.env`` + .. |PCMK_LOG_FILE| replace:: %CRM_LOG_DIR%/pacemaker.log + .. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES% ++.. |PCMK__REMOTE_SCHEMA_DIR| replace:: %PCMK__REMOTE_SCHEMA_DIR% + .. |REMOTE_DISTRO| replace:: AlmaLinux + .. |REMOTE_DISTRO_VER| replace:: 9 + """ +-- +2.31.1 + diff --git a/004-attrd-cache-1.patch b/004-attrd-cache-1.patch new file mode 100644 index 0000000..dd617c4 --- /dev/null +++ b/004-attrd-cache-1.patch @@ -0,0 +1,1443 @@ +From 543a1e9b6f22f13956a8ef22b20c8fe93dad7ae9 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 12 Dec 2023 16:08:44 -0600 +Subject: [PATCH 01/12] Refactor: libcrmcommon: support attrd purge requests + without clearing cache + +Nothing uses the new capability yet +--- + daemons/attrd/attrd_corosync.c | 4 +++- + daemons/attrd/attrd_messages.c | 8 +++++++- + daemons/attrd/pacemaker-attrd.h | 3 ++- + daemons/controld/controld_attrd.c | 2 +- + include/crm/common/ipc_attrd_internal.h | 7 ++++--- + include/crm_internal.h | 1 + + lib/common/ipc_attrd.c | 3 ++- + lib/common/ipc_client.c | 1 + + 8 files changed, 21 insertions(+), 8 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 86dc67b04..e6cd07f65 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -540,7 +540,9 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) + GHashTableIter aIter; + + CRM_CHECK(host != NULL, return); +- crm_notice("Removing all %s attributes for peer %s", host, source); ++ crm_notice("Removing all %s attributes for node %s " ++ CRM_XS " %s reaping node from cache", ++ host, source, (uncache? "and" : "without")); + + g_hash_table_iter_init(&aIter, attributes); + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 89da6d894..ac32e18af 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -148,7 +148,13 @@ handle_remove_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); +- attrd_peer_remove(host, true, request->peer); ++ bool reap = false; ++ ++ if (pcmk__xe_get_bool_attr(request->xml, PCMK__XA_REAP, ++ &reap) != pcmk_rc_ok) { ++ reap = true; // Default to true for backward compatibility ++ } ++ attrd_peer_remove(host, reap, request->peer); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index b8929a7f7..70e2cb41b 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -42,8 +42,9 @@ + * 4 2.1.5 Multiple attributes can be updated in a single IPC + * message + * 5 2.1.5 Peers can request confirmation of a sent message ++ * 6 2.1.7 PCMK__ATTRD_CMD_PEER_REMOVE supports PCMK__XA_REAP + */ +-#define ATTRD_PROTOCOL_VERSION "5" ++#define ATTRD_PROTOCOL_VERSION "6" + + #define ATTRD_SUPPORTS_MULTI_MESSAGE(x) ((x) >= 4) + #define ATTRD_SUPPORTS_CONFIRMATION(x) ((x) >= 5) +diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c +index 923abb92d..958dc2f14 100644 +--- a/daemons/controld/controld_attrd.c ++++ b/daemons/controld/controld_attrd.c +@@ -117,7 +117,7 @@ update_attrd_remote_node_removed(const char *host, const char *user_name) + if (rc == pcmk_rc_ok) { + crm_trace("Asking attribute manager to purge Pacemaker Remote node %s", + host); +- rc = pcmk__attrd_api_purge(attrd_api, host); ++ rc = pcmk__attrd_api_purge(attrd_api, host, true); + } + if (rc != pcmk_rc_ok) { + crm_err("Could not purge Pacemaker Remote node %s " +diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h +index b1b7584bd..39a55ad1d 100644 +--- a/include/crm/common/ipc_attrd_internal.h ++++ b/include/crm/common/ipc_attrd_internal.h +@@ -89,10 +89,11 @@ int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *na + + /*! + * \internal +- * \brief Purge a node from pacemaker-attrd ++ * \brief Request removal of a node's transient attributes + * + * \param[in,out] api pacemaker-attrd IPC object +- * \param[in] node Node to remove ++ * \param[in] node Node whose attributes should be purged ++ * \param[in] reap If true, also request removal from node caches + * + * \note If \p api is NULL, a new temporary connection will be created + * just for this operation and destroyed afterwards. If \p api is +@@ -102,7 +103,7 @@ int pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *na + * + * \return Standard Pacemaker return code + */ +-int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); ++int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap); + + /*! + * \internal +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 3bc8d096a..f800ab0cc 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -92,6 +92,7 @@ + #define PCMK__XA_MODE "mode" + #define PCMK__XA_NODE_START_STATE "node_start_state" + #define PCMK__XA_PATH "path" ++#define PCMK__XA_REAP "reap" + #define PCMK__XA_SCHEMA "schema" + #define PCMK__XA_SCHEMAS "schemas" + #define PCMK__XA_TASK "task" +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 9caaabec0..56cdb5aba 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -277,7 +277,7 @@ pcmk__attrd_api_delete(pcmk_ipc_api_t *api, const char *node, const char *name, + } + + int +-pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node) ++pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +@@ -291,6 +291,7 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node) + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); ++ pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap); + pcmk__xe_add_node(request, node, 0); + + if (api == NULL) { +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +index 0d3865095..5e64e2324 100644 +--- a/lib/common/ipc_client.c ++++ b/lib/common/ipc_client.c +@@ -759,6 +759,7 @@ create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name, + crm_xml_add(request, F_TYPE, T_ATTRD); + crm_xml_add(request, F_ORIG, crm_system_name); + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); ++ pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, true); + pcmk__xe_add_node(request, node_name, nodeid); + break; + +-- +2.41.0 + +From adc1d8ef587913e5505494e0205bd77a8e0a878e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 09:24:28 -0600 +Subject: [PATCH 02/12] Log: attrd: improve messages for CIB wipe + +Also, expose attrd_erase_attrs() as attrd_cib_erase_transient_attrs() and make +it take the node name as an argument, for future reuse. +--- + daemons/attrd/attrd_cib.c | 60 ++++++++++++++++++++------------- + daemons/attrd/pacemaker-attrd.h | 1 + + 2 files changed, 37 insertions(+), 24 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 80e5580d9..ca1c5b9e0 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -153,41 +153,44 @@ static void + attrd_erase_cb(xmlNode *msg, int call_id, int rc, xmlNode *output, + void *user_data) + { +- do_crm_log_unlikely(((rc != pcmk_ok)? LOG_NOTICE : LOG_DEBUG), +- "Cleared transient attributes: %s " +- CRM_XS " xpath=%s rc=%d", +- pcmk_strerror(rc), (char *) user_data, rc); ++ const char *node = pcmk__s((const char *) user_data, "a node"); ++ ++ if (rc == pcmk_ok) { ++ crm_info("Cleared transient node attributes for %s from CIB", node); ++ } else { ++ crm_err("Unable to clear transient node attributes for %s from CIB: %s", ++ node, pcmk_strerror(rc)); ++ } + } + + #define XPATH_TRANSIENT "//node_state[@uname='%s']/" XML_TAG_TRANSIENT_NODEATTRS + + /*! + * \internal +- * \brief Wipe all transient attributes for this node from the CIB ++ * \brief Wipe all transient node attributes for a node from the CIB + * +- * Clear any previous transient node attributes from the CIB. This is +- * normally done by the DC's controller when this node leaves the cluster, but +- * this handles the case where the node restarted so quickly that the +- * cluster layer didn't notice. +- * +- * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED), +- * ideally we'd skip this and sync our attributes from the writer. +- * However, currently we reject any values for us that the writer has, in +- * attrd_peer_update(). ++ * \param[in] node Node to clear attributes for + */ +-static void +-attrd_erase_attrs(void) ++void ++attrd_cib_erase_transient_attrs(const char *node) + { + int call_id = 0; +- char *xpath = crm_strdup_printf(XPATH_TRANSIENT, attrd_cluster->uname); ++ char *xpath = NULL; ++ ++ CRM_CHECK(node != NULL, return); ++ ++ xpath = crm_strdup_printf(XPATH_TRANSIENT, node); + +- crm_info("Clearing transient attributes from CIB " CRM_XS " xpath=%s", +- xpath); ++ crm_debug("Clearing transient node attributes for %s from CIB using %s", ++ node, xpath); + + call_id = the_cib->cmds->remove(the_cib, xpath, NULL, cib_xpath); +- the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, xpath, +- "attrd_erase_cb", attrd_erase_cb, +- free); ++ free(xpath); ++ ++ // strdup() is just for logging here, so ignore failure ++ the_cib->cmds->register_callback_full(the_cib, call_id, 120, FALSE, ++ strdup(node), "attrd_erase_cb", ++ attrd_erase_cb, free); + } + + /*! +@@ -197,8 +200,17 @@ attrd_erase_attrs(void) + void + attrd_cib_init(void) + { +- // We have no attribute values in memory, wipe the CIB to match +- attrd_erase_attrs(); ++ /* We have no attribute values in memory, so wipe the CIB to match. This is ++ * normally done by the DC's controller when this node leaves the cluster, but ++ * this handles the case where the node restarted so quickly that the ++ * cluster layer didn't notice. ++ * ++ * \todo If pacemaker-attrd respawns after crashing (see PCMK_ENV_RESPAWNED), ++ * ideally we'd skip this and sync our attributes from the writer. ++ * However, currently we reject any values for us that the writer has, in ++ * attrd_peer_update(). ++ */ ++ attrd_cib_erase_transient_attrs(attrd_cluster->uname); + + // Set a trigger for reading the CIB (for the alerts section) + attrd_config_read = mainloop_add_trigger(G_PRIORITY_HIGH, attrd_read_options, NULL); +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 70e2cb41b..62637d1d7 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -66,6 +66,7 @@ void attrd_ipc_fini(void); + int attrd_cib_connect(int max_retry); + void attrd_cib_disconnect(void); + void attrd_cib_init(void); ++void attrd_cib_erase_transient_attrs(const char *node); + + bool attrd_value_needs_expansion(const char *value); + int attrd_expand_value(const char *value, const char *old_value); +-- +2.41.0 + +From 9be38897eaa683ad7920503d9c9fd7db7a20a8ec Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 11:20:07 -0600 +Subject: [PATCH 03/12] Refactor: attrd: convert value booleans to flags + +--- + daemons/attrd/attrd_attributes.c | 7 +++--- + daemons/attrd/attrd_corosync.c | 38 +++++++++++++++++--------------- + daemons/attrd/pacemaker-attrd.h | 21 ++++++++++++++++-- + 3 files changed, 42 insertions(+), 24 deletions(-) + +diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c +index 388c181d7..8f32988be 100644 +--- a/daemons/attrd/attrd_attributes.c ++++ b/daemons/attrd/attrd_attributes.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2013-2022 the Pacemaker project contributors ++ * Copyright 2013-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -143,7 +143,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a, + crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid); + crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); + pcmk__xe_add_node(xml, v->nodename, v->nodeid); +- if (v->is_remote != 0) { ++ if (pcmk_is_set(v->flags, attrd_value_remote)) { + crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1); + } + crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); +@@ -166,8 +166,7 @@ attrd_clear_value_seen(void) + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { +- v->seen = FALSE; +- crm_trace("Clear seen flag %s[%s] = %s.", a->id, v->nodename, v->current); ++ attrd_clear_value_flags(v, attrd_value_from_peer); + } + } + } +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index e6cd07f65..ca20bdc0f 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -192,34 +192,35 @@ cache_remote_node(const char *node_name) + + /*! + * \internal +- * \brief Return host's hash table entry (creating one if needed) ++ * \brief Return a node's value from hash table (creating one if needed) + * +- * \param[in,out] values Hash table of values +- * \param[in] host Name of peer to look up +- * \param[in] xml XML describing the attribute ++ * \param[in,out] values Hash table of values ++ * \param[in] node_name Name of node to look up ++ * \param[in] xml XML describing the attribute + * + * \return Pointer to new or existing hash table entry + */ + static attribute_value_t * +-attrd_lookup_or_create_value(GHashTable *values, const char *host, ++attrd_lookup_or_create_value(GHashTable *values, const char *node_name, + const xmlNode *xml) + { +- attribute_value_t *v = g_hash_table_lookup(values, host); ++ attribute_value_t *v = g_hash_table_lookup(values, node_name); + int is_remote = 0; + +- crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); +- if (is_remote) { +- cache_remote_node(host); +- } +- + if (v == NULL) { + v = calloc(1, sizeof(attribute_value_t)); + CRM_ASSERT(v != NULL); + +- pcmk__str_update(&v->nodename, host); +- v->is_remote = is_remote; ++ pcmk__str_update(&v->nodename, node_name); + g_hash_table_replace(values, v->nodename, v); + } ++ ++ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); ++ if (is_remote) { ++ attrd_set_value_flags(v, attrd_value_remote); ++ cache_remote_node(node_name); ++ } ++ + return(v); + } + +@@ -344,11 +345,11 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + } + } + +- /* Set the seen flag for attribute processing held only in the own node. */ +- v->seen = TRUE; ++ // This allows us to later detect local values that peer doesn't know about ++ attrd_set_value_flags(v, attrd_value_from_peer); + + /* If this is a cluster node whose node ID we are learning, remember it */ +- if ((v->nodeid == 0) && (v->is_remote == FALSE) ++ if ((v->nodeid == 0) && !pcmk_is_set(v->flags, attrd_value_remote) + && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, + (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { + record_peer_nodeid(v, host); +@@ -414,8 +415,9 @@ broadcast_unseen_local_values(void) + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { +- if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, +- pcmk__str_casei)) { ++ if (!pcmk_is_set(v->flags, attrd_value_from_peer) ++ && pcmk__str_eq(v->nodename, attrd_cluster->uname, ++ pcmk__str_casei)) { + if (sync == NULL) { + sync = create_xml_node(NULL, __func__); + crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 62637d1d7..738418857 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -140,15 +140,32 @@ typedef struct attribute_s { + + } attribute_t; + ++enum attrd_value_flags { ++ attrd_value_none = 0U, ++ attrd_value_remote = (1U << 0), // Value is for Pacemaker Remote node ++ attrd_value_from_peer = (1U << 1), // Value is from peer sync response ++}; ++ + typedef struct attribute_value_s { + uint32_t nodeid; +- gboolean is_remote; + char *nodename; + char *current; + char *requested; +- gboolean seen; ++ uint32_t flags; // Group of attrd_value_flags + } attribute_value_t; + ++#define attrd_set_value_flags(attr_value, flags_to_set) do { \ ++ (attr_value)->flags = pcmk__set_flags_as(__func__, __LINE__, \ ++ LOG_TRACE, "Value for node", (attr_value)->nodename, \ ++ (attr_value)->flags, (flags_to_set), #flags_to_set); \ ++ } while (0) ++ ++#define attrd_clear_value_flags(attr_value, flags_to_clear) do { \ ++ (attr_value)->flags = pcmk__clear_flags_as(__func__, __LINE__, \ ++ LOG_TRACE, "Value for node", (attr_value)->nodename, \ ++ (attr_value)->flags, (flags_to_clear), #flags_to_clear); \ ++ } while (0) ++ + extern crm_cluster_t *attrd_cluster; + extern GHashTable *attributes; + extern GHashTable *peer_protocol_vers; +-- +2.41.0 + +From 922c79f4e39dc9501ff7c0136df8043081b771cb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 16:51:39 -0600 +Subject: [PATCH 04/12] Log: attrd: improve logging of CIB write result + +When attrd requests a write-out of a changed attribute value, it saves the new +value in attribute_value_t:requested so it can be used in a log when the write +completes (which may occur after the value has already changed again, so we +can't log the current value at that time). + +Previously, the log call relied on libqb mapping a NULL pointer to "(null)". +To be safer, do that explicitly. + +Also, it previously erased "requested" after the write completed, even if the +write failed and would be reattempted. Leave the value alone in this case so +the result of the reattempt can be logged correctly. +--- + daemons/attrd/attrd_cib.c | 18 ++++++++---------- + 1 file changed, 8 insertions(+), 10 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index ca1c5b9e0..ae6564856 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -274,11 +274,12 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use + + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) { +- do_crm_log(level, "* %s[%s]=%s", a->id, peer, v->requested); +- free(v->requested); +- v->requested = NULL; +- if (rc != pcmk_ok) { +- a->changed = true; /* Attempt write out again */ ++ do_crm_log(level, "* %s[%s]=%s", ++ a->id, peer, pcmk__s(v->requested, "(null)")); ++ if (rc == pcmk_ok) { ++ pcmk__str_update(&(v->requested), NULL); ++ } else { ++ a->changed = true; // Reattempt write below if we are still writer + } + } + +@@ -605,11 +606,8 @@ write_attribute(attribute_t *a, bool ignore_delay) + /* Preservation of the attribute to transmit alert */ + set_alert_attribute_value(alert_attribute_value, v); + +- free(v->requested); +- v->requested = NULL; +- if (v->current) { +- v->requested = strdup(v->current); +- } ++ // Save this value so we can log it when write completes ++ pcmk__str_update(&(v->requested), v->current); + } + + if (private_updates) { +-- +2.41.0 + +From fa2830b1c4acf061faa40490620eb63c48a56a2b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 13 Dec 2023 17:01:01 -0600 +Subject: [PATCH 05/12] Low: libcrmcluster: avoid use-after-free in trace log + +--- + lib/cluster/membership.c | 16 ++++++++++++++-- + 1 file changed, 14 insertions(+), 2 deletions(-) + +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index f856ccaca..6958e65f2 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -143,11 +143,23 @@ crm_remote_peer_get(const char *node_name) + return node; + } + ++/*! ++ * \brief Remove a node from the Pacemaker Remote node cache ++ * ++ * \param[in] node_name Name of node to remove from cache ++ * ++ * \note The caller must be careful not to use \p node_name after calling this ++ * function if it might be a pointer into the cache entry being removed. ++ */ + void + crm_remote_peer_cache_remove(const char *node_name) + { +- if (g_hash_table_remove(crm_remote_peer_cache, node_name)) { +- crm_trace("removed %s from remote peer cache", node_name); ++ /* Do a lookup first, because node_name could be a pointer within the entry ++ * being removed -- we can't log it *after* removing it. ++ */ ++ if (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL) { ++ crm_trace("Removing %s from Pacemaker Remote node cache", node_name); ++ g_hash_table_remove(crm_remote_peer_cache, node_name); + } + } + +-- +2.41.0 + +From 14a7449a413f3f10eb80634c607386007d264475 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 09:24:38 -0600 +Subject: [PATCH 06/12] Refactor: libcrmcluster,attrd: functionize removing + node from both caches + +This future-proofs against a potential use-after-free (not possible with +current code) and isolates cache management better. +--- + daemons/attrd/attrd_corosync.c | 3 +-- + include/crm/cluster/internal.h | 9 +++---- + lib/cluster/membership.c | 44 ++++++++++++++++++++++++++++++++++ + 3 files changed, 50 insertions(+), 6 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index ca20bdc0f..aa94a078e 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -554,8 +554,7 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) + } + + if (uncache) { +- crm_remote_peer_cache_remove(host); +- reap_crm_member(0, host); ++ pcmk__purge_node_from_cache(host, 0); + } + } + +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index e20ee4c59..c71069be2 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2021 the Pacemaker project contributors ++ * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -7,8 +7,8 @@ + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +-#ifndef CRM_CLUSTER_INTERNAL__H +-# define CRM_CLUSTER_INTERNAL__H ++#ifndef PCMK__CRM_CLUSTER_INTERNAL__H ++# define PCMK__CRM_CLUSTER_INTERNAL__H + + # include // uint32_t, uint64_t + # include +@@ -126,6 +126,7 @@ crm_node_t *pcmk__search_node_caches(unsigned int id, const char *uname, + uint32_t flags); + crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname, + const char *uuid); ++void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); + + void pcmk__refresh_node_caches_from_cib(xmlNode *cib); + crm_node_t *pcmk__search_known_node_cache(unsigned int id, const char *uname, +@@ -136,4 +137,4 @@ crm_node_t *pcmk__get_peer(unsigned int id, const char *uname, + crm_node_t *pcmk__get_peer_full(unsigned int id, const char *uname, + const char *uuid, int flags); + +-#endif ++#endif // PCMK__CRM_CLUSTER_INTERNAL__H +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 6958e65f2..173aaaa17 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -341,6 +341,9 @@ crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data) + * \param[in] name Uname of node to remove (or NULL to ignore) + * + * \return Number of cache entries removed ++ * ++ * \note The caller must be careful not to use \p name after calling this ++ * function if it might be a pointer into the cache entry being removed. + */ + guint + reap_crm_member(uint32_t id, const char *name) +@@ -564,6 +567,47 @@ pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, + return node; + } + ++/*! ++ * \internal ++ * \brief Purge a node from cache (both cluster and Pacemaker Remote) ++ * ++ * \param[in] node_name If not NULL, purge only nodes with this name ++ * \param[in] node_id If not 0, purge cluster nodes only if they have this ID ++ * ++ * \note If \p node_name is NULL and \p node_id is 0, no nodes will be purged. ++ * If \p node_name is not NULL and \p node_id is not 0, Pacemaker Remote ++ * nodes that match \p node_name will be purged, and cluster nodes that ++ * match both \p node_name and \p node_id will be purged. ++ * \note The caller must be careful not to use \p node_name after calling this ++ * function if it might be a pointer into a cache entry being removed. ++ */ ++void ++pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) ++{ ++ char *node_name_copy = NULL; ++ ++ if ((node_name == NULL) && (node_id == 0U)) { ++ return; ++ } ++ ++ // Purge from Pacemaker Remote node cache ++ if ((node_name != NULL) ++ && (g_hash_table_lookup(crm_remote_peer_cache, node_name) != NULL)) { ++ /* node_name could be a pointer into the cache entry being purged, ++ * so reassign it to a copy before the original gets freed ++ */ ++ node_name_copy = strdup(node_name); ++ CRM_ASSERT(node_name_copy != NULL); ++ node_name = node_name_copy; ++ ++ crm_trace("Purging %s from Pacemaker Remote node cache", node_name); ++ g_hash_table_remove(crm_remote_peer_cache, node_name); ++ } ++ ++ reap_crm_member(node_id, node_name); ++ free(node_name_copy); ++} ++ + /*! + * \brief Get a node cache entry (cluster or Pacemaker Remote) + * +-- +2.41.0 + +From 8d552c1b582a95f9879b15e2dd991a7f995e7eca Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 09:51:37 -0600 +Subject: [PATCH 07/12] Fix: pacemaker-attrd,libcrmcluster: avoid + use-after-free when remote node in cluster node cache + +Previously, pacemaker-attrd removed any conflicting entry from the cluster node +cache before adding a node to the remote node cache. However, if the name used +was a pointer into the cluster node cache entry being freed, it would be reused +to create the remote node cache entry. + +This avoids that and also moves the functionality into libcrmcluster for better +isolation of cache management. It also corrects mistakenly setting errno to a +negative value. +--- + daemons/attrd/attrd_corosync.c | 26 ++------------------------ + lib/cluster/membership.c | 30 ++++++++++++++++++++++++++++-- + 2 files changed, 30 insertions(+), 26 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index aa94a078e..1d0f87f04 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -166,28 +166,6 @@ broadcast_local_value(const attribute_t *a) + return v; + } + +-/*! +- * \internal +- * \brief Ensure a Pacemaker Remote node is in the correct peer cache +- * +- * \param[in] node_name Name of Pacemaker Remote node to check +- */ +-static void +-cache_remote_node(const char *node_name) +-{ +- /* If we previously assumed this node was an unseen cluster node, +- * remove its entry from the cluster peer cache. +- */ +- crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name, NULL); +- +- if (dup && (dup->uuid == NULL)) { +- reap_crm_member(0, node_name); +- } +- +- // Ensure node is in the remote peer cache +- CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); +-} +- + #define state_text(state) pcmk__s((state), "in unknown state") + + /*! +@@ -218,7 +196,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *node_name, + crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); + if (is_remote) { + attrd_set_value_flags(v, attrd_value_remote); +- cache_remote_node(node_name); ++ CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); + } + + return(v); +@@ -273,7 +251,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + + // Ensure remote nodes that come up are in the remote node cache + } else if (!gone && is_remote) { +- cache_remote_node(peer->uname); ++ CRM_ASSERT(crm_remote_peer_get(peer->uname) != NULL); + } + } + +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 173aaaa17..a653617fa 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -102,26 +102,50 @@ crm_remote_peer_cache_size(void) + * \note When creating a new entry, this will leave the node state undetermined, + * so the caller should also call pcmk__update_peer_state() if the state + * is known. ++ * \note Because this can add and remove cache entries, callers should not ++ * assume any previously obtained cache entry pointers remain valid. + */ + crm_node_t * + crm_remote_peer_get(const char *node_name) + { + crm_node_t *node; ++ char *node_name_copy = NULL; + + if (node_name == NULL) { +- errno = -EINVAL; ++ errno = EINVAL; + return NULL; + } + ++ /* It's theoretically possible that the node was added to the cluster peer ++ * cache before it was known to be a Pacemaker Remote node. Remove that ++ * entry unless it has a node ID, which means the name actually is ++ * associated with a cluster node. (@TODO return an error in that case?) ++ */ ++ node = pcmk__search_cluster_node_cache(0, node_name, NULL); ++ if ((node != NULL) && (node->uuid == NULL)) { ++ /* node_name could be a pointer into the cache entry being removed, so ++ * reassign it to a copy before the original gets freed ++ */ ++ node_name_copy = strdup(node_name); ++ if (node_name_copy == NULL) { ++ errno = ENOMEM; ++ return NULL; ++ } ++ node_name = node_name_copy; ++ reap_crm_member(0, node_name); ++ } ++ + /* Return existing cache entry if one exists */ + node = g_hash_table_lookup(crm_remote_peer_cache, node_name); + if (node) { ++ free(node_name_copy); + return node; + } + + /* Allocate a new entry */ + node = calloc(1, sizeof(crm_node_t)); + if (node == NULL) { ++ free(node_name_copy); + return NULL; + } + +@@ -130,7 +154,8 @@ crm_remote_peer_get(const char *node_name) + node->uuid = strdup(node_name); + if (node->uuid == NULL) { + free(node); +- errno = -ENOMEM; ++ errno = ENOMEM; ++ free(node_name_copy); + return NULL; + } + +@@ -140,6 +165,7 @@ crm_remote_peer_get(const char *node_name) + + /* Update the entry's uname, ensuring peer status callbacks are called */ + update_peer_uname(node, node_name); ++ free(node_name_copy); + return node; + } + +-- +2.41.0 + +From 17ac8f0409021cbcd3e03a1b70518ab7abd9b259 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 10:03:05 -0600 +Subject: [PATCH 08/12] Refactor: attrd: remove dead code + +The peer change callback can't be called for a Pacemaker Remote node unless the +node is already in the remote node cache, so don't bother trying to add it. +Modifying the peer caches is forbidden in peer change callbacks anyway since it +could lead to use-after-free issues in libcrmcluster. +--- + daemons/attrd/attrd_corosync.c | 4 ---- + 1 file changed, 4 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1d0f87f04..eba734c3a 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -248,10 +248,6 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + attrd_remove_voter(peer); + attrd_remove_peer_protocol_ver(peer->uname); + attrd_do_not_expect_from_peer(peer->uname); +- +- // Ensure remote nodes that come up are in the remote node cache +- } else if (!gone && is_remote) { +- CRM_ASSERT(crm_remote_peer_get(peer->uname) != NULL); + } + } + +-- +2.41.0 + +From 221c4d697edc0481817c206ce8fdd878afd98ca1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 17:17:32 -0600 +Subject: [PATCH 09/12] Low: libcrmcommon: handle disconnected attrd API + connections consistently + +Drop send_attrd_request() in favor of using connect_and_send_attrd_request(), +since pcmk__connect_ipc() will return pcmk_rc_ok immediately if the API is +already connected. + +All the attribute manager IPC APIs attempted the connection if not already +connected except for pcmk__attrd_api_query(). Now that it uses +connect_and_send_attrd_request(), they are all consistent. +--- + lib/common/ipc_attrd.c | 28 +++++----------------------- + 1 file changed, 5 insertions(+), 23 deletions(-) + +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 56cdb5aba..e36b42cbc 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -190,12 +190,6 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + return pcmk_rc_ok; + } + +-static int +-send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) +-{ +- return pcmk__send_ipc_request(api, request); +-} +- + int + pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + const char *resource, const char *operation, +@@ -229,11 +223,8 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +@@ -303,11 +294,8 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +@@ -346,7 +334,7 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_QUERY); + pcmk__xe_add_node(request, node, 0); + +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + free_xml(request); + + if (node) { +@@ -386,11 +374,8 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +@@ -479,11 +464,8 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + rc = connect_and_send_attrd_request(api, request); + destroy_api(api); + +- } else if (!pcmk_ipc_is_connected(api)) { +- rc = connect_and_send_attrd_request(api, request); +- + } else { +- rc = send_attrd_request(api, request); ++ rc = connect_and_send_attrd_request(api, request); + } + + free_xml(request); +-- +2.41.0 + +From 85502a405c384fdf0331e43ec161910ee1d14973 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 17:29:11 -0600 +Subject: [PATCH 10/12] Low: libcrmcommon: handle NULL attribute manager IPC + API connections consistently + +Previously, all attribute manager IPC APIs except pcmk__attrd_api_query() would +create a temporary connection if passed a NULL argument for one. Now, +connect_and_send_attrd_request() does this itself, reducing code duplication and +making the handling consistent across all APIs. +--- + lib/common/ipc_attrd.c | 116 +++++++++-------------------------------- + 1 file changed, 25 insertions(+), 91 deletions(-) + +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index e36b42cbc..68975c7b6 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -148,46 +148,39 @@ create_attrd_op(const char *user_name) + return attrd_op; + } + +-static int +-create_api(pcmk_ipc_api_t **api) +-{ +- int rc = pcmk_new_ipc_api(api, pcmk_ipc_attrd); +- +- if (rc != pcmk_rc_ok) { +- crm_err("Could not connect to attrd: %s", pcmk_rc_str(rc)); +- } +- +- return rc; +-} +- +-static void +-destroy_api(pcmk_ipc_api_t *api) +-{ +- pcmk_disconnect_ipc(api); +- pcmk_free_ipc_api(api); +- api = NULL; +-} +- + static int + connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + { + int rc = pcmk_rc_ok; ++ bool created_api = false; ++ ++ if (api == NULL) { ++ rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd); ++ if (rc != pcmk_rc_ok) { ++ crm_err("Could not connect to attribute manager: %s", ++ pcmk_rc_str(rc)); ++ return rc; ++ } ++ created_api = true; ++ } + + rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5); + if (rc != pcmk_rc_ok) { + crm_err("Could not connect to %s: %s", + pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- return rc; +- } + +- rc = pcmk__send_ipc_request(api, request); +- if (rc != pcmk_rc_ok) { +- crm_err("Could not send request to %s: %s", +- pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- return rc; ++ } else { ++ rc = pcmk__send_ipc_request(api, request); ++ if (rc != pcmk_rc_ok) { ++ crm_err("Could not send request to %s: %s", ++ pcmk_ipc_name(api, true), pcmk_rc_str(rc)); ++ } + } + +- return pcmk_rc_ok; ++ if (created_api) { ++ pcmk_free_ipc_api(api); ++ } ++ return rc; + } + + int +@@ -214,18 +207,7 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + crm_xml_add_int(request, PCMK__XA_ATTR_IS_REMOTE, + pcmk_is_set(options, pcmk__node_attr_remote)); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -285,18 +267,7 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + pcmk__xe_set_bool_attr(request, PCMK__XA_REAP, reap); + pcmk__xe_add_node(request, node, 0); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -365,18 +336,7 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_REFRESH); + pcmk__xe_add_node(request, node, 0); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -455,18 +415,7 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + request = create_attrd_op(user_name); + populate_update_op(request, node, name, value, dampen, set, options); + +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- rc = connect_and_send_attrd_request(api, request); +- destroy_api(api); +- +- } else { +- rc = connect_and_send_attrd_request(api, request); +- } ++ rc = connect_and_send_attrd_request(api, request); + + free_xml(request); + +@@ -547,23 +496,8 @@ pcmk__attrd_api_update_list(pcmk_ipc_api_t *api, GList *attrs, const char *dampe + * request. Do that now, creating and destroying the API object if needed. + */ + if (pcmk__is_daemon) { +- bool created_api = false; +- +- if (api == NULL) { +- rc = create_api(&api); +- if (rc != pcmk_rc_ok) { +- return rc; +- } +- +- created_api = true; +- } +- + rc = connect_and_send_attrd_request(api, request); + free_xml(request); +- +- if (created_api) { +- destroy_api(api); +- } + } + + return rc; +-- +2.41.0 + +From 4b25e2e2cf52e6c772805309e1f3dd6bb7ce8fab Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 18:11:14 -0600 +Subject: [PATCH 11/12] Log: controld,libcrmcommon: improve attrd IPC API + messages + +Previously, connect_and_send_attrd_request() would log error messages for +failures, attrd IPC APIs would log debug messages with the result whether +success or failure, and then callers would log or output failures again. + +Now, connect_and_send_attrd_request() does not log, the attrd IPC APIs log a +debug message before sending the request, and the callers log or output +failures. +--- + daemons/controld/controld_attrd.c | 22 ++++----- + lib/common/ipc_attrd.c | 76 ++++++++++++------------------- + 2 files changed, 38 insertions(+), 60 deletions(-) + +diff --git a/daemons/controld/controld_attrd.c b/daemons/controld/controld_attrd.c +index 958dc2f14..24c1e7068 100644 +--- a/daemons/controld/controld_attrd.c ++++ b/daemons/controld/controld_attrd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2006-2022 the Pacemaker project contributors ++ * Copyright 2006-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -136,25 +136,23 @@ update_attrd_clear_failures(const char *host, const char *rsc, const char *op, + rc = pcmk_new_ipc_api(&attrd_api, pcmk_ipc_attrd); + } + if (rc == pcmk_rc_ok) { +- const char *op_desc = pcmk__s(op, "operations"); +- const char *interval_desc = "all"; + uint32_t attrd_opts = pcmk__node_attr_none; + +- if (op != NULL) { +- interval_desc = pcmk__s(interval_spec, "nonrecurring"); +- } + if (is_remote_node) { + pcmk__set_node_attr_flags(attrd_opts, pcmk__node_attr_remote); + } +- crm_info("Asking attribute manager to clear failure of %s %s for %s " +- "on %s node %s", interval_desc, op_desc, rsc, +- node_type(is_remote_node), host); + rc = pcmk__attrd_api_clear_failures(attrd_api, host, rsc, op, + interval_spec, NULL, attrd_opts); + } + if (rc != pcmk_rc_ok) { +- crm_err("Could not clear failure attributes for %s on %s node %s%s: %s " +- CRM_XS " rc=%d", pcmk__s(rsc, "all resources"), +- node_type(is_remote_node), host, when(), pcmk_rc_str(rc), rc); ++ const char *interval_desc = "all"; ++ ++ if (op != NULL) { ++ interval_desc = pcmk__s(interval_spec, "nonrecurring"); ++ } ++ crm_err("Could not clear failure of %s %s for %s on %s node %s%s: %s " ++ CRM_XS " rc=%d", interval_desc, pcmk__s(op, "operations"), ++ pcmk__s(rsc, "all resources"), node_type(is_remote_node), host, ++ when(), pcmk_rc_str(rc), rc); + } + } +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 68975c7b6..3951bd3df 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -157,24 +157,14 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + if (api == NULL) { + rc = pcmk_new_ipc_api(&api, pcmk_ipc_attrd); + if (rc != pcmk_rc_ok) { +- crm_err("Could not connect to attribute manager: %s", +- pcmk_rc_str(rc)); + return rc; + } + created_api = true; + } + + rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5); +- if (rc != pcmk_rc_ok) { +- crm_err("Could not connect to %s: %s", +- pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- +- } else { ++ if (rc == pcmk_rc_ok) { + rc = pcmk__send_ipc_request(api, request); +- if (rc != pcmk_rc_ok) { +- crm_err("Could not send request to %s: %s", +- pcmk_ipc_name(api, true), pcmk_rc_str(rc)); +- } + } + + if (created_api) { +@@ -199,6 +189,17 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + node = target; + } + ++ if (operation) { ++ interval_desc = pcmk__s(interval_spec, "nonrecurring"); ++ op_desc = operation; ++ } else { ++ interval_desc = "all"; ++ op_desc = "operations"; ++ } ++ crm_debug("Asking %s to clear failure of %s %s for %s on %s", ++ pcmk_ipc_name(api, true), interval_desc, op_desc, ++ pcmk__s(resource, "all resources"), pcmk__s(node, "all nodes")); ++ + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_CLEAR_FAILURE); + pcmk__xe_add_node(request, node, 0); + crm_xml_add(request, PCMK__XA_ATTR_RESOURCE, resource); +@@ -210,19 +211,6 @@ pcmk__attrd_api_clear_failures(pcmk_ipc_api_t *api, const char *node, + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- if (operation) { +- interval_desc = interval_spec? interval_spec : "nonrecurring"; +- op_desc = operation; +- } else { +- interval_desc = "all"; +- op_desc = "operations"; +- } +- +- crm_debug("Asked pacemaker-attrd to clear failure of %s %s for %s on %s: %s (%d)", +- interval_desc, op_desc, (resource? resource : "all resources"), +- (node? node : "all nodes"), pcmk_rc_str(rc), rc); +- + return rc; + } + +@@ -254,13 +242,17 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +- const char *display_host = (node ? node : "localhost"); + const char *target = pcmk__node_attr_target(node); + + if (target != NULL) { + node = target; + } + ++ crm_debug("Asking %s to purge transient attributes%s for %s", ++ pcmk_ipc_name(api, true), ++ (reap? " and node cache entries" : ""), ++ pcmk__s(node, "local node")); ++ + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_PEER_REMOVE); +@@ -270,10 +262,6 @@ pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node, bool reap) + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- crm_debug("Asked pacemaker-attrd to purge %s: %s (%d)", +- display_host, pcmk_rc_str(rc), rc); +- + return rc; + } + +@@ -299,6 +287,10 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + } + } + ++ crm_debug("Querying %s for value of '%s'%s%s", ++ pcmk_ipc_name(api, true), name, ++ ((node == NULL)? "" : " on "), pcmk__s(node, "")); ++ + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_ATTR_NAME, name); +@@ -307,15 +299,6 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + + rc = connect_and_send_attrd_request(api, request); + free_xml(request); +- +- if (node) { +- crm_debug("Queried pacemaker-attrd for %s on %s: %s (%d)", +- name, node, pcmk_rc_str(rc), rc); +- } else { +- crm_debug("Queried pacemaker-attrd for %s: %s (%d)", +- name, pcmk_rc_str(rc), rc); +- } +- + return rc; + } + +@@ -324,13 +307,15 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +- const char *display_host = (node ? node : "localhost"); + const char *target = pcmk__node_attr_target(node); + + if (target != NULL) { + node = target; + } + ++ crm_debug("Asking %s to write all transient attributes for %s to CIB", ++ pcmk_ipc_name(api, true), pcmk__s(node, "local node")); ++ + request = create_attrd_op(NULL); + + crm_xml_add(request, PCMK__XA_TASK, PCMK__ATTRD_CMD_REFRESH); +@@ -339,10 +324,6 @@ pcmk__attrd_api_refresh(pcmk_ipc_api_t *api, const char *node) + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- crm_debug("Asked pacemaker-attrd to refresh %s: %s (%d)", +- display_host, pcmk_rc_str(rc), rc); +- + return rc; + } + +@@ -399,7 +380,6 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + { + int rc = pcmk_rc_ok; + xmlNode *request = NULL; +- const char *display_host = (node ? node : "localhost"); + const char *target = NULL; + + if (name == NULL) { +@@ -412,16 +392,16 @@ pcmk__attrd_api_update(pcmk_ipc_api_t *api, const char *node, const char *name, + node = target; + } + ++ crm_debug("Asking %s to update '%s' to '%s' for %s", ++ pcmk_ipc_name(api, true), name, pcmk__s(value, "(null)"), ++ pcmk__s(node, "local node")); ++ + request = create_attrd_op(user_name); + populate_update_op(request, node, name, value, dampen, set, options); + + rc = connect_and_send_attrd_request(api, request); + + free_xml(request); +- +- crm_debug("Asked pacemaker-attrd to update %s on %s: %s (%d)", +- name, display_host, pcmk_rc_str(rc), rc); +- + return rc; + } + +-- +2.41.0 + +From e5d22ef2a6b130768bd59ab5b7d8cd1155bb02a5 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 14 Dec 2023 17:54:01 -0600 +Subject: [PATCH 12/12] Log: libcrmcommon: use log-friendly name in pacemakerd + IPC logs + +--- + lib/common/ipc_pacemakerd.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c +index 2f0370974..6d6f6d6bf 100644 +--- a/lib/common/ipc_pacemakerd.c ++++ b/lib/common/ipc_pacemakerd.c +@@ -210,15 +210,16 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + value = crm_element_value(reply, F_CRM_MSG_TYPE); + if (pcmk__str_empty(value) + || !pcmk__str_eq(value, XML_ATTR_RESPONSE, pcmk__str_none)) { +- crm_info("Unrecognizable message from pacemakerd: " ++ crm_info("Unrecognizable message from %s: " + "message type '%s' not '" XML_ATTR_RESPONSE "'", +- pcmk__s(value, "")); ++ pcmk_ipc_name(api, true), pcmk__s(value, "")); + status = CRM_EX_PROTOCOL; + goto done; + } + + if (pcmk__str_empty(crm_element_value(reply, XML_ATTR_REFERENCE))) { +- crm_info("Unrecognizable message from pacemakerd: no reference"); ++ crm_info("Unrecognizable message from %s: no reference", ++ pcmk_ipc_name(api, true)); + status = CRM_EX_PROTOCOL; + goto done; + } +@@ -244,8 +245,8 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply) + reply_data.reply_type = pcmk_pacemakerd_reply_shutdown; + reply_data.data.shutdown.status = atoi(crm_element_value(msg_data, XML_LRM_ATTR_OPSTATUS)); + } else { +- crm_info("Unrecognizable message from pacemakerd: " +- "unknown command '%s'", pcmk__s(value, "")); ++ crm_info("Unrecognizable message from %s: unknown command '%s'", ++ pcmk_ipc_name(api, true), pcmk__s(value, "")); + status = CRM_EX_PROTOCOL; + goto done; + } +@@ -292,8 +293,8 @@ do_pacemakerd_api_call(pcmk_ipc_api_t *api, const char *ipc_name, const char *ta + if (cmd) { + rc = pcmk__send_ipc_request(api, cmd); + if (rc != pcmk_rc_ok) { +- crm_debug("Couldn't send request to pacemakerd: %s rc=%d", +- pcmk_rc_str(rc), rc); ++ crm_debug("Couldn't send request to %s: %s rc=%d", ++ pcmk_ipc_name(api, true), pcmk_rc_str(rc), rc); + } + free_xml(cmd); + } else { +-- +2.41.0 + diff --git a/005-attrd-cache-2.patch b/005-attrd-cache-2.patch new file mode 100644 index 0000000..c1174e1 --- /dev/null +++ b/005-attrd-cache-2.patch @@ -0,0 +1,2786 @@ +From 843ef27542aac43ed7789b15255dd4f30004f0d1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 18 Dec 2023 10:08:30 -0600 +Subject: [PATCH 1/9] Fix: attrd: write Pacemaker Remote node attributes even + if not in cache + +Previously, we required a node to be in one of the node caches in order to +write out its attributes. However for Pacemaker Remote nodes, we only need the +node name to do the write, and we already have that even if it's not cached. +--- + daemons/attrd/attrd_cib.c | 55 +++++++++++++++++++++------------------ + 1 file changed, 30 insertions(+), 25 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index ae65648..b22137a 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include // pcmk__get_peer_full() + + #include "pacemaker-attrd.h" + +@@ -556,20 +557,26 @@ write_attribute(attribute_t *a, bool ignore_delay) + /* Iterate over each peer value of this attribute */ + g_hash_table_iter_init(&iter, a->values); + while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &v)) { +- crm_node_t *peer = crm_get_peer_full(v->nodeid, v->nodename, +- CRM_GET_PEER_ANY); ++ const char *uuid = NULL; + +- /* If the value's peer info does not correspond to a peer, ignore it */ +- if (peer == NULL) { +- crm_notice("Cannot update %s[%s]=%s because peer not known", +- a->id, v->nodename, v->current); +- continue; +- } ++ if (pcmk_is_set(v->flags, attrd_value_remote)) { ++ /* If this is a Pacemaker Remote node, the node's UUID is the same ++ * as its name, which we already have. ++ */ ++ uuid = v->nodename; + +- /* If we're just learning the peer's node id, remember it */ +- if (peer->id && (v->nodeid == 0)) { +- crm_trace("Learned ID %u for node %s", peer->id, v->nodename); +- v->nodeid = peer->id; ++ } else { ++ // This will create a cluster node cache entry if none exists ++ crm_node_t *peer = pcmk__get_peer_full(v->nodeid, v->nodename, NULL, ++ CRM_GET_PEER_ANY); ++ ++ uuid = peer->uuid; ++ ++ // Remember peer's node ID if we're just now learning it ++ if ((peer->id != 0) && (v->nodeid == 0)) { ++ crm_trace("Learned ID %u for node %s", peer->id, v->nodename); ++ v->nodeid = peer->id; ++ } + } + + /* If this is a private attribute, no update needs to be sent */ +@@ -578,29 +585,27 @@ write_attribute(attribute_t *a, bool ignore_delay) + continue; + } + +- /* If the peer is found, but its uuid is unknown, defer write */ +- if (peer->uuid == NULL) { ++ // Defer write if this is a cluster node that's never been seen ++ if (uuid == NULL) { + a->unknown_peer_uuids = true; +- crm_notice("Cannot update %s[%s]=%s because peer UUID not known " +- "(will retry if learned)", ++ crm_notice("Cannot update %s[%s]='%s' now because node's UUID is " ++ "unknown (will retry if learned)", + a->id, v->nodename, v->current); + continue; + } + + // Update this value as part of the CIB transaction we're building +- rc = add_attr_update(a, v->current, peer->uuid); ++ rc = add_attr_update(a, v->current, uuid); + if (rc != pcmk_rc_ok) { +- crm_err("Failed to update %s[%s]=%s (peer known as %s, UUID %s, " +- "ID %" PRIu32 "/%" PRIu32 "): %s", +- a->id, v->nodename, v->current, peer->uname, peer->uuid, +- peer->id, v->nodeid, pcmk_rc_str(rc)); ++ crm_err("Failed to update %s[%s]='%s': %s " ++ CRM_XS " node uuid=%s id=%" PRIu32, ++ a->id, v->nodename, v->current, pcmk_rc_str(rc), ++ uuid, v->nodeid); + continue; + } + +- crm_debug("Updating %s[%s]=%s (peer known as %s, UUID %s, ID " +- "%" PRIu32 "/%" PRIu32 ")", +- a->id, v->nodename, v->current, +- peer->uname, peer->uuid, peer->id, v->nodeid); ++ crm_debug("Updating %s[%s]=%s (node uuid=%s id=%" PRIu32 ")", ++ a->id, v->nodename, v->current, uuid, v->nodeid); + cib_updates++; + + /* Preservation of the attribute to transmit alert */ +-- +2.31.1 + +From 724e6db9830475e212381430a30014ccda43c901 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 14:59:54 -0600 +Subject: [PATCH 2/9] API: libcrmcluster: deprecate crm_get_peer_full() + +--- + daemons/attrd/attrd_messages.c | 1 + + daemons/controld/controld_execd.c | 2 +- + include/crm/cluster.h | 5 +---- + include/crm/cluster/compat.h | 5 ++++- + lib/cluster/membership.c | 21 ++++++--------------- + 5 files changed, 13 insertions(+), 21 deletions(-) + +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index ac32e18..53c70bd 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -12,6 +12,7 @@ + #include + + #include ++#include // pcmk__get_peer() + #include + + #include "pacemaker-attrd.h" +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 480d37d..381b0be 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -581,7 +581,7 @@ controld_query_executor_state(void) + return NULL; + } + +- peer = crm_get_peer_full(0, lrm_state->node_name, CRM_GET_PEER_ANY); ++ peer = pcmk__get_peer_full(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, +diff --git a/include/crm/cluster.h b/include/crm/cluster.h +index b61fd70..137684d 100644 +--- a/include/crm/cluster.h ++++ b/include/crm/cluster.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -146,9 +146,6 @@ void crm_remote_peer_cache_refresh(xmlNode *cib); + crm_node_t *crm_remote_peer_get(const char *node_name); + void crm_remote_peer_cache_remove(const char *node_name); + +-/* allows filtering of remote and cluster nodes using crm_get_peer_flags */ +-crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags); +- + /* only searches cluster nodes */ + crm_node_t *crm_get_peer(unsigned int id, const char *uname); + +diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h +index 89a03fd..fc68f27 100644 +--- a/include/crm/cluster/compat.h ++++ b/include/crm/cluster/compat.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -26,6 +26,9 @@ extern "C" { + * release. + */ + ++// \deprecated Do not use Pacemaker for cluster node cacheing ++crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags); ++ + // \deprecated Use stonith_api_kick() from libstonithd instead + int crm_terminate_member(int nodeid, const char *uname, void *unused); + +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index a653617..52db840 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -634,21 +634,6 @@ pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id) + free(node_name_copy); + } + +-/*! +- * \brief Get a node cache entry (cluster or Pacemaker Remote) +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags +- * +- * \return (Possibly newly created) node cache entry +- */ +-crm_node_t * +-crm_get_peer_full(unsigned int id, const char *uname, int flags) +-{ +- return pcmk__get_peer_full(id, uname, NULL, flags); +-} +- + /*! + * \internal + * \brief Search cluster node cache +@@ -1444,5 +1429,11 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + return stonith_api_kick(nodeid, uname, 120, TRUE); + } + ++crm_node_t * ++crm_get_peer_full(unsigned int id, const char *uname, int flags) ++{ ++ return pcmk__get_peer_full(id, uname, NULL, flags); ++} ++ + // LCOV_EXCL_STOP + // End deprecated API +-- +2.31.1 + +From 8a263fa254a62b07f3b591844e7eacd5cdd0538f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 15:07:47 -0600 +Subject: [PATCH 3/9] API: libcrmcluster: deprecate crm_get_peer() + +Use pcmk__get_peer() internally +--- + daemons/attrd/attrd_corosync.c | 8 +++----- + daemons/attrd/attrd_messages.c | 6 +++--- + daemons/based/based_callbacks.c | 5 +++-- + daemons/based/based_messages.c | 7 ++++--- + daemons/controld/controld_corosync.c | 2 +- + daemons/controld/controld_election.c | 3 ++- + daemons/controld/controld_execd.c | 2 +- + daemons/controld/controld_fencing.c | 2 +- + daemons/controld/controld_join_client.c | 6 +++--- + daemons/controld/controld_join_dc.c | 10 +++++----- + daemons/controld/controld_messages.c | 2 +- + daemons/controld/controld_remote_ra.c | 2 +- + daemons/controld/controld_te_actions.c | 8 +++++--- + daemons/controld/controld_te_events.c | 3 ++- + daemons/controld/controld_utils.c | 2 +- + daemons/fenced/fenced_commands.c | 8 ++++---- + daemons/fenced/fenced_remote.c | 8 +++++--- + include/crm/cluster.h | 3 --- + include/crm/cluster/compat.h | 3 +++ + lib/cluster/corosync.c | 8 ++++---- + lib/cluster/cpg.c | 8 ++++---- + lib/cluster/election.c | 6 +++--- + lib/cluster/membership.c | 21 ++++++--------------- + 23 files changed, 65 insertions(+), 68 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index eba734c..3b2880b 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -119,9 +119,7 @@ attrd_cpg_dispatch(cpg_handle_t handle, + if (xml == NULL) { + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { +- crm_node_t *peer = crm_get_peer(nodeid, from); +- +- attrd_peer_message(peer, xml); ++ attrd_peer_message(pcmk__get_peer(nodeid, from, NULL), xml); + } + + free_xml(xml); +@@ -254,7 +252,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + static void + record_peer_nodeid(attribute_value_t *v, const char *host) + { +- crm_node_t *known_peer = crm_get_peer(v->nodeid, host); ++ crm_node_t *known_peer = pcmk__get_peer(v->nodeid, host, NULL); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { +@@ -439,7 +437,7 @@ attrd_peer_clear_failure(pcmk__request_t *request) + GHashTableIter iter; + regex_t regex; + +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 53c70bd..5536207 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -177,7 +177,7 @@ static xmlNode * + handle_sync_request(pcmk__request_t *request) + { + if (request->peer != NULL) { +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +@@ -194,7 +194,7 @@ handle_sync_response_request(pcmk__request_t *request) + return handle_unknown_request(request); + } else { + if (request->peer != NULL) { +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { +@@ -212,7 +212,7 @@ handle_update_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); +- crm_node_t *peer = crm_get_peer(0, request->peer); ++ crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index 4fac222..02f3425 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -928,7 +928,7 @@ forward_request(xmlNode *request) + + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + +- send_cluster_message(((host != NULL)? crm_get_peer(0, host) : NULL), ++ send_cluster_message(((host != NULL)? pcmk__get_peer(0, host, NULL) : NULL), + crm_msg_cib, request, FALSE); + + // Return the request to its original state +@@ -986,7 +986,8 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + /* send reply via HA to originating node */ + crm_trace("Sending request result to %s only", originator); + crm_xml_add(msg, F_CIB_ISREPLY, originator); +- return send_cluster_message(crm_get_peer(0, originator), crm_msg_cib, msg, FALSE); ++ return send_cluster_message(pcmk__get_peer(0, originator, NULL), ++ crm_msg_cib, msg, FALSE); + } + + return FALSE; +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index a87d9ac..08521e4 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -127,7 +127,8 @@ send_sync_request(const char *host) + crm_xml_add(sync_me, F_CIB_DELEGATED, + stand_alone? "localhost" : crm_cluster->uname); + +- send_cluster_message(host ? crm_get_peer(0, host) : NULL, crm_msg_cib, sync_me, FALSE); ++ send_cluster_message((host == NULL)? NULL : pcmk__get_peer(0, host, NULL), ++ crm_msg_cib, sync_me, FALSE); + free_xml(sync_me); + } + +@@ -443,8 +444,8 @@ sync_our_cib(xmlNode * request, gboolean all) + + add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); + +- if (send_cluster_message +- (all ? NULL : crm_get_peer(0, host), crm_msg_cib, replace_request, FALSE) == FALSE) { ++ if (!send_cluster_message(all? NULL : pcmk__get_peer(0, host, NULL), ++ crm_msg_cib, replace_request, FALSE)) { + result = -ENOTCONN; + } + free_xml(replace_request); +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index b69e821..c2953b5 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -49,7 +49,7 @@ crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + +- peer = crm_get_peer(0, from); ++ peer = pcmk__get_peer(0, from, NULL); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership +diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c +index 70ffecc..6e22067 100644 +--- a/daemons/controld/controld_election.c ++++ b/daemons/controld/controld_election.c +@@ -265,7 +265,8 @@ do_dc_release(long long action, + crm_info("DC role released"); + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; +- crm_node_t *node = crm_get_peer(0, controld_globals.our_nodename); ++ crm_node_t *node = pcmk__get_peer(0, controld_globals.our_nodename, ++ NULL); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 381b0be..45b6b58 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -1752,7 +1752,7 @@ controld_ack_event_directly(const char *to_host, const char *to_sys, + to_sys = CRM_SYSTEM_TENGINE; + } + +- peer = crm_get_peer(0, controld_globals.our_nodename); ++ peer = pcmk__get_peer(0, controld_globals.our_nodename, NULL); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 9557d9e..6c0ee09 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -374,7 +374,7 @@ execute_stonith_cleanup(void) + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; +- crm_node_t *target_node = crm_get_peer(0, target); ++ crm_node_t *target_node = pcmk__get_peer(0, target, NULL); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 805ecbd..2b5267d 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -35,7 +35,7 @@ update_dc_expected(const xmlNode *msg) + { + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { +- crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); ++ crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +@@ -177,7 +177,7 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); +- send_cluster_message(crm_get_peer(0, controld_globals.dc_name), ++ send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } +@@ -333,7 +333,7 @@ do_cl_join_finalize_respond(long long action, + } + } + +- send_cluster_message(crm_get_peer(0, controld_globals.dc_name), ++ send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 2fe6710..45e1eba 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -318,7 +318,7 @@ do_dc_join_offer_one(long long action, + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } +- member = crm_get_peer(0, join_to); ++ member = pcmk__get_peer(0, join_to, NULL); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in +@@ -332,7 +332,7 @@ do_dc_join_offer_one(long long action, + * well, to ensure the correct value for max_generation_from. + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { +- member = crm_get_peer(0, controld_globals.our_nodename); ++ member = pcmk__get_peer(0, controld_globals.our_nodename, NULL); + join_make_offer(NULL, member, NULL); + } + +@@ -396,7 +396,7 @@ do_dc_join_filter_offer(long long action, + crm_err("Ignoring invalid join request without node name"); + return; + } +- join_node = crm_get_peer(0, join_from); ++ join_node = pcmk__get_peer(0, join_from, NULL); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { +@@ -732,7 +732,7 @@ do_dc_join_ack(long long action, + goto done; + } + +- peer = crm_get_peer(0, join_from); ++ peer = pcmk__get_peer(0, join_from, NULL); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", +@@ -866,7 +866,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + return; + } + +- join_node = crm_get_peer(0, join_to); ++ join_node = pcmk__get_peer(0, join_to, NULL); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 39f3c7a..8d3cef7 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -458,7 +458,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) + ref, pcmk__s(host_to, "broadcast")); + crm_log_xml_trace(msg, "relayed"); + if (!broadcast) { +- node_to = crm_get_peer(0, host_to); ++ node_to = pcmk__get_peer(0, host_to, NULL); + } + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index d692ef6..a9c398d 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -206,7 +206,7 @@ should_purge_attributes(crm_node_t *node) + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ +- conn_node = crm_get_peer(0, node->conn_host); ++ conn_node = pcmk__get_peer(0, node->conn_host, NULL); + if (conn_node == NULL) { + return purge; + } +diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c +index fe6b744..e76174b 100644 +--- a/daemons/controld/controld_te_actions.c ++++ b/daemons/controld/controld_te_actions.c +@@ -158,7 +158,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + return pcmk_rc_ok; + + } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { +- crm_node_t *peer = crm_get_peer(0, router_node); ++ crm_node_t *peer = pcmk__get_peer(0, router_node, NULL); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } +@@ -170,7 +170,8 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + controld_globals.te_uuid); + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + +- rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_crmd, cmd, TRUE); ++ rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); + +@@ -421,7 +422,8 @@ execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + I_NULL, &msg); + + } else { +- rc = send_cluster_message(crm_get_peer(0, router_node), crm_msg_lrmd, cmd, TRUE); ++ rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ crm_msg_lrmd, cmd, TRUE); + } + + free(counter); +diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c +index 28977c0..c8cceed 100644 +--- a/daemons/controld/controld_te_events.c ++++ b/daemons/controld/controld_te_events.c +@@ -119,7 +119,8 @@ fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) + target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { +- crm_node_t *node = crm_get_peer(0, router); ++ crm_node_t *node = pcmk__get_peer(0, router, NULL); ++ + if (node) { + router_uuid = node->uuid; + } +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index 9b306ee..55790c0 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -734,7 +734,7 @@ update_dc(xmlNode * msg) + /* do nothing */ + + } else if (controld_globals.dc_name != NULL) { +- crm_node_t *dc_node = crm_get_peer(0, controld_globals.dc_name); ++ crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 7a62ed6..28f08dd 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -645,7 +645,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) + } + + if (device->include_nodeid && (cmd->target != NULL)) { +- crm_node_t *node = crm_get_peer(0, cmd->target); ++ crm_node_t *node = pcmk__get_peer(0, cmd->target, NULL); + + cmd->target_nodeid = node->id; + } +@@ -2402,8 +2402,8 @@ stonith_send_reply(const xmlNode *reply, int call_options, + if (remote_peer == NULL) { + do_local_reply(reply, client, call_options); + } else { +- send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, +- reply, FALSE); ++ send_cluster_message(pcmk__get_peer(0, remote_peer, NULL), ++ crm_msg_stonith_ng, reply, FALSE); + } + } + +@@ -3371,7 +3371,7 @@ handle_fence_request(pcmk__request_t *request) + crm_xml_add(request->xml, F_STONITH_CLIENTID, + request->ipc_client->id); + crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); +- send_cluster_message(crm_get_peer(0, alternate_host), ++ send_cluster_message(pcmk__get_peer(0, alternate_host, NULL), + crm_msg_stonith_ng, request->xml, FALSE); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 843b3d4..3c176c8 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1030,7 +1030,7 @@ merge_duplicates(remote_fencing_op_t *op) + op->id, other->id, other->target); + continue; + } +- if (!fencing_peer_active(crm_get_peer(0, other->originator))) { ++ if (!fencing_peer_active(pcmk__get_peer(0, other->originator, NULL))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, +@@ -1663,7 +1663,8 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + crm_xml_add(update, F_STONITH_CALLID, call_id); + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + +- send_cluster_message(crm_get_peer(0, client_node), crm_msg_stonith_ng, update, FALSE); ++ send_cluster_message(pcmk__get_peer(0, client_node, NULL), ++ crm_msg_stonith_ng, update, FALSE); + + free_xml(update); + +@@ -1916,7 +1917,8 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); + } + +- send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); ++ send_cluster_message(pcmk__get_peer(0, peer->host, NULL), ++ crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); + return; +diff --git a/include/crm/cluster.h b/include/crm/cluster.h +index 137684d..302b807 100644 +--- a/include/crm/cluster.h ++++ b/include/crm/cluster.h +@@ -146,9 +146,6 @@ void crm_remote_peer_cache_refresh(xmlNode *cib); + crm_node_t *crm_remote_peer_get(const char *node_name); + void crm_remote_peer_cache_remove(const char *node_name); + +-/* only searches cluster nodes */ +-crm_node_t *crm_get_peer(unsigned int id, const char *uname); +- + guint crm_active_peers(void); + gboolean crm_is_peer_active(const crm_node_t * node); + guint reap_crm_member(uint32_t id, const char *name); +diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h +index fc68f27..e853fd8 100644 +--- a/include/crm/cluster/compat.h ++++ b/include/crm/cluster/compat.h +@@ -26,6 +26,9 @@ extern "C" { + * release. + */ + ++// \deprecated Do not use Pacemaker for cluster node cacheing ++crm_node_t *crm_get_peer(unsigned int id, const char *uname); ++ + // \deprecated Do not use Pacemaker for cluster node cacheing + crm_node_t *crm_get_peer_full(unsigned int id, const char *uname, int flags); + +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index 08280ce..34a31fb 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -309,12 +309,12 @@ quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, + crm_debug("Member[%d] %u ", i, id); + + /* Get this node's peer cache entry (adding one if not already there) */ +- node = crm_get_peer(id, NULL); ++ node = pcmk__get_peer(id, NULL, NULL); + if (node->uname == NULL) { + char *name = pcmk__corosync_name(0, id); + + crm_info("Obtaining name for new node %u", id); +- node = crm_get_peer(id, name); ++ node = pcmk__get_peer(id, name, NULL); + free(name); + } + +@@ -480,7 +480,7 @@ pcmk__corosync_connect(crm_cluster_t *cluster) + } + + // Ensure local node always exists in peer cache +- peer = crm_get_peer(cluster->nodeid, cluster->uname); ++ peer = pcmk__get_peer(cluster->nodeid, cluster->uname, NULL); + cluster->uuid = pcmk__corosync_uuid(peer); + + return TRUE; +@@ -640,7 +640,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) + + if (nodeid > 0 || name != NULL) { + crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); +- crm_get_peer(nodeid, name); ++ pcmk__get_peer(nodeid, name, NULL); + } + + if (nodeid > 0 && name != NULL) { +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index d1decc6..778368f 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -465,7 +465,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + + msg->sender.id = nodeid; + if (msg->sender.size == 0) { +- crm_node_t *peer = crm_get_peer(nodeid, NULL); ++ crm_node_t *peer = pcmk__get_peer(nodeid, NULL, NULL); + + if (peer == NULL) { + crm_err("Peer with nodeid=%u is unknown", nodeid); +@@ -526,7 +526,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + } + + // Is this necessary? +- crm_get_peer(msg->sender.id, msg->sender.uname); ++ pcmk__get_peer(msg->sender.id, msg->sender.uname, NULL); + + crm_trace("Payload: %.200s", data); + return data; +@@ -720,7 +720,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + } + + for (i = 0; i < member_list_entries; i++) { +- crm_node_t *peer = crm_get_peer(member_list[i].nodeid, NULL); ++ crm_node_t *peer = pcmk__get_peer(member_list[i].nodeid, NULL, NULL); + + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { +@@ -873,7 +873,7 @@ cluster_connect_cpg(crm_cluster_t *cluster) + return FALSE; + } + +- peer = crm_get_peer(id, NULL); ++ peer = pcmk__get_peer(id, NULL, NULL); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); + return TRUE; + } +diff --git a/lib/cluster/election.c b/lib/cluster/election.c +index ebbae72..31867f2 100644 +--- a/lib/cluster/election.c ++++ b/lib/cluster/election.c +@@ -298,7 +298,7 @@ election_vote(election_t *e) + return; + } + +- our_node = crm_get_peer(0, e->uname); ++ our_node = pcmk__get_peer(0, e->uname, NULL); + if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + crm_trace("Cannot vote in %s yet: local node not connected to cluster", + e->name); +@@ -547,8 +547,8 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) + return election_error; + } + +- your_node = crm_get_peer(0, vote.from); +- our_node = crm_get_peer(0, e->uname); ++ your_node = pcmk__get_peer(0, vote.from, NULL); ++ our_node = pcmk__get_peer(0, e->uname, NULL); + we_are_owner = (our_node != NULL) + && pcmk__str_eq(our_node->uuid, vote.election_owner, + pcmk__str_none); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 52db840..41e0fa3 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -868,21 +868,6 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) + return node; + } + +-/*! +- * \brief Get a cluster node cache entry +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * +- * \return (Possibly newly created) cluster node cache entry +- */ +-/* coverity[-alloc] Memory is referenced in one or both hashtables */ +-crm_node_t * +-crm_get_peer(unsigned int id, const char *uname) +-{ +- return pcmk__get_peer(id, uname, NULL); +-} +- + /*! + * \internal + * \brief Update a node's uname +@@ -1429,6 +1414,12 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + return stonith_api_kick(nodeid, uname, 120, TRUE); + } + ++crm_node_t * ++crm_get_peer(unsigned int id, const char *uname) ++{ ++ return pcmk__get_peer(id, uname, NULL); ++} ++ + crm_node_t * + crm_get_peer_full(unsigned int id, const char *uname, int flags) + { +-- +2.31.1 + +From 39e949a698afb5b0177b05e7d81b403cbb27a57a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 15:23:59 -0600 +Subject: [PATCH 4/9] Refactor: libcrmcluster: consolidate pcmk__get_peer() and + pcmk__get_peer_full() + +... into a new function pcmk__get_node() (since it can handle Pacemaker Remote +nodes, which aren't peers) +--- + daemons/attrd/attrd_cib.c | 6 +-- + daemons/attrd/attrd_corosync.c | 10 +++-- + daemons/attrd/attrd_messages.c | 11 ++++-- + daemons/based/based_callbacks.c | 10 +++-- + daemons/based/based_messages.c | 14 +++++-- + daemons/controld/controld_corosync.c | 2 +- + daemons/controld/controld_election.c | 4 +- + daemons/controld/controld_execd.c | 5 ++- + daemons/controld/controld_fencing.c | 5 ++- + daemons/controld/controld_join_client.c | 9 +++-- + daemons/controld/controld_join_dc.c | 11 +++--- + daemons/controld/controld_messages.c | 3 +- + daemons/controld/controld_remote_ra.c | 2 +- + daemons/controld/controld_te_actions.c | 9 +++-- + daemons/controld/controld_te_events.c | 3 +- + daemons/controld/controld_utils.c | 3 +- + daemons/fenced/fenced_commands.c | 9 +++-- + daemons/fenced/fenced_remote.c | 9 +++-- + include/crm/cluster/internal.h | 8 ++-- + lib/cluster/corosync.c | 9 +++-- + lib/cluster/cpg.c | 13 ++++--- + lib/cluster/election.c | 6 +-- + lib/cluster/membership.c | 52 ++++++++----------------- + 23 files changed, 116 insertions(+), 97 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index b22137a..7018a32 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -20,7 +20,7 @@ + #include + #include + #include +-#include // pcmk__get_peer_full() ++#include // pcmk__get_node() + + #include "pacemaker-attrd.h" + +@@ -567,8 +567,8 @@ write_attribute(attribute_t *a, bool ignore_delay) + + } else { + // This will create a cluster node cache entry if none exists +- crm_node_t *peer = pcmk__get_peer_full(v->nodeid, v->nodename, NULL, +- CRM_GET_PEER_ANY); ++ crm_node_t *peer = pcmk__get_node(v->nodeid, v->nodename, NULL, ++ CRM_GET_PEER_ANY); + + uuid = peer->uuid; + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 3b2880b..c9e11e6 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -119,7 +119,9 @@ attrd_cpg_dispatch(cpg_handle_t handle, + if (xml == NULL) { + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { +- attrd_peer_message(pcmk__get_peer(nodeid, from, NULL), xml); ++ attrd_peer_message(pcmk__get_node(nodeid, from, NULL, ++ CRM_GET_PEER_CLUSTER), ++ xml); + } + + free_xml(xml); +@@ -252,7 +254,8 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + static void + record_peer_nodeid(attribute_value_t *v, const char *host) + { +- crm_node_t *known_peer = pcmk__get_peer(v->nodeid, host, NULL); ++ crm_node_t *known_peer = pcmk__get_node(v->nodeid, host, NULL, ++ CRM_GET_PEER_CLUSTER); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { +@@ -437,7 +440,8 @@ attrd_peer_clear_failure(pcmk__request_t *request) + GHashTableIter iter; + regex_t regex; + +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 5536207..c6c1b9a 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -12,7 +12,7 @@ + #include + + #include +-#include // pcmk__get_peer() ++#include // pcmk__get_node() + #include + + #include "pacemaker-attrd.h" +@@ -177,7 +177,8 @@ static xmlNode * + handle_sync_request(pcmk__request_t *request) + { + if (request->peer != NULL) { +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +@@ -194,7 +195,8 @@ handle_sync_response_request(pcmk__request_t *request) + return handle_unknown_request(request); + } else { + if (request->peer != NULL) { +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { +@@ -212,7 +214,8 @@ handle_update_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); +- crm_node_t *peer = pcmk__get_peer(0, request->peer, NULL); ++ crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, ++ CRM_GET_PEER_CLUSTER); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index 02f3425..b1f3b4b 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -910,6 +910,7 @@ forward_request(xmlNode *request) + const char *originator = crm_element_value(request, F_ORIG); + const char *client_name = crm_element_value(request, F_CIB_CLIENTNAME); + const char *call_id = crm_element_value(request, F_CIB_CALLID); ++ crm_node_t *peer = NULL; + + int log_level = LOG_INFO; + +@@ -928,8 +929,10 @@ forward_request(xmlNode *request) + + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + +- send_cluster_message(((host != NULL)? pcmk__get_peer(0, host, NULL) : NULL), +- crm_msg_cib, request, FALSE); ++ if (host != NULL) { ++ peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ } ++ send_cluster_message(peer, crm_msg_cib, request, FALSE); + + // Return the request to its original state + xml_remove_prop(request, F_CIB_DELEGATED); +@@ -986,7 +989,8 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + /* send reply via HA to originating node */ + crm_trace("Sending request result to %s only", originator); + crm_xml_add(msg, F_CIB_ISREPLY, originator); +- return send_cluster_message(pcmk__get_peer(0, originator, NULL), ++ return send_cluster_message(pcmk__get_node(0, originator, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_cib, msg, FALSE); + } + +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index 08521e4..ff1a6aa 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -118,6 +118,7 @@ void + send_sync_request(const char *host) + { + xmlNode *sync_me = create_xml_node(NULL, "sync-me"); ++ crm_node_t *peer = NULL; + + crm_info("Requesting re-sync from %s", (host? host : "all peers")); + sync_in_progress = 1; +@@ -127,8 +128,10 @@ send_sync_request(const char *host) + crm_xml_add(sync_me, F_CIB_DELEGATED, + stand_alone? "localhost" : crm_cluster->uname); + +- send_cluster_message((host == NULL)? NULL : pcmk__get_peer(0, host, NULL), +- crm_msg_cib, sync_me, FALSE); ++ if (host != NULL) { ++ peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ } ++ send_cluster_message(peer, crm_msg_cib, sync_me, FALSE); + free_xml(sync_me); + } + +@@ -418,6 +421,7 @@ sync_our_cib(xmlNode * request, gboolean all) + const char *host = crm_element_value(request, F_ORIG); + const char *op = crm_element_value(request, F_CIB_OPERATION); + ++ crm_node_t *peer = NULL; + xmlNode *replace_request = NULL; + + CRM_CHECK(the_cib != NULL, return -EINVAL); +@@ -444,8 +448,10 @@ sync_our_cib(xmlNode * request, gboolean all) + + add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); + +- if (!send_cluster_message(all? NULL : pcmk__get_peer(0, host, NULL), +- crm_msg_cib, replace_request, FALSE)) { ++ if (!all) { ++ peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ } ++ if (!send_cluster_message(peer, crm_msg_cib, replace_request, FALSE)) { + result = -ENOTCONN; + } + free_xml(replace_request); +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index c2953b5..fa1df6f 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -49,7 +49,7 @@ crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + +- peer = pcmk__get_peer(0, from, NULL); ++ peer = pcmk__get_node(0, from, NULL, CRM_GET_PEER_CLUSTER); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership +diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c +index 6e22067..734064d 100644 +--- a/daemons/controld/controld_election.c ++++ b/daemons/controld/controld_election.c +@@ -265,8 +265,8 @@ do_dc_release(long long action, + crm_info("DC role released"); + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; +- crm_node_t *node = pcmk__get_peer(0, controld_globals.our_nodename, +- NULL); ++ crm_node_t *node = pcmk__get_node(0, controld_globals.our_nodename, ++ NULL, CRM_GET_PEER_CLUSTER); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index 45b6b58..df715aa 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -581,7 +581,7 @@ controld_query_executor_state(void) + return NULL; + } + +- peer = pcmk__get_peer_full(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, +@@ -1752,7 +1752,8 @@ controld_ack_event_directly(const char *to_host, const char *to_sys, + to_sys = CRM_SYSTEM_TENGINE; + } + +- peer = pcmk__get_peer(0, controld_globals.our_nodename, NULL); ++ peer = pcmk__get_node(0, controld_globals.our_nodename, NULL, ++ CRM_GET_PEER_CLUSTER); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 6c0ee09..60a7f9f 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -222,7 +222,7 @@ send_stonith_update(pcmk__graph_action_t *action, const char *target, + * Try getting any existing node cache entry also by node uuid in case it + * doesn't have an uname yet. + */ +- peer = pcmk__get_peer_full(0, target, uuid, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, target, uuid, CRM_GET_PEER_ANY); + + CRM_CHECK(peer != NULL, return); + +@@ -374,7 +374,8 @@ execute_stonith_cleanup(void) + + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; +- crm_node_t *target_node = pcmk__get_peer(0, target, NULL); ++ crm_node_t *target_node = pcmk__get_node(0, target, NULL, ++ CRM_GET_PEER_CLUSTER); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 2b5267d..6f20ef2 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -35,7 +35,8 @@ update_dc_expected(const xmlNode *msg) + { + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { +- crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); ++ crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +@@ -177,7 +178,8 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); +- send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), ++ send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } +@@ -333,7 +335,8 @@ do_cl_join_finalize_respond(long long action, + } + } + +- send_cluster_message(pcmk__get_peer(0, controld_globals.dc_name, NULL), ++ send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 45e1eba..064649f 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -318,7 +318,7 @@ do_dc_join_offer_one(long long action, + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } +- member = pcmk__get_peer(0, join_to, NULL); ++ member = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in +@@ -332,7 +332,8 @@ do_dc_join_offer_one(long long action, + * well, to ensure the correct value for max_generation_from. + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { +- member = pcmk__get_peer(0, controld_globals.our_nodename, NULL); ++ member = pcmk__get_node(0, controld_globals.our_nodename, NULL, ++ CRM_GET_PEER_CLUSTER); + join_make_offer(NULL, member, NULL); + } + +@@ -396,7 +397,7 @@ do_dc_join_filter_offer(long long action, + crm_err("Ignoring invalid join request without node name"); + return; + } +- join_node = pcmk__get_peer(0, join_from, NULL); ++ join_node = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { +@@ -732,7 +733,7 @@ do_dc_join_ack(long long action, + goto done; + } + +- peer = pcmk__get_peer(0, join_from, NULL); ++ peer = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", +@@ -866,7 +867,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + return; + } + +- join_node = pcmk__get_peer(0, join_to, NULL); ++ join_node = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 8d3cef7..71f5680 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -458,7 +458,8 @@ relay_message(xmlNode * msg, gboolean originated_locally) + ref, pcmk__s(host_to, "broadcast")); + crm_log_xml_trace(msg, "relayed"); + if (!broadcast) { +- node_to = pcmk__get_peer(0, host_to, NULL); ++ node_to = pcmk__get_node(0, host_to, NULL, ++ CRM_GET_PEER_CLUSTER); + } + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index a9c398d..9c4bb58 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -206,7 +206,7 @@ should_purge_attributes(crm_node_t *node) + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ +- conn_node = pcmk__get_peer(0, node->conn_host, NULL); ++ conn_node = pcmk__get_node(0, node->conn_host, NULL, CRM_GET_PEER_CLUSTER); + if (conn_node == NULL) { + return purge; + } +diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c +index e76174b..01ba4a0 100644 +--- a/daemons/controld/controld_te_actions.c ++++ b/daemons/controld/controld_te_actions.c +@@ -158,7 +158,8 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + return pcmk_rc_ok; + + } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { +- crm_node_t *peer = pcmk__get_peer(0, router_node, NULL); ++ crm_node_t *peer = pcmk__get_node(0, router_node, NULL, ++ CRM_GET_PEER_CLUSTER); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } +@@ -170,7 +171,8 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + controld_globals.te_uuid); + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + +- rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); +@@ -422,7 +424,8 @@ execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + I_NULL, &msg); + + } else { +- rc = send_cluster_message(pcmk__get_peer(0, router_node, NULL), ++ rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_lrmd, cmd, TRUE); + } + +diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c +index c8cceed..84bef5b 100644 +--- a/daemons/controld/controld_te_events.c ++++ b/daemons/controld/controld_te_events.c +@@ -119,7 +119,8 @@ fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) + target_uuid = crm_element_value(action->xml, XML_LRM_ATTR_TARGET_UUID); + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { +- crm_node_t *node = pcmk__get_peer(0, router, NULL); ++ crm_node_t *node = pcmk__get_node(0, router, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (node) { + router_uuid = node->uuid; +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index 55790c0..0e92416 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -734,7 +734,8 @@ update_dc(xmlNode * msg) + /* do nothing */ + + } else if (controld_globals.dc_name != NULL) { +- crm_node_t *dc_node = pcmk__get_peer(0, controld_globals.dc_name, NULL); ++ crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, ++ CRM_GET_PEER_CLUSTER); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 28f08dd..c519607 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -645,7 +645,8 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) + } + + if (device->include_nodeid && (cmd->target != NULL)) { +- crm_node_t *node = pcmk__get_peer(0, cmd->target, NULL); ++ crm_node_t *node = pcmk__get_node(0, cmd->target, NULL, ++ CRM_GET_PEER_CLUSTER); + + cmd->target_nodeid = node->id; + } +@@ -2402,7 +2403,8 @@ stonith_send_reply(const xmlNode *reply, int call_options, + if (remote_peer == NULL) { + do_local_reply(reply, client, call_options); + } else { +- send_cluster_message(pcmk__get_peer(0, remote_peer, NULL), ++ send_cluster_message(pcmk__get_node(0, remote_peer, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, reply, FALSE); + } + } +@@ -3371,7 +3373,8 @@ handle_fence_request(pcmk__request_t *request) + crm_xml_add(request->xml, F_STONITH_CLIENTID, + request->ipc_client->id); + crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); +- send_cluster_message(pcmk__get_peer(0, alternate_host, NULL), ++ send_cluster_message(pcmk__get_node(0, alternate_host, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, request->xml, FALSE); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 3c176c8..96b518a 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1030,7 +1030,8 @@ merge_duplicates(remote_fencing_op_t *op) + op->id, other->id, other->target); + continue; + } +- if (!fencing_peer_active(pcmk__get_peer(0, other->originator, NULL))) { ++ if (!fencing_peer_active(pcmk__get_node(0, other->originator, NULL, ++ CRM_GET_PEER_CLUSTER))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, +@@ -1663,7 +1664,8 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + crm_xml_add(update, F_STONITH_CALLID, call_id); + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + +- send_cluster_message(pcmk__get_peer(0, client_node, NULL), ++ send_cluster_message(pcmk__get_node(0, client_node, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, update, FALSE); + + free_xml(update); +@@ -1917,7 +1919,8 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); + } + +- send_cluster_message(pcmk__get_peer(0, peer->host, NULL), ++ send_cluster_message(pcmk__get_node(0, peer->host, NULL, ++ CRM_GET_PEER_CLUSTER), + crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index c71069b..bea4086 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -132,9 +132,7 @@ void pcmk__refresh_node_caches_from_cib(xmlNode *cib); + crm_node_t *pcmk__search_known_node_cache(unsigned int id, const char *uname, + uint32_t flags); + +-crm_node_t *pcmk__get_peer(unsigned int id, const char *uname, +- const char *uuid); +-crm_node_t *pcmk__get_peer_full(unsigned int id, const char *uname, +- const char *uuid, int flags); ++crm_node_t *pcmk__get_node(unsigned int id, const char *uname, ++ const char *uuid, uint32_t flags); + + #endif // PCMK__CRM_CLUSTER_INTERNAL__H +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index 34a31fb..47a3321 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -309,12 +309,12 @@ quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, + crm_debug("Member[%d] %u ", i, id); + + /* Get this node's peer cache entry (adding one if not already there) */ +- node = pcmk__get_peer(id, NULL, NULL); ++ node = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); + if (node->uname == NULL) { + char *name = pcmk__corosync_name(0, id); + + crm_info("Obtaining name for new node %u", id); +- node = pcmk__get_peer(id, name, NULL); ++ node = pcmk__get_node(id, name, NULL, CRM_GET_PEER_CLUSTER); + free(name); + } + +@@ -480,7 +480,8 @@ pcmk__corosync_connect(crm_cluster_t *cluster) + } + + // Ensure local node always exists in peer cache +- peer = pcmk__get_peer(cluster->nodeid, cluster->uname, NULL); ++ peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL, ++ CRM_GET_PEER_CLUSTER); + cluster->uuid = pcmk__corosync_uuid(peer); + + return TRUE; +@@ -640,7 +641,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) + + if (nodeid > 0 || name != NULL) { + crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); +- pcmk__get_peer(nodeid, name, NULL); ++ pcmk__get_node(nodeid, name, NULL, CRM_GET_PEER_CLUSTER); + } + + if (nodeid > 0 && name != NULL) { +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index 778368f..bc251da 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -465,7 +465,8 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + + msg->sender.id = nodeid; + if (msg->sender.size == 0) { +- crm_node_t *peer = pcmk__get_peer(nodeid, NULL, NULL); ++ crm_node_t *peer = pcmk__get_node(nodeid, NULL, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (peer == NULL) { + crm_err("Peer with nodeid=%u is unknown", nodeid); +@@ -526,7 +527,8 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + } + + // Is this necessary? +- pcmk__get_peer(msg->sender.id, msg->sender.uname, NULL); ++ pcmk__get_node(msg->sender.id, msg->sender.uname, NULL, ++ CRM_GET_PEER_CLUSTER); + + crm_trace("Payload: %.200s", data); + return data; +@@ -720,7 +722,8 @@ pcmk_cpg_membership(cpg_handle_t handle, + } + + for (i = 0; i < member_list_entries; i++) { +- crm_node_t *peer = pcmk__get_peer(member_list[i].nodeid, NULL, NULL); ++ crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL, ++ CRM_GET_PEER_CLUSTER); + + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { +@@ -873,7 +876,7 @@ cluster_connect_cpg(crm_cluster_t *cluster) + return FALSE; + } + +- peer = pcmk__get_peer(id, NULL, NULL); ++ peer = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); + return TRUE; + } +diff --git a/lib/cluster/election.c b/lib/cluster/election.c +index 31867f2..576c0aa 100644 +--- a/lib/cluster/election.c ++++ b/lib/cluster/election.c +@@ -298,7 +298,7 @@ election_vote(election_t *e) + return; + } + +- our_node = pcmk__get_peer(0, e->uname, NULL); ++ our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); + if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + crm_trace("Cannot vote in %s yet: local node not connected to cluster", + e->name); +@@ -547,8 +547,8 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) + return election_error; + } + +- your_node = pcmk__get_peer(0, vote.from, NULL); +- our_node = pcmk__get_peer(0, e->uname, NULL); ++ your_node = pcmk__get_node(0, vote.from, NULL, CRM_GET_PEER_CLUSTER); ++ our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); + we_are_owner = (our_node != NULL) + && pcmk__str_eq(our_node->uuid, vote.election_owner, + pcmk__str_none); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 41e0fa3..4c89a7c 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -562,37 +562,6 @@ pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) + return node; + } + +-/*! +- * \brief Get a node cache entry (cluster or Pacemaker Remote) +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster +- * node ID to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags +- * +- * \return (Possibly newly created) node cache entry +- */ +-crm_node_t * +-pcmk__get_peer_full(unsigned int id, const char *uname, const char *uuid, +- int flags) +-{ +- crm_node_t *node = NULL; +- +- CRM_ASSERT(id > 0 || uname != NULL); +- +- crm_peer_init(); +- +- if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { +- node = g_hash_table_lookup(crm_remote_peer_cache, uname); +- } +- +- if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { +- node = pcmk__get_peer(id, uname, uuid); +- } +- return node; +-} +- + /*! + * \internal + * \brief Purge a node from cache (both cluster and Pacemaker Remote) +@@ -794,12 +763,14 @@ remove_conflicting_peer(crm_node_t *node) + * \param[in] uname If not NULL, node name to search for + * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster + * node ID to search for ++ * \param[in] flags Group of enum crm_get_peer_flags + * + * \return (Possibly newly created) cluster node cache entry + */ + /* coverity[-alloc] Memory is referenced in one or both hashtables */ + crm_node_t * +-pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) ++pcmk__get_node(unsigned int id, const char *uname, const char *uuid, ++ uint32_t flags) + { + crm_node_t *node = NULL; + char *uname_lookup = NULL; +@@ -808,6 +779,18 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) + + crm_peer_init(); + ++ // Check the Pacemaker Remote node cache first ++ if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { ++ node = g_hash_table_lookup(crm_remote_peer_cache, uname); ++ if (node != NULL) { ++ return node; ++ } ++ } ++ ++ if (!pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { ++ return NULL; ++ } ++ + node = pcmk__search_cluster_node_cache(id, uname, uuid); + + /* if uname wasn't provided, and find_peer did not turn up a uname based on id. +@@ -826,7 +809,6 @@ pcmk__get_peer(unsigned int id, const char *uname, const char *uuid) + } + } + +- + if (node == NULL) { + char *uniqueid = crm_generate_uuid(); + +@@ -1417,13 +1399,13 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + crm_node_t * + crm_get_peer(unsigned int id, const char *uname) + { +- return pcmk__get_peer(id, uname, NULL); ++ return pcmk__get_node(id, uname, NULL, CRM_GET_PEER_CLUSTER); + } + + crm_node_t * + crm_get_peer_full(unsigned int id, const char *uname, int flags) + { +- return pcmk__get_peer_full(id, uname, NULL, flags); ++ return pcmk__get_node(id, uname, NULL, flags); + } + + // LCOV_EXCL_STOP +-- +2.31.1 + +From 8a33a98c48475790a033f59aeb3e026f2bb68e4f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 16:18:45 -0600 +Subject: [PATCH 5/9] API: libcrmcluster: deprecate enum crm_get_peer_flags and + all its values + +Replace it internally with a new enum pcmk__node_search_flags +--- + daemons/attrd/attrd_cib.c | 2 +- + daemons/attrd/attrd_corosync.c | 6 +++--- + daemons/attrd/attrd_messages.c | 6 +++--- + daemons/based/based_callbacks.c | 4 ++-- + daemons/based/based_messages.c | 4 ++-- + daemons/controld/controld_corosync.c | 2 +- + daemons/controld/controld_election.c | 2 +- + daemons/controld/controld_execd.c | 4 ++-- + daemons/controld/controld_fencing.c | 6 +++--- + daemons/controld/controld_join_client.c | 6 +++--- + daemons/controld/controld_join_dc.c | 10 +++++----- + daemons/controld/controld_messages.c | 4 ++-- + daemons/controld/controld_remote_ra.c | 3 ++- + daemons/controld/controld_te_actions.c | 6 +++--- + daemons/controld/controld_te_events.c | 2 +- + daemons/controld/controld_utils.c | 2 +- + daemons/fenced/fenced_commands.c | 9 +++++---- + daemons/fenced/fenced_history.c | 3 ++- + daemons/fenced/fenced_remote.c | 9 +++++---- + include/crm/cluster.h | 7 ------- + include/crm/cluster/compat.h | 7 +++++++ + include/crm/cluster/internal.h | 13 +++++++++++++ + lib/cluster/corosync.c | 8 ++++---- + lib/cluster/cpg.c | 8 ++++---- + lib/cluster/election.c | 6 +++--- + lib/cluster/membership.c | 18 +++++++++--------- + 26 files changed, 87 insertions(+), 70 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index 7018a32..bdc0a10 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -568,7 +568,7 @@ write_attribute(attribute_t *a, bool ignore_delay) + } else { + // This will create a cluster node cache entry if none exists + crm_node_t *peer = pcmk__get_node(v->nodeid, v->nodename, NULL, +- CRM_GET_PEER_ANY); ++ pcmk__node_search_any); + + uuid = peer->uuid; + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index c9e11e6..158d82f 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -120,7 +120,7 @@ attrd_cpg_dispatch(cpg_handle_t handle, + crm_err("Bad message of class %d received from %s[%u]: '%.120s'", kind, from, nodeid, data); + } else { + attrd_peer_message(pcmk__get_node(nodeid, from, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + xml); + } + +@@ -255,7 +255,7 @@ static void + record_peer_nodeid(attribute_value_t *v, const char *host) + { + crm_node_t *known_peer = pcmk__get_node(v->nodeid, host, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid); + if (attrd_election_won()) { +@@ -441,7 +441,7 @@ attrd_peer_clear_failure(pcmk__request_t *request) + regex_t regex; + + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (attrd_failure_regex(®ex, rsc, op, interval_ms) != pcmk_ok) { + crm_info("Ignoring invalid request to clear failures for %s", +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index c6c1b9a..5525d4b 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -178,7 +178,7 @@ handle_sync_request(pcmk__request_t *request) + { + if (request->peer != NULL) { + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + attrd_peer_sync(peer, request->xml); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +@@ -196,7 +196,7 @@ handle_sync_response_request(pcmk__request_t *request) + } else { + if (request->peer != NULL) { + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + bool peer_won = attrd_check_for_new_writer(peer, request->xml); + + if (!pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { +@@ -215,7 +215,7 @@ handle_update_request(pcmk__request_t *request) + if (request->peer != NULL) { + const char *host = crm_element_value(request->xml, PCMK__XA_ATTR_NODE_NAME); + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + attrd_peer_update(peer, request->xml, host, false); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index b1f3b4b..5f3dc62 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -930,7 +930,7 @@ forward_request(xmlNode *request) + crm_xml_add(request, F_CIB_DELEGATED, OUR_NODENAME); + + if (host != NULL) { +- peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster); + } + send_cluster_message(peer, crm_msg_cib, request, FALSE); + +@@ -990,7 +990,7 @@ send_peer_reply(xmlNode * msg, xmlNode * result_diff, const char *originator, gb + crm_trace("Sending request result to %s only", originator); + crm_xml_add(msg, F_CIB_ISREPLY, originator); + return send_cluster_message(pcmk__get_node(0, originator, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_cib, msg, FALSE); + } + +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index ff1a6aa..7f503b2 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -129,7 +129,7 @@ send_sync_request(const char *host) + stand_alone? "localhost" : crm_cluster->uname); + + if (host != NULL) { +- peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster); + } + send_cluster_message(peer, crm_msg_cib, sync_me, FALSE); + free_xml(sync_me); +@@ -449,7 +449,7 @@ sync_our_cib(xmlNode * request, gboolean all) + add_message_xml(replace_request, F_CIB_CALLDATA, the_cib); + + if (!all) { +- peer = pcmk__get_node(0, host, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, host, NULL, pcmk__node_search_cluster); + } + if (!send_cluster_message(peer, crm_msg_cib, replace_request, FALSE)) { + result = -ENOTCONN; +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index fa1df6f..0f3ea32 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -49,7 +49,7 @@ crmd_cs_dispatch(cpg_handle_t handle, const struct cpg_name *groupName, + crm_xml_add(xml, F_ORIG, from); + /* crm_xml_add_int(xml, F_SEQ, wrapper->id); Fake? */ + +- peer = pcmk__get_node(0, from, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, from, NULL, pcmk__node_search_cluster); + if (!pcmk_is_set(peer->processes, crm_proc_cpg)) { + /* If we can still talk to our peer process on that node, + * then it must be part of the corosync membership +diff --git a/daemons/controld/controld_election.c b/daemons/controld/controld_election.c +index 734064d..adad168 100644 +--- a/daemons/controld/controld_election.c ++++ b/daemons/controld/controld_election.c +@@ -266,7 +266,7 @@ do_dc_release(long long action, + if (pcmk_is_set(controld_globals.fsa_input_register, R_SHUTDOWN)) { + xmlNode *update = NULL; + crm_node_t *node = pcmk__get_node(0, controld_globals.our_nodename, +- NULL, CRM_GET_PEER_CLUSTER); ++ NULL, pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + update = create_node_state_update(node, node_update_expected, NULL, +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index df715aa..fe2313c 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -581,7 +581,7 @@ controld_query_executor_state(void) + return NULL; + } + +- peer = pcmk__get_node(0, lrm_state->node_name, NULL, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, lrm_state->node_name, NULL, pcmk__node_search_any); + CRM_CHECK(peer != NULL, return NULL); + + xml_state = create_node_state_update(peer, +@@ -1753,7 +1753,7 @@ controld_ack_event_directly(const char *to_host, const char *to_sys, + } + + peer = pcmk__get_node(0, controld_globals.our_nodename, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + update = create_node_state_update(peer, node_update_none, NULL, + __func__); + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 60a7f9f..79a52be 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -222,7 +222,7 @@ send_stonith_update(pcmk__graph_action_t *action, const char *target, + * Try getting any existing node cache entry also by node uuid in case it + * doesn't have an uname yet. + */ +- peer = pcmk__get_node(0, target, uuid, CRM_GET_PEER_ANY); ++ peer = pcmk__get_node(0, target, uuid, pcmk__node_search_any); + + CRM_CHECK(peer != NULL, return); + +@@ -375,7 +375,7 @@ execute_stonith_cleanup(void) + for (iter = stonith_cleanup_list; iter != NULL; iter = iter->next) { + char *target = iter->data; + crm_node_t *target_node = pcmk__get_node(0, target, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + const char *uuid = crm_peer_uuid(target_node); + + crm_notice("Marking %s, target of a previous stonith action, as clean", target); +@@ -582,7 +582,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) + + if (succeeded) { + crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, +- CRM_GET_PEER_ANY); ++ pcmk__node_search_any); + const char *uuid = NULL; + + if (peer == NULL) { +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 6f20ef2..101c73d 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -36,7 +36,7 @@ update_dc_expected(const xmlNode *msg) + if ((controld_globals.dc_name != NULL) + && pcmk__xe_attr_is_true(msg, F_CRM_DC_LEAVING)) { + crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, dc_node, CRMD_JOINSTATE_DOWN); + } +@@ -179,7 +179,7 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + crm_xml_add(reply, F_CRM_JOIN_ID, join_id); + crm_xml_add(reply, XML_ATTR_CRM_VERSION, CRM_FEATURE_SET); + send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + } +@@ -336,7 +336,7 @@ do_cl_join_finalize_respond(long long action, + } + + send_cluster_message(pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_crmd, reply, TRUE); + free_xml(reply); + +diff --git a/daemons/controld/controld_join_dc.c b/daemons/controld/controld_join_dc.c +index 064649f..e9fc698 100644 +--- a/daemons/controld/controld_join_dc.c ++++ b/daemons/controld/controld_join_dc.c +@@ -318,7 +318,7 @@ do_dc_join_offer_one(long long action, + crm_err("Can't make join-%d offer to unknown node", current_join_id); + return; + } +- member = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); ++ member = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster); + + /* It is possible that a node will have been sick or starting up when the + * original offer was made. However, it will either re-announce itself in +@@ -333,7 +333,7 @@ do_dc_join_offer_one(long long action, + */ + if (strcasecmp(join_to, controld_globals.our_nodename) != 0) { + member = pcmk__get_node(0, controld_globals.our_nodename, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + join_make_offer(NULL, member, NULL); + } + +@@ -397,7 +397,7 @@ do_dc_join_filter_offer(long long action, + crm_err("Ignoring invalid join request without node name"); + return; + } +- join_node = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); ++ join_node = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster); + + crm_element_value_int(join_ack->msg, F_CRM_JOIN_ID, &join_id); + if (join_id != current_join_id) { +@@ -733,7 +733,7 @@ do_dc_join_ack(long long action, + goto done; + } + +- peer = pcmk__get_node(0, join_from, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(0, join_from, NULL, pcmk__node_search_cluster); + if (peer->join != crm_join_finalized) { + crm_info("Ignoring out-of-sequence join-%d confirmation from %s " + "(currently %s not %s)", +@@ -867,7 +867,7 @@ finalize_join_for(gpointer key, gpointer value, gpointer user_data) + return; + } + +- join_node = pcmk__get_node(0, join_to, NULL, CRM_GET_PEER_CLUSTER); ++ join_node = pcmk__get_node(0, join_to, NULL, pcmk__node_search_cluster); + if (!crm_is_peer_active(join_node)) { + /* + * NACK'ing nodes that the membership layer doesn't know about yet +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 71f5680..999dd13 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -459,7 +459,7 @@ relay_message(xmlNode * msg, gboolean originated_locally) + crm_log_xml_trace(msg, "relayed"); + if (!broadcast) { + node_to = pcmk__get_node(0, host_to, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + } + send_cluster_message(node_to, dest, msg, TRUE); + return TRUE; +@@ -904,7 +904,7 @@ handle_node_info_request(const xmlNode *msg) + value = controld_globals.our_nodename; + } + +- node = pcmk__search_node_caches(node_id, value, CRM_GET_PEER_ANY); ++ node = pcmk__search_node_caches(node_id, value, pcmk__node_search_any); + if (node) { + crm_xml_add(reply_data, XML_ATTR_ID, node->uuid); + crm_xml_add(reply_data, XML_ATTR_UNAME, node->uname); +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index 9c4bb58..662643c 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -206,7 +206,8 @@ should_purge_attributes(crm_node_t *node) + /* Get the node that was hosting the remote connection resource from the + * peer cache. That's the one we really care about here. + */ +- conn_node = pcmk__get_node(0, node->conn_host, NULL, CRM_GET_PEER_CLUSTER); ++ conn_node = pcmk__get_node(0, node->conn_host, NULL, ++ pcmk__node_search_cluster); + if (conn_node == NULL) { + return purge; + } +diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c +index 01ba4a0..fbd9955 100644 +--- a/daemons/controld/controld_te_actions.c ++++ b/daemons/controld/controld_te_actions.c +@@ -159,7 +159,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + + } else if (pcmk__str_eq(task, PCMK_ACTION_DO_SHUTDOWN, pcmk__str_none)) { + crm_node_t *peer = pcmk__get_node(0, router_node, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, peer, CRMD_JOINSTATE_DOWN); + } +@@ -172,7 +172,7 @@ execute_cluster_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + crm_xml_add(cmd, XML_ATTR_TRANSITION_KEY, counter); + + rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_crmd, cmd, TRUE); + free(counter); + free_xml(cmd); +@@ -425,7 +425,7 @@ execute_rsc_action(pcmk__graph_t *graph, pcmk__graph_action_t *action) + + } else { + rc = send_cluster_message(pcmk__get_node(0, router_node, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_lrmd, cmd, TRUE); + } + +diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c +index 84bef5b..a54304b 100644 +--- a/daemons/controld/controld_te_events.c ++++ b/daemons/controld/controld_te_events.c +@@ -120,7 +120,7 @@ fail_incompletable_actions(pcmk__graph_t *graph, const char *down_node) + router = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE); + if (router) { + crm_node_t *node = pcmk__get_node(0, router, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (node) { + router_uuid = node->uuid; +diff --git a/daemons/controld/controld_utils.c b/daemons/controld/controld_utils.c +index 0e92416..1143e88 100644 +--- a/daemons/controld/controld_utils.c ++++ b/daemons/controld/controld_utils.c +@@ -735,7 +735,7 @@ update_dc(xmlNode * msg) + + } else if (controld_globals.dc_name != NULL) { + crm_node_t *dc_node = pcmk__get_node(0, controld_globals.dc_name, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + crm_info("Set DC to %s (%s)", + controld_globals.dc_name, +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index c519607..d2a556f 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -646,7 +646,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device) + + if (device->include_nodeid && (cmd->target != NULL)) { + crm_node_t *node = pcmk__get_node(0, cmd->target, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + cmd->target_nodeid = node->id; + } +@@ -2404,7 +2404,7 @@ stonith_send_reply(const xmlNode *reply, int call_options, + do_local_reply(reply, client, call_options); + } else { + send_cluster_message(pcmk__get_node(0, remote_peer, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, reply, FALSE); + } + } +@@ -2920,7 +2920,8 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result) + crm_node_t *node = NULL; + + pcmk__scan_min_int(host, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); ++ node = pcmk__search_known_node_cache(nodeid, NULL, ++ pcmk__node_search_any); + if (node != NULL) { + host = node->uname; + } +@@ -3374,7 +3375,7 @@ handle_fence_request(pcmk__request_t *request) + request->ipc_client->id); + crm_xml_add(request->xml, F_STONITH_REMOTE_OP_ID, op->id); + send_cluster_message(pcmk__get_node(0, alternate_host, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, request->xml, FALSE); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_PENDING, + NULL); +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index a766477..4fa2215 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -469,7 +469,8 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + crm_node_t *node; + + pcmk__scan_min_int(target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); ++ node = pcmk__search_known_node_cache(nodeid, NULL, ++ pcmk__node_search_any); + if (node) { + target = node->uname; + } +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 96b518a..482efb9 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1031,7 +1031,7 @@ merge_duplicates(remote_fencing_op_t *op) + continue; + } + if (!fencing_peer_active(pcmk__get_node(0, other->originator, NULL, +- CRM_GET_PEER_CLUSTER))) { ++ pcmk__node_search_cluster))) { + crm_notice("Failing action '%s' targeting %s originating from " + "client %s@%s: Originator is dead " CRM_XS " id=%.8s", + other->action, other->target, other->client_name, +@@ -1221,7 +1221,8 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) + crm_node_t *node; + + pcmk__scan_min_int(op->target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, CRM_GET_PEER_ANY); ++ node = pcmk__search_known_node_cache(nodeid, NULL, ++ pcmk__node_search_any); + + /* Ensure the conversion only happens once */ + stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); +@@ -1665,7 +1666,7 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + crm_xml_add_int(update, F_STONITH_TIMEOUT, op_timeout); + + send_cluster_message(pcmk__get_node(0, client_node, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, update, FALSE); + + free_xml(update); +@@ -1920,7 +1921,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer) + } + + send_cluster_message(pcmk__get_node(0, peer->host, NULL, +- CRM_GET_PEER_CLUSTER), ++ pcmk__node_search_cluster), + crm_msg_stonith_ng, remote_op, FALSE); + peer->tried = TRUE; + free_xml(remote_op); +diff --git a/include/crm/cluster.h b/include/crm/cluster.h +index 302b807..decb8e8 100644 +--- a/include/crm/cluster.h ++++ b/include/crm/cluster.h +@@ -128,13 +128,6 @@ enum crm_ais_msg_types { + crm_msg_stonith_ng = 9, + }; + +-/* used with crm_get_peer_full */ +-enum crm_get_peer_flags { +- CRM_GET_PEER_CLUSTER = 0x0001, +- CRM_GET_PEER_REMOTE = 0x0002, +- CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE, +-}; +- + gboolean send_cluster_message(const crm_node_t *node, + enum crm_ais_msg_types service, + const xmlNode *data, gboolean ordered); +diff --git a/include/crm/cluster/compat.h b/include/crm/cluster/compat.h +index e853fd8..14c4504 100644 +--- a/include/crm/cluster/compat.h ++++ b/include/crm/cluster/compat.h +@@ -26,6 +26,13 @@ extern "C" { + * release. + */ + ++// \deprecated Do not use ++enum crm_get_peer_flags { ++ CRM_GET_PEER_CLUSTER = 0x0001, ++ CRM_GET_PEER_REMOTE = 0x0002, ++ CRM_GET_PEER_ANY = CRM_GET_PEER_CLUSTER|CRM_GET_PEER_REMOTE, ++}; ++ + // \deprecated Do not use Pacemaker for cluster node cacheing + crm_node_t *crm_get_peer(unsigned int id, const char *uname); + +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index bea4086..9513254 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -30,6 +30,19 @@ enum crm_proc_flag { + }; + /* *INDENT-ON* */ + ++// Used with node cache search functions ++enum pcmk__node_search_flags { ++ pcmk__node_search_none = 0, ++ pcmk__node_search_cluster = (1 << 0), // Search for cluster nodes ++ pcmk__node_search_remote = (1 << 1), // Search for remote nodes ++ pcmk__node_search_any = pcmk__node_search_cluster ++ |pcmk__node_search_remote, ++ ++ /* @COMPAT The values before this must stay the same until we can drop ++ * support for enum crm_get_peer_flags ++ */ ++}; ++ + /*! + * \internal + * \brief Return the process bit corresponding to the current cluster stack +diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c +index 47a3321..374250f 100644 +--- a/lib/cluster/corosync.c ++++ b/lib/cluster/corosync.c +@@ -309,12 +309,12 @@ quorum_notification_cb(quorum_handle_t handle, uint32_t quorate, + crm_debug("Member[%d] %u ", i, id); + + /* Get this node's peer cache entry (adding one if not already there) */ +- node = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); ++ node = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster); + if (node->uname == NULL) { + char *name = pcmk__corosync_name(0, id); + + crm_info("Obtaining name for new node %u", id); +- node = pcmk__get_node(id, name, NULL, CRM_GET_PEER_CLUSTER); ++ node = pcmk__get_node(id, name, NULL, pcmk__node_search_cluster); + free(name); + } + +@@ -481,7 +481,7 @@ pcmk__corosync_connect(crm_cluster_t *cluster) + + // Ensure local node always exists in peer cache + peer = pcmk__get_node(cluster->nodeid, cluster->uname, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + cluster->uuid = pcmk__corosync_uuid(peer); + + return TRUE; +@@ -641,7 +641,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent) + + if (nodeid > 0 || name != NULL) { + crm_trace("Initializing node[%d] %u = %s", lpc, nodeid, name); +- pcmk__get_node(nodeid, name, NULL, CRM_GET_PEER_CLUSTER); ++ pcmk__get_node(nodeid, name, NULL, pcmk__node_search_cluster); + } + + if (nodeid > 0 && name != NULL) { +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index bc251da..b5f2884 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -466,7 +466,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + msg->sender.id = nodeid; + if (msg->sender.size == 0) { + crm_node_t *peer = pcmk__get_node(nodeid, NULL, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (peer == NULL) { + crm_err("Peer with nodeid=%u is unknown", nodeid); +@@ -528,7 +528,7 @@ pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void + + // Is this necessary? + pcmk__get_node(msg->sender.id, msg->sender.uname, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + crm_trace("Payload: %.200s", data); + return data; +@@ -723,7 +723,7 @@ pcmk_cpg_membership(cpg_handle_t handle, + + for (i = 0; i < member_list_entries; i++) { + crm_node_t *peer = pcmk__get_node(member_list[i].nodeid, NULL, NULL, +- CRM_GET_PEER_CLUSTER); ++ pcmk__node_search_cluster); + + if (member_list[i].nodeid == local_nodeid + && member_list[i].pid != getpid()) { +@@ -876,7 +876,7 @@ cluster_connect_cpg(crm_cluster_t *cluster) + return FALSE; + } + +- peer = pcmk__get_node(id, NULL, NULL, CRM_GET_PEER_CLUSTER); ++ peer = pcmk__get_node(id, NULL, NULL, pcmk__node_search_cluster); + crm_update_peer_proc(__func__, peer, crm_proc_cpg, ONLINESTATUS); + return TRUE; + } +diff --git a/lib/cluster/election.c b/lib/cluster/election.c +index 576c0aa..7276a2d 100644 +--- a/lib/cluster/election.c ++++ b/lib/cluster/election.c +@@ -298,7 +298,7 @@ election_vote(election_t *e) + return; + } + +- our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); ++ our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster); + if ((our_node == NULL) || (crm_is_peer_active(our_node) == FALSE)) { + crm_trace("Cannot vote in %s yet: local node not connected to cluster", + e->name); +@@ -547,8 +547,8 @@ election_count_vote(election_t *e, const xmlNode *message, bool can_win) + return election_error; + } + +- your_node = pcmk__get_node(0, vote.from, NULL, CRM_GET_PEER_CLUSTER); +- our_node = pcmk__get_node(0, e->uname, NULL, CRM_GET_PEER_CLUSTER); ++ your_node = pcmk__get_node(0, vote.from, NULL, pcmk__node_search_cluster); ++ our_node = pcmk__get_node(0, e->uname, NULL, pcmk__node_search_cluster); + we_are_owner = (our_node != NULL) + && pcmk__str_eq(our_node->uuid, vote.election_owner, + pcmk__str_none); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 4c89a7c..705b70c 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -539,7 +539,7 @@ hash_find_by_data(gpointer key, gpointer value, gpointer user_data) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags ++ * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return Node cache entry if found, otherwise NULL + */ +@@ -552,11 +552,11 @@ pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) + + crm_peer_init(); + +- if ((uname != NULL) && pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { ++ if ((uname != NULL) && pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + } + +- if ((node == NULL) && pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { ++ if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) { + node = pcmk__search_cluster_node_cache(id, uname, NULL); + } + return node; +@@ -763,7 +763,7 @@ remove_conflicting_peer(crm_node_t *node) + * \param[in] uname If not NULL, node name to search for + * \param[in] uuid If not NULL while id is 0, node UUID instead of cluster + * node ID to search for +- * \param[in] flags Group of enum crm_get_peer_flags ++ * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return (Possibly newly created) cluster node cache entry + */ +@@ -780,14 +780,14 @@ pcmk__get_node(unsigned int id, const char *uname, const char *uuid, + crm_peer_init(); + + // Check the Pacemaker Remote node cache first +- if (pcmk_is_set(flags, CRM_GET_PEER_REMOTE)) { ++ if (pcmk_is_set(flags, pcmk__node_search_remote)) { + node = g_hash_table_lookup(crm_remote_peer_cache, uname); + if (node != NULL) { + return node; + } + } + +- if (!pcmk_is_set(flags, CRM_GET_PEER_CLUSTER)) { ++ if (!pcmk_is_set(flags, pcmk__node_search_cluster)) { + return NULL; + } + +@@ -1349,7 +1349,7 @@ pcmk__refresh_node_caches_from_cib(xmlNode *cib) + * + * \param[in] id If not 0, cluster node ID to search for + * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Bitmask of enum crm_get_peer_flags ++ * \param[in] flags Group of enum pcmk__node_search_flags + * + * \return Known node cache entry if found, otherwise NULL + */ +@@ -1364,7 +1364,7 @@ pcmk__search_known_node_cache(unsigned int id, const char *uname, + + node = pcmk__search_node_caches(id, uname, flags); + +- if (node || !(flags & CRM_GET_PEER_CLUSTER)) { ++ if (node || !(flags & pcmk__node_search_cluster)) { + return node; + } + +@@ -1399,7 +1399,7 @@ crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection) + crm_node_t * + crm_get_peer(unsigned int id, const char *uname) + { +- return pcmk__get_node(id, uname, NULL, CRM_GET_PEER_CLUSTER); ++ return pcmk__get_node(id, uname, NULL, pcmk__node_search_cluster); + } + + crm_node_t * +-- +2.31.1 + +From aef8f5016b2de67ab12f896b2bfa7a0f1954b5b1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 16:27:24 -0600 +Subject: [PATCH 6/9] Refactor: libcrmcluster: replace + pcmk__search_known_node_cache() + +... with new flag in enum pcmk__node_search_flags +--- + daemons/controld/controld_fencing.c | 5 ++-- + daemons/fenced/fenced_commands.c | 5 ++-- + daemons/fenced/fenced_history.c | 5 ++-- + daemons/fenced/fenced_remote.c | 5 ++-- + include/crm/cluster/internal.h | 4 +-- + lib/cluster/membership.c | 45 ++++++----------------------- + 6 files changed, 23 insertions(+), 46 deletions(-) + +diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c +index 79a52be..ede2c27 100644 +--- a/daemons/controld/controld_fencing.c ++++ b/daemons/controld/controld_fencing.c +@@ -581,8 +581,9 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event) + event->id); + + if (succeeded) { +- crm_node_t *peer = pcmk__search_known_node_cache(0, event->target, +- pcmk__node_search_any); ++ crm_node_t *peer = pcmk__search_node_caches(0, event->target, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + const char *uuid = NULL; + + if (peer == NULL) { +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index d2a556f..4f21858 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2920,8 +2920,9 @@ fence_locally(xmlNode *msg, pcmk__action_result_t *result) + crm_node_t *node = NULL; + + pcmk__scan_min_int(host, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, +- pcmk__node_search_any); ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + if (node != NULL) { + host = node->uname; + } +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 4fa2215..fb709ff 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -469,8 +469,9 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + crm_node_t *node; + + pcmk__scan_min_int(target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, +- pcmk__node_search_any); ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + if (node) { + target = node->uname; + } +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 482efb9..ba70c57 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1221,8 +1221,9 @@ create_remote_stonith_op(const char *client, xmlNode *request, gboolean peer) + crm_node_t *node; + + pcmk__scan_min_int(op->target, &nodeid, 0); +- node = pcmk__search_known_node_cache(nodeid, NULL, +- pcmk__node_search_any); ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_any ++ |pcmk__node_search_known); + + /* Ensure the conversion only happens once */ + stonith__clear_call_options(op->call_options, op->id, st_opt_cs_nodeid); +diff --git a/include/crm/cluster/internal.h b/include/crm/cluster/internal.h +index 9513254..b75784c 100644 +--- a/include/crm/cluster/internal.h ++++ b/include/crm/cluster/internal.h +@@ -41,6 +41,8 @@ enum pcmk__node_search_flags { + /* @COMPAT The values before this must stay the same until we can drop + * support for enum crm_get_peer_flags + */ ++ ++ pcmk__node_search_known = (1 << 2), // Search previously known nodes + }; + + /*! +@@ -142,8 +144,6 @@ crm_node_t *pcmk__search_cluster_node_cache(unsigned int id, const char *uname, + void pcmk__purge_node_from_cache(const char *node_name, uint32_t node_id); + + void pcmk__refresh_node_caches_from_cib(xmlNode *cib); +-crm_node_t *pcmk__search_known_node_cache(unsigned int id, const char *uname, +- uint32_t flags); + + crm_node_t *pcmk__get_node(unsigned int id, const char *uname, + const char *uuid, uint32_t flags); +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index 705b70c..ef4aaac 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -82,6 +82,7 @@ static gboolean crm_autoreap = TRUE; + } while (0) + + static void update_peer_uname(crm_node_t *node, const char *uname); ++static crm_node_t *find_known_node(const char *id, const char *uname); + + int + crm_remote_peer_cache_size(void) +@@ -559,6 +560,14 @@ pcmk__search_node_caches(unsigned int id, const char *uname, uint32_t flags) + if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_cluster)) { + node = pcmk__search_cluster_node_cache(id, uname, NULL); + } ++ ++ if ((node == NULL) && pcmk_is_set(flags, pcmk__node_search_known)) { ++ char *id_str = (id == 0)? NULL : crm_strdup_printf("%u", id); ++ ++ node = find_known_node(id_str, uname); ++ free(id_str); ++ } ++ + return node; + } + +@@ -1343,42 +1352,6 @@ pcmk__refresh_node_caches_from_cib(xmlNode *cib) + refresh_known_node_cache(cib); + } + +-/*! +- * \internal +- * \brief Search known node cache +- * +- * \param[in] id If not 0, cluster node ID to search for +- * \param[in] uname If not NULL, node name to search for +- * \param[in] flags Group of enum pcmk__node_search_flags +- * +- * \return Known node cache entry if found, otherwise NULL +- */ +-crm_node_t * +-pcmk__search_known_node_cache(unsigned int id, const char *uname, +- uint32_t flags) +-{ +- crm_node_t *node = NULL; +- char *id_str = NULL; +- +- CRM_ASSERT(id > 0 || uname != NULL); +- +- node = pcmk__search_node_caches(id, uname, flags); +- +- if (node || !(flags & pcmk__node_search_cluster)) { +- return node; +- } +- +- if (id > 0) { +- id_str = crm_strdup_printf("%u", id); +- } +- +- node = find_known_node(id_str, uname); +- +- free(id_str); +- return node; +-} +- +- + // Deprecated functions kept only for backward API compatibility + // LCOV_EXCL_START + +-- +2.31.1 + +From 5b64c943bd8ba82b06e803fa97737fb7b574ec04 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Dec 2023 16:38:10 -0600 +Subject: [PATCH 7/9] Refactor: libcrmcluster: replace + pcmk__search_cluster_node_cache() + +... with calls to pcmk__search_node_caches() using pcmk__node_search_cluster +where possible +--- + daemons/attrd/attrd_ipc.c | 5 +++-- + daemons/based/based_messages.c | 5 ++++- + daemons/controld/controld_corosync.c | 4 ++-- + daemons/controld/controld_messages.c | 6 ++++-- + lib/cluster/cluster.c | 3 ++- + lib/cluster/cpg.c | 4 ++-- + lib/cluster/membership.c | 2 +- + 7 files changed, 18 insertions(+), 11 deletions(-) + +diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c +index 05c4a69..b08963d 100644 +--- a/daemons/attrd/attrd_ipc.c ++++ b/daemons/attrd/attrd_ipc.c +@@ -162,10 +162,11 @@ attrd_client_peer_remove(pcmk__request_t *request) + + crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, &nodeid); + if (nodeid > 0) { +- crm_node_t *node = pcmk__search_cluster_node_cache(nodeid, NULL, +- NULL); ++ crm_node_t *node = NULL; + char *host_alloc = NULL; + ++ node = pcmk__search_node_caches(nodeid, NULL, ++ pcmk__node_search_cluster); + if (node && node->uname) { + // Use cached name if available + host = node->uname; +diff --git a/daemons/based/based_messages.c b/daemons/based/based_messages.c +index 7f503b2..efad9a7 100644 +--- a/daemons/based/based_messages.c ++++ b/daemons/based/based_messages.c +@@ -247,7 +247,10 @@ cib_process_upgrade_server(const char *op, int options, const char *section, xml + + if (rc != pcmk_ok) { + // Notify originating peer so it can notify its local clients +- crm_node_t *origin = pcmk__search_cluster_node_cache(0, host, NULL); ++ crm_node_t *origin = NULL; ++ ++ origin = pcmk__search_node_caches(0, host, ++ pcmk__node_search_cluster); + + crm_info("Rejecting upgrade request from %s: %s " + CRM_XS " rc=%d peer=%s", host, pcmk_strerror(rc), rc, +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index 0f3ea32..63184d2 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -119,8 +119,8 @@ cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, + if (controld_globals.dc_name != NULL) { + crm_node_t *peer = NULL; + +- peer = pcmk__search_cluster_node_cache(0, controld_globals.dc_name, +- NULL); ++ peer = pcmk__search_node_caches(0, controld_globals.dc_name, ++ pcmk__node_search_cluster); + if (peer != NULL) { + for (int i = 0; i < left_list_entries; ++i) { + if (left_list[i].nodeid == peer->id) { +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 999dd13..bd5237e 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -485,7 +485,8 @@ relay_message(xmlNode * msg, gboolean originated_locally) + } + + if (!broadcast) { +- node_to = pcmk__search_cluster_node_cache(0, host_to, NULL); ++ node_to = pcmk__search_node_caches(0, host_to, ++ pcmk__node_search_cluster); + if (node_to == NULL) { + crm_warn("Ignoring message %s because node %s is unknown", + ref, host_to); +@@ -1029,7 +1030,8 @@ handle_request(xmlNode *stored_msg, enum crmd_fsa_cause cause) + + if (strcmp(op, CRM_OP_SHUTDOWN_REQ) == 0) { + const char *from = crm_element_value(stored_msg, F_CRM_HOST_FROM); +- crm_node_t *node = pcmk__search_cluster_node_cache(0, from, NULL); ++ crm_node_t *node = pcmk__search_node_caches(0, from, ++ pcmk__node_search_cluster); + + pcmk__update_peer_expected(__func__, node, CRMD_JOINSTATE_DOWN); + if(AM_I_DC == FALSE) { +diff --git a/lib/cluster/cluster.c b/lib/cluster/cluster.c +index f2cd428..1cdc204 100644 +--- a/lib/cluster/cluster.c ++++ b/lib/cluster/cluster.c +@@ -280,7 +280,8 @@ crm_peer_uname(const char *uuid) + return NULL; + } + +- node = pcmk__search_cluster_node_cache((uint32_t) id, NULL, NULL); ++ node = pcmk__search_node_caches((uint32_t) id, NULL, ++ pcmk__node_search_cluster); + if (node != NULL) { + crm_info("Setting uuid for node %s[%u] to %s", + node->uname, node->id, uuid); +diff --git a/lib/cluster/cpg.c b/lib/cluster/cpg.c +index b5f2884..4f3e81c 100644 +--- a/lib/cluster/cpg.c ++++ b/lib/cluster/cpg.c +@@ -629,8 +629,8 @@ node_left(const char *cpg_group_name, int event_counter, + const struct cpg_address **sorted_member_list, + size_t member_list_entries) + { +- crm_node_t *peer = pcmk__search_cluster_node_cache(cpg_peer->nodeid, +- NULL, NULL); ++ crm_node_t *peer = pcmk__search_node_caches(cpg_peer->nodeid, NULL, ++ pcmk__node_search_cluster); + const struct cpg_address **rival = NULL; + + /* Most CPG-related Pacemaker code assumes that only one process on a node +diff --git a/lib/cluster/membership.c b/lib/cluster/membership.c +index ef4aaac..73ea1e3 100644 +--- a/lib/cluster/membership.c ++++ b/lib/cluster/membership.c +@@ -122,7 +122,7 @@ crm_remote_peer_get(const char *node_name) + * entry unless it has a node ID, which means the name actually is + * associated with a cluster node. (@TODO return an error in that case?) + */ +- node = pcmk__search_cluster_node_cache(0, node_name, NULL); ++ node = pcmk__search_node_caches(0, node_name, pcmk__node_search_cluster); + if ((node != NULL) && (node->uuid == NULL)) { + /* node_name could be a pointer into the cache entry being removed, so + * reassign it to a copy before the original gets freed +-- +2.31.1 + +From cbeb9eb516d3bf29df7850dcf2a8515f6a0dfb2c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 18 Dec 2023 17:09:12 -0600 +Subject: [PATCH 8/9] Test: cts-cli: strip feature set out of reference output + +--- + cts/cli/regression.tools.exp | 4 ++-- + cts/cts-cli.in | 2 ++ + 2 files changed, 4 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp +index accf781..417b5cd 100644 +--- a/cts/cli/regression.tools.exp ++++ b/cts/cli/regression.tools.exp +@@ -7667,7 +7667,7 @@ Diff: +++ 0.1.0 (null) + -- /cib/status/node_state[@id='1'] + -- /cib/status/node_state[@id='httpd-bundle-0'] + -- /cib/status/node_state[@id='httpd-bundle-1'] +-+ /cib: @crm_feature_set=3.19.0, @num_updates=0, @admin_epoch=0 +++ /cib: @num_updates=0, @admin_epoch=0 + -- /cib: @cib-last-written, @update-origin, @update-client, @update-user, @have-quorum, @dc-uuid + =#=#=#= End test: Get active shadow instance's diff (empty CIB) - Error occurred (1) =#=#=#= + * Passed: crm_shadow - Get active shadow instance's diff (empty CIB) +@@ -7701,7 +7701,7 @@ Diff: +++ 0.1.0 (null) + + + +- ++ + + + +diff --git a/cts/cts-cli.in b/cts/cts-cli.in +index f4cb7c3..40ada49 100755 +--- a/cts/cts-cli.in ++++ b/cts/cts-cli.in +@@ -3357,7 +3357,9 @@ for t in $tests; do + -e 's/ version="[^"]*"/ version=""/' \ + -e 's/request=\".*\(crm_[a-zA-Z0-9]*\)/request=\"\1/' \ + -e 's/crm_feature_set="[^"]*" //'\ ++ -e 's/@crm_feature_set=[0-9.]*, //'\ + -e 's/validate-with="[^"]*" //'\ ++ -e 's/\( +Date: Tue, 2 Jan 2024 14:48:14 -0600 +Subject: [PATCH 9/9] Test: cts-lab: ignore all transition calculation log + messages + +9e28f3b6d means these are now possible for more ignorable errors +--- + python/pacemaker/_cts/patterns.py | 26 +++++++------------------- + 1 file changed, 7 insertions(+), 19 deletions(-) + +diff --git a/python/pacemaker/_cts/patterns.py b/python/pacemaker/_cts/patterns.py +index 0fb1c2b..d05ff5f 100644 +--- a/python/pacemaker/_cts/patterns.py ++++ b/python/pacemaker/_cts/patterns.py +@@ -1,7 +1,7 @@ + """ Pattern-holding classes for Pacemaker's Cluster Test Suite (CTS) """ + + __all__ = ["PatternSelector"] +-__copyright__ = "Copyright 2008-2023 the Pacemaker project contributors" ++__copyright__ = "Copyright 2008-2024 the Pacemaker project contributors" + __license__ = "GNU General Public License version 2 or later (GPLv2+)" + + import argparse +@@ -32,6 +32,12 @@ class BasePatterns: + # pcs can log this when node is fenced, but fencing is OK in some + # tests (and we will catch it in pacemaker logs when not OK) + r"pcs.daemon:No response from: .* request: get_configs, error:", ++ ++ # This is overbroad, but there's no way to say that only certain ++ # transition errors are acceptable. We have to rely on causes of a ++ # transition error logging their own error message, which should ++ # always be the case. ++ r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._commands = { +@@ -239,12 +245,6 @@ class Corosync2Patterns(BasePatterns): + r"error:.*cib_(shm|rw) IPC provider disconnected while waiting", + r"error:.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"error: Lost fencer connection", +- # This is overbroad, but we don't have a way to say that only +- # certain transition errors are acceptable (if the fencer respawns, +- # fence devices may appear multiply active). We have to rely on +- # other causes of a transition error logging their own error +- # message, which is the usual practice. +- r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["corosync"] = [ +@@ -281,12 +281,6 @@ class Corosync2Patterns(BasePatterns): + r"pacemaker-execd.*Connection to (fencer|stonith-ng).* (closed|failed|lost)", + r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", + r"pacemaker-controld.*:Could not connect to attrd: Connection refused", +- # This is overbroad, but we don't have a way to say that only +- # certain transition errors are acceptable (if the fencer respawns, +- # fence devices may appear multiply active). We have to rely on +- # other causes of a transition error logging their own error +- # message, which is the usual practice. +- r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["pacemaker-execd"] = [ +@@ -338,12 +332,6 @@ class Corosync2Patterns(BasePatterns): + r"error:.*Lost fencer connection", + r"error:.*Fencer connection failed \(will retry\)", + r"pacemaker-controld.*:\s+Result of .* operation for Fencing.*Error \(Lost connection to fencer\)", +- # This is overbroad, but we don't have a way to say that only +- # certain transition errors are acceptable (if the fencer respawns, +- # fence devices may appear multiply active). We have to rely on +- # other causes of a transition error logging their own error +- # message, which is the usual practice. +- r"pacemaker-schedulerd.* Calculated transition .*/pe-error", + ] + + self._components["pacemaker-fenced-ignore"].extend(self._components["common-ignore"]) +-- +2.31.1 + diff --git a/006-cib-file-feature-set.patch b/006-cib-file-feature-set.patch new file mode 100644 index 0000000..a7ce74d --- /dev/null +++ b/006-cib-file-feature-set.patch @@ -0,0 +1,276 @@ +From d50bbafc32428e873c0052a9defcf93d2e52667e Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 10 Jan 2024 11:35:11 -0500 +Subject: [PATCH 1/3] Refactor: libcrmcommon: Split feature set check into its + own function. + +--- + include/crm/common/cib_internal.h | 4 +++- + lib/cib/cib_utils.c | 12 ++++++------ + lib/common/cib.c | 18 +++++++++++++++++- + 3 files changed, 26 insertions(+), 8 deletions(-) + +diff --git a/include/crm/common/cib_internal.h b/include/crm/common/cib_internal.h +index c41c12e..fa65e58 100644 +--- a/include/crm/common/cib_internal.h ++++ b/include/crm/common/cib_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2023 the Pacemaker project contributors ++ * Copyright 2023-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -16,6 +16,8 @@ extern "C" { + + const char *pcmk__cib_abs_xpath_for(const char *element); + ++int pcmk__check_feature_set(const char *cib_version); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 0082eef..bf2982c 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -353,7 +353,6 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, + xmlNode *patchset_cib = NULL; + xmlNode *local_diff = NULL; + +- const char *new_version = NULL; + const char *user = crm_element_value(req, F_CIB_USER); + bool with_digest = false; + +@@ -470,12 +469,13 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, + } + + if (scratch) { +- new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION); ++ const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION); + +- if (new_version && compare_version(new_version, CRM_FEATURE_SET) > 0) { +- crm_err("Discarding update with feature set '%s' greater than our own '%s'", +- new_version, CRM_FEATURE_SET); +- rc = -EPROTONOSUPPORT; ++ rc = pcmk__check_feature_set(new_version); ++ if (rc != pcmk_rc_ok) { ++ pcmk__config_err("Discarding update with feature set '%s' greater than our own '%s'", ++ new_version, CRM_FEATURE_SET); ++ rc = pcmk_rc2legacy(rc); + goto done; + } + } +diff --git a/lib/common/cib.c b/lib/common/cib.c +index fee7881..cbebc2e 100644 +--- a/lib/common/cib.c ++++ b/lib/common/cib.c +@@ -1,6 +1,6 @@ + /* + * Original copyright 2004 International Business Machines +- * Later changes copyright 2008-2023 the Pacemaker project contributors ++ * Later changes copyright 2008-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -173,3 +173,19 @@ pcmk_find_cib_element(xmlNode *cib, const char *element_name) + { + return get_xpath_object(pcmk_cib_xpath_for(element_name), cib, LOG_TRACE); + } ++ ++/*! ++ * \internal ++ * \brief Check that the feature set in the CIB is supported on this node ++ * ++ * \param[in] new_version XML_ATTR_CRM_VERSION attribute from the CIB ++ */ ++int ++pcmk__check_feature_set(const char *cib_version) ++{ ++ if (cib_version && compare_version(cib_version, CRM_FEATURE_SET) > 0) { ++ return EPROTONOSUPPORT; ++ } ++ ++ return pcmk_rc_ok; ++} +-- +2.31.1 + +From d89fd8336ae47d892201513c99773705d57f15f0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 10 Jan 2024 13:46:42 -0500 +Subject: [PATCH 2/3] Feature: scheduler: Check the CIB feature set in + cluster_status. + +This adds the check that was previously only in cib_perform_op to the +scheduler code, ensuring that any daemon or tool that calls the +scheduler will check that the feature set in the CIB is supported. +--- + lib/pengine/status.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/lib/pengine/status.c b/lib/pengine/status.c +index e6ec237..1294803 100644 +--- a/lib/pengine/status.c ++++ b/lib/pengine/status.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + + #include + +@@ -70,12 +71,21 @@ pe_free_working_set(pcmk_scheduler_t *scheduler) + gboolean + cluster_status(pcmk_scheduler_t * scheduler) + { ++ const char *new_version = NULL; + xmlNode *section = NULL; + + if ((scheduler == NULL) || (scheduler->input == NULL)) { + return FALSE; + } + ++ new_version = crm_element_value(scheduler->input, XML_ATTR_CRM_VERSION); ++ ++ if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) { ++ pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'", ++ new_version, CRM_FEATURE_SET); ++ return FALSE; ++ } ++ + crm_trace("Beginning unpack"); + + if (scheduler->failed != NULL) { +-- +2.31.1 + +From a3428926d37af506014a6b462d1308d8541c5932 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 10 Jan 2024 14:56:36 -0500 +Subject: [PATCH 3/3] Low: libcib: Do not check CIB feature set for files in + cib_perform_op. + +This is related to the previous feature for transferring schema files to +older remote nodes. In that case, the newer schema files may also have +a newer feature set than the node supports, so the transferred files are +still not usable. + +However, the feature set only matters for the scheduler, not for most +command line tools (obviously, crm_simulate would still care). So in +those cases, we can just disable the feature set check if the CIB was +read in from a file. For the scheduler, the check is still performed as +part of cluster_status. +--- + cts/cli/regression.tools.exp | 2 +- + daemons/based/based_callbacks.c | 4 ++-- + include/crm/cib/internal.h | 4 ++-- + lib/cib/cib_file.c | 2 +- + lib/cib/cib_utils.c | 15 +++++++++------ + 5 files changed, 15 insertions(+), 12 deletions(-) + +diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp +index 417b5cd..c81c420 100644 +--- a/cts/cli/regression.tools.exp ++++ b/cts/cli/regression.tools.exp +@@ -7939,7 +7939,7 @@ unpack_config warning: Blind faith: not fencing unseen nodes + =#=#=#= End test: Verbosely verify a file-specified invalid configuration, outputting as xml - Invalid configuration (78) =#=#=#= + * Passed: crm_verify - Verbosely verify a file-specified invalid configuration, outputting as xml + =#=#=#= Begin test: Verbosely verify another file-specified invalid configuration, outputting as xml =#=#=#= +-(cluster_status@status.c:113) warning: Fencing and resource management disabled due to lack of quorum ++(cluster_status@status.c:123) warning: Fencing and resource management disabled due to lack of quorum + + + +diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c +index 5f3dc62..f16e4d9 100644 +--- a/daemons/based/based_callbacks.c ++++ b/daemons/based/based_callbacks.c +@@ -1362,7 +1362,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation, + input = prepare_input(request, operation->type, §ion); + + if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) { +- rc = cib_perform_op(op, call_options, op_function, true, section, ++ rc = cib_perform_op(NULL, op, call_options, op_function, true, section, + request, input, false, &config_changed, &the_cib, + &result_cib, NULL, &output); + +@@ -1395,7 +1395,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation, + } + + // result_cib must not be modified after cib_perform_op() returns +- rc = cib_perform_op(op, call_options, op_function, false, section, ++ rc = cib_perform_op(NULL, op, call_options, op_function, false, section, + request, input, manage_counters, &config_changed, + &the_cib, &result_cib, cib_diff, &output); + +diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h +index 9d54d52..b6d6871 100644 +--- a/include/crm/cib/internal.h ++++ b/include/crm/cib/internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -206,7 +206,7 @@ int cib__get_notify_patchset(const xmlNode *msg, const xmlNode **patchset); + + bool cib__element_in_patchset(const xmlNode *patchset, const char *element); + +-int cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, ++int cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn, + bool is_query, const char *section, xmlNode *req, + xmlNode *input, bool manage_counters, bool *config_changed, + xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff, +diff --git a/lib/cib/cib_file.c b/lib/cib/cib_file.c +index a279823..9dd952c 100644 +--- a/lib/cib/cib_file.c ++++ b/lib/cib/cib_file.c +@@ -245,7 +245,7 @@ cib_file_process_request(cib_t *cib, xmlNode *request, xmlNode **output) + data = pcmk_find_cib_element(data, section); + } + +- rc = cib_perform_op(op, call_options, op_function, read_only, section, ++ rc = cib_perform_op(cib, op, call_options, op_function, read_only, section, + request, data, true, &changed, &private->cib_xml, + &result_cib, &cib_diff, output); + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index bf2982c..9c3f9f1 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -339,11 +339,10 @@ should_copy_cib(const char *op, const char *section, int call_options) + } + + int +-cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, +- const char *section, xmlNode *req, xmlNode *input, +- bool manage_counters, bool *config_changed, +- xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff, +- xmlNode **output) ++cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn, ++ bool is_query, const char *section, xmlNode *req, xmlNode *input, ++ bool manage_counters, bool *config_changed, xmlNode **current_cib, ++ xmlNode **result_cib, xmlNode **diff, xmlNode **output) + { + int rc = pcmk_ok; + bool check_schema = true; +@@ -468,7 +467,11 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query, + goto done; + } + +- if (scratch) { ++ /* If the CIB is from a file, we don't need to check that the feature set is ++ * supported. All we care about in that case is the schema version, which ++ * is checked elsewhere. ++ */ ++ if (scratch && (cib == NULL || cib->variant != cib_file)) { + const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION); + + rc = pcmk__check_feature_set(new_version); +-- +2.31.1 + diff --git a/007-option-metadata.patch b/007-option-metadata.patch new file mode 100644 index 0000000..d948042 --- /dev/null +++ b/007-option-metadata.patch @@ -0,0 +1,3689 @@ +From 4401064f409921caed9444d18a74713250213c44 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 2 Jan 2024 19:52:41 -0800 +Subject: [PATCH 01/24] Test: cts-cli: Update for added spaces + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index b34fba8..1cd049f 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -47,7 +47,7 @@ + + + +- Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure timeouts and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. Allowed values: Zero disables polling, while positive values are an interval in seconds(unless other units are specified, for example "5min") ++ Pacemaker is primarily event-driven, and looks ahead to know when to recheck cluster state for failure timeouts and most time-based rules. However, it will also recheck the cluster after this amount of inactivity, to evaluate rules with date specifications and serve as a fail-safe for certain types of scheduler bugs. Allowed values: Zero disables polling, while positive values are an interval in seconds (unless other units are specified, for example "5min") + Polling interval to recheck cluster state and evaluate rules with date specifications + + +@@ -345,7 +345,7 @@ + + + +- Setting this to false may lead to a "split-brain" situation,potentially leading to data loss and/or service unavailability. ++ Setting this to false may lead to a "split-brain" situation, potentially leading to data loss and/or service unavailability. + *** Advanced Use Only *** Whether to fence unseen nodes at start-up + + +-- +2.31.1 + +From e3597b061afa62a1f869a3e238ad78d3f7222029 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 2 Jan 2024 20:28:21 -0800 +Subject: [PATCH 02/24] Test: cts-cli: Update daemon tests to note that + stonith-timeout is used + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 1cd049f..6a24089 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -330,8 +330,8 @@ + + + +- This value is not used by Pacemaker, but is kept for backward compatibility, and certain legacy fence agents might use it. +- *** Advanced Use Only *** Unused by Pacemaker ++ How long to wait for on, off, and reboot fence actions to complete by default ++ How long to wait for on, off, and reboot fence actions to complete by default + + + +-- +2.31.1 + +From cb3431c1058dd3cfbcc1cc490db268a95d1731e1 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 19:20:26 -0800 +Subject: [PATCH 03/24] Refactor: libcrmcommon: New + pcmk__valid_placement_strategy() + +We'll soon need this function outside the scheduler. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + include/crm/common/options_internal.h | 1 + + lib/common/options.c | 16 ++++++++++++++++ + lib/pengine/common.c | 9 +-------- + 3 files changed, 18 insertions(+), 8 deletions(-) + +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index a9316ca..1ea27ee 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -67,6 +67,7 @@ bool pcmk__valid_number(const char *value); + bool pcmk__valid_positive_number(const char *value); + bool pcmk__valid_quorum(const char *value); + bool pcmk__valid_script(const char *value); ++bool pcmk__valid_placement_strategy(const char *value); + bool pcmk__valid_percentage(const char *value); + + // from watchdog.c +diff --git a/lib/common/options.c b/lib/common/options.c +index 2d86ebc..1db41a2 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -246,6 +246,22 @@ pcmk__valid_script(const char *value) + + return true; + } ++ ++/*! ++ * \internal ++ * \brief Check whether a string represents a valid placement strategy ++ * ++ * \param[in] value String to validate ++ * ++ * \return \c true if \p value is a valid placement strategy, or \c false ++ * otherwise ++ */ ++bool ++pcmk__valid_placement_strategy(const char *value) ++{ ++ return pcmk__strcase_any_of(value, "default", "utilization", "minimal", ++ "balanced", NULL); ++} + + bool + pcmk__valid_percentage(const char *value) +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 0fdd5a1..6878f4d 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,13 +21,6 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + +-static bool +-check_placement_strategy(const char *value) +-{ +- return pcmk__strcase_any_of(value, "default", "utilization", "minimal", +- "balanced", NULL); +-} +- + static pcmk__cluster_option_t pe_opts[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -285,7 +278,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "placement-strategy", NULL, "select", + "default, utilization, minimal, balanced", +- "default", check_placement_strategy, ++ "default", pcmk__valid_placement_strategy, + N_("How the cluster should allocate resources to nodes"), + NULL + }, +-- +2.31.1 + +From 4c877cefcde40da8a2cd776956ade62919a2c926 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 9 Jan 2024 22:13:19 -0800 +Subject: [PATCH 04/24] Refactor: controller: Remove stonith-watchdog-timeout + validator function + +...from options array. Instead, call it from the controller after +validating the options array. + +We'll soon be moving the options array to libcrmcommon. There, we don't +have access to controld_verify_stonith_watchdog_timeout() or to the +controller's stonith API connection and node name. New comments and the +following task have more details: https://projects.clusterlabs.org/T749. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 25 ++++++++++++++++++++++++- + lib/common/watchdog.c | 4 ++++ + 2 files changed, 28 insertions(+), 1 deletion(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 644d686..83b802e 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -621,8 +621,18 @@ static pcmk__cluster_option_t controller_options[] = { + "the order in which ping updates arrive.") + }, + { ++ /* @COMPAT Currently unparsable values default to -1 (auto-calculate), ++ * while missing values default to 0 (disable). All values are accepted ++ * (unless the controller finds that the value conflicts with the ++ * SBD_WATCHDOG_TIMEOUT). ++ * ++ * At a compatibility break: properly validate as a timeout, let ++ * either negative values or a particular string like "auto" mean auto- ++ * calculate, and use 0 as the single default for when the option either ++ * is unset or fails to validate. ++ */ + "stonith-watchdog-timeout", NULL, "time", NULL, +- "0", controld_verify_stonith_watchdog_timeout, ++ "0", NULL, + N_("How long before nodes can be assumed to be safely down when " + "watchdog-based self-fencing via SBD is in use"), + N_("If this is set to a positive value, lost nodes are assumed to " +@@ -747,6 +757,19 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + pcmk__validate_cluster_options(config_hash, controller_options, + PCMK__NELEM(controller_options)); + ++ /* Validate the watchdog timeout in the context of the local node ++ * environment. If invalid, the controller will exit with a fatal error. ++ * ++ * We do this via a wrapper in the controller, so that we call ++ * pcmk__valid_stonith_watchdog_timeout() only if watchdog fencing is ++ * enabled for the local node. Otherwise, we may exit unnecessarily. ++ * ++ * A validator function in libcrmcommon can't act as such a wrapper, because ++ * it doesn't have a stonith API connection or the local node name. ++ */ ++ value = g_hash_table_lookup(config_hash, "stonith-watchdog-timeout"); ++ controld_verify_stonith_watchdog_timeout(value); ++ + value = g_hash_table_lookup(config_hash, "no-quorum-policy"); + if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) { + controld_set_global_flags(controld_no_quorum_suicide); +diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c +index e569214..7d126af 100644 +--- a/lib/common/watchdog.c ++++ b/lib/common/watchdog.c +@@ -278,6 +278,10 @@ pcmk__valid_sbd_timeout(const char *value) + { + long st_timeout = value? crm_get_msec(value) : 0; + ++ /* @COMPAT At a compatibility break, accept either negative values or a ++ * specific string like "auto" (but not both) to mean "auto-calculate the ++ * timeout." Reject other values that aren't parsable as timeouts. ++ */ + if (st_timeout < 0) { + st_timeout = pcmk__auto_watchdog_timeout(); + crm_debug("Using calculated value %ld for stonith-watchdog-timeout (%s)", +-- +2.31.1 + +From 28d96fc802bca24ed3e52b8ce5946f4b2b971b7d Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 17:52:18 -0800 +Subject: [PATCH 05/24] Refactor: libcrmcommon: New enum pcmk__opt_context + +At first this will be used only for backward compatibility with metadata +commands for pacemaker-based, pacemaker-controld, and +pacemaker-schedulerd. It may be extended later for meta-attribute +contexts or similar. + +The idea is that we'll consolidate all cluster options into a single +table and use these enum values as filters for getting daemon metadata. + +We won't need a pcmk__opt_context_fenced, because its metadata consists +of stonith instance attributes, not cluster options. Those instance +attributes will be stored in a separate array from the cluster options. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + include/crm/common/options_internal.h | 13 +++++++++++++ + 1 file changed, 13 insertions(+) + +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index 1ea27ee..0c6c9e8 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -34,6 +34,19 @@ bool pcmk__env_option_enabled(const char *daemon, const char *option); + * Cluster option handling + */ + ++/*! ++ * \internal ++ * \enum pcmk__opt_context ++ * \brief Context flags for options ++ */ ++enum pcmk__opt_context { ++ // @COMPAT Used only for daemon metadata ++ pcmk__opt_context_none = 0, //!< No additional context ++ pcmk__opt_context_based = (1 << 1), //!< CIB manager metadata ++ pcmk__opt_context_controld = (1 << 2), //!< Controller metadata ++ pcmk__opt_context_schedulerd = (1 << 3), //!< Scheduler metadata ++}; ++ + typedef struct pcmk__cluster_option_s { + const char *name; + const char *alt_name; +-- +2.31.1 + +From 2f8537331e2948b9186555ffbd9c9f2c121587d1 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 17:58:25 -0800 +Subject: [PATCH 06/24] Refactor: libcrmcommon: New + pcmk__cluster_option_t:context member + +Arguably makes more sense adjacent to the type member, but this +placement keeps the diffs smaller when updating existing options arrays. + +We will use this soon to ensure that each option occurs in exactly one +daemon's metadata. Several options (for example, +PCMK_OPT_NO_QUORUM_POLICY) currently appear in the metadata of both the +controller and the scheduler, causing issues for external tools that +parse the output. + +Where an option currently appears in the metadata of both the controller +and the scheduler, it will soon appear only in the scheduler's metadata. +We assign context flags accordingly. + +Note that the fencer doesn't have a context flag. The options in its +metadata are actually stonith device instance attributes, not cluster +options. They will continue to reside in a separate table from the +cluster options, so there's no ambiguity about which daemon they "belong +to." + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 20 +++++++++++++++++ + daemons/fenced/pacemaker-fenced.c | 26 ++++++++++++++++++++++ + include/crm/common/options_internal.h | 3 +++ + lib/cib/cib_utils.c | 3 +++ + lib/pengine/common.c | 32 +++++++++++++++++++++++++++ + 5 files changed, 84 insertions(+) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 83b802e..4d7cb14 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -518,22 +518,26 @@ do_recover(long long action, + static pcmk__cluster_option_t controller_options[] = { + /* name, old name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL, ++ pcmk__opt_context_controld, + N_("Pacemaker version on cluster node elected Designated Controller (DC)"), + N_("Includes a hash which identifies the exact changeset the code was " + "built from. Used for diagnostic purposes.") + }, + { + "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, ++ pcmk__opt_context_controld, + N_("The messaging stack on which Pacemaker is currently running"), + N_("Used for informational and diagnostic purposes.") + }, + { + "cluster-name", NULL, "string", NULL, NULL, NULL, ++ pcmk__opt_context_controld, + N_("An arbitrary name for the cluster"), + N_("This optional value is mostly for users' convenience as desired " + "in administration, but may also be used in Pacemaker " +@@ -543,6 +547,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", + NULL, "20s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("How long to wait for a response from other nodes during start-up"), + N_("The optimal value will depend on the speed and load of your network " + "and the type of switches used.") +@@ -552,6 +557,7 @@ static pcmk__cluster_option_t controller_options[] = { + N_("Zero disables polling, while positive values are an interval in seconds" + "(unless other units are specified, for example \"5min\")"), + "15min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("Polling interval to recheck cluster state and evaluate rules " + "with date specifications"), + N_("Pacemaker is primarily event-driven, and looks ahead to know when to " +@@ -563,6 +569,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "load-threshold", NULL, "percentage", NULL, + "80%", pcmk__valid_percentage, ++ pcmk__opt_context_controld, + N_("Maximum amount of system load that should be used by cluster nodes"), + N_("The cluster will slow down its recovery process when the amount of " + "system resources used (currently CPU) approaches this limit"), +@@ -570,10 +577,12 @@ static pcmk__cluster_option_t controller_options[] = { + { + "node-action-limit", NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_controld, + N_("Maximum number of jobs that can be scheduled per node " + "(defaults to 2x cores)") + }, + { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, ++ pcmk__opt_context_controld, + N_("How a cluster node should react if notified of its own fencing"), + N_("A cluster node may receive notification of its own fencing if fencing " + "is misconfigured, or if fabric fencing is in use that doesn't cut " +@@ -584,6 +593,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, + "2min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("Declare an election failed if it is not decided within this much " + "time. If you need to adjust this value, it probably indicates " +@@ -592,6 +602,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, + "20min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("Exit immediately if shutdown does not complete within this much " + "time. If you need to adjust this value, it probably indicates " +@@ -600,6 +611,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "join-integration-timeout", "crmd-integration-timeout", "time", NULL, + "3min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") +@@ -607,6 +619,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, + "30min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + "*** Advanced Use Only ***", + N_("If you need to adjust this value, it probably indicates " + "the presence of a bug.") +@@ -614,6 +627,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "transition-delay", "crmd-transition-delay", "time", NULL, + "0s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("*** Advanced Use Only *** Enabling this option will slow down " + "cluster recovery under all conditions"), + N_("Delay cluster recovery for this much time to allow for additional " +@@ -633,6 +647,7 @@ static pcmk__cluster_option_t controller_options[] = { + */ + "stonith-watchdog-timeout", NULL, "time", NULL, + "0", NULL, ++ pcmk__opt_context_controld, + N_("How long before nodes can be assumed to be safely down when " + "watchdog-based self-fencing via SBD is in use"), + N_("If this is set to a positive value, lost nodes are assumed to " +@@ -654,6 +669,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + "stonith-max-attempts", NULL, "integer", NULL, + "10", pcmk__valid_positive_number, ++ pcmk__opt_context_controld, + N_("How many times fencing can fail before it will no longer be " + "immediately re-attempted on a target") + }, +@@ -662,11 +678,13 @@ static pcmk__cluster_option_t controller_options[] = { + { + "no-quorum-policy", NULL, "select", + "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, ++ pcmk__opt_context_controld, + N_("What to do when the cluster does not have quorum"), NULL + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_controld, + N_("Whether to lock resources to a cleanly shut down node"), + N_("When true, resources active on a node when it is cleanly shut down " + "are kept \"locked\" to that node (not allowed to run elsewhere) " +@@ -680,6 +698,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("Do not lock resources to a cleanly shut down node longer than " + "this"), + N_("If shutdown-lock is true and this is set to a nonzero time " +@@ -690,6 +709,7 @@ static pcmk__cluster_option_t controller_options[] = { + { + XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, + N_("How long to wait for a node that has joined the cluster to join " + "the controller process group"), + N_("Fence nodes that do not join the controller process group within " +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index 7c69fb8..b2f4742 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -528,11 +528,13 @@ st_peer_update_callback(enum crm_status_type type, crm_node_t * node, const void + static pcmk__cluster_option_t fencer_options[] = { + /* name, old name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + PCMK_STONITH_HOST_ARGUMENT, NULL, "string", NULL, "port", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate parameter to supply instead of 'port'"), + N_("some devices do not support the " + "standard 'port' parameter or may provide additional ones. Use " +@@ -543,17 +545,20 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_HOST_MAP,NULL, "string", NULL, "", NULL, ++ pcmk__opt_context_none, + N_("A mapping of host names to ports numbers for devices that do not support host names."), + N_("Eg. node1:1;node2:2,3 would tell the cluster to use port 1 for node1 and ports 2 and 3 for node2") + }, + { + PCMK_STONITH_HOST_LIST,NULL, "string", NULL, "", NULL, ++ pcmk__opt_context_none, + N_("Eg. node1,node2,node3"), + N_("A list of machines controlled by " + "this device (Optional unless pcmk_host_list=static-list)") + }, + { + PCMK_STONITH_HOST_CHECK,NULL, "string", NULL, "dynamic-list", NULL, ++ pcmk__opt_context_none, + N_("How to determine which machines are controlled by the device."), + N_("Allowed values: dynamic-list " + "(query the device via the 'list' command), static-list " +@@ -564,6 +569,7 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_DELAY_MAX,NULL, "time", NULL, "0s", NULL, ++ pcmk__opt_context_none, + N_("Enable a base delay for fencing actions and specify base delay value."), + N_("Enable a delay of no more than the " + "time specified before executing fencing actions. Pacemaker " +@@ -573,6 +579,7 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_DELAY_BASE,NULL, "string", NULL, "0s", NULL, ++ pcmk__opt_context_none, + N_("Enable a base delay for " + "fencing actions and specify base delay value."), + N_("This enables a static delay for " +@@ -587,6 +594,7 @@ static pcmk__cluster_option_t fencer_options[] = { + }, + { + PCMK_STONITH_ACTION_LIMIT,NULL, "integer", NULL, "1", NULL, ++ pcmk__opt_context_none, + N_("The maximum number of actions can be performed in parallel on this device"), + N_("Cluster property concurrent-fencing=true needs to be configured first." + "Then use this to specify the maximum number of actions can be performed in parallel on this device. -1 is unlimited.") +@@ -594,18 +602,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_reboot_action", NULL, "string", NULL, + PCMK_ACTION_REBOOT, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of 'reboot'"), + N_("Some devices do not support the standard commands or may provide additional ones.\n" + "Use this to specify an alternate, device-specific, command that implements the \'reboot\' action.") + }, + { + "pcmk_reboot_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for reboot actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'reboot\' actions.") + }, + { + "pcmk_reboot_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the 'reboot' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -614,18 +625,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_off_action", NULL, "string", NULL, + PCMK_ACTION_OFF, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'off\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'off\' action.") + }, + { + "pcmk_off_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for off actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'off\' actions.") + }, + { + "pcmk_off_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the 'off' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -634,18 +648,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_on_action", NULL, "string", NULL, + PCMK_ACTION_ON, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of 'on'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'on\' action.") + }, + { + "pcmk_on_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for on actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'on\' actions.") + }, + { + "pcmk_on_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the 'on' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -654,18 +671,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_list_action",NULL, "string", NULL, + PCMK_ACTION_LIST, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'list\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'list\' action.") + }, + { + "pcmk_list_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for list actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'list\' actions.") + }, + { + "pcmk_list_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the \'list\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -674,18 +694,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_monitor_action", NULL, "string", NULL, + PCMK_ACTION_MONITOR, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'monitor\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'monitor\' action.") + }, + { + "pcmk_monitor_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for monitor actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal.\n" + "Use this to specify an alternate, device-specific, timeout for \'monitor\' actions.") + }, + { + "pcmk_monitor_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the \'monitor\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +@@ -694,18 +717,21 @@ static pcmk__cluster_option_t fencer_options[] = { + { + "pcmk_status_action", NULL, "string", NULL, + PCMK_ACTION_STATUS, NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: An alternate command to run instead of \'status\'"), + N_("Some devices do not support the standard commands or may provide additional ones." + "Use this to specify an alternate, device-specific, command that implements the \'status\' action.") + }, + { + "pcmk_status_timeout",NULL, "time", NULL, "60s", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: Specify an alternate timeout to use for status actions instead of stonith-timeout"), + N_("Some devices need much more/less time to complete than normal." + "Use this to specify an alternate, device-specific, timeout for \'status\' actions.") + }, + { + "pcmk_status_retries",NULL, "integer", NULL, "2", NULL, ++ pcmk__opt_context_none, + N_("Advanced use only: The maximum number of times to retry the \'status\' command within the timeout period"), + N_("Some devices do not support multiple connections." + " Operations may 'fail' if the device is busy with another task so Pacemaker will automatically retry the operation, if there is time remaining." +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index 0c6c9e8..b2525ef 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -56,6 +56,9 @@ typedef struct pcmk__cluster_option_s { + + bool (*is_valid)(const char *); + ++ // @COMPAT context is used only for daemon meta-data ++ enum pcmk__opt_context context; ++ + const char *description_short; + const char *description_long; + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 9c3f9f1..9e4060b 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -869,18 +869,21 @@ cib_native_notify(gpointer data, gpointer user_data) + static pcmk__cluster_option_t cib_opts[] = { + /* name, legacy name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + "enable-acl", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_based, + N_("Enable Access Control Lists (ACLs) for the CIB"), + NULL + }, + { + "cluster-ipc-limit", NULL, "integer", NULL, + "500", pcmk__valid_positive_number, ++ pcmk__opt_context_based, + N_("Maximum IPC message backlog before disconnecting a cluster daemon"), + N_("Raise this if log has \"Evicting client\" messages for cluster daemon" + " PIDs (a good value is the number of resources in the cluster" +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 6878f4d..383c4af 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -24,24 +24,28 @@ gboolean was_processing_warning = FALSE; + static pcmk__cluster_option_t pe_opts[] = { + /* name, old name, type, allowed values, + * default value, validator, ++ * context, + * short description, + * long description + */ + { + "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide", + "stop", pcmk__valid_quorum, ++ pcmk__opt_context_schedulerd, + N_("What to do when the cluster does not have quorum"), + NULL + }, + { + "symmetric-cluster", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether resources can run on any node by default"), + NULL + }, + { + "maintenance-mode", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether the cluster should refrain from monitoring, starting, " + "and stopping resources"), + NULL +@@ -49,6 +53,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "start-failure-is-fatal", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether a start failure should prevent a resource from being " + "recovered on the same node"), + N_("When true, the cluster will immediately ban a resource from a node " +@@ -58,12 +63,14 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "enable-startup-probes", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether the cluster should check for active resources during start-up"), + NULL + }, + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether to lock resources to a cleanly shut down node"), + N_("When true, resources active on a node when it is cleanly shut down " + "are kept \"locked\" to that node (not allowed to run elsewhere) " +@@ -77,6 +84,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("Do not lock resources to a cleanly shut down node longer than " + "this"), + N_("If shutdown-lock is true and this is set to a nonzero time " +@@ -89,6 +97,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stonith-enabled", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("*** Advanced Use Only *** " + "Whether nodes may be fenced as part of recovery"), + N_("If false, unresponsive nodes are immediately assumed to be harmless, " +@@ -99,6 +108,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stonith-action", NULL, "select", "reboot, off, poweroff", + PCMK_ACTION_REBOOT, pcmk__is_fencing_action, ++ pcmk__opt_context_schedulerd, + N_("Action to send to fence device when a node needs to be fenced " + "(\"poweroff\" is a deprecated alias for \"off\")"), + NULL +@@ -106,6 +116,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stonith-timeout", NULL, "time", NULL, + "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("*** Advanced Use Only *** Unused by Pacemaker"), + N_("This value is not used by Pacemaker, but is kept for backward " + "compatibility, and certain legacy fence agents might use it.") +@@ -113,6 +124,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether watchdog integration is enabled"), + N_("This is set automatically by the cluster according to whether SBD " + "is detected to be in use. User-configured values are ignored. " +@@ -124,12 +136,14 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "concurrent-fencing", NULL, "boolean", NULL, + PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Allow performing fencing operations in parallel"), + NULL + }, + { + "startup-fencing", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"), + N_("Setting this to false may lead to a \"split-brain\" situation," + "potentially leading to data loss and/or service unavailability.") +@@ -137,6 +151,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"), + N_("Apply specified delay for the fencings that are targeting the lost " + "nodes with the highest total resource priority in case we don't " +@@ -154,6 +169,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, + "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("How long to wait for a node that has joined the cluster to join " + "the controller process group"), + N_("Fence nodes that do not join the controller process group within " +@@ -165,6 +181,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "cluster-delay", NULL, "time", NULL, + "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, + N_("Maximum time for node-to-node communication"), + N_("The node elected Designated Controller (DC) will consider an action " + "failed if it does not get a response from the node executing the " +@@ -175,6 +192,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "batch-limit", NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("Maximum number of jobs that the cluster may execute in parallel " + "across all nodes"), + N_("The \"correct\" value will depend on the speed and load of your " +@@ -185,6 +203,7 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "migration-limit", NULL, "integer", NULL, + "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of live migration actions that the cluster is allowed " + "to execute in parallel on a node (-1 means no limit)") + }, +@@ -193,24 +212,28 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "stop-all-resources", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether the cluster should stop all active resources"), + NULL + }, + { + "stop-orphan-resources", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether to stop resources that were removed from the configuration"), + NULL + }, + { + "stop-orphan-actions", NULL, "boolean", NULL, + "true", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("Whether to cancel recurring actions removed from the configuration"), + NULL + }, + { + "remove-after-stop", NULL, "boolean", NULL, + "false", pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, + N_("*** Deprecated *** Whether to remove stopped resources from " + "the executor"), + N_("Values other than default are poorly tested and potentially dangerous." +@@ -221,18 +244,21 @@ static pcmk__cluster_option_t pe_opts[] = { + { + "pe-error-series-max", NULL, "integer", NULL, + "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of scheduler inputs resulting in errors to save"), + N_("Zero to disable, -1 to store unlimited.") + }, + { + "pe-warn-series-max", NULL, "integer", NULL, + "5000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of scheduler inputs resulting in warnings to save"), + N_("Zero to disable, -1 to store unlimited.") + }, + { + "pe-input-series-max", NULL, "integer", NULL, + "4000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The number of scheduler inputs without errors or warnings to save"), + N_("Zero to disable, -1 to store unlimited.") + }, +@@ -244,6 +270,7 @@ static pcmk__cluster_option_t pe_opts[] = { + PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", " + PCMK__VALUE_CUSTOM, + PCMK__VALUE_NONE, pcmk__validate_health_strategy, ++ pcmk__opt_context_schedulerd, + N_("How cluster should react to node health attributes"), + N_("Requires external entities to create node attributes (named with " + "the prefix \"#health\") with values \"red\", " +@@ -252,24 +279,28 @@ static pcmk__cluster_option_t pe_opts[] = { + { + PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("Base health score assigned to a node"), + N_("Only used when \"node-health-strategy\" is set to \"progressive\".") + }, + { + PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The score to use for a node health attribute whose value is \"green\""), + N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") + }, + { + PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL, + "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The score to use for a node health attribute whose value is \"yellow\""), + N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") + }, + { + PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL, + "-INFINITY", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, + N_("The score to use for a node health attribute whose value is \"red\""), + N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") + }, +@@ -279,6 +310,7 @@ static pcmk__cluster_option_t pe_opts[] = { + "placement-strategy", NULL, "select", + "default, utilization, minimal, balanced", + "default", pcmk__valid_placement_strategy, ++ pcmk__opt_context_schedulerd, + N_("How the cluster should allocate resources to nodes"), + NULL + }, +-- +2.31.1 + +From cc7c3c87d333854d0f28abe461dd58d5c94b0888 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 18:47:25 -0800 +Subject: [PATCH 07/24] Refactor: libcrmcommon: Consolidate cluster option + metadata + +This isn't plugged in yet. It's also currently defined out, to avoid an +unused variable warning from the compiler. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + lib/common/options.c | 499 +++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 499 insertions(+) + +diff --git a/lib/common/options.c b/lib/common/options.c +index 1db41a2..ff73dcc 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -20,6 +20,7 @@ + #include + + #include ++#include + + void + pcmk__cli_help(char cmd) +@@ -38,6 +39,504 @@ pcmk__cli_help(char cmd) + } + + ++/* ++ * Option metadata ++ */ ++ ++#if 0 ++static pcmk__cluster_option_t cluster_options[] = { ++ /* name, old name, type, allowed values, ++ * default value, validator, ++ * context, ++ * short description, ++ * long description ++ */ ++ { ++ "dc-version", NULL, "string", NULL, ++ PCMK__VALUE_NONE, NULL, ++ pcmk__opt_context_controld, ++ N_("Pacemaker version on cluster node elected Designated Controller " ++ "(DC)"), ++ N_("Includes a hash which identifies the exact changeset the code was " ++ "built from. Used for diagnostic purposes."), ++ }, ++ { ++ "cluster-infrastructure", NULL, "string", NULL, ++ "corosync", NULL, ++ pcmk__opt_context_controld, ++ N_("The messaging stack on which Pacemaker is currently running"), ++ N_("Used for informational and diagnostic purposes."), ++ }, ++ { ++ "cluster-name", NULL, "string", NULL, ++ NULL, NULL, ++ pcmk__opt_context_controld, ++ N_("An arbitrary name for the cluster"), ++ N_("This optional value is mostly for users' convenience as desired " ++ "in administration, but may also be used in Pacemaker " ++ "configuration rules via the #cluster-name node attribute, and " ++ "by higher-level tools and resource agents."), ++ }, ++ { ++ "dc-deadtime", NULL, "time", NULL, ++ "20s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("How long to wait for a response from other nodes during start-up"), ++ N_("The optimal value will depend on the speed and load of your " ++ "network and the type of switches used."), ++ }, ++ { ++ "cluster-recheck-interval", NULL, "time", ++ N_("Zero disables polling, while positive values are an interval in " ++ "seconds (unless other units are specified, for example \"5min\")"), ++ "15min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("Polling interval to recheck cluster state and evaluate rules " ++ "with date specifications"), ++ N_("Pacemaker is primarily event-driven, and looks ahead to know when " ++ "to recheck cluster state for failure timeouts and most time-based " ++ "rules. However, it will also recheck the cluster after this " ++ "amount of inactivity, to evaluate rules with date specifications " ++ "and serve as a fail-safe for certain types of scheduler bugs."), ++ }, ++ { ++ "fence-reaction", NULL, "select", "stop, panic", ++ "stop", NULL, ++ pcmk__opt_context_controld, ++ N_("How a cluster node should react if notified of its own fencing"), ++ N_("A cluster node may receive notification of its own fencing if " ++ "fencing is misconfigured, or if fabric fencing is in use that " ++ "doesn't cut cluster communication. Use \"stop\" to attempt to " ++ "immediately stop Pacemaker and stay stopped, or \"panic\" to " ++ "attempt to immediately reboot the local node, falling back to " ++ "stop on failure."), ++ }, ++ { ++ "election-timeout", NULL, "time", NULL, ++ "2min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("Declare an election failed if it is not decided within this much " ++ "time. If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "shutdown-escalation", NULL, "time", NULL, ++ "20min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("Exit immediately if shutdown does not complete within this much " ++ "time. If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "join-integration-timeout", "crmd-integration-timeout", "time", ++ NULL, ++ "3min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "join-finalization-timeout", "crmd-finalization-timeout", ++ "time", NULL, ++ "30min", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only ***"), ++ N_("If you need to adjust this value, it probably indicates " ++ "the presence of a bug."), ++ }, ++ { ++ "transition-delay", "crmd-transition-delay", "time", NULL, ++ "0s", pcmk__valid_interval_spec, ++ pcmk__opt_context_controld, ++ N_("*** Advanced Use Only *** " ++ "Enabling this option will slow down cluster recovery under all " ++ "conditions"), ++ N_("Delay cluster recovery for this much time to allow for additional " ++ "events to occur. Useful if your configuration is sensitive to " ++ "the order in which ping updates arrive."), ++ }, ++ { ++ "no-quorum-policy", NULL, "select", ++ "stop, freeze, ignore, demote, suicide", ++ "stop", pcmk__valid_quorum, ++ pcmk__opt_context_schedulerd, ++ N_("What to do when the cluster does not have quorum"), ++ NULL, ++ }, ++ { ++ "shutdown-lock", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether to lock resources to a cleanly shut down node"), ++ N_("When true, resources active on a node when it is cleanly shut down " ++ "are kept \"locked\" to that node (not allowed to run elsewhere) " ++ "until they start again on that node after it rejoins (or for at " ++ "most shutdown-lock-limit, if set). Stonith resources and " ++ "Pacemaker Remote connections are never locked. Clone and bundle " ++ "instances and the promoted role of promotable clones are " ++ "currently never locked, though support could be added in a future " ++ "release."), ++ }, ++ { ++ "shutdown-lock-limit", NULL, "time", NULL, ++ "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("Do not lock resources to a cleanly shut down node longer than " ++ "this"), ++ N_("If shutdown-lock is true and this is set to a nonzero time " ++ "duration, shutdown locks will expire after this much time has " ++ "passed since the shutdown was initiated, even if the node has not " ++ "rejoined."), ++ }, ++ { ++ "enable-acl", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_based, ++ N_("Enable Access Control Lists (ACLs) for the CIB"), ++ NULL, ++ }, ++ { ++ "symmetric-cluster", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether resources can run on any node by default"), ++ NULL, ++ }, ++ { ++ "maintenance-mode", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether the cluster should refrain from monitoring, starting, and " ++ "stopping resources"), ++ NULL, ++ }, ++ { ++ "start-failure-is-fatal", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether a start failure should prevent a resource from being " ++ "recovered on the same node"), ++ N_("When true, the cluster will immediately ban a resource from a node " ++ "if it fails to start there. When false, the cluster will instead " ++ "check the resource's fail count against its migration-threshold.") ++ }, ++ { ++ "enable-startup-probes", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether the cluster should check for active resources during " ++ "start-up"), ++ NULL, ++ }, ++ ++ // Fencing-related options ++ { ++ "stonith-enabled", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("*** Advanced Use Only *** " ++ "Whether nodes may be fenced as part of recovery"), ++ N_("If false, unresponsive nodes are immediately assumed to be " ++ "harmless, and resources that were active on them may be recovered " ++ "elsewhere. This can result in a \"split-brain\" situation, " ++ "potentially leading to data loss and/or service unavailability."), ++ }, ++ { ++ "stonith-action", NULL, "select", "reboot, off, poweroff", ++ PCMK_ACTION_REBOOT, pcmk__is_fencing_action, ++ pcmk__opt_context_schedulerd, ++ N_("Action to send to fence device when a node needs to be fenced " ++ "(\"poweroff\" is a deprecated alias for \"off\")"), ++ NULL, ++ }, ++ { ++ "stonith-timeout", NULL, "time", NULL, ++ "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("How long to wait for on, off, and reboot fence actions to complete " ++ "by default"), ++ NULL, ++ }, ++ { ++ "have-watchdog", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether watchdog integration is enabled"), ++ N_("This is set automatically by the cluster according to whether SBD " ++ "is detected to be in use. User-configured values are ignored. " ++ "The value `true` is meaningful if diskless SBD is used and " ++ "`stonith-watchdog-timeout` is nonzero. In that case, if fencing " ++ "is required, watchdog-based self-fencing will be performed via " ++ "SBD without requiring a fencing resource explicitly configured."), ++ }, ++ { ++ /* @COMPAT Currently, unparsable values default to -1 (auto-calculate), ++ * while missing values default to 0 (disable). All values are accepted ++ * (unless the controller finds that the value conflicts with the ++ * SBD_WATCHDOG_TIMEOUT). ++ * ++ * At a compatibility break: properly validate as a timeout, let ++ * either negative values or a particular string like "auto" mean auto- ++ * calculate, and use 0 as the single default for when the option either ++ * is unset or fails to validate. ++ */ ++ "stonith-watchdog-timeout", NULL, "time", NULL, ++ "0", NULL, ++ pcmk__opt_context_controld, ++ N_("How long before nodes can be assumed to be safely down when " ++ "watchdog-based self-fencing via SBD is in use"), ++ N_("If this is set to a positive value, lost nodes are assumed to " ++ "self-fence using watchdog-based SBD within this much time. This " ++ "does not require a fencing resource to be explicitly configured, " ++ "though a fence_watchdog resource can be configured, to limit use " ++ "to specific nodes. If this is set to 0 (the default), the cluster " ++ "will never assume watchdog-based self-fencing. If this is set to a " ++ "negative value, the cluster will use twice the local value of the " ++ "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " ++ "or otherwise treat this as 0. WARNING: When used, this timeout " ++ "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " ++ "watchdog-based SBD, and Pacemaker will refuse to start on any of " ++ "those nodes where this is not true for the local value or SBD is " ++ "not active. When this is set to a negative value, " ++ "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " ++ "that use SBD, otherwise data corruption or loss could occur."), ++ }, ++ { ++ "stonith-max-attempts", NULL, "integer", NULL, ++ "10", pcmk__valid_positive_number, ++ pcmk__opt_context_controld, ++ N_("How many times fencing can fail before it will no longer be " ++ "immediately re-attempted on a target"), ++ NULL, ++ }, ++ { ++ "concurrent-fencing", NULL, "boolean", NULL, ++ PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Allow performing fencing operations in parallel"), ++ NULL, ++ }, ++ { ++ "startup-fencing", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("*** Advanced Use Only *** " ++ "Whether to fence unseen nodes at start-up"), ++ N_("Setting this to false may lead to a \"split-brain\" situation, " ++ "potentially leading to data loss and/or service unavailability."), ++ }, ++ { ++ "priority-fencing-delay", NULL, "time", NULL, ++ "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("Apply fencing delay targeting the lost nodes with the highest " ++ "total resource priority"), ++ N_("Apply specified delay for the fencings that are targeting the lost " ++ "nodes with the highest total resource priority in case we don't " ++ "have the majority of the nodes in our cluster partition, so that " ++ "the more significant nodes potentially win any fencing match, " ++ "which is especially meaningful under split-brain of 2-node " ++ "cluster. A promoted resource instance takes the base priority + 1 " ++ "on calculation if the base priority is not 0. Any static/random " ++ "delays that are introduced by `pcmk_delay_base/max` configured " ++ "for the corresponding fencing resources will be added to this " ++ "delay. This delay should be significantly greater than, safely " ++ "twice, the maximum `pcmk_delay_base/max`. By default, priority " ++ "fencing delay is disabled."), ++ }, ++ { ++ "node-pending-timeout", NULL, "time", NULL, ++ "0", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("How long to wait for a node that has joined the cluster to join " ++ "the controller process group"), ++ N_("Fence nodes that do not join the controller process group within " ++ "this much time after joining the cluster, to allow the cluster " ++ "to continue managing resources. A value of 0 means never fence " ++ "pending nodes. Setting the value to 2h means fence nodes after " ++ "2 hours."), ++ }, ++ { ++ "cluster-delay", NULL, "time", NULL, ++ "60s", pcmk__valid_interval_spec, ++ pcmk__opt_context_schedulerd, ++ N_("Maximum time for node-to-node communication"), ++ N_("The node elected Designated Controller (DC) will consider an action " ++ "failed if it does not get a response from the node executing the " ++ "action within this time (after considering the action's own " ++ "timeout). The \"correct\" value will depend on the speed and " ++ "load of your network and cluster nodes.") ++ }, ++ ++ // Limits ++ { ++ "load-threshold", NULL, "percentage", NULL, ++ "80%", pcmk__valid_percentage, ++ pcmk__opt_context_controld, ++ N_("Maximum amount of system load that should be used by cluster " ++ "nodes"), ++ N_("The cluster will slow down its recovery process when the amount of " ++ "system resources used (currently CPU) approaches this limit"), ++ }, ++ { ++ "node-action-limit", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_controld, ++ N_("Maximum number of jobs that can be scheduled per node (defaults to " ++ "2x cores)"), ++ NULL, ++ }, ++ { ++ "batch-limit", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("Maximum number of jobs that the cluster may execute in parallel " ++ "across all nodes"), ++ N_("The \"correct\" value will depend on the speed and load of your " ++ "network and cluster nodes. If set to 0, the cluster will " ++ "impose a dynamically calculated limit when any node has a " ++ "high load."), ++ }, ++ { ++ "migration-limit", NULL, "integer", NULL, ++ "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of live migration actions that the cluster is allowed " ++ "to execute in parallel on a node (-1 means no limit)"), ++ NULL, ++ }, ++ { ++ "cluster-ipc-limit", NULL, "integer", NULL, ++ "500", pcmk__valid_positive_number, ++ pcmk__opt_context_based, ++ N_("Maximum IPC message backlog before disconnecting a cluster daemon"), ++ N_("Raise this if log has \"Evicting client\" messages for cluster " ++ "daemon PIDs (a good value is the number of resources in the " ++ "cluster multiplied by the number of nodes)."), ++ }, ++ ++ // Orphans and stopping ++ { ++ "stop-all-resources", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether the cluster should stop all active resources"), ++ NULL, ++ }, ++ { ++ "stop-orphan-resources", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether to stop resources that were removed from the " ++ "configuration"), ++ NULL, ++ }, ++ { ++ "stop-orphan-actions", NULL, "boolean", NULL, ++ XML_BOOLEAN_TRUE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("Whether to cancel recurring actions removed from the " ++ "configuration"), ++ NULL, ++ }, ++ { ++ "remove-after-stop", NULL, "boolean", NULL, ++ XML_BOOLEAN_FALSE, pcmk__valid_boolean, ++ pcmk__opt_context_schedulerd, ++ N_("*** Deprecated *** " ++ "Whether to remove stopped resources from the executor"), ++ N_("Values other than default are poorly tested and potentially " ++ "dangerous. This option will be removed in a future release."), ++ }, ++ ++ // Storing inputs ++ { ++ "pe-error-series-max", NULL, "integer", NULL, ++ "-1", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of scheduler inputs resulting in errors to save"), ++ N_("Zero to disable, -1 to store unlimited."), ++ }, ++ { ++ "pe-warn-series-max", NULL, "integer", NULL, ++ "5000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of scheduler inputs resulting in warnings to save"), ++ N_("Zero to disable, -1 to store unlimited."), ++ }, ++ { ++ "pe-input-series-max", NULL, "integer", NULL, ++ "4000", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The number of scheduler inputs without errors or warnings to save"), ++ N_("Zero to disable, -1 to store unlimited."), ++ }, ++ ++ // Node health ++ { ++ "node-health-strategy", NULL, "select", ++ PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", " ++ PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", " ++ PCMK__VALUE_CUSTOM, ++ PCMK__VALUE_NONE, pcmk__validate_health_strategy, ++ pcmk__opt_context_schedulerd, ++ N_("How cluster should react to node health attributes"), ++ N_("Requires external entities to create node attributes (named with " ++ "the prefix \"#health\") with values \"red\", \"yellow\", or " ++ "\"green\".") ++ }, ++ { ++ "node-health-base", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("Base health score assigned to a node"), ++ N_("Only used when \"node-health-strategy\" is set to " ++ "\"progressive\"."), ++ }, ++ { ++ "node-health-green", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The score to use for a node health attribute whose value is " ++ "\"green\""), ++ N_("Only used when \"node-health-strategy\" is set to \"custom\" or " ++ "\"progressive\"."), ++ }, ++ { ++ "node-health-yellow", NULL, "integer", NULL, ++ "0", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The score to use for a node health attribute whose value is " ++ "\"yellow\""), ++ N_("Only used when \"node-health-strategy\" is set to \"custom\" or " ++ "\"progressive\"."), ++ }, ++ { ++ "node-health-red", NULL, "integer", NULL, ++ "-INFINITY", pcmk__valid_number, ++ pcmk__opt_context_schedulerd, ++ N_("The score to use for a node health attribute whose value is " ++ "\"red\""), ++ N_("Only used when \"node-health-strategy\" is set to \"custom\" or " ++ "\"progressive\".") ++ }, ++ ++ // Placement strategy ++ { ++ "placement-strategy", NULL, "select", ++ "default, utilization, minimal, balanced", ++ "default", pcmk__valid_placement_strategy, ++ pcmk__opt_context_schedulerd, ++ N_("How the cluster should allocate resources to nodes"), ++ NULL, ++ }, ++}; ++#endif // 0 ++ ++ + /* + * Environment variable option handling + */ +-- +2.31.1 + +From 96fa08b7adc911cce417f7f9889029510ec1c428 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 10 Jan 2024 23:35:22 -0800 +Subject: [PATCH 08/24] Refactor: libcrmcommon: New filter arg to + pcmk__format_option_metadata() + +Now each cluster option is in exactly one daemon's metadata. The four +options that were previously in the metadata of both the controller and +the scheduler are now only in the scheduler's metadata. + +All daemons still have access to all the options they use. + +All function calls in daemons still use the local options arrays rather +than the one in libcrmcommon. That will change in upcoming commits. + +Closes T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 6 ++++-- + daemons/fenced/pacemaker-fenced.c | 17 +++++++++------- + include/crm/common/options_internal.h | 1 + + lib/cib/cib_utils.c | 7 ++++--- + lib/common/options.c | 28 +++++++++++++++++++++++++-- + lib/pengine/common.c | 7 ++++--- + 6 files changed, 49 insertions(+), 17 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 4d7cb14..8fe09da 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -723,11 +723,13 @@ static pcmk__cluster_option_t controller_options[] = { + void + crmd_metadata(void) + { ++ const char *name = "pacemaker-controld"; + const char *desc_short = "Pacemaker controller options"; + const char *desc_long = "Cluster options used by Pacemaker's controller"; + +- gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short, +- desc_long, controller_options, ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_controld, ++ controller_options, + PCMK__NELEM(controller_options)); + printf("%s", s); + g_free(s); +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index b2f4742..d0b6c31 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -742,14 +742,17 @@ static pcmk__cluster_option_t fencer_options[] = { + void + fencer_metadata(void) + { ++ const char *name = "pacemaker-fenced"; + const char *desc_short = N_("Instance attributes available for all " +- "\"stonith\"-class resources"); +- const char *desc_long = N_("Instance attributes available for all \"stonith\"-" +- "class resources and used by Pacemaker's fence " +- "daemon, formerly known as stonithd"); +- +- gchar *s = pcmk__format_option_metadata("pacemaker-fenced", desc_short, +- desc_long, fencer_options, ++ "\"stonith\"-class resources"); ++ const char *desc_long = N_("Instance attributes available for all " ++ "\"stonith\"-class resources and used by " ++ "Pacemaker's fence daemon, formerly known as " ++ "stonithd"); ++ ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_none, ++ fencer_options, + PCMK__NELEM(fencer_options)); + printf("%s", s); + g_free(s); +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index b2525ef..89d27d1 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -70,6 +70,7 @@ const char *pcmk__cluster_option(GHashTable *options, + + gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *desc_long, ++ enum pcmk__opt_context filter, + pcmk__cluster_option_t *option_list, + int len); + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 9e4060b..2205d15 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -894,13 +894,14 @@ static pcmk__cluster_option_t cib_opts[] = { + void + cib_metadata(void) + { ++ const char *name = "pacemaker-based"; + const char *desc_short = "Cluster Information Base manager options"; + const char *desc_long = "Cluster options used by Pacemaker's Cluster " + "Information Base manager"; + +- gchar *s = pcmk__format_option_metadata("pacemaker-based", desc_short, +- desc_long, cib_opts, +- PCMK__NELEM(cib_opts)); ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based, ++ cib_opts, PCMK__NELEM(cib_opts)); + printf("%s", s); + g_free(s); + } +diff --git a/lib/common/options.c b/lib/common/options.c +index ff73dcc..d5b6c17 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -937,13 +937,32 @@ add_desc(GString *s, const char *tag, const char *desc, const char *values, + free(escaped_en); + } + ++/*! ++ * \internal ++ * \brief Format option metadata as an OCF-like XML string ++ * ++ * \param[in] name Daemon name ++ * \param[in] desc_short Short description of the daemon ++ * \param[in] desc_long Long description of the daemon ++ * \param[in] filter If not \c pcmk__opt_context_none, include only ++ * those options whose \c context field is equal to ++ * \p filter ++ * \param[in] option_list Options whose metadata to format ++ * \param[in] len Number of items in \p option_list ++ * ++ * \return A string containing OCF-like option metadata XML ++ * ++ * \note The caller is responsible for freeing the return value using ++ * \c g_free(). ++ */ + gchar * + pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *desc_long, ++ enum pcmk__opt_context filter, + pcmk__cluster_option_t *option_list, int len) + { +- /* big enough to hold "pacemaker-schedulerd metadata" output */ +- GString *s = g_string_sized_new(13000); ++ // Large enough to hold current cluster options with room for growth (2^15) ++ GString *s = g_string_sized_new(32768); + + pcmk__g_strcat(s, + "\n" +@@ -964,6 +983,11 @@ pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *opt_desc_short = option_list[lpc].description_short; + const char *opt_desc_long = option_list[lpc].description_long; + ++ if ((filter != pcmk__opt_context_none) ++ && (filter != option_list[lpc].context)) { ++ continue; ++ } ++ + // The standard requires long and short parameter descriptions + CRM_ASSERT((opt_desc_short != NULL) || (opt_desc_long != NULL)); + +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 383c4af..e9aa2e2 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -319,12 +319,13 @@ static pcmk__cluster_option_t pe_opts[] = { + void + pe_metadata(pcmk__output_t *out) + { ++ const char *name = "pacemaker-schedulerd"; + const char *desc_short = "Pacemaker scheduler options"; + const char *desc_long = "Cluster options used by Pacemaker's scheduler"; + +- gchar *s = pcmk__format_option_metadata("pacemaker-schedulerd", desc_short, +- desc_long, pe_opts, +- PCMK__NELEM(pe_opts)); ++ gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_schedulerd, ++ pe_opts, PCMK__NELEM(pe_opts)); + out->output_xml(out, "metadata", s); + g_free(s); + } +-- +2.31.1 + +From 96b59bf0c66fccc0656a9195ebe7580d54083eb3 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 10 Jan 2024 23:59:39 -0800 +Subject: [PATCH 09/24] Test: cts-cli: Update daemon outputs for option + filtering + +Now each cluster option is in exactly one daemon's metadata. The four +options that were previously in the metadata of both the controller and +the scheduler are now only in the scheduler's metadata. + +All daemons still have access to all the options they use. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 26 -------------------------- + 1 file changed, 26 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 6a24089..9b2dd96 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -101,32 +101,6 @@ + How many times fencing can fail before it will no longer be immediately re-attempted on a target + + +- +- What to do when the cluster does not have quorum Allowed values: stop, freeze, ignore, demote, suicide +- What to do when the cluster does not have quorum +- +- +- +- +- When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. +- Whether to lock resources to a cleanly shut down node +- +- +- +- If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. +- Do not lock resources to a cleanly shut down node longer than this +- +- +- +- Fence nodes that do not join the controller process group within this much time after joining the cluster, to allow the cluster to continue managing resources. A value of 0 means never fence pending nodes. Setting the value to 2h means fence nodes after 2 hours. +- How long to wait for a node that has joined the cluster to join the controller process group +- +- + + + =#=#=#= End test: Get controller metadata - OK (0) =#=#=#= +-- +2.31.1 + +From fec945824ed11395a8366882c29315c509de80f0 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Sat, 6 Jan 2024 19:07:00 -0800 +Subject: [PATCH 10/24] Refactor: libcrmcommon, daemons: New + pcmk__cluster_option_metadata() + +This new function is a wrapper for pcmk__format_option_metadata() that +always uses the shared cluster_options array and its length. + +Daemons can now call this function to get metadata instead of using +their local options arrays for that purpose. + +Soon we'll introduce a command that outputs all cluster option metadata +directly, instead of calling daemon metadata commands. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 7 +++---- + include/crm/common/options_internal.h | 4 ++++ + lib/cib/cib_utils.c | 6 +++--- + lib/common/options.c | 28 +++++++++++++++++++++++++-- + lib/pengine/common.c | 6 +++--- + 5 files changed, 39 insertions(+), 12 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 8fe09da..82aa143 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -727,10 +727,9 @@ crmd_metadata(void) + const char *desc_short = "Pacemaker controller options"; + const char *desc_long = "Cluster options used by Pacemaker's controller"; + +- gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_controld, +- controller_options, +- PCMK__NELEM(controller_options)); ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_controld); ++ + printf("%s", s); + g_free(s); + } +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index 89d27d1..a62015f 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -74,6 +74,10 @@ gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, + pcmk__cluster_option_t *option_list, + int len); + ++gchar *pcmk__cluster_option_metadata(const char *name, const char *desc_short, ++ const char *desc_long, ++ enum pcmk__opt_context filter); ++ + void pcmk__validate_cluster_options(GHashTable *options, + pcmk__cluster_option_t *option_list, + int len); +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 2205d15..479a7fb 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -899,9 +899,9 @@ cib_metadata(void) + const char *desc_long = "Cluster options used by Pacemaker's Cluster " + "Information Base manager"; + +- gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_based, +- cib_opts, PCMK__NELEM(cib_opts)); ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based); ++ + printf("%s", s); + g_free(s); + } +diff --git a/lib/common/options.c b/lib/common/options.c +index d5b6c17..df4a8b4 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -43,7 +43,6 @@ pcmk__cli_help(char cmd) + * Option metadata + */ + +-#if 0 + static pcmk__cluster_option_t cluster_options[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -534,7 +533,6 @@ static pcmk__cluster_option_t cluster_options[] = { + NULL, + }, + }; +-#endif // 0 + + + /* +@@ -1036,6 +1034,32 @@ pcmk__format_option_metadata(const char *name, const char *desc_short, + return g_string_free(s, FALSE); + } + ++/*! ++ * \internal ++ * \brief Format cluster option metadata as an OCF-like XML string ++ * ++ * \param[in] name Daemon name ++ * \param[in] desc_short Short description of the daemon ++ * \param[in] desc_long Long description of the daemon ++ * \param[in] filter If not \c pcmk__opt_context_none, include only ++ * those options whose \c context field is equal to ++ * \p filter ++ * ++ * \return A string containing OCF-like cluster option metadata XML ++ * ++ * \note The caller is responsible for freeing the return value using ++ * \c g_free(). ++ */ ++gchar * ++pcmk__cluster_option_metadata(const char *name, const char *desc_short, ++ const char *desc_long, ++ enum pcmk__opt_context filter) ++{ ++ return pcmk__format_option_metadata(name, desc_short, desc_long, filter, ++ cluster_options, ++ PCMK__NELEM(cluster_options)); ++} ++ + void + pcmk__validate_cluster_options(GHashTable *options, + pcmk__cluster_option_t *option_list, int len) +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index e9aa2e2..c9f1fc1 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -323,9 +323,9 @@ pe_metadata(pcmk__output_t *out) + const char *desc_short = "Pacemaker scheduler options"; + const char *desc_long = "Cluster options used by Pacemaker's scheduler"; + +- gchar *s = pcmk__format_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_schedulerd, +- pe_opts, PCMK__NELEM(pe_opts)); ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_schedulerd); ++ + out->output_xml(out, "metadata", s); + g_free(s); + } +-- +2.31.1 + +From 9a7d33003dffea465e7b452abd0388db4a7d73b0 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Thu, 11 Jan 2024 00:06:32 -0800 +Subject: [PATCH 11/24] Test: cts-cli: Update daemon outputs for reordering + +In the new libcrmcommon options array, some options have been reordered +to be near other similar options. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 40 +++++++++++++++++----------------- + 1 file changed, 20 insertions(+), 20 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 9b2dd96..43393df 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -51,16 +51,6 @@ + Polling interval to recheck cluster state and evaluate rules with date specifications + + +- +- The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit +- Maximum amount of system load that should be used by cluster nodes +- +- +- +- Maximum number of jobs that can be scheduled per node (defaults to 2x cores) +- Maximum number of jobs that can be scheduled per node (defaults to 2x cores) +- +- + + A cluster node may receive notification of its own fencing if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Allowed values are "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. + How a cluster node should react if notified of its own fencing +@@ -101,6 +91,16 @@ + How many times fencing can fail before it will no longer be immediately re-attempted on a target + + ++ ++ The cluster will slow down its recovery process when the amount of system resources used (currently CPU) approaches this limit ++ Maximum amount of system load that should be used by cluster nodes ++ ++ ++ ++ Maximum number of jobs that can be scheduled per node (defaults to 2x cores) ++ Maximum number of jobs that can be scheduled per node (defaults to 2x cores) ++ ++ + + + =#=#=#= End test: Get controller metadata - OK (0) =#=#=#= +@@ -259,6 +259,16 @@ + + ++ ++ When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. ++ Whether to lock resources to a cleanly shut down node ++ ++ ++ ++ If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. ++ Do not lock resources to a cleanly shut down node longer than this ++ ++ + + Whether resources can run on any node by default + Whether resources can run on any node by default +@@ -279,16 +289,6 @@ + Whether the cluster should check for active resources during start-up + + +- +- When true, resources active on a node when it is cleanly shut down are kept "locked" to that node (not allowed to run elsewhere) until they start again on that node after it rejoins (or for at most shutdown-lock-limit, if set). Stonith resources and Pacemaker Remote connections are never locked. Clone and bundle instances and the promoted role of promotable clones are currently never locked, though support could be added in a future release. +- Whether to lock resources to a cleanly shut down node +- +- +- +- If shutdown-lock is true and this is set to a nonzero time duration, shutdown locks will expire after this much time has passed since the shutdown was initiated, even if the node has not rejoined. +- Do not lock resources to a cleanly shut down node longer than this +- +- + + If false, unresponsive nodes are immediately assumed to be harmless, and resources that were active on them may be recovered elsewhere. This can result in a "split-brain" situation, potentially leading to data loss and/or service unavailability. + *** Advanced Use Only *** Whether nodes may be fenced as part of recovery +-- +2.31.1 + +From c085ff844deddefe4f00355e2a273f27eb35ce00 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Thu, 11 Jan 2024 00:23:01 -0800 +Subject: [PATCH 12/24] Refactor: libcrmcommon, daemons: Use cluster_options + array in getters + +Drop the option_list and len arguments from pcmk__cluster_option() and +pcmk__validate_cluster_options(). Use cluster_options in libcrmcommon +instead. + +Now, all daemons fetch and validate the full set of cluster options, +even the ones they don't use. This is only slightly less efficient. It +ensures that there's no problem using the same option with multiple +daemons, and it makes it easy to use new options in a given daemon in +the future. + +Now that nothing is using the local, per-daemon options arrays anymore, +we can drop them in an upcoming commit. + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 9 +++--- + include/crm/common/options_internal.h | 8 ++---- + lib/cib/cib_utils.c | 7 +++-- + lib/common/options.c | 40 ++++++++++++--------------- + lib/pengine/common.c | 6 ++-- + 5 files changed, 31 insertions(+), 39 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 82aa143..4208947 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -515,6 +515,7 @@ do_recover(long long action, + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); + } + ++#if 0 + static pcmk__cluster_option_t controller_options[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -719,6 +720,7 @@ static pcmk__cluster_option_t controller_options[] = { + "2 hours.") + }, + }; ++#endif // 0 + + void + crmd_metadata(void) +@@ -775,8 +777,7 @@ config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void + config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL); + + // Validate all options, and use defaults if not already present in hash +- pcmk__validate_cluster_options(config_hash, controller_options, +- PCMK__NELEM(controller_options)); ++ pcmk__validate_cluster_options(config_hash); + + /* Validate the watchdog timeout in the context of the local node + * environment. If invalid, the controller will exit with a fatal error. +@@ -900,9 +901,7 @@ crm_shutdown(int nsig) + * config_query_callback() has been run at least once, it doesn't look like + * anything could have changed the timer period since then. + */ +- value = pcmk__cluster_option(NULL, controller_options, +- PCMK__NELEM(controller_options), +- XML_CONFIG_ATTR_FORCE_QUIT); ++ value = pcmk__cluster_option(NULL, XML_CONFIG_ATTR_FORCE_QUIT); + default_period_ms = crm_parse_interval_spec(value); + controld_shutdown_start_countdown(default_period_ms); + } +diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h +index a62015f..b727a58 100644 +--- a/include/crm/common/options_internal.h ++++ b/include/crm/common/options_internal.h +@@ -64,9 +64,7 @@ typedef struct pcmk__cluster_option_s { + + } pcmk__cluster_option_t; + +-const char *pcmk__cluster_option(GHashTable *options, +- const pcmk__cluster_option_t *option_list, +- int len, const char *name); ++const char *pcmk__cluster_option(GHashTable *options, const char *name); + + gchar *pcmk__format_option_metadata(const char *name, const char *desc_short, + const char *desc_long, +@@ -78,9 +76,7 @@ gchar *pcmk__cluster_option_metadata(const char *name, const char *desc_short, + const char *desc_long, + enum pcmk__opt_context filter); + +-void pcmk__validate_cluster_options(GHashTable *options, +- pcmk__cluster_option_t *option_list, +- int len); ++void pcmk__validate_cluster_options(GHashTable *options); + + bool pcmk__valid_interval_spec(const char *value); + bool pcmk__valid_boolean(const char *value); +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 479a7fb..97f62ac 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -866,6 +866,7 @@ cib_native_notify(gpointer data, gpointer user_data) + crm_trace("Callback invoked..."); + } + ++#if 0 + static pcmk__cluster_option_t cib_opts[] = { + /* name, legacy name, type, allowed values, + * default value, validator, +@@ -890,6 +891,7 @@ static pcmk__cluster_option_t cib_opts[] = { + " multiplied by the number of nodes).") + }, + }; ++#endif // 0 + + void + cib_metadata(void) +@@ -909,14 +911,13 @@ cib_metadata(void) + static void + verify_cib_options(GHashTable *options) + { +- pcmk__validate_cluster_options(options, cib_opts, PCMK__NELEM(cib_opts)); ++ pcmk__validate_cluster_options(options); + } + + const char * + cib_pref(GHashTable * options, const char *name) + { +- return pcmk__cluster_option(options, cib_opts, PCMK__NELEM(cib_opts), +- name); ++ return pcmk__cluster_option(options, name); + } + + gboolean +diff --git a/lib/common/options.c b/lib/common/options.c +index df4a8b4..13d58e3 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -852,27 +852,21 @@ cluster_option_value(GHashTable *options, bool (*validate)(const char *), + * \internal + * \brief Get the value of a cluster option + * +- * \param[in,out] options Name/value pairs for configured options +- * \param[in] option_list Possible cluster options +- * \param[in] len Length of \p option_list +- * \param[in] name (Primary) option name to look for ++ * \param[in,out] options Name/value pairs for configured options ++ * \param[in] name (Primary) option name to look for + * + * \return Option value + */ + const char * +-pcmk__cluster_option(GHashTable *options, +- const pcmk__cluster_option_t *option_list, +- int len, const char *name) ++pcmk__cluster_option(GHashTable *options, const char *name) + { +- const char *value = NULL; +- +- for (int lpc = 0; lpc < len; lpc++) { +- if (pcmk__str_eq(name, option_list[lpc].name, pcmk__str_casei)) { +- value = cluster_option_value(options, option_list[lpc].is_valid, +- option_list[lpc].name, +- option_list[lpc].alt_name, +- option_list[lpc].default_value); +- return value; ++ for (int lpc = 0; lpc < PCMK__NELEM(cluster_options); lpc++) { ++ if (pcmk__str_eq(name, cluster_options[lpc].name, pcmk__str_casei)) { ++ return cluster_option_value(options, ++ cluster_options[lpc].is_valid, ++ cluster_options[lpc].name, ++ cluster_options[lpc].alt_name, ++ cluster_options[lpc].default_value); + } + } + CRM_CHECK(FALSE, crm_err("Bug: looking for unknown option '%s'", name)); +@@ -1061,13 +1055,13 @@ pcmk__cluster_option_metadata(const char *name, const char *desc_short, + } + + void +-pcmk__validate_cluster_options(GHashTable *options, +- pcmk__cluster_option_t *option_list, int len) ++pcmk__validate_cluster_options(GHashTable *options) + { +- for (int lpc = 0; lpc < len; lpc++) { +- cluster_option_value(options, option_list[lpc].is_valid, +- option_list[lpc].name, +- option_list[lpc].alt_name, +- option_list[lpc].default_value); ++ for (int lpc = 0; lpc < PCMK__NELEM(cluster_options); lpc++) { ++ cluster_option_value(options, ++ cluster_options[lpc].is_valid, ++ cluster_options[lpc].name, ++ cluster_options[lpc].alt_name, ++ cluster_options[lpc].default_value); + } + } +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index c9f1fc1..f99bd1b 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,6 +21,7 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + ++#if 0 + static pcmk__cluster_option_t pe_opts[] = { + /* name, old name, type, allowed values, + * default value, validator, +@@ -315,6 +316,7 @@ static pcmk__cluster_option_t pe_opts[] = { + NULL + }, + }; ++#endif // 0 + + void + pe_metadata(pcmk__output_t *out) +@@ -333,13 +335,13 @@ pe_metadata(pcmk__output_t *out) + void + verify_pe_options(GHashTable * options) + { +- pcmk__validate_cluster_options(options, pe_opts, PCMK__NELEM(pe_opts)); ++ pcmk__validate_cluster_options(options); + } + + const char * + pe_pref(GHashTable * options, const char *name) + { +- return pcmk__cluster_option(options, pe_opts, PCMK__NELEM(pe_opts), name); ++ return pcmk__cluster_option(options, name); + } + + const char * +-- +2.31.1 + +From de834cee2c5d8f4f796633e66f263ad77b9cd2eb Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 03:06:27 -0800 +Subject: [PATCH 13/24] Refactor: various: Drop per-daemon cluster opt tables + +Ref T746 + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 207 ------------------- + lib/cib/cib_utils.c | 27 --- + lib/pengine/common.c | 297 ---------------------------- + 3 files changed, 531 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 4208947..40b90f8 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -515,213 +515,6 @@ do_recover(long long action, + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); + } + +-#if 0 +-static pcmk__cluster_option_t controller_options[] = { +- /* name, old name, type, allowed values, +- * default value, validator, +- * context, +- * short description, +- * long description +- */ +- { +- "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL, +- pcmk__opt_context_controld, +- N_("Pacemaker version on cluster node elected Designated Controller (DC)"), +- N_("Includes a hash which identifies the exact changeset the code was " +- "built from. Used for diagnostic purposes.") +- }, +- { +- "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL, +- pcmk__opt_context_controld, +- N_("The messaging stack on which Pacemaker is currently running"), +- N_("Used for informational and diagnostic purposes.") +- }, +- { +- "cluster-name", NULL, "string", NULL, NULL, NULL, +- pcmk__opt_context_controld, +- N_("An arbitrary name for the cluster"), +- N_("This optional value is mostly for users' convenience as desired " +- "in administration, but may also be used in Pacemaker " +- "configuration rules via the #cluster-name node attribute, and " +- "by higher-level tools and resource agents.") +- }, +- { +- XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time", +- NULL, "20s", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("How long to wait for a response from other nodes during start-up"), +- N_("The optimal value will depend on the speed and load of your network " +- "and the type of switches used.") +- }, +- { +- XML_CONFIG_ATTR_RECHECK, NULL, "time", +- N_("Zero disables polling, while positive values are an interval in seconds" +- "(unless other units are specified, for example \"5min\")"), +- "15min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("Polling interval to recheck cluster state and evaluate rules " +- "with date specifications"), +- N_("Pacemaker is primarily event-driven, and looks ahead to know when to " +- "recheck cluster state for failure timeouts and most time-based " +- "rules. However, it will also recheck the cluster after this " +- "amount of inactivity, to evaluate rules with date specifications " +- "and serve as a fail-safe for certain types of scheduler bugs.") +- }, +- { +- "load-threshold", NULL, "percentage", NULL, +- "80%", pcmk__valid_percentage, +- pcmk__opt_context_controld, +- N_("Maximum amount of system load that should be used by cluster nodes"), +- N_("The cluster will slow down its recovery process when the amount of " +- "system resources used (currently CPU) approaches this limit"), +- }, +- { +- "node-action-limit", NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_controld, +- N_("Maximum number of jobs that can be scheduled per node " +- "(defaults to 2x cores)") +- }, +- { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL, +- pcmk__opt_context_controld, +- N_("How a cluster node should react if notified of its own fencing"), +- N_("A cluster node may receive notification of its own fencing if fencing " +- "is misconfigured, or if fabric fencing is in use that doesn't cut " +- "cluster communication. Allowed values are \"stop\" to attempt to " +- "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt " +- "to immediately reboot the local node, falling back to stop on failure.") +- }, +- { +- XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL, +- "2min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("Declare an election failed if it is not decided within this much " +- "time. If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL, +- "20min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("Exit immediately if shutdown does not complete within this much " +- "time. If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- "join-integration-timeout", "crmd-integration-timeout", "time", NULL, +- "3min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL, +- "30min", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- "*** Advanced Use Only ***", +- N_("If you need to adjust this value, it probably indicates " +- "the presence of a bug.") +- }, +- { +- "transition-delay", "crmd-transition-delay", "time", NULL, +- "0s", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("*** Advanced Use Only *** Enabling this option will slow down " +- "cluster recovery under all conditions"), +- N_("Delay cluster recovery for this much time to allow for additional " +- "events to occur. Useful if your configuration is sensitive to " +- "the order in which ping updates arrive.") +- }, +- { +- /* @COMPAT Currently unparsable values default to -1 (auto-calculate), +- * while missing values default to 0 (disable). All values are accepted +- * (unless the controller finds that the value conflicts with the +- * SBD_WATCHDOG_TIMEOUT). +- * +- * At a compatibility break: properly validate as a timeout, let +- * either negative values or a particular string like "auto" mean auto- +- * calculate, and use 0 as the single default for when the option either +- * is unset or fails to validate. +- */ +- "stonith-watchdog-timeout", NULL, "time", NULL, +- "0", NULL, +- pcmk__opt_context_controld, +- N_("How long before nodes can be assumed to be safely down when " +- "watchdog-based self-fencing via SBD is in use"), +- N_("If this is set to a positive value, lost nodes are assumed to " +- "self-fence using watchdog-based SBD within this much time. This " +- "does not require a fencing resource to be explicitly configured, " +- "though a fence_watchdog resource can be configured, to limit use " +- "to specific nodes. If this is set to 0 (the default), the cluster " +- "will never assume watchdog-based self-fencing. If this is set to a " +- "negative value, the cluster will use twice the local value of the " +- "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, " +- "or otherwise treat this as 0. WARNING: When used, this timeout " +- "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use " +- "watchdog-based SBD, and Pacemaker will refuse to start on any of " +- "those nodes where this is not true for the local value or SBD is " +- "not active. When this is set to a negative value, " +- "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes " +- "that use SBD, otherwise data corruption or loss could occur.") +- }, +- { +- "stonith-max-attempts", NULL, "integer", NULL, +- "10", pcmk__valid_positive_number, +- pcmk__opt_context_controld, +- N_("How many times fencing can fail before it will no longer be " +- "immediately re-attempted on a target") +- }, +- +- // Already documented in libpe_status (other values must be kept identical) +- { +- "no-quorum-policy", NULL, "select", +- "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum, +- pcmk__opt_context_controld, +- N_("What to do when the cluster does not have quorum"), NULL +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_controld, +- N_("Whether to lock resources to a cleanly shut down node"), +- N_("When true, resources active on a node when it is cleanly shut down " +- "are kept \"locked\" to that node (not allowed to run elsewhere) " +- "until they start again on that node after it rejoins (or for at " +- "most shutdown-lock-limit, if set). Stonith resources and " +- "Pacemaker Remote connections are never locked. Clone and bundle " +- "instances and the promoted role of promotable clones are " +- "currently never locked, though support could be added in a future " +- "release.") +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("Do not lock resources to a cleanly shut down node longer than " +- "this"), +- N_("If shutdown-lock is true and this is set to a nonzero time " +- "duration, shutdown locks will expire after this much time has " +- "passed since the shutdown was initiated, even if the node has not " +- "rejoined.") +- }, +- { +- XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_controld, +- N_("How long to wait for a node that has joined the cluster to join " +- "the controller process group"), +- N_("Fence nodes that do not join the controller process group within " +- "this much time after joining the cluster, to allow the cluster " +- "to continue managing resources. A value of 0 means never fence " +- "pending nodes. Setting the value to 2h means fence nodes after " +- "2 hours.") +- }, +-}; +-#endif // 0 +- + void + crmd_metadata(void) + { +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 97f62ac..b83158c 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -866,33 +866,6 @@ cib_native_notify(gpointer data, gpointer user_data) + crm_trace("Callback invoked..."); + } + +-#if 0 +-static pcmk__cluster_option_t cib_opts[] = { +- /* name, legacy name, type, allowed values, +- * default value, validator, +- * context, +- * short description, +- * long description +- */ +- { +- "enable-acl", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_based, +- N_("Enable Access Control Lists (ACLs) for the CIB"), +- NULL +- }, +- { +- "cluster-ipc-limit", NULL, "integer", NULL, +- "500", pcmk__valid_positive_number, +- pcmk__opt_context_based, +- N_("Maximum IPC message backlog before disconnecting a cluster daemon"), +- N_("Raise this if log has \"Evicting client\" messages for cluster daemon" +- " PIDs (a good value is the number of resources in the cluster" +- " multiplied by the number of nodes).") +- }, +-}; +-#endif // 0 +- + void + cib_metadata(void) + { +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index f99bd1b..e96f0b5 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,303 +21,6 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + +-#if 0 +-static pcmk__cluster_option_t pe_opts[] = { +- /* name, old name, type, allowed values, +- * default value, validator, +- * context, +- * short description, +- * long description +- */ +- { +- "no-quorum-policy", NULL, "select", "stop, freeze, ignore, demote, suicide", +- "stop", pcmk__valid_quorum, +- pcmk__opt_context_schedulerd, +- N_("What to do when the cluster does not have quorum"), +- NULL +- }, +- { +- "symmetric-cluster", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether resources can run on any node by default"), +- NULL +- }, +- { +- "maintenance-mode", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether the cluster should refrain from monitoring, starting, " +- "and stopping resources"), +- NULL +- }, +- { +- "start-failure-is-fatal", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether a start failure should prevent a resource from being " +- "recovered on the same node"), +- N_("When true, the cluster will immediately ban a resource from a node " +- "if it fails to start there. When false, the cluster will instead " +- "check the resource's fail count against its migration-threshold.") +- }, +- { +- "enable-startup-probes", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether the cluster should check for active resources during start-up"), +- NULL +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether to lock resources to a cleanly shut down node"), +- N_("When true, resources active on a node when it is cleanly shut down " +- "are kept \"locked\" to that node (not allowed to run elsewhere) " +- "until they start again on that node after it rejoins (or for at " +- "most shutdown-lock-limit, if set). Stonith resources and " +- "Pacemaker Remote connections are never locked. Clone and bundle " +- "instances and the promoted role of promotable clones are " +- "currently never locked, though support could be added in a future " +- "release.") +- }, +- { +- XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("Do not lock resources to a cleanly shut down node longer than " +- "this"), +- N_("If shutdown-lock is true and this is set to a nonzero time " +- "duration, shutdown locks will expire after this much time has " +- "passed since the shutdown was initiated, even if the node has not " +- "rejoined.") +- }, +- +- // Fencing-related options +- { +- "stonith-enabled", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("*** Advanced Use Only *** " +- "Whether nodes may be fenced as part of recovery"), +- N_("If false, unresponsive nodes are immediately assumed to be harmless, " +- "and resources that were active on them may be recovered " +- "elsewhere. This can result in a \"split-brain\" situation, " +- "potentially leading to data loss and/or service unavailability.") +- }, +- { +- "stonith-action", NULL, "select", "reboot, off, poweroff", +- PCMK_ACTION_REBOOT, pcmk__is_fencing_action, +- pcmk__opt_context_schedulerd, +- N_("Action to send to fence device when a node needs to be fenced " +- "(\"poweroff\" is a deprecated alias for \"off\")"), +- NULL +- }, +- { +- "stonith-timeout", NULL, "time", NULL, +- "60s", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("*** Advanced Use Only *** Unused by Pacemaker"), +- N_("This value is not used by Pacemaker, but is kept for backward " +- "compatibility, and certain legacy fence agents might use it.") +- }, +- { +- XML_ATTR_HAVE_WATCHDOG, NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether watchdog integration is enabled"), +- N_("This is set automatically by the cluster according to whether SBD " +- "is detected to be in use. User-configured values are ignored. " +- "The value `true` is meaningful if diskless SBD is used and " +- "`stonith-watchdog-timeout` is nonzero. In that case, if fencing " +- "is required, watchdog-based self-fencing will be performed via " +- "SBD without requiring a fencing resource explicitly configured.") +- }, +- { +- "concurrent-fencing", NULL, "boolean", NULL, +- PCMK__CONCURRENT_FENCING_DEFAULT, pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Allow performing fencing operations in parallel"), +- NULL +- }, +- { +- "startup-fencing", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("*** Advanced Use Only *** Whether to fence unseen nodes at start-up"), +- N_("Setting this to false may lead to a \"split-brain\" situation," +- "potentially leading to data loss and/or service unavailability.") +- }, +- { +- XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("Apply fencing delay targeting the lost nodes with the highest total resource priority"), +- N_("Apply specified delay for the fencings that are targeting the lost " +- "nodes with the highest total resource priority in case we don't " +- "have the majority of the nodes in our cluster partition, so that " +- "the more significant nodes potentially win any fencing match, " +- "which is especially meaningful under split-brain of 2-node " +- "cluster. A promoted resource instance takes the base priority + 1 " +- "on calculation if the base priority is not 0. Any static/random " +- "delays that are introduced by `pcmk_delay_base/max` configured " +- "for the corresponding fencing resources will be added to this " +- "delay. This delay should be significantly greater than, safely " +- "twice, the maximum `pcmk_delay_base/max`. By default, priority " +- "fencing delay is disabled.") +- }, +- { +- XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT, NULL, "time", NULL, +- "0", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("How long to wait for a node that has joined the cluster to join " +- "the controller process group"), +- N_("Fence nodes that do not join the controller process group within " +- "this much time after joining the cluster, to allow the cluster " +- "to continue managing resources. A value of 0 means never fence " +- "pending nodes. Setting the value to 2h means fence nodes after " +- "2 hours.") +- }, +- { +- "cluster-delay", NULL, "time", NULL, +- "60s", pcmk__valid_interval_spec, +- pcmk__opt_context_schedulerd, +- N_("Maximum time for node-to-node communication"), +- N_("The node elected Designated Controller (DC) will consider an action " +- "failed if it does not get a response from the node executing the " +- "action within this time (after considering the action's own " +- "timeout). The \"correct\" value will depend on the speed and " +- "load of your network and cluster nodes.") +- }, +- { +- "batch-limit", NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("Maximum number of jobs that the cluster may execute in parallel " +- "across all nodes"), +- N_("The \"correct\" value will depend on the speed and load of your " +- "network and cluster nodes. If set to 0, the cluster will " +- "impose a dynamically calculated limit when any node has a " +- "high load.") +- }, +- { +- "migration-limit", NULL, "integer", NULL, +- "-1", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of live migration actions that the cluster is allowed " +- "to execute in parallel on a node (-1 means no limit)") +- }, +- +- /* Orphans and stopping */ +- { +- "stop-all-resources", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether the cluster should stop all active resources"), +- NULL +- }, +- { +- "stop-orphan-resources", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether to stop resources that were removed from the configuration"), +- NULL +- }, +- { +- "stop-orphan-actions", NULL, "boolean", NULL, +- "true", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("Whether to cancel recurring actions removed from the configuration"), +- NULL +- }, +- { +- "remove-after-stop", NULL, "boolean", NULL, +- "false", pcmk__valid_boolean, +- pcmk__opt_context_schedulerd, +- N_("*** Deprecated *** Whether to remove stopped resources from " +- "the executor"), +- N_("Values other than default are poorly tested and potentially dangerous." +- " This option will be removed in a future release.") +- }, +- +- /* Storing inputs */ +- { +- "pe-error-series-max", NULL, "integer", NULL, +- "-1", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of scheduler inputs resulting in errors to save"), +- N_("Zero to disable, -1 to store unlimited.") +- }, +- { +- "pe-warn-series-max", NULL, "integer", NULL, +- "5000", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of scheduler inputs resulting in warnings to save"), +- N_("Zero to disable, -1 to store unlimited.") +- }, +- { +- "pe-input-series-max", NULL, "integer", NULL, +- "4000", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The number of scheduler inputs without errors or warnings to save"), +- N_("Zero to disable, -1 to store unlimited.") +- }, +- +- /* Node health */ +- { +- PCMK__OPT_NODE_HEALTH_STRATEGY, NULL, "select", +- PCMK__VALUE_NONE ", " PCMK__VALUE_MIGRATE_ON_RED ", " +- PCMK__VALUE_ONLY_GREEN ", " PCMK__VALUE_PROGRESSIVE ", " +- PCMK__VALUE_CUSTOM, +- PCMK__VALUE_NONE, pcmk__validate_health_strategy, +- pcmk__opt_context_schedulerd, +- N_("How cluster should react to node health attributes"), +- N_("Requires external entities to create node attributes (named with " +- "the prefix \"#health\") with values \"red\", " +- "\"yellow\", or \"green\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_BASE, NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("Base health score assigned to a node"), +- N_("Only used when \"node-health-strategy\" is set to \"progressive\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_GREEN, NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The score to use for a node health attribute whose value is \"green\""), +- N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_YELLOW, NULL, "integer", NULL, +- "0", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The score to use for a node health attribute whose value is \"yellow\""), +- N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") +- }, +- { +- PCMK__OPT_NODE_HEALTH_RED, NULL, "integer", NULL, +- "-INFINITY", pcmk__valid_number, +- pcmk__opt_context_schedulerd, +- N_("The score to use for a node health attribute whose value is \"red\""), +- N_("Only used when \"node-health-strategy\" is set to \"custom\" or \"progressive\".") +- }, +- +- /*Placement Strategy*/ +- { +- "placement-strategy", NULL, "select", +- "default, utilization, minimal, balanced", +- "default", pcmk__valid_placement_strategy, +- pcmk__opt_context_schedulerd, +- N_("How the cluster should allocate resources to nodes"), +- NULL +- }, +-}; +-#endif // 0 +- + void + pe_metadata(pcmk__output_t *out) + { +-- +2.31.1 + +From 9a8bb049fcb49204932e96014c3a63e58fd95d23 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:36:34 -0800 +Subject: [PATCH 14/24] Refactor: libpe_status: Drop verify_pe_opts() + +Signed-off-by: Reid Wahl +--- + include/crm/pengine/internal.h | 1 - + lib/pengine/common.c | 6 ------ + lib/pengine/unpack.c | 2 +- + 3 files changed, 1 insertion(+), 8 deletions(-) + +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 9c8068f..5835ef8 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -209,7 +209,6 @@ pcmk_node_t *native_location(const pcmk_resource_t *rsc, GList **list, + int current); + + void pe_metadata(pcmk__output_t *out); +-void verify_pe_options(GHashTable * options); + + void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, + pcmk_scheduler_t *scheduler, gboolean failed); +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index e96f0b5..402fae9 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -35,12 +35,6 @@ pe_metadata(pcmk__output_t *out) + g_free(s); + } + +-void +-verify_pe_options(GHashTable * options) +-{ +- pcmk__validate_cluster_options(options); +-} +- + const char * + pe_pref(GHashTable * options, const char *name) + { +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 3429d56..2a9b563 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -228,7 +228,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, &rule_data, config_hash, + CIB_OPTIONS_FIRST, FALSE, scheduler); + +- verify_pe_options(scheduler->config_hash); ++ pcmk__validate_cluster_options(config_hash); + + set_config_flag(scheduler, "enable-startup-probes", + pcmk_sched_probe_resources); +-- +2.31.1 + +From af79c50b7a5626218bf2a9b34fe631f07b1e2bda Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 08:40:13 -0800 +Subject: [PATCH 15/24] Refactor: libpe_status: Drop pe_pref() internally + +Signed-off-by: Reid Wahl +--- + daemons/schedulerd/schedulerd_messages.c | 3 ++- + include/crm/pengine/internal.h | 10 +++++++--- + lib/pacemaker/pcmk_graph_producer.c | 9 +++++---- + lib/pacemaker/pcmk_sched_nodes.c | 5 +++-- + lib/pengine/unpack.c | 4 +++- + tools/crm_resource_print.c | 4 ++-- + 6 files changed, 22 insertions(+), 13 deletions(-) + +diff --git a/daemons/schedulerd/schedulerd_messages.c b/daemons/schedulerd/schedulerd_messages.c +index 5a97365..ff31fce 100644 +--- a/daemons/schedulerd/schedulerd_messages.c ++++ b/daemons/schedulerd/schedulerd_messages.c +@@ -112,7 +112,8 @@ handle_pecalc_request(pcmk__request_t *request) + series_id = 2; + } + +- value = pe_pref(scheduler->config_hash, series[series_id].param); ++ value = pcmk__cluster_option(scheduler->config_hash, ++ series[series_id].param); + if ((value == NULL) + || (pcmk__scan_min_int(value, &series_wrap, -1) != pcmk_rc_ok)) { + series_wrap = series[series_id].wrap; +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 5835ef8..2b7f2eb 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -620,14 +620,18 @@ int pe__node_health(pcmk_node_t *node); + static inline enum pcmk__health_strategy + pe__health_strategy(pcmk_scheduler_t *scheduler) + { +- return pcmk__parse_health_strategy(pe_pref(scheduler->config_hash, +- PCMK__OPT_NODE_HEALTH_STRATEGY)); ++ const char *strategy = pcmk__cluster_option(scheduler->config_hash, ++ PCMK__OPT_NODE_HEALTH_STRATEGY); ++ ++ return pcmk__parse_health_strategy(strategy); + } + + static inline int + pe__health_score(const char *option, pcmk_scheduler_t *scheduler) + { +- return char2score(pe_pref(scheduler->config_hash, option)); ++ const char *value = pcmk__cluster_option(scheduler->config_hash, option); ++ ++ return char2score(value); + } + + /*! +diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c +index 59b6176..3006775 100644 +--- a/lib/pacemaker/pcmk_graph_producer.c ++++ b/lib/pacemaker/pcmk_graph_producer.c +@@ -1004,16 +1004,17 @@ pcmk__create_graph(pcmk_scheduler_t *scheduler) + GList *iter = NULL; + const char *value = NULL; + long long limit = 0LL; ++ GHashTable *config_hash = scheduler->config_hash; + + transition_id++; + crm_trace("Creating transition graph %d", transition_id); + + scheduler->graph = create_xml_node(NULL, XML_TAG_GRAPH); + +- value = pe_pref(scheduler->config_hash, "cluster-delay"); ++ value = pcmk__cluster_option(config_hash, "cluster-delay"); + crm_xml_add(scheduler->graph, "cluster-delay", value); + +- value = pe_pref(scheduler->config_hash, "stonith-timeout"); ++ value = pcmk__cluster_option(config_hash, "stonith-timeout"); + crm_xml_add(scheduler->graph, "stonith-timeout", value); + + crm_xml_add(scheduler->graph, "failed-stop-offset", "INFINITY"); +@@ -1024,12 +1025,12 @@ pcmk__create_graph(pcmk_scheduler_t *scheduler) + crm_xml_add(scheduler->graph, "failed-start-offset", "1"); + } + +- value = pe_pref(scheduler->config_hash, "batch-limit"); ++ value = pcmk__cluster_option(config_hash, "batch-limit"); + crm_xml_add(scheduler->graph, "batch-limit", value); + + crm_xml_add_int(scheduler->graph, "transition_id", transition_id); + +- value = pe_pref(scheduler->config_hash, "migration-limit"); ++ value = pcmk__cluster_option(config_hash, "migration-limit"); + if ((pcmk__scan_ll(value, &limit, 0LL) == pcmk_rc_ok) && (limit > 0)) { + crm_xml_add(scheduler->graph, "migration-limit", value); + } +diff --git a/lib/pacemaker/pcmk_sched_nodes.c b/lib/pacemaker/pcmk_sched_nodes.c +index 9cf5545..03baa2c 100644 +--- a/lib/pacemaker/pcmk_sched_nodes.c ++++ b/lib/pacemaker/pcmk_sched_nodes.c +@@ -360,8 +360,9 @@ pcmk__apply_node_health(pcmk_scheduler_t *scheduler) + { + int base_health = 0; + enum pcmk__health_strategy strategy; +- const char *strategy_str = pe_pref(scheduler->config_hash, +- PCMK__OPT_NODE_HEALTH_STRATEGY); ++ const char *strategy_str = ++ pcmk__cluster_option(scheduler->config_hash, ++ PCMK__OPT_NODE_HEALTH_STRATEGY); + + strategy = pcmk__parse_health_strategy(strategy_str); + if (strategy == pcmk__health_strategy_none) { +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 2a9b563..49443c6 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -50,7 +50,9 @@ struct action_history { + * flag is stringified more readably in log messages. + */ + #define set_config_flag(scheduler, option, flag) do { \ +- const char *scf_value = pe_pref((scheduler)->config_hash, (option)); \ ++ GHashTable *config_hash = (scheduler)->config_hash; \ ++ const char *scf_value = pcmk__cluster_option(config_hash, (option)); \ ++ \ + if (scf_value != NULL) { \ + if (crm_is_true(scf_value)) { \ + (scheduler)->flags = pcmk__set_flags_as(__func__, __LINE__, \ +diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c +index bdf3ad9..26761dd 100644 +--- a/tools/crm_resource_print.c ++++ b/tools/crm_resource_print.c +@@ -479,8 +479,8 @@ resource_check_list_default(pcmk__output_t *out, va_list args) { + "'%s' cannot run on unhealthy nodes due to " + PCMK__OPT_NODE_HEALTH_STRATEGY "='%s'", + parent->id, +- pe_pref(checks->rsc->cluster->config_hash, +- PCMK__OPT_NODE_HEALTH_STRATEGY)); ++ pcmk__cluster_option(checks->rsc->cluster->config_hash, ++ PCMK__OPT_NODE_HEALTH_STRATEGY)); + } + + out->end_list(out); +-- +2.31.1 + +From 1e78e617965b1a2e1a5671aa15943ba42487b09a Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 08:43:55 -0800 +Subject: [PATCH 16/24] API: libpe_status: Deprecate pe_pref() + +Signed-off-by: Reid Wahl +--- + include/crm/pengine/common.h | 4 +--- + include/crm/pengine/common_compat.h | 5 ++++- + lib/pengine/common.c | 20 ++++++++++++++------ + lib/pengine/unpack.c | 28 ++++++++++++++-------------- + 4 files changed, 33 insertions(+), 24 deletions(-) + +diff --git a/include/crm/pengine/common.h b/include/crm/pengine/common.h +index 2feac8a..a935aa7 100644 +--- a/include/crm/pengine/common.h ++++ b/include/crm/pengine/common.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -28,8 +28,6 @@ enum rsc_role_e text2role(const char *role); + const char *role2text(enum rsc_role_e role); + const char *fail2text(enum action_fail_response fail); + +-const char *pe_pref(GHashTable * options, const char *name); +- + /*! + * \brief Get readable description of a recovery type + * +diff --git a/include/crm/pengine/common_compat.h b/include/crm/pengine/common_compat.h +index 4330ccf..52e11f7 100644 +--- a/include/crm/pengine/common_compat.h ++++ b/include/crm/pengine/common_compat.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -54,6 +54,9 @@ extern "C" { + + //! \deprecated Do not use + #define RSC_ROLE_MASTER_S RSC_ROLE_PROMOTED_LEGACY_S ++ ++//! \deprecated Do not use ++const char *pe_pref(GHashTable * options, const char *name); + + #ifdef __cplusplus + } +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 402fae9..0a4dfe6 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -35,12 +35,6 @@ pe_metadata(pcmk__output_t *out) + g_free(s); + } + +-const char * +-pe_pref(GHashTable * options, const char *name) +-{ +- return pcmk__cluster_option(options, name); +-} +- + const char * + fail2text(enum action_fail_response fail) + { +@@ -350,3 +344,17 @@ pe_node_attribute_raw(const pcmk_node_t *node, const char *name) + } + return g_hash_table_lookup(node->details->attrs, name); + } ++ ++// Deprecated functions kept only for backward API compatibility ++// LCOV_EXCL_START ++ ++#include ++ ++const char * ++pe_pref(GHashTable * options, const char *name) ++{ ++ return pcmk__cluster_option(options, name); ++} ++ ++// LCOV_EXCL_STOP ++// End deprecated API +diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c +index 49443c6..d484e93 100644 +--- a/lib/pengine/unpack.c ++++ b/lib/pengine/unpack.c +@@ -238,7 +238,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_info("Startup probes: disabled (dangerous)"); + } + +- value = pe_pref(scheduler->config_hash, XML_ATTR_HAVE_WATCHDOG); ++ value = pcmk__cluster_option(config_hash, XML_ATTR_HAVE_WATCHDOG); + if (value && crm_is_true(value)) { + crm_info("Watchdog-based self-fencing will be performed via SBD if " + "fencing is required and stonith-watchdog-timeout is nonzero"); +@@ -251,7 +251,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + set_if_xpath(pcmk_sched_enable_unfencing, XPATH_ENABLE_UNFENCING, + scheduler); + +- value = pe_pref(scheduler->config_hash, "stonith-timeout"); ++ value = pcmk__cluster_option(config_hash, "stonith-timeout"); + scheduler->stonith_timeout = (int) crm_parse_interval_spec(value); + crm_debug("STONITH timeout: %d", scheduler->stonith_timeout); + +@@ -262,8 +262,8 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_debug("STONITH of failed nodes is disabled"); + } + +- scheduler->stonith_action = pe_pref(scheduler->config_hash, +- "stonith-action"); ++ scheduler->stonith_action = pcmk__cluster_option(config_hash, ++ "stonith-action"); + if (!strcmp(scheduler->stonith_action, "poweroff")) { + pe_warn_once(pcmk__wo_poweroff, + "Support for stonith-action of 'poweroff' is deprecated " +@@ -280,8 +280,8 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_debug("Concurrent fencing is disabled"); + } + +- value = pe_pref(scheduler->config_hash, +- XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); ++ value = pcmk__cluster_option(config_hash, ++ XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY); + if (value) { + scheduler->priority_fencing_delay = crm_parse_interval_spec(value) + / 1000; +@@ -299,7 +299,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_debug("Cluster is symmetric" " - resources can run anywhere by default"); + } + +- value = pe_pref(scheduler->config_hash, "no-quorum-policy"); ++ value = pcmk__cluster_option(config_hash, "no-quorum-policy"); + + if (pcmk__str_eq(value, "ignore", pcmk__str_casei)) { + scheduler->no_quorum_policy = pcmk_no_quorum_ignore; +@@ -367,7 +367,7 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + crm_trace("Orphan resource actions are ignored"); + } + +- value = pe_pref(scheduler->config_hash, "remove-after-stop"); ++ value = pcmk__cluster_option(config_hash, "remove-after-stop"); + if (value != NULL) { + if (crm_is_true(value)) { + pe__set_working_set_flags(scheduler, pcmk_sched_remove_after_stop); +@@ -407,14 +407,14 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + + pe__unpack_node_health_scores(scheduler); + +- scheduler->placement_strategy = pe_pref(scheduler->config_hash, +- "placement-strategy"); ++ scheduler->placement_strategy = ++ pcmk__cluster_option(config_hash, "placement-strategy"); + crm_trace("Placement strategy: %s", scheduler->placement_strategy); + + set_config_flag(scheduler, "shutdown-lock", pcmk_sched_shutdown_lock); + if (pcmk_is_set(scheduler->flags, pcmk_sched_shutdown_lock)) { +- value = pe_pref(scheduler->config_hash, +- XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); ++ value = pcmk__cluster_option(config_hash, ++ XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT); + scheduler->shutdown_lock = crm_parse_interval_spec(value) / 1000; + crm_trace("Resources will be locked to nodes that were cleanly " + "shut down (locks expire after %s)", +@@ -424,8 +424,8 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler) + "shut down"); + } + +- value = pe_pref(scheduler->config_hash, +- XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT); ++ value = pcmk__cluster_option(config_hash, ++ XML_CONFIG_ATTR_NODE_PENDING_TIMEOUT); + scheduler->node_pending_timeout = crm_parse_interval_spec(value) / 1000; + if (scheduler->node_pending_timeout == 0) { + crm_trace("Do not fence pending nodes"); +-- +2.31.1 + +From 866877401075e7ea4c3bc278e69ed94ea3a7af99 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:32:36 -0800 +Subject: [PATCH 17/24] Refactor: libpe_status, scheduler: Drop pe_metadata() + +Replace with static scheduler_metadata() in the scheduler since we don't +rely on a static options array in lib/pengine/common.c anymore. + +Signed-off-by: Reid Wahl +--- + daemons/schedulerd/pacemaker-schedulerd.c | 18 ++++++++++++++++-- + include/crm/pengine/internal.h | 2 -- + lib/pengine/common.c | 14 -------------- + 3 files changed, 16 insertions(+), 18 deletions(-) + +diff --git a/daemons/schedulerd/pacemaker-schedulerd.c b/daemons/schedulerd/pacemaker-schedulerd.c +index 3f2a3e8..27c96da 100644 +--- a/daemons/schedulerd/pacemaker-schedulerd.c ++++ b/daemons/schedulerd/pacemaker-schedulerd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -46,6 +46,20 @@ pcmk__supported_format_t formats[] = { + + void pengine_shutdown(int nsig); + ++static void ++scheduler_metadata(pcmk__output_t *out) ++{ ++ const char *name = "pacemaker-schedulerd"; ++ const char *desc_short = "Pacemaker scheduler options"; ++ const char *desc_long = "Cluster options used by Pacemaker's scheduler"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_schedulerd); ++ ++ out->output_xml(out, "metadata", s); ++ g_free(s); ++} ++ + static GOptionContext * + build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { + GOptionContext *context = NULL; +@@ -98,7 +112,7 @@ main(int argc, char **argv) + if (options.remainder) { + if (g_strv_length(options.remainder) == 1 && + pcmk__str_eq("metadata", options.remainder[0], pcmk__str_casei)) { +- pe_metadata(out); ++ scheduler_metadata(out); + goto done; + } else { + exit_code = CRM_EX_USAGE; +diff --git a/include/crm/pengine/internal.h b/include/crm/pengine/internal.h +index 2b7f2eb..5965c1a 100644 +--- a/include/crm/pengine/internal.h ++++ b/include/crm/pengine/internal.h +@@ -208,8 +208,6 @@ char *native_parameter(pcmk_resource_t *rsc, pcmk_node_t *node, gboolean create, + pcmk_node_t *native_location(const pcmk_resource_t *rsc, GList **list, + int current); + +-void pe_metadata(pcmk__output_t *out); +- + void native_add_running(pcmk_resource_t *rsc, pcmk_node_t *node, + pcmk_scheduler_t *scheduler, gboolean failed); + +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 0a4dfe6..6551d10 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -21,20 +21,6 @@ + gboolean was_processing_error = FALSE; + gboolean was_processing_warning = FALSE; + +-void +-pe_metadata(pcmk__output_t *out) +-{ +- const char *name = "pacemaker-schedulerd"; +- const char *desc_short = "Pacemaker scheduler options"; +- const char *desc_long = "Cluster options used by Pacemaker's scheduler"; +- +- gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_schedulerd); +- +- out->output_xml(out, "metadata", s); +- g_free(s); +-} +- + const char * + fail2text(enum action_fail_response fail) + { +-- +2.31.1 + +From 700c906d621887f257c73ddfd7c82c773cb32c8e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:40:18 -0800 +Subject: [PATCH 18/24] Refactor: libcib: Drop verify_cib_options() + +Signed-off-by: Reid Wahl +--- + lib/cib/cib_utils.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index b83158c..227a50f 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -881,12 +881,6 @@ cib_metadata(void) + g_free(s); + } + +-static void +-verify_cib_options(GHashTable *options) +-{ +- pcmk__validate_cluster_options(options); +-} +- + const char * + cib_pref(GHashTable * options, const char *name) + { +@@ -913,7 +907,7 @@ cib_read_config(GHashTable * options, xmlNode * current_cib) + options, CIB_OPTIONS_FIRST, TRUE, now, NULL); + } + +- verify_cib_options(options); ++ pcmk__validate_cluster_options(options); + + crm_time_free(now); + +-- +2.31.1 + +From 6284a3a79b88fd20630bfbfe866a4c2c3686a246 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:24:19 -0800 +Subject: [PATCH 19/24] Refactor: libcib: Drop cib_pref() internally + +Signed-off-by: Reid Wahl +--- + lib/cib/cib_utils.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 227a50f..5b241ae 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -281,7 +281,7 @@ cib_acl_enabled(xmlNode *xml, const char *user) + GHashTable *options = pcmk__strkey_table(free, free); + + cib_read_config(options, xml); +- value = cib_pref(options, "enable-acl"); ++ value = pcmk__cluster_option(options, "enable-acl"); + rc = crm_is_true(value); + g_hash_table_destroy(options); + } +-- +2.31.1 + +From 1806822590b0060079b94b7d2867722ef2430bf9 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:26:29 -0800 +Subject: [PATCH 20/24] API: libcib: Deprecate cib_pref() + +Signed-off-by: Reid Wahl +--- + include/crm/cib/util.h | 1 - + include/crm/cib/util_compat.h | 3 +++ + lib/cib/cib_utils.c | 12 ++++++------ + 3 files changed, 9 insertions(+), 7 deletions(-) + +diff --git a/include/crm/cib/util.h b/include/crm/cib/util.h +index 18726bb..47894cb 100644 +--- a/include/crm/cib/util.h ++++ b/include/crm/cib/util.h +@@ -57,7 +57,6 @@ int set_standby(cib_t * the_cib, const char *uuid, const char *scope, const char + xmlNode *cib_get_generation(cib_t * cib); + + void cib_metadata(void); +-const char *cib_pref(GHashTable * options, const char *name); + + int cib_apply_patch_event(xmlNode *event, xmlNode *input, xmlNode **output, + int level); +diff --git a/include/crm/cib/util_compat.h b/include/crm/cib/util_compat.h +index 20f1e2d..d6ccd4d 100644 +--- a/include/crm/cib/util_compat.h ++++ b/include/crm/cib/util_compat.h +@@ -33,6 +33,9 @@ const char *get_object_parent(const char *object_type); + //! \deprecated Use pcmk_cib_xpath_for() instead + xmlNode *get_object_root(const char *object_type, xmlNode *the_root); + ++//! \deprecated Do not use ++const char *cib_pref(GHashTable * options, const char *name); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index 5b241ae..f9c463e 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -881,12 +881,6 @@ cib_metadata(void) + g_free(s); + } + +-const char * +-cib_pref(GHashTable * options, const char *name) +-{ +- return pcmk__cluster_option(options, name); +-} +- + gboolean + cib_read_config(GHashTable * options, xmlNode * current_cib) + { +@@ -1085,5 +1079,11 @@ get_object_root(const char *object_type, xmlNode *the_root) + return pcmk_find_cib_element(the_root, object_type); + } + ++const char * ++cib_pref(GHashTable * options, const char *name) ++{ ++ return pcmk__cluster_option(options, name); ++} ++ + // LCOV_EXCL_STOP + // End deprecated API +-- +2.31.1 + +From 422fb81250aa733d2601b4d412c3fbcbf5b74420 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:29:29 -0800 +Subject: [PATCH 21/24] Refactor: based, libcib: Drop cib_metadata() internally + +Replace with a static based_metadata() in pacemaker-based since we don't +depend on a static options array in lib/cib/cib_utils.c anymore. + +Signed-off-by: Reid Wahl +--- + daemons/based/pacemaker-based.c | 17 ++++++++++++++++- + 1 file changed, 16 insertions(+), 1 deletion(-) + +diff --git a/daemons/based/pacemaker-based.c b/daemons/based/pacemaker-based.c +index 5dd7938..78bcd51 100644 +--- a/daemons/based/pacemaker-based.c ++++ b/daemons/based/pacemaker-based.c +@@ -126,6 +126,21 @@ setup_stand_alone(GError **error) + return pcmk_rc_ok; + } + ++static void ++based_metadata(void) ++{ ++ const char *name = "pacemaker-based"; ++ const char *desc_short = "Cluster Information Base manager options"; ++ const char *desc_long = "Cluster options used by Pacemaker's Cluster " ++ "Information Base manager"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based); ++ ++ printf("%s", s); ++ g_free(s); ++} ++ + static GOptionEntry entries[] = { + { "stand-alone", 's', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &stand_alone, + "(Advanced use only) Run in stand-alone mode", NULL }, +@@ -204,7 +219,7 @@ main(int argc, char **argv) + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { +- cib_metadata(); ++ based_metadata(); + goto done; + } + +-- +2.31.1 + +From 05b3e08de7c515c38cf42bbbeaf18e3346eb360d Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:30:16 -0800 +Subject: [PATCH 22/24] API: libcib: Deprecate cib_metadata() + +Signed-off-by: Reid Wahl +--- + include/crm/cib/util.h | 2 -- + include/crm/cib/util_compat.h | 3 +++ + lib/cib/cib_utils.c | 30 +++++++++++++++--------------- + 3 files changed, 18 insertions(+), 17 deletions(-) + +diff --git a/include/crm/cib/util.h b/include/crm/cib/util.h +index 47894cb..ce6cbeb 100644 +--- a/include/crm/cib/util.h ++++ b/include/crm/cib/util.h +@@ -56,8 +56,6 @@ int set_standby(cib_t * the_cib, const char *uuid, const char *scope, const char + + xmlNode *cib_get_generation(cib_t * cib); + +-void cib_metadata(void); +- + int cib_apply_patch_event(xmlNode *event, xmlNode *input, xmlNode **output, + int level); + +diff --git a/include/crm/cib/util_compat.h b/include/crm/cib/util_compat.h +index d6ccd4d..95e0766 100644 +--- a/include/crm/cib/util_compat.h ++++ b/include/crm/cib/util_compat.h +@@ -36,6 +36,9 @@ xmlNode *get_object_root(const char *object_type, xmlNode *the_root); + //! \deprecated Do not use + const char *cib_pref(GHashTable * options, const char *name); + ++//! \deprecated Do not use ++void cib_metadata(void); ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c +index f9c463e..75dda16 100644 +--- a/lib/cib/cib_utils.c ++++ b/lib/cib/cib_utils.c +@@ -866,21 +866,6 @@ cib_native_notify(gpointer data, gpointer user_data) + crm_trace("Callback invoked..."); + } + +-void +-cib_metadata(void) +-{ +- const char *name = "pacemaker-based"; +- const char *desc_short = "Cluster Information Base manager options"; +- const char *desc_long = "Cluster options used by Pacemaker's Cluster " +- "Information Base manager"; +- +- gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_based); +- +- printf("%s", s); +- g_free(s); +-} +- + gboolean + cib_read_config(GHashTable * options, xmlNode * current_cib) + { +@@ -1085,5 +1070,20 @@ cib_pref(GHashTable * options, const char *name) + return pcmk__cluster_option(options, name); + } + ++void ++cib_metadata(void) ++{ ++ const char *name = "pacemaker-based"; ++ const char *desc_short = "Cluster Information Base manager options"; ++ const char *desc_long = "Cluster options used by Pacemaker's Cluster " ++ "Information Base manager"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_based); ++ ++ printf("%s", s); ++ g_free(s); ++} ++ + // LCOV_EXCL_STOP + // End deprecated API +-- +2.31.1 + +From f8ee575a51f6bacf82abb1d1f41eba1092776682 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Mon, 8 Jan 2024 20:42:37 -0800 +Subject: [PATCH 23/24] Refactor: controller: Replace crmd_metadata() with + controld_metadata() + +Can be static since we don't rely on an options array that lives in +controld_control.c anymore. + +Signed-off-by: Reid Wahl +--- + daemons/controld/controld_control.c | 14 -------------- + daemons/controld/pacemaker-controld.c | 18 ++++++++++++++++-- + daemons/controld/pacemaker-controld.h | 3 +-- + 3 files changed, 17 insertions(+), 18 deletions(-) + +diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c +index 40b90f8..9b54900 100644 +--- a/daemons/controld/controld_control.c ++++ b/daemons/controld/controld_control.c +@@ -515,20 +515,6 @@ do_recover(long long action, + register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL); + } + +-void +-crmd_metadata(void) +-{ +- const char *name = "pacemaker-controld"; +- const char *desc_short = "Pacemaker controller options"; +- const char *desc_long = "Cluster options used by Pacemaker's controller"; +- +- gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, +- pcmk__opt_context_controld); +- +- printf("%s", s); +- g_free(s); +-} +- + static void + config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data) + { +diff --git a/daemons/controld/pacemaker-controld.c b/daemons/controld/pacemaker-controld.c +index e4a72c2..d80644d 100644 +--- a/daemons/controld/pacemaker-controld.c ++++ b/daemons/controld/pacemaker-controld.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -46,6 +46,20 @@ static pcmk__supported_format_t formats[] = { + { NULL, NULL, NULL } + }; + ++static void ++controld_metadata(void) ++{ ++ const char *name = "pacemaker-controld"; ++ const char *desc_short = "Pacemaker controller options"; ++ const char *desc_long = "Cluster options used by Pacemaker's controller"; ++ ++ gchar *s = pcmk__cluster_option_metadata(name, desc_short, desc_long, ++ pcmk__opt_context_controld); ++ ++ printf("%s", s); ++ g_free(s); ++} ++ + static GOptionContext * + build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) + { +@@ -96,7 +110,7 @@ main(int argc, char **argv) + + if ((g_strv_length(processed_args) >= 2) + && pcmk__str_eq(processed_args[1], "metadata", pcmk__str_none)) { +- crmd_metadata(); ++ controld_metadata(); + initialize = false; + goto done; + } +diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h +index 2334cce..ba8dc8f 100644 +--- a/daemons/controld/pacemaker-controld.h ++++ b/daemons/controld/pacemaker-controld.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2023 the Pacemaker project contributors ++ * Copyright 2004-2024 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -28,7 +28,6 @@ + # define controld_trigger_config() \ + controld_trigger_config_as(__func__, __LINE__) + +-void crmd_metadata(void); + void controld_trigger_config_as(const char *fn, int line); + void controld_election_init(const char *uname); + void controld_configure_election(GHashTable *options); +-- +2.31.1 + +From 282e9eb026699abef5a28fc37f54b9330029da1c Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Tue, 2 Jan 2024 19:56:11 -0800 +Subject: [PATCH 24/24] Test: cts-cli: Update daemon tests after fence-reaction + select + +Signed-off-by: Reid Wahl +--- + cts/cli/regression.daemons.exp | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index 43393df..543d62f 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -52,9 +52,12 @@ + + + +- A cluster node may receive notification of its own fencing if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Allowed values are "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. ++ A cluster node may receive notification of its own fencing if fencing is misconfigured, or if fabric fencing is in use that doesn't cut cluster communication. Use "stop" to attempt to immediately stop Pacemaker and stay stopped, or "panic" to attempt to immediately reboot the local node, falling back to stop on failure. Allowed values: stop, panic + How a cluster node should react if notified of its own fencing +- ++ ++ + + + Declare an election failed if it is not decided within this much time. If you need to adjust this value, it probably indicates the presence of a bug. +-- +2.31.1 + diff --git a/008-attrd-prep.patch b/008-attrd-prep.patch new file mode 100644 index 0000000..acc22d3 --- /dev/null +++ b/008-attrd-prep.patch @@ -0,0 +1,373 @@ +From 4823643bef8801b33688167b159bb531bcdf8911 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 4 Jan 2024 17:10:08 -0600 +Subject: [PATCH 1/5] Refactor: pacemaker-attrd: drop redundant argument from + update_attr_on_host() + +It can check for a force-write via its xml argument, to simplify the caller +--- + daemons/attrd/attrd_corosync.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 158d82f..1b56923 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -266,7 +266,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host) + static void + update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, +- bool filter, int is_force_write) ++ bool filter) + { + attribute_value_t *v = NULL; + +@@ -309,6 +309,10 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + } + + } else { ++ int is_force_write = 0; ++ ++ crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); ++ + if (is_force_write == 1 && a->timeout_ms && a->timer) { + /* Save forced writing and set change flag. */ + /* The actual attribute is written by Writer after election. */ +@@ -338,15 +342,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) + const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME); + const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE); + const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); +- int is_force_write = 0; + + if (attr == NULL) { + crm_warn("Could not update attribute: peer did not specify name"); + return; + } + +- crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write); +- + a = attrd_populate_attribute(xml, attr); + if (a == NULL) { + return; +@@ -361,12 +362,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter) + g_hash_table_iter_init(&vIter, a->values); + + while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) { +- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); ++ update_attr_on_host(a, peer, xml, attr, value, host, filter); + } + + } else { + // Update attribute value for the given host +- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write); ++ update_attr_on_host(a, peer, xml, attr, value, host, filter); + } + + /* If this is a message from some attrd instance broadcasting its protocol +-- +2.31.1 + +From c7a1ab819b25e3225c185c1630a7139a96fb5c71 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 9 Jan 2024 16:48:37 -0600 +Subject: [PATCH 2/5] Refactor: pacemaker-attrd: drop unused argument from + attrd_peer_sync() + +--- + daemons/attrd/attrd_corosync.c | 10 ++++++++-- + daemons/attrd/attrd_elections.c | 2 +- + daemons/attrd/attrd_messages.c | 2 +- + daemons/attrd/pacemaker-attrd.h | 2 +- + 4 files changed, 11 insertions(+), 5 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 1b56923..088f00c 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -233,7 +233,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da + */ + if (attrd_election_won() + && !pcmk_is_set(peer->flags, crm_remote_node)) { +- attrd_peer_sync(peer, NULL); ++ attrd_peer_sync(peer); + } + } else { + // Remove all attribute values associated with lost nodes +@@ -535,8 +535,14 @@ attrd_peer_remove(const char *host, bool uncache, const char *source) + } + } + ++/*! ++ * \internal ++ * \brief Send all known attributes and values to a peer ++ * ++ * \param[in] peer Peer to send sync to (if NULL, broadcast to all peers) ++ */ + void +-attrd_peer_sync(crm_node_t *peer, xmlNode *xml) ++attrd_peer_sync(crm_node_t *peer) + { + GHashTableIter aIter; + GHashTableIter vIter; +diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c +index 82fbe8a..9dbf133 100644 +--- a/daemons/attrd/attrd_elections.c ++++ b/daemons/attrd/attrd_elections.c +@@ -23,7 +23,7 @@ attrd_election_cb(gpointer user_data) + attrd_declare_winner(); + + /* Update the peers after an election */ +- attrd_peer_sync(NULL, NULL); ++ attrd_peer_sync(NULL); + + /* After winning an election, update the CIB with the values of all + * attributes as the winner knows them. +diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c +index 5525d4b..13ac01f 100644 +--- a/daemons/attrd/attrd_messages.c ++++ b/daemons/attrd/attrd_messages.c +@@ -180,7 +180,7 @@ handle_sync_request(pcmk__request_t *request) + crm_node_t *peer = pcmk__get_node(0, request->peer, NULL, + pcmk__node_search_cluster); + +- attrd_peer_sync(peer, request->xml); ++ attrd_peer_sync(peer); + pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + return NULL; + } else { +diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h +index 7384188..bacaad6 100644 +--- a/daemons/attrd/pacemaker-attrd.h ++++ b/daemons/attrd/pacemaker-attrd.h +@@ -175,7 +175,7 @@ extern GHashTable *peer_protocol_vers; + int attrd_cluster_connect(void); + void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host, + bool filter); +-void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); ++void attrd_peer_sync(crm_node_t *peer); + void attrd_peer_remove(const char *host, bool uncache, const char *source); + void attrd_peer_clear_failure(pcmk__request_t *request); + void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won, +-- +2.31.1 + +From abafae0068e10abb135b0496086947728365299a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Jan 2024 17:31:17 -0600 +Subject: [PATCH 3/5] Refactor: pacemaker-attrd: de-functionize + attrd_lookup_or_create_value() + +... to make planned changes easier +--- + daemons/attrd/attrd_corosync.c | 62 +++++++++++++--------------------- + 1 file changed, 24 insertions(+), 38 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 088f00c..59e6a26 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -168,40 +168,6 @@ broadcast_local_value(const attribute_t *a) + + #define state_text(state) pcmk__s((state), "in unknown state") + +-/*! +- * \internal +- * \brief Return a node's value from hash table (creating one if needed) +- * +- * \param[in,out] values Hash table of values +- * \param[in] node_name Name of node to look up +- * \param[in] xml XML describing the attribute +- * +- * \return Pointer to new or existing hash table entry +- */ +-static attribute_value_t * +-attrd_lookup_or_create_value(GHashTable *values, const char *node_name, +- const xmlNode *xml) +-{ +- attribute_value_t *v = g_hash_table_lookup(values, node_name); +- int is_remote = 0; +- +- if (v == NULL) { +- v = calloc(1, sizeof(attribute_value_t)); +- CRM_ASSERT(v != NULL); +- +- pcmk__str_update(&v->nodename, node_name); +- g_hash_table_replace(values, v->nodename, v); +- } +- +- crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); +- if (is_remote) { +- attrd_set_value_flags(v, attrd_value_remote); +- CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); +- } +- +- return(v); +-} +- + static void + attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) + { +@@ -268,18 +234,38 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, + bool filter) + { ++ int is_remote = 0; ++ bool changed = false; + attribute_value_t *v = NULL; + +- v = attrd_lookup_or_create_value(a->values, host, xml); ++ // Create entry for value if not already existing ++ v = g_hash_table_lookup(a->values, host); ++ if (v == NULL) { ++ v = calloc(1, sizeof(attribute_value_t)); ++ CRM_ASSERT(v != NULL); ++ ++ pcmk__str_update(&v->nodename, host); ++ g_hash_table_replace(a->values, v->nodename, v); ++ } ++ ++ // If value is for a Pacemaker Remote node, remember that ++ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); ++ if (is_remote) { ++ attrd_set_value_flags(v, attrd_value_remote); ++ CRM_ASSERT(crm_remote_peer_get(host) != NULL); ++ } ++ ++ // Check whether the value changed ++ changed = !pcmk__str_eq(v->current, value, pcmk__str_casei); + +- if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) +- && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { ++ if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname, ++ pcmk__str_casei)) { + + crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", + attr, host, v->current, value, peer->uname); + v = broadcast_local_value(a); + +- } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { ++ } else if (changed) { + crm_notice("Setting %s[%s]%s%s: %s -> %s " + CRM_XS " from %s with %s write delay", + attr, host, a->set_type ? " in " : "", +-- +2.31.1 + +From 72529ec512fb4938bd8dbbd2caf44bbb1a616826 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Jan 2024 18:04:33 -0600 +Subject: [PATCH 4/5] Refactor: pacemaker-attrd: minor shuffling to make + planned changes easier + +--- + daemons/attrd/attrd_cib.c | 19 +++++++++++-------- + 1 file changed, 11 insertions(+), 8 deletions(-) + +diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c +index bdc0a10..481fea7 100644 +--- a/daemons/attrd/attrd_cib.c ++++ b/daemons/attrd/attrd_cib.c +@@ -51,6 +51,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) + { + const xmlNode *patchset = NULL; + const char *client_name = NULL; ++ bool status_changed = false; + + if (attrd_shutting_down(true)) { + return; +@@ -64,20 +65,22 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg) + mainloop_set_trigger(attrd_config_read); + } + +- if (!attrd_election_won()) { +- // Don't write attributes if we're not the writer +- return; +- } ++ status_changed = cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS); + + client_name = crm_element_value(msg, F_CIB_CLIENTNAME); + if (!cib__client_triggers_refresh(client_name)) { +- // The CIB is still accurate ++ /* This change came from a source that ensured the CIB is consistent ++ * with our attributes table, so we don't need to write anything out. ++ */ + return; + } + +- if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES) +- || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) { +- ++ if (!attrd_election_won()) { ++ // Don't write attributes if we're not the writer ++ return; ++ } ++ ++ if (status_changed || cib__element_in_patchset(patchset, XML_CIB_TAG_NODES)) { + /* An unsafe client modified the nodes or status section. Write + * transient attributes to ensure they're up-to-date in the CIB. + */ +-- +2.31.1 + +From b83c2567fb450eec5b18882ded16403831d2c3c0 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Jan 2024 17:53:55 -0600 +Subject: [PATCH 5/5] Log: pacemaker-attrd: make sure we don't try to log NULL + +--- + daemons/attrd/attrd_corosync.c | 15 +++++++++++---- + 1 file changed, 11 insertions(+), 4 deletions(-) + +diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c +index 59e6a26..b348d52 100644 +--- a/daemons/attrd/attrd_corosync.c ++++ b/daemons/attrd/attrd_corosync.c +@@ -229,6 +229,11 @@ record_peer_nodeid(attribute_value_t *v, const char *host) + } + } + ++#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)") ++ ++#define readable_peer(p) \ ++ (((p) == NULL)? "all peers" : pcmk__s((p)->uname, "unknown peer")) ++ + static void + update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + const char *attr, const char *value, const char *host, +@@ -262,14 +267,14 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml, + pcmk__str_casei)) { + + crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", +- attr, host, v->current, value, peer->uname); ++ attr, host, readable_value(v), value, peer->uname); + v = broadcast_local_value(a); + + } else if (changed) { + crm_notice("Setting %s[%s]%s%s: %s -> %s " + CRM_XS " from %s with %s write delay", + attr, host, a->set_type ? " in " : "", +- pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"), ++ pcmk__s(a->set_type, ""), readable_value(v), + pcmk__s(value, "(unset)"), peer->uname, + (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms)); + pcmk__str_update(&v->current, value); +@@ -543,12 +548,14 @@ attrd_peer_sync(crm_node_t *peer) + while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { + g_hash_table_iter_init(&vIter, a->values); + while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { +- crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); ++ crm_debug("Syncing %s[%s]='%s' to %s", ++ a->id, v->nodename, readable_value(v), ++ readable_peer(peer)); + attrd_add_value_xml(sync, a, v, false); + } + } + +- crm_debug("Syncing values to %s", peer?peer->uname:"everyone"); ++ crm_debug("Syncing values to %s", readable_peer(peer)); + attrd_send_message(peer, sync, false); + free_xml(sync); + } +-- +2.31.1 + diff --git a/pacemaker.spec b/pacemaker.spec index c7c90ae..aa44b99 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -36,10 +36,10 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.7 -%global specversion 2 +%global specversion 3 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build -%global commit c858c13cb79431b5c8e5dda3ca44dd305fce946c +%global commit 0f7f88312f7a1ccedee60bf768aba79ee13d41e0 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. @@ -267,6 +267,12 @@ Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz # upstream commits Patch001: 001-schema-glib.patch Patch002: 002-schema-transfer.patch +Patch003: 003-schema-doc.patch +Patch004: 004-attrd-cache-1.patch +Patch005: 005-attrd-cache-2.patch +Patch006: 006-cib-file-feature-set.patch +Patch007: 007-option-metadata.patch +Patch008: 008-attrd-prep.patch # downstream-only commits #Patch1xx: 1xx-xxxx.patch @@ -1015,6 +1021,15 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Wed Jan 17 2024 Chris Lumens - 2.1.7-3 +- Rebase on upstream 2.1.7 final release +- Fix documentation for Pacemaker Remote schema transfers +- Do not check CIB feature set version when CIB_file is set +- Consolidate attrd cache handling +- Avoid duplicating option metadata across daemons +- Related: RHEL-7597 +- Related: RHEL-14045 + * Thu Dec 14 2023 Chris Lumens - 2.1.7-2 - Rebase on upstream 2.1.7-rc4 release - Pacemaker Remote nodes can validate against later schema versions diff --git a/sources b/sources index 3b1cbc3..fef159f 100644 --- a/sources +++ b/sources @@ -1,2 +1,2 @@ SHA512 (nagios-agents-metadata-105ab8a.tar.gz) = 3b8a57de69f53cee1e4c0355ccd23fda49c72d06a802c3d6dc33f0fd1823766356b44f9eb14fb00f47678e522e8679a638f3850857ed7e8123dfea525151d1d5 -SHA512 (pacemaker-c858c13cb.tar.gz) = 67a6669bb42dd7adcde2a99155086746a95a37262a14b6ca2c4f8f2706ba572ef2a3715cc40bbeb6988ae6a8979ed8ce208c890934ea0eb5d8448d84510cf3ce +SHA512 (pacemaker-0f7f88312.tar.gz) = 9bea37a11594aa4dc1470d212faff91e577856955278b37ec23c6fcdf5baf7bd8948d304e2fb8dcd9d078a344663ae3656b40ba94feda03aca2ec425b7d0d16b