From 13597d4799978803ef56971df3543bfd23f531a6 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Wed, 21 Jun 2023 16:42:45 -0400 Subject: [PATCH] Support start state for Pacemaker Remote nodes - Resolves: rhbz1502795 --- 001-remote-start-state.patch | 402 +++++++++++++++++++++++++++++++++++ pacemaker.spec | 8 +- 2 files changed, 408 insertions(+), 2 deletions(-) create mode 100644 001-remote-start-state.patch diff --git a/001-remote-start-state.patch b/001-remote-start-state.patch new file mode 100644 index 0000000..e66bf16 --- /dev/null +++ b/001-remote-start-state.patch @@ -0,0 +1,402 @@ +From cf53f523e691295879cd75cff1a86bc15664fa51 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 09:59:13 -0400 +Subject: [PATCH 1/7] Feature: daemons: Add start state to LRMD handshake XML + +This gets read out of /etc/sysconfig/pacemaker and set into the +environment. The remote node executor will then add that to the XML +that it sends to the controller upon startup. + +Ref T183 +--- + daemons/execd/execd_commands.c | 5 +++++ + include/crm_internal.h | 1 + + 2 files changed, 6 insertions(+) + +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index fa2761e..9a783a5 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -1474,6 +1474,7 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + int rc = pcmk_ok; + time_t now = time(NULL); + const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION); ++ const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE); + + if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) { + crm_err("Cluster API version must be greater than or equal to %s, not %s", +@@ -1503,6 +1504,10 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id, + crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION); + crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time); + ++ if (start_state) { ++ crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state); ++ } ++ + return rc; + } + +diff --git a/include/crm_internal.h b/include/crm_internal.h +index 5f6531f..771bd26 100644 +--- a/include/crm_internal.h ++++ b/include/crm_internal.h +@@ -84,6 +84,7 @@ + #define PCMK__XA_GRAPH_ERRORS "graph-errors" + #define PCMK__XA_GRAPH_WARNINGS "graph-warnings" + #define PCMK__XA_MODE "mode" ++#define PCMK__XA_NODE_START_STATE "node_start_state" + #define PCMK__XA_TASK "task" + #define PCMK__XA_UPTIME "uptime" + #define PCMK__XA_CONN_HOST "connection_host" +-- +2.31.1 + +From c950291742711b5c4c8986adc8e938fe6fef861c Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:04:32 -0400 +Subject: [PATCH 2/7] Feature: liblrmd: Save a remote node's requested start + state + +Ref T183 +--- + include/crm/common/ipc_internal.h | 1 + + lib/lrmd/lrmd_client.c | 7 +++++++ + 2 files changed, 8 insertions(+) + +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +index 5099dda..d203924 100644 +--- a/include/crm/common/ipc_internal.h ++++ b/include/crm/common/ipc_internal.h +@@ -112,6 +112,7 @@ struct pcmk__remote_s { + int tcp_socket; + mainloop_io_t *source; + time_t uptime; ++ char *start_state; + + /* CIB-only */ + char *token; +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index c565728..4239105 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -588,7 +588,9 @@ lrmd_tls_connection_destroy(gpointer userdata) + } + + free(native->remote->buffer); ++ free(native->remote->start_state); + native->remote->buffer = NULL; ++ native->remote->start_state = NULL; + native->source = 0; + native->sock = 0; + native->psk_cred_c = NULL; +@@ -980,6 +982,7 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) + const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION); + const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION); + const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID); ++ const char *start_state = crm_element_value(reply, PCMK__XA_NODE_START_STATE); + long long uptime = -1; + + crm_element_value_int(reply, F_LRMD_RC, &rc); +@@ -992,6 +995,10 @@ lrmd_handshake(lrmd_t * lrmd, const char *name) + crm_element_value_ll(reply, PCMK__XA_UPTIME, &uptime); + native->remote->uptime = uptime; + ++ if (start_state) { ++ native->remote->start_state = strdup(start_state); ++ } ++ + if (rc == -EPROTO) { + crm_err("Executor protocol version mismatch between client (%s) and server (%s)", + LRMD_PROTOCOL_VERSION, version); +-- +2.31.1 + +From 7302014c7b7296be31b1f542b3f107d55b1fb2a0 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:05:13 -0400 +Subject: [PATCH 3/7] Feature: liblrmd: Add lrmd__node_start_state. + +This function is used to get the start state out of an lrmd_private_t +structure. + +Ref T183 +--- + include/crm/lrmd_internal.h | 1 + + lib/lrmd/lrmd_client.c | 12 ++++++++++++ + 2 files changed, 13 insertions(+) + +diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h +index 5810554..d1cd25d 100644 +--- a/include/crm/lrmd_internal.h ++++ b/include/crm/lrmd_internal.h +@@ -47,6 +47,7 @@ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc, + void lrmd__reset_result(lrmd_event_data_t *event); + + time_t lrmd__uptime(lrmd_t *lrmd); ++const char *lrmd__node_start_state(lrmd_t *lrmd); + + /* Shared functions for IPC proxy back end */ + +diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c +index 4239105..82434b9 100644 +--- a/lib/lrmd/lrmd_client.c ++++ b/lib/lrmd/lrmd_client.c +@@ -2538,3 +2538,15 @@ lrmd__uptime(lrmd_t *lrmd) + return native->remote->uptime; + } + } ++ ++const char * ++lrmd__node_start_state(lrmd_t *lrmd) ++{ ++ lrmd_private_t *native = lrmd->lrmd_private; ++ ++ if (native->remote == NULL) { ++ return NULL; ++ } else { ++ return native->remote->start_state; ++ } ++} +-- +2.31.1 + +From e5e4d43f847da0930bae12f63c7e9d9c44c07cdf Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:07:58 -0400 +Subject: [PATCH 4/7] Refactor: controller: Make set_join_state a public + function. + +This already does all the work of setting a node's start state. It just +needs to be made public and given arguments for what node to set instead +of reading globals. + +Ref T183 +--- + daemons/controld/controld_join_client.c | 20 ++++++++++---------- + daemons/controld/pacemaker-controld.h | 3 +++ + 2 files changed, 13 insertions(+), 10 deletions(-) + +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index da6a9d6..07e2a27 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -195,32 +195,31 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + free_xml(generation); + } + +-static void +-set_join_state(const char * start_state) ++void ++set_join_state(const char *start_state, const char *node_name, const char *node_uuid) + { + if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +- "environment", controld_globals.our_nodename, start_state); ++ "environment", node_name, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, +- XML_CIB_TAG_NODES, controld_globals.our_uuid, ++ XML_CIB_TAG_NODES, node_uuid, + NULL, NULL, NULL, "standby", "on", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +- "environment", controld_globals.our_nodename, start_state); ++ "environment", node_name, start_state); + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, +- XML_CIB_TAG_NODES, controld_globals.our_uuid, ++ XML_CIB_TAG_NODES, node_uuid, + NULL, NULL, NULL, "standby", "off", NULL, NULL); + + } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { +- crm_debug("Not forcing a starting state on node %s", +- controld_globals.our_nodename); ++ crm_debug("Not forcing a starting state on node %s", node_name); + + } else { + crm_warn("Unrecognized start state '%s', using 'default' (%s)", +- start_state, controld_globals.our_nodename); ++ start_state, node_name); + } + } + +@@ -335,7 +334,8 @@ do_cl_join_finalize_respond(long long action, + + first_join = FALSE; + if (start_state) { +- set_join_state(start_state); ++ set_join_state(start_state, controld_globals.our_nodename, ++ controld_globals.our_uuid); + } + } + +diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h +index 1484a00..d8c2ddd 100644 +--- a/daemons/controld/pacemaker-controld.h ++++ b/daemons/controld/pacemaker-controld.h +@@ -36,4 +36,7 @@ void controld_remove_voter(const char *uname); + void controld_election_fini(void); + void controld_stop_current_election_timeout(void); + ++void set_join_state(const char *start_state, const char *node_name, ++ const char *node_uuid); ++ + #endif +-- +2.31.1 + +From 63d069adb344bba2c982013226f87dfd95afaff3 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 13:38:03 -0400 +Subject: [PATCH 5/7] Refactor: controller: set_join_state needs to take a + remote parameter. + +Without this parameter, we won't know what to pass to as node_type to +cib__update_node_attr. And without that, that function will not know to +update a remote node - it'll try to update a regular node by the same +name, which either doesn't exist or is not what we were hoping would +happen. + +Ref T138 +--- + daemons/controld/controld_join_client.c | 11 +++++++---- + daemons/controld/pacemaker-controld.h | 2 +- + 2 files changed, 8 insertions(+), 5 deletions(-) + +diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c +index 07e2a27..799d1b4 100644 +--- a/daemons/controld/controld_join_client.c ++++ b/daemons/controld/controld_join_client.c +@@ -196,7 +196,8 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void * + } + + void +-set_join_state(const char *start_state, const char *node_name, const char *node_uuid) ++set_join_state(const char *start_state, const char *node_name, const char *node_uuid, ++ bool remote) + { + if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +@@ -204,7 +205,8 @@ set_join_state(const char *start_state, const char *node_name, const char *node_ + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, node_uuid, +- NULL, NULL, NULL, "standby", "on", NULL, NULL); ++ NULL, NULL, NULL, "standby", "on", NULL, ++ remote ? "remote" : NULL); + + } else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) { + crm_notice("Forcing node %s to join in %s state per configured " +@@ -212,7 +214,8 @@ set_join_state(const char *start_state, const char *node_name, const char *node_ + cib__update_node_attr(controld_globals.logger_out, + controld_globals.cib_conn, cib_sync_call, + XML_CIB_TAG_NODES, node_uuid, +- NULL, NULL, NULL, "standby", "off", NULL, NULL); ++ NULL, NULL, NULL, "standby", "off", NULL, ++ remote ? "remote" : NULL); + + } else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) { + crm_debug("Not forcing a starting state on node %s", node_name); +@@ -335,7 +338,7 @@ do_cl_join_finalize_respond(long long action, + first_join = FALSE; + if (start_state) { + set_join_state(start_state, controld_globals.our_nodename, +- controld_globals.our_uuid); ++ controld_globals.our_uuid, false); + } + } + +diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h +index d8c2ddd..2334cce 100644 +--- a/daemons/controld/pacemaker-controld.h ++++ b/daemons/controld/pacemaker-controld.h +@@ -37,6 +37,6 @@ void controld_election_fini(void); + void controld_stop_current_election_timeout(void); + + void set_join_state(const char *start_state, const char *node_name, +- const char *node_uuid); ++ const char *node_uuid, bool remote); + + #endif +-- +2.31.1 + +From 67274787898355065315f8c06d62458e2c2b0afe Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:09:02 -0400 +Subject: [PATCH 6/7] Feature: controller: When a remote node starts, apply any + start state. + +If we were given a start state in the handshake XML, that is now stored +in the remote node cache's private data. Extract it and set the state +on the node with set_node_state. + +Fixes T183 +--- + daemons/controld/controld_remote_ra.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index f24b755..8ab1e46 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -280,6 +280,7 @@ remote_node_up(const char *node_name) + int call_opt; + xmlNode *update, *state; + crm_node_t *node; ++ lrm_state_t *connection_rsc = NULL; + + CRM_CHECK(node_name != NULL, return); + crm_info("Announcing Pacemaker Remote node %s", node_name); +@@ -301,6 +302,20 @@ remote_node_up(const char *node_name) + purge_remote_node_attrs(call_opt, node); + pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0); + ++ /* Apply any start state that we were given from the environment on the ++ * remote node. ++ */ ++ connection_rsc = lrm_state_find(node->uname); ++ ++ if (connection_rsc != NULL) { ++ lrmd_t *lrm = connection_rsc->conn; ++ const char *start_state = lrmd__node_start_state(lrm); ++ ++ if (start_state) { ++ set_join_state(start_state, node->uname, node->uuid, true); ++ } ++ } ++ + /* pacemaker_remote nodes don't participate in the membership layer, + * so cluster nodes don't automatically get notified when they come and go. + * We send a cluster message to the DC, and update the CIB node state entry, +-- +2.31.1 + +From 91cdda7056c9b9254a0d7e7a016b30f788e3e3ff Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 2 May 2023 10:16:30 -0400 +Subject: [PATCH 7/7] Doc: sysconfig: Remote nodes now respect start state. + +Ref T183 +--- + etc/sysconfig/pacemaker.in | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/etc/sysconfig/pacemaker.in b/etc/sysconfig/pacemaker.in +index 3b03ad6..041da71 100644 +--- a/etc/sysconfig/pacemaker.in ++++ b/etc/sysconfig/pacemaker.in +@@ -144,8 +144,7 @@ + # By default, the local host will join the cluster in an online or standby + # state when Pacemaker first starts depending on whether it was previously put + # into standby mode. If this variable is set to "standby" or "online", it will +-# force the local host to join in the specified state. This has no effect on +-# Pacemaker Remote nodes. ++# force the local host to join in the specified state. + # + # Default: PCMK_node_start_state="default" + +-- +2.31.1 + diff --git a/pacemaker.spec b/pacemaker.spec index 47f422e..46aaf2c 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -36,7 +36,7 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.6 -%global specversion 1 +%global specversion 2 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit 6fdc9deea294bbad629b003c6ae036aaed8e3ee0 @@ -263,7 +263,7 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz # upstream commits -#Patch001: 001-xxxx.patch +Patch001: 001-remote-start-state.patch # downstream-only commits #Patch1xx: 1xx-xxxx.patch @@ -1002,6 +1002,10 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Wed Jun 21 2023 Chris Lumens - 2.1.6-2 +- Support start state for Pacemaker Remote nodes +- Resolves: rhbz1502795 + * Thu May 25 2023 Chris Lumens - 2.1.6-1 - Rebase pacemaker on upstream 2.1.6 final release - Resolves: rhbz1578820