13597d4799
- Resolves: rhbz1502795
403 lines
15 KiB
Diff
403 lines
15 KiB
Diff
From cf53f523e691295879cd75cff1a86bc15664fa51 Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Tue, 2 May 2023 09:59:13 -0400
|
|
Subject: [PATCH 1/7] Feature: daemons: Add start state to LRMD handshake XML
|
|
|
|
This gets read out of /etc/sysconfig/pacemaker and set into the
|
|
environment. The remote node executor will then add that to the XML
|
|
that it sends to the controller upon startup.
|
|
|
|
Ref T183
|
|
---
|
|
daemons/execd/execd_commands.c | 5 +++++
|
|
include/crm_internal.h | 1 +
|
|
2 files changed, 6 insertions(+)
|
|
|
|
diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c
|
|
index fa2761e..9a783a5 100644
|
|
--- a/daemons/execd/execd_commands.c
|
|
+++ b/daemons/execd/execd_commands.c
|
|
@@ -1474,6 +1474,7 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
|
|
int rc = pcmk_ok;
|
|
time_t now = time(NULL);
|
|
const char *protocol_version = crm_element_value(request, F_LRMD_PROTOCOL_VERSION);
|
|
+ const char *start_state = pcmk__env_option(PCMK__ENV_NODE_START_STATE);
|
|
|
|
if (compare_version(protocol_version, LRMD_MIN_PROTOCOL_VERSION) < 0) {
|
|
crm_err("Cluster API version must be greater than or equal to %s, not %s",
|
|
@@ -1503,6 +1504,10 @@ process_lrmd_signon(pcmk__client_t *client, xmlNode *request, int call_id,
|
|
crm_xml_add(*reply, F_LRMD_PROTOCOL_VERSION, LRMD_PROTOCOL_VERSION);
|
|
crm_xml_add_ll(*reply, PCMK__XA_UPTIME, now - start_time);
|
|
|
|
+ if (start_state) {
|
|
+ crm_xml_add(*reply, PCMK__XA_NODE_START_STATE, start_state);
|
|
+ }
|
|
+
|
|
return rc;
|
|
}
|
|
|
|
diff --git a/include/crm_internal.h b/include/crm_internal.h
|
|
index 5f6531f..771bd26 100644
|
|
--- a/include/crm_internal.h
|
|
+++ b/include/crm_internal.h
|
|
@@ -84,6 +84,7 @@
|
|
#define PCMK__XA_GRAPH_ERRORS "graph-errors"
|
|
#define PCMK__XA_GRAPH_WARNINGS "graph-warnings"
|
|
#define PCMK__XA_MODE "mode"
|
|
+#define PCMK__XA_NODE_START_STATE "node_start_state"
|
|
#define PCMK__XA_TASK "task"
|
|
#define PCMK__XA_UPTIME "uptime"
|
|
#define PCMK__XA_CONN_HOST "connection_host"
|
|
--
|
|
2.31.1
|
|
|
|
From c950291742711b5c4c8986adc8e938fe6fef861c Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Tue, 2 May 2023 10:04:32 -0400
|
|
Subject: [PATCH 2/7] Feature: liblrmd: Save a remote node's requested start
|
|
state
|
|
|
|
Ref T183
|
|
---
|
|
include/crm/common/ipc_internal.h | 1 +
|
|
lib/lrmd/lrmd_client.c | 7 +++++++
|
|
2 files changed, 8 insertions(+)
|
|
|
|
diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h
|
|
index 5099dda..d203924 100644
|
|
--- a/include/crm/common/ipc_internal.h
|
|
+++ b/include/crm/common/ipc_internal.h
|
|
@@ -112,6 +112,7 @@ struct pcmk__remote_s {
|
|
int tcp_socket;
|
|
mainloop_io_t *source;
|
|
time_t uptime;
|
|
+ char *start_state;
|
|
|
|
/* CIB-only */
|
|
char *token;
|
|
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
|
index c565728..4239105 100644
|
|
--- a/lib/lrmd/lrmd_client.c
|
|
+++ b/lib/lrmd/lrmd_client.c
|
|
@@ -588,7 +588,9 @@ lrmd_tls_connection_destroy(gpointer userdata)
|
|
}
|
|
|
|
free(native->remote->buffer);
|
|
+ free(native->remote->start_state);
|
|
native->remote->buffer = NULL;
|
|
+ native->remote->start_state = NULL;
|
|
native->source = 0;
|
|
native->sock = 0;
|
|
native->psk_cred_c = NULL;
|
|
@@ -980,6 +982,7 @@ lrmd_handshake(lrmd_t * lrmd, const char *name)
|
|
const char *version = crm_element_value(reply, F_LRMD_PROTOCOL_VERSION);
|
|
const char *msg_type = crm_element_value(reply, F_LRMD_OPERATION);
|
|
const char *tmp_ticket = crm_element_value(reply, F_LRMD_CLIENTID);
|
|
+ const char *start_state = crm_element_value(reply, PCMK__XA_NODE_START_STATE);
|
|
long long uptime = -1;
|
|
|
|
crm_element_value_int(reply, F_LRMD_RC, &rc);
|
|
@@ -992,6 +995,10 @@ lrmd_handshake(lrmd_t * lrmd, const char *name)
|
|
crm_element_value_ll(reply, PCMK__XA_UPTIME, &uptime);
|
|
native->remote->uptime = uptime;
|
|
|
|
+ if (start_state) {
|
|
+ native->remote->start_state = strdup(start_state);
|
|
+ }
|
|
+
|
|
if (rc == -EPROTO) {
|
|
crm_err("Executor protocol version mismatch between client (%s) and server (%s)",
|
|
LRMD_PROTOCOL_VERSION, version);
|
|
--
|
|
2.31.1
|
|
|
|
From 7302014c7b7296be31b1f542b3f107d55b1fb2a0 Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Tue, 2 May 2023 10:05:13 -0400
|
|
Subject: [PATCH 3/7] Feature: liblrmd: Add lrmd__node_start_state.
|
|
|
|
This function is used to get the start state out of an lrmd_private_t
|
|
structure.
|
|
|
|
Ref T183
|
|
---
|
|
include/crm/lrmd_internal.h | 1 +
|
|
lib/lrmd/lrmd_client.c | 12 ++++++++++++
|
|
2 files changed, 13 insertions(+)
|
|
|
|
diff --git a/include/crm/lrmd_internal.h b/include/crm/lrmd_internal.h
|
|
index 5810554..d1cd25d 100644
|
|
--- a/include/crm/lrmd_internal.h
|
|
+++ b/include/crm/lrmd_internal.h
|
|
@@ -47,6 +47,7 @@ void lrmd__set_result(lrmd_event_data_t *event, enum ocf_exitcode rc,
|
|
void lrmd__reset_result(lrmd_event_data_t *event);
|
|
|
|
time_t lrmd__uptime(lrmd_t *lrmd);
|
|
+const char *lrmd__node_start_state(lrmd_t *lrmd);
|
|
|
|
/* Shared functions for IPC proxy back end */
|
|
|
|
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
|
index 4239105..82434b9 100644
|
|
--- a/lib/lrmd/lrmd_client.c
|
|
+++ b/lib/lrmd/lrmd_client.c
|
|
@@ -2538,3 +2538,15 @@ lrmd__uptime(lrmd_t *lrmd)
|
|
return native->remote->uptime;
|
|
}
|
|
}
|
|
+
|
|
+const char *
|
|
+lrmd__node_start_state(lrmd_t *lrmd)
|
|
+{
|
|
+ lrmd_private_t *native = lrmd->lrmd_private;
|
|
+
|
|
+ if (native->remote == NULL) {
|
|
+ return NULL;
|
|
+ } else {
|
|
+ return native->remote->start_state;
|
|
+ }
|
|
+}
|
|
--
|
|
2.31.1
|
|
|
|
From e5e4d43f847da0930bae12f63c7e9d9c44c07cdf Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Tue, 2 May 2023 10:07:58 -0400
|
|
Subject: [PATCH 4/7] Refactor: controller: Make set_join_state a public
|
|
function.
|
|
|
|
This already does all the work of setting a node's start state. It just
|
|
needs to be made public and given arguments for what node to set instead
|
|
of reading globals.
|
|
|
|
Ref T183
|
|
---
|
|
daemons/controld/controld_join_client.c | 20 ++++++++++----------
|
|
daemons/controld/pacemaker-controld.h | 3 +++
|
|
2 files changed, 13 insertions(+), 10 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
|
|
index da6a9d6..07e2a27 100644
|
|
--- a/daemons/controld/controld_join_client.c
|
|
+++ b/daemons/controld/controld_join_client.c
|
|
@@ -195,32 +195,31 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *
|
|
free_xml(generation);
|
|
}
|
|
|
|
-static void
|
|
-set_join_state(const char * start_state)
|
|
+void
|
|
+set_join_state(const char *start_state, const char *node_name, const char *node_uuid)
|
|
{
|
|
if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) {
|
|
crm_notice("Forcing node %s to join in %s state per configured "
|
|
- "environment", controld_globals.our_nodename, start_state);
|
|
+ "environment", node_name, start_state);
|
|
cib__update_node_attr(controld_globals.logger_out,
|
|
controld_globals.cib_conn, cib_sync_call,
|
|
- XML_CIB_TAG_NODES, controld_globals.our_uuid,
|
|
+ XML_CIB_TAG_NODES, node_uuid,
|
|
NULL, NULL, NULL, "standby", "on", NULL, NULL);
|
|
|
|
} else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) {
|
|
crm_notice("Forcing node %s to join in %s state per configured "
|
|
- "environment", controld_globals.our_nodename, start_state);
|
|
+ "environment", node_name, start_state);
|
|
cib__update_node_attr(controld_globals.logger_out,
|
|
controld_globals.cib_conn, cib_sync_call,
|
|
- XML_CIB_TAG_NODES, controld_globals.our_uuid,
|
|
+ XML_CIB_TAG_NODES, node_uuid,
|
|
NULL, NULL, NULL, "standby", "off", NULL, NULL);
|
|
|
|
} else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) {
|
|
- crm_debug("Not forcing a starting state on node %s",
|
|
- controld_globals.our_nodename);
|
|
+ crm_debug("Not forcing a starting state on node %s", node_name);
|
|
|
|
} else {
|
|
crm_warn("Unrecognized start state '%s', using 'default' (%s)",
|
|
- start_state, controld_globals.our_nodename);
|
|
+ start_state, node_name);
|
|
}
|
|
}
|
|
|
|
@@ -335,7 +334,8 @@ do_cl_join_finalize_respond(long long action,
|
|
|
|
first_join = FALSE;
|
|
if (start_state) {
|
|
- set_join_state(start_state);
|
|
+ set_join_state(start_state, controld_globals.our_nodename,
|
|
+ controld_globals.our_uuid);
|
|
}
|
|
}
|
|
|
|
diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h
|
|
index 1484a00..d8c2ddd 100644
|
|
--- a/daemons/controld/pacemaker-controld.h
|
|
+++ b/daemons/controld/pacemaker-controld.h
|
|
@@ -36,4 +36,7 @@ void controld_remove_voter(const char *uname);
|
|
void controld_election_fini(void);
|
|
void controld_stop_current_election_timeout(void);
|
|
|
|
+void set_join_state(const char *start_state, const char *node_name,
|
|
+ const char *node_uuid);
|
|
+
|
|
#endif
|
|
--
|
|
2.31.1
|
|
|
|
From 63d069adb344bba2c982013226f87dfd95afaff3 Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Tue, 2 May 2023 13:38:03 -0400
|
|
Subject: [PATCH 5/7] Refactor: controller: set_join_state needs to take a
|
|
remote parameter.
|
|
|
|
Without this parameter, we won't know what to pass to as node_type to
|
|
cib__update_node_attr. And without that, that function will not know to
|
|
update a remote node - it'll try to update a regular node by the same
|
|
name, which either doesn't exist or is not what we were hoping would
|
|
happen.
|
|
|
|
Ref T138
|
|
---
|
|
daemons/controld/controld_join_client.c | 11 +++++++----
|
|
daemons/controld/pacemaker-controld.h | 2 +-
|
|
2 files changed, 8 insertions(+), 5 deletions(-)
|
|
|
|
diff --git a/daemons/controld/controld_join_client.c b/daemons/controld/controld_join_client.c
|
|
index 07e2a27..799d1b4 100644
|
|
--- a/daemons/controld/controld_join_client.c
|
|
+++ b/daemons/controld/controld_join_client.c
|
|
@@ -196,7 +196,8 @@ join_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *
|
|
}
|
|
|
|
void
|
|
-set_join_state(const char *start_state, const char *node_name, const char *node_uuid)
|
|
+set_join_state(const char *start_state, const char *node_name, const char *node_uuid,
|
|
+ bool remote)
|
|
{
|
|
if (pcmk__str_eq(start_state, "standby", pcmk__str_casei)) {
|
|
crm_notice("Forcing node %s to join in %s state per configured "
|
|
@@ -204,7 +205,8 @@ set_join_state(const char *start_state, const char *node_name, const char *node_
|
|
cib__update_node_attr(controld_globals.logger_out,
|
|
controld_globals.cib_conn, cib_sync_call,
|
|
XML_CIB_TAG_NODES, node_uuid,
|
|
- NULL, NULL, NULL, "standby", "on", NULL, NULL);
|
|
+ NULL, NULL, NULL, "standby", "on", NULL,
|
|
+ remote ? "remote" : NULL);
|
|
|
|
} else if (pcmk__str_eq(start_state, "online", pcmk__str_casei)) {
|
|
crm_notice("Forcing node %s to join in %s state per configured "
|
|
@@ -212,7 +214,8 @@ set_join_state(const char *start_state, const char *node_name, const char *node_
|
|
cib__update_node_attr(controld_globals.logger_out,
|
|
controld_globals.cib_conn, cib_sync_call,
|
|
XML_CIB_TAG_NODES, node_uuid,
|
|
- NULL, NULL, NULL, "standby", "off", NULL, NULL);
|
|
+ NULL, NULL, NULL, "standby", "off", NULL,
|
|
+ remote ? "remote" : NULL);
|
|
|
|
} else if (pcmk__str_eq(start_state, "default", pcmk__str_casei)) {
|
|
crm_debug("Not forcing a starting state on node %s", node_name);
|
|
@@ -335,7 +338,7 @@ do_cl_join_finalize_respond(long long action,
|
|
first_join = FALSE;
|
|
if (start_state) {
|
|
set_join_state(start_state, controld_globals.our_nodename,
|
|
- controld_globals.our_uuid);
|
|
+ controld_globals.our_uuid, false);
|
|
}
|
|
}
|
|
|
|
diff --git a/daemons/controld/pacemaker-controld.h b/daemons/controld/pacemaker-controld.h
|
|
index d8c2ddd..2334cce 100644
|
|
--- a/daemons/controld/pacemaker-controld.h
|
|
+++ b/daemons/controld/pacemaker-controld.h
|
|
@@ -37,6 +37,6 @@ void controld_election_fini(void);
|
|
void controld_stop_current_election_timeout(void);
|
|
|
|
void set_join_state(const char *start_state, const char *node_name,
|
|
- const char *node_uuid);
|
|
+ const char *node_uuid, bool remote);
|
|
|
|
#endif
|
|
--
|
|
2.31.1
|
|
|
|
From 67274787898355065315f8c06d62458e2c2b0afe Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Tue, 2 May 2023 10:09:02 -0400
|
|
Subject: [PATCH 6/7] Feature: controller: When a remote node starts, apply any
|
|
start state.
|
|
|
|
If we were given a start state in the handshake XML, that is now stored
|
|
in the remote node cache's private data. Extract it and set the state
|
|
on the node with set_node_state.
|
|
|
|
Fixes T183
|
|
---
|
|
daemons/controld/controld_remote_ra.c | 15 +++++++++++++++
|
|
1 file changed, 15 insertions(+)
|
|
|
|
diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c
|
|
index f24b755..8ab1e46 100644
|
|
--- a/daemons/controld/controld_remote_ra.c
|
|
+++ b/daemons/controld/controld_remote_ra.c
|
|
@@ -280,6 +280,7 @@ remote_node_up(const char *node_name)
|
|
int call_opt;
|
|
xmlNode *update, *state;
|
|
crm_node_t *node;
|
|
+ lrm_state_t *connection_rsc = NULL;
|
|
|
|
CRM_CHECK(node_name != NULL, return);
|
|
crm_info("Announcing Pacemaker Remote node %s", node_name);
|
|
@@ -301,6 +302,20 @@ remote_node_up(const char *node_name)
|
|
purge_remote_node_attrs(call_opt, node);
|
|
pcmk__update_peer_state(__func__, node, CRM_NODE_MEMBER, 0);
|
|
|
|
+ /* Apply any start state that we were given from the environment on the
|
|
+ * remote node.
|
|
+ */
|
|
+ connection_rsc = lrm_state_find(node->uname);
|
|
+
|
|
+ if (connection_rsc != NULL) {
|
|
+ lrmd_t *lrm = connection_rsc->conn;
|
|
+ const char *start_state = lrmd__node_start_state(lrm);
|
|
+
|
|
+ if (start_state) {
|
|
+ set_join_state(start_state, node->uname, node->uuid, true);
|
|
+ }
|
|
+ }
|
|
+
|
|
/* pacemaker_remote nodes don't participate in the membership layer,
|
|
* so cluster nodes don't automatically get notified when they come and go.
|
|
* We send a cluster message to the DC, and update the CIB node state entry,
|
|
--
|
|
2.31.1
|
|
|
|
From 91cdda7056c9b9254a0d7e7a016b30f788e3e3ff Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Tue, 2 May 2023 10:16:30 -0400
|
|
Subject: [PATCH 7/7] Doc: sysconfig: Remote nodes now respect start state.
|
|
|
|
Ref T183
|
|
---
|
|
etc/sysconfig/pacemaker.in | 3 +--
|
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
|
|
|
diff --git a/etc/sysconfig/pacemaker.in b/etc/sysconfig/pacemaker.in
|
|
index 3b03ad6..041da71 100644
|
|
--- a/etc/sysconfig/pacemaker.in
|
|
+++ b/etc/sysconfig/pacemaker.in
|
|
@@ -144,8 +144,7 @@
|
|
# By default, the local host will join the cluster in an online or standby
|
|
# state when Pacemaker first starts depending on whether it was previously put
|
|
# into standby mode. If this variable is set to "standby" or "online", it will
|
|
-# force the local host to join in the specified state. This has no effect on
|
|
-# Pacemaker Remote nodes.
|
|
+# force the local host to join in the specified state.
|
|
#
|
|
# Default: PCMK_node_start_state="default"
|
|
|
|
--
|
|
2.31.1
|
|
|