pacemaker/SOURCES/014-sbd.patch

1188 lines
39 KiB
Diff

From 17d5ceac78f610aabf6a3678813706faf252c2fb Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 20 Jul 2020 17:56:29 +0200
Subject: [PATCH 1/6] Fix: ipc-api: allow calling connect after disconnection
---
lib/common/crmcommon_private.h | 1 +
lib/common/ipc_client.c | 22 ++++++++++++++++------
2 files changed, 17 insertions(+), 6 deletions(-)
diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h
index 49dae6c..d55df99 100644
--- a/lib/common/crmcommon_private.h
+++ b/lib/common/crmcommon_private.h
@@ -175,6 +175,7 @@ typedef struct pcmk__ipc_methods_s {
struct pcmk_ipc_api_s {
enum pcmk_ipc_server server; // Daemon this IPC API instance is for
enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched
+ size_t ipc_size_max; // maximum IPC buffer size
crm_ipc_t *ipc; // IPC connection
mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC
bool free_on_disconnect; // Whether disconnect should free object
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index 4077d61..df687da 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -46,8 +46,6 @@
int
pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
{
- size_t max_size = 0;
-
if (api == NULL) {
return EINVAL;
}
@@ -64,13 +62,15 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
return EOPNOTSUPP;
}
+ (*api)->ipc_size_max = 0;
+
// Set server methods and max_size (if not default)
switch (server) {
case pcmk_ipc_attrd:
break;
case pcmk_ipc_based:
- max_size = 512 * 1024; // 512KB
+ (*api)->ipc_size_max = 512 * 1024; // 512KB
break;
case pcmk_ipc_controld:
@@ -88,7 +88,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
case pcmk_ipc_schedulerd:
// @TODO max_size could vary by client, maybe take as argument?
- max_size = 5 * 1024 * 1024; // 5MB
+ (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB
break;
}
if ((*api)->cmds == NULL) {
@@ -97,7 +97,8 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
return ENOMEM;
}
- (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), max_size);
+ (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false),
+ (*api)->ipc_size_max);
if ((*api)->ipc == NULL) {
pcmk_free_ipc_api(*api);
*api = NULL;
@@ -451,11 +452,20 @@ pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type)
{
int rc = pcmk_rc_ok;
- if ((api == NULL) || (api->ipc == NULL)) {
+ if (api == NULL) {
crm_err("Cannot connect to uninitialized API object");
return EINVAL;
}
+ if (api->ipc == NULL) {
+ api->ipc = crm_ipc_new(pcmk_ipc_name(api, false),
+ api->ipc_size_max);
+ if (api->ipc == NULL) {
+ crm_err("Failed to re-create IPC API");
+ return ENOMEM;
+ }
+ }
+
if (crm_ipc_connected(api->ipc)) {
crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true));
return pcmk_rc_ok;
--
1.8.3.1
From e5ad1a6c54da48c86c8ab262abd4921cb37e998d Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 20 Jul 2020 18:18:01 +0200
Subject: [PATCH 2/6] Fix: ipc-api: avoid infinite loop when disconnected
Happens when using pcmk_dispatch_ipc when dispatching without
mainloop.
---
lib/common/ipc_client.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index df687da..aa032fe 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -392,7 +392,7 @@ pcmk_dispatch_ipc(pcmk_ipc_api_t *api)
if (api == NULL) {
return;
}
- while (crm_ipc_ready(api->ipc)) {
+ while (crm_ipc_ready(api->ipc) > 0) {
if (crm_ipc_read(api->ipc) > 0) {
dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api);
}
--
1.8.3.1
From 927b43a57d5e8256fbce8fe0792f8ea228c57687 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 9 Dec 2019 15:13:11 +0100
Subject: [PATCH 3/6] Fix: sbd-integration: sync pacemakerd with sbd
Make pacemakerd wait to be pinged by sbd before starting
sub-daemons. Pings further reply health-state and timestamp
of last successful check. On shutdown bring down all the
sub-daemons and wait to be polled for state by sbd before
finally exiting pacemakerd.
Add new api as not to make the xml-structure an external interface.
---
daemons/pacemakerd/pacemakerd.c | 100 ++++++++++++++--
include/crm/common/Makefile.am | 2 +-
include/crm/common/ipc_pacemakerd.h | 71 +++++++++++
include/crm/msg_xml.h | 7 ++
lib/common/Makefile.am | 1 +
lib/common/crmcommon_private.h | 3 +
lib/common/ipc_client.c | 5 +-
lib/common/ipc_pacemakerd.c | 232 ++++++++++++++++++++++++++++++++++++
8 files changed, 410 insertions(+), 11 deletions(-)
create mode 100644 include/crm/common/ipc_pacemakerd.h
create mode 100644 lib/common/ipc_pacemakerd.c
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index 652d6ca..ccfae66 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -40,8 +40,25 @@ static bool global_keep_tracking = false;
#define PCMK_PROCESS_CHECK_INTERVAL 5
static crm_trigger_t *shutdown_trigger = NULL;
+static crm_trigger_t *startup_trigger = NULL;
static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid";
+/* state we report when asked via pacemakerd-api status-ping */
+static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT;
+static gboolean running_with_sbd = FALSE; /* local copy */
+/* When contacted via pacemakerd-api by a client having sbd in
+ * the name we assume it is sbd-daemon which wants to know
+ * if pacemakerd shutdown gracefully.
+ * Thus when everything is shutdown properly pacemakerd
+ * waits till it has reported the graceful completion of
+ * shutdown to sbd and just when sbd-client closes the
+ * connection we can assume that the report has arrived
+ * properly so that pacemakerd can finally exit.
+ * Following two variables are used to track that handshake.
+ */
+static unsigned int shutdown_complete_state_reported_to = 0;
+static gboolean shutdown_complete_state_reported_client_closed = FALSE;
+
typedef struct pcmk_child_s {
pid_t pid;
long flag;
@@ -374,21 +391,20 @@ escalate_shutdown(gpointer data)
static gboolean
pcmk_shutdown_worker(gpointer user_data)
{
- static int phase = 0;
+ static int phase = SIZEOF(pcmk_children);
static time_t next_log = 0;
- static int max = SIZEOF(pcmk_children);
int lpc = 0;
- if (phase == 0) {
+ if (phase == SIZEOF(pcmk_children)) {
crm_notice("Shutting down Pacemaker");
- phase = max;
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN;
}
for (; phase > 0; phase--) {
/* Don't stop anything with start_seq < 1 */
- for (lpc = max - 1; lpc >= 0; lpc--) {
+ for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) {
pcmk_child_t *child = &(pcmk_children[lpc]);
if (phase != child->start_seq) {
@@ -436,6 +452,11 @@ pcmk_shutdown_worker(gpointer user_data)
}
crm_notice("Shutdown complete");
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
+ if (!fatal_error && running_with_sbd &&
+ !shutdown_complete_state_reported_client_closed) {
+ return TRUE;
+ }
{
const char *delay = pcmk__env_option("shutdown_delay");
@@ -489,6 +510,51 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid)
return 0;
}
+static void
+pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
+{
+ const char *value = NULL;
+ xmlNode *ping = NULL;
+ xmlNode *reply = NULL;
+ time_t pinged = time(NULL);
+ const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
+
+ /* Pinged for status */
+ crm_trace("Pinged from %s.%s",
+ crm_str(crm_element_value(msg, F_CRM_ORIGIN)),
+ from?from:"unknown");
+ ping = create_xml_node(NULL, XML_CRM_TAG_PING);
+ value = crm_element_value(msg, F_CRM_SYS_TO);
+ crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
+ crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
+ crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
+ crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
+ reply = create_reply(msg, ping);
+ free_xml(ping);
+ if (reply) {
+ if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) !=
+ pcmk_rc_ok) {
+ crm_err("Failed sending ping-reply");
+ }
+ free_xml(reply);
+ } else {
+ crm_err("Failed building ping-reply");
+ }
+ /* just proceed state on sbd pinging us */
+ if (from && strstr(from, "sbd")) {
+ if (crm_str_eq(pacemakerd_state,
+ XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
+ TRUE)) {
+ shutdown_complete_state_reported_to = c->pid;
+ } else if (crm_str_eq(pacemakerd_state,
+ XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
+ TRUE)) {
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+ mainloop_set_trigger(startup_trigger);
+ }
+ }
+}
+
/* Exit code means? */
static int32_t
pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
@@ -514,6 +580,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size)
crm_trace("Ignoring IPC request to purge node "
"because peer cache is not used");
+ } else if (crm_str_eq(task, CRM_OP_PING, TRUE)) {
+ pcmk_handle_ping_request(c, msg, id);
+
} else {
crm_debug("Unrecognized IPC command '%s' sent to pacemakerd",
crm_str(task));
@@ -533,6 +602,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c)
return 0;
}
crm_trace("Connection %p", c);
+ if (shutdown_complete_state_reported_to == client->pid) {
+ shutdown_complete_state_reported_client_closed = TRUE;
+ if (shutdown_trigger) {
+ mainloop_set_trigger(shutdown_trigger);
+ }
+ }
pcmk__free_client(client);
return 0;
}
@@ -924,8 +999,8 @@ find_and_track_existing_processes(void)
return pcmk_rc_ok;
}
-static void
-init_children_processes(void)
+static gboolean
+init_children_processes(void *user_data)
{
int start_seq = 1, lpc = 0;
static int max = SIZEOF(pcmk_children);
@@ -951,6 +1026,8 @@ init_children_processes(void)
* This may be useful for the daemons to know
*/
setenv("PCMK_respawned", "true", 1);
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING;
+ return TRUE;
}
static void
@@ -1154,6 +1231,7 @@ main(int argc, char **argv)
if(pcmk_locate_sbd() > 0) {
setenv("PCMK_watchdog", "true", 1);
+ running_with_sbd = TRUE;
} else {
setenv("PCMK_watchdog", "false", 1);
}
@@ -1170,7 +1248,13 @@ main(int argc, char **argv)
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
- init_children_processes();
+ if (running_with_sbd) {
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
+ startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
+ } else {
+ pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
+ init_children_processes(NULL);
+ }
crm_notice("Pacemaker daemon successfully started and accepting connections");
g_main_loop_run(mainloop);
diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am
index f29d105..1b5730a 100644
--- a/include/crm/common/Makefile.am
+++ b/include/crm/common/Makefile.am
@@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in
headerdir=$(pkgincludedir)/crm/common
header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \
- nvpair.h acl.h ipc_controld.h
+ nvpair.h acl.h ipc_controld.h ipc_pacemakerd.h
noinst_HEADERS = internal.h alerts_internal.h \
iso8601_internal.h remote_internal.h xml_internal.h \
ipc_internal.h output.h cmdline_internal.h curses_internal.h \
diff --git a/include/crm/common/ipc_pacemakerd.h b/include/crm/common/ipc_pacemakerd.h
new file mode 100644
index 0000000..00e3edd
--- /dev/null
+++ b/include/crm/common/ipc_pacemakerd.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2020 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#ifndef PCMK__IPC_PACEMAKERD__H
+# define PCMK__IPC_PACEMAKERD__H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \file
+ * \brief IPC commands for Pacemakerd
+ *
+ * \ingroup core
+ */
+
+#include <sys/types.h> // time_t
+#include <crm/common/ipc.h> // pcmk_ipc_api_t
+
+enum pcmk_pacemakerd_state {
+ pcmk_pacemakerd_state_invalid = -1,
+ pcmk_pacemakerd_state_init = 0,
+ pcmk_pacemakerd_state_starting_daemons,
+ pcmk_pacemakerd_state_wait_for_ping,
+ pcmk_pacemakerd_state_running,
+ pcmk_pacemakerd_state_shutting_down,
+ pcmk_pacemakerd_state_shutdown_complete,
+ pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete,
+};
+
+//! Possible types of pacemakerd replies
+enum pcmk_pacemakerd_api_reply {
+ pcmk_pacemakerd_reply_unknown,
+ pcmk_pacemakerd_reply_ping,
+};
+
+/*!
+ * Pacemakerd reply passed to event callback
+ */
+typedef struct {
+ enum pcmk_pacemakerd_api_reply reply_type;
+
+ union {
+ // pcmk_pacemakerd_reply_ping
+ struct {
+ const char *sys_from;
+ enum pcmk_pacemakerd_state state;
+ time_t last_good;
+ int status;
+ } ping;
+ } data;
+} pcmk_pacemakerd_api_reply_t;
+
+int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name);
+enum pcmk_pacemakerd_state
+ pcmk_pacemakerd_api_daemon_state_text2enum(const char *state);
+const char
+ *pcmk_pacemakerd_api_daemon_state_enum2text(enum pcmk_pacemakerd_state state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // PCMK__IPC_PACEMAKERD__H
diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h
index af3f33e..1fcb72d 100644
--- a/include/crm/msg_xml.h
+++ b/include/crm/msg_xml.h
@@ -123,6 +123,13 @@ extern "C" {
# define XML_PING_ATTR_STATUS "result"
# define XML_PING_ATTR_SYSFROM "crm_subsystem"
# define XML_PING_ATTR_CRMDSTATE "crmd_state"
+# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state"
+# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init"
+# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons"
+# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping"
+# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running"
+# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down"
+# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete"
# define XML_TAG_FRAGMENT "cib_fragment"
diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am
index db66a6e..e0249b9 100644
--- a/lib/common/Makefile.am
+++ b/lib/common/Makefile.am
@@ -50,6 +50,7 @@ libcrmcommon_la_SOURCES += io.c
libcrmcommon_la_SOURCES += ipc_client.c
libcrmcommon_la_SOURCES += ipc_common.c
libcrmcommon_la_SOURCES += ipc_controld.c
+libcrmcommon_la_SOURCES += ipc_pacemakerd.c
libcrmcommon_la_SOURCES += ipc_server.c
libcrmcommon_la_SOURCES += iso8601.c
libcrmcommon_la_SOURCES += logging.c
diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h
index d55df99..68e3390 100644
--- a/lib/common/crmcommon_private.h
+++ b/lib/common/crmcommon_private.h
@@ -210,4 +210,7 @@ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header);
G_GNUC_INTERNAL
pcmk__ipc_methods_t *pcmk__controld_api_methods(void);
+G_GNUC_INTERNAL
+pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void);
+
#endif // CRMCOMMON_PRIVATE__H
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index aa032fe..033199d 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -41,7 +41,7 @@
*
* \note The caller is responsible for freeing *api using pcmk_free_ipc_api().
* \note This is intended to supersede crm_ipc_new() but currently only
- * supports the controller IPC API.
+ * supports the controller & pacemakerd IPC API.
*/
int
pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
@@ -84,6 +84,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server)
break;
case pcmk_ipc_pacemakerd:
+ (*api)->cmds = pcmk__pacemakerd_api_methods();
break;
case pcmk_ipc_schedulerd:
@@ -259,7 +260,7 @@ pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log)
return for_log? "fencer" : NULL /* "stonith-ng" */;
case pcmk_ipc_pacemakerd:
- return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */;
+ return for_log? "launcher" : CRM_SYSTEM_MCP;
case pcmk_ipc_schedulerd:
return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */;
diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c
new file mode 100644
index 0000000..241722e
--- /dev/null
+++ b/lib/common/ipc_pacemakerd.c
@@ -0,0 +1,232 @@
+/*
+ * Copyright 2020 the Pacemaker project contributors
+ *
+ * The version control history for this file may have further details.
+ *
+ * This source code is licensed under the GNU Lesser General Public License
+ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
+ */
+
+#include <crm_internal.h>
+
+#include <stdlib.h>
+#include <time.h>
+
+#include <crm/crm.h>
+#include <crm/msg_xml.h>
+#include <crm/common/xml.h>
+#include <crm/common/ipc.h>
+#include <crm/common/ipc_internal.h>
+#include <crm/common/ipc_pacemakerd.h>
+#include "crmcommon_private.h"
+
+typedef struct pacemakerd_api_private_s {
+ enum pcmk_pacemakerd_state state;
+ char *client_uuid;
+} pacemakerd_api_private_t;
+
+static const char *pacemakerd_state_str[] = {
+ XML_PING_ATTR_PACEMAKERDSTATE_INIT,
+ XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS,
+ XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
+ XML_PING_ATTR_PACEMAKERDSTATE_RUNNING,
+ XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN,
+ XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE
+};
+
+enum pcmk_pacemakerd_state
+pcmk_pacemakerd_api_daemon_state_text2enum(const char *state)
+{
+ int i;
+
+ if (state == NULL) {
+ return pcmk_pacemakerd_state_invalid;
+ }
+ for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max;
+ i++) {
+ if (crm_str_eq(state, pacemakerd_state_str[i], TRUE)) {
+ return i;
+ }
+ }
+ return pcmk_pacemakerd_state_invalid;
+}
+
+const char *
+pcmk_pacemakerd_api_daemon_state_enum2text(
+ enum pcmk_pacemakerd_state state)
+{
+ if ((state >= pcmk_pacemakerd_state_init) &&
+ (state <= pcmk_pacemakerd_state_max)) {
+ return pacemakerd_state_str[state];
+ }
+ return "invalid";
+}
+
+// \return Standard Pacemaker return code
+static int
+new_data(pcmk_ipc_api_t *api)
+{
+ struct pacemakerd_api_private_s *private = NULL;
+
+ api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s));
+
+ if (api->api_data == NULL) {
+ return errno;
+ }
+
+ private = api->api_data;
+ private->state = pcmk_pacemakerd_state_invalid;
+ /* other as with cib, controld, ... we are addressing pacemakerd just
+ from the local node -> pid is unique and thus sufficient as an ID
+ */
+ private->client_uuid = pcmk__getpid_s();
+
+ return pcmk_rc_ok;
+}
+
+static void
+free_data(void *data)
+{
+ free(((struct pacemakerd_api_private_s *) data)->client_uuid);
+ free(data);
+}
+
+// \return Standard Pacemaker return code
+static int
+post_connect(pcmk_ipc_api_t *api)
+{
+ struct pacemakerd_api_private_s *private = NULL;
+
+ if (api->api_data == NULL) {
+ return EINVAL;
+ }
+ private = api->api_data;
+ private->state = pcmk_pacemakerd_state_invalid;
+
+ return pcmk_rc_ok;
+}
+
+static void
+post_disconnect(pcmk_ipc_api_t *api)
+{
+ struct pacemakerd_api_private_s *private = NULL;
+
+ if (api->api_data == NULL) {
+ return;
+ }
+ private = api->api_data;
+ private->state = pcmk_pacemakerd_state_invalid;
+
+ return;
+}
+
+static bool
+reply_expected(pcmk_ipc_api_t *api, xmlNode *request)
+{
+ const char *command = crm_element_value(request, F_CRM_TASK);
+
+ if (command == NULL) {
+ return false;
+ }
+
+ // We only need to handle commands that functions in this file can send
+ return !strcmp(command, CRM_OP_PING);
+}
+
+static void
+dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
+{
+ crm_exit_t status = CRM_EX_OK;
+ xmlNode *msg_data = NULL;
+ pcmk_pacemakerd_api_reply_t reply_data = {
+ pcmk_pacemakerd_reply_unknown
+ };
+ const char *value = NULL;
+ long long value_ll = 0;
+
+ value = crm_element_value(reply, F_CRM_MSG_TYPE);
+ if ((value == NULL) || (strcmp(value, XML_ATTR_RESPONSE))) {
+ crm_debug("Unrecognizable pacemakerd message: invalid message type '%s'",
+ crm_str(value));
+ status = CRM_EX_PROTOCOL;
+ goto done;
+ }
+
+ if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) {
+ crm_debug("Unrecognizable pacemakerd message: no reference");
+ status = CRM_EX_PROTOCOL;
+ goto done;
+ }
+
+ value = crm_element_value(reply, F_CRM_TASK);
+ if ((value == NULL) || strcmp(value, CRM_OP_PING)) {
+ crm_debug("Unrecognizable pacemakerd message: '%s'", crm_str(value));
+ status = CRM_EX_PROTOCOL;
+ goto done;
+ }
+
+ // Parse useful info from reply
+
+ msg_data = get_message_xml(reply, F_CRM_DATA);
+ crm_element_value_ll(msg_data, XML_ATTR_TSTAMP, &value_ll);
+
+ reply_data.reply_type = pcmk_pacemakerd_reply_ping;
+ reply_data.data.ping.state =
+ pcmk_pacemakerd_api_daemon_state_text2enum(
+ crm_element_value(msg_data, XML_PING_ATTR_PACEMAKERDSTATE));
+ reply_data.data.ping.status =
+ crm_str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS),
+ "ok", FALSE)?pcmk_rc_ok:pcmk_rc_error;
+ reply_data.data.ping.last_good = (time_t) value_ll;
+ reply_data.data.ping.sys_from = crm_element_value(msg_data,
+ XML_PING_ATTR_SYSFROM);
+
+done:
+ pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data);
+}
+
+pcmk__ipc_methods_t *
+pcmk__pacemakerd_api_methods()
+{
+ pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t));
+
+ if (cmds != NULL) {
+ cmds->new_data = new_data;
+ cmds->free_data = free_data;
+ cmds->post_connect = post_connect;
+ cmds->reply_expected = reply_expected;
+ cmds->dispatch = dispatch;
+ cmds->post_disconnect = post_disconnect;
+ }
+ return cmds;
+}
+
+int
+pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name)
+{
+ pacemakerd_api_private_t *private;
+ xmlNode *cmd;
+ int rc;
+
+ CRM_CHECK(api != NULL, return -EINVAL);
+ private = api->api_data;
+ CRM_ASSERT(private != NULL);
+
+ cmd = create_request(CRM_OP_PING, NULL, NULL, CRM_SYSTEM_MCP,
+ ipc_name?ipc_name:((crm_system_name? crm_system_name : "client")),
+ private->client_uuid);
+
+ if (cmd) {
+ rc = pcmk__send_ipc_request(api, cmd);
+ if (rc != pcmk_rc_ok) {
+ crm_debug("Couldn't ping pacemakerd: %s rc=%d",
+ pcmk_rc_str(rc), rc);
+ rc = ECOMM;
+ }
+ free_xml(cmd);
+ } else {
+ rc = ENOMSG;
+ }
+
+ return rc;
+}
--
1.8.3.1
From 06da3c3685b0bdf093a13067cc399e782115e39c Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Mon, 20 Jul 2020 23:28:32 +0200
Subject: [PATCH 4/6] Feature: tools: Add -P to crmadmin to ping via
pacemakerd-api
---
include/crm/crm.h | 2 +-
tools/crmadmin.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 152 insertions(+), 11 deletions(-)
diff --git a/include/crm/crm.h b/include/crm/crm.h
index dc2adc1..ce2074b 100644
--- a/include/crm/crm.h
+++ b/include/crm/crm.h
@@ -51,7 +51,7 @@ extern "C" {
* >=3.0.13: Fail counts include operation name and interval
* >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED
*/
-# define CRM_FEATURE_SET "3.4.0"
+# define CRM_FEATURE_SET "3.4.1"
# define EOS '\0'
# define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) )
diff --git a/tools/crmadmin.c b/tools/crmadmin.c
index 4688458..2ebdd14 100644
--- a/tools/crmadmin.c
+++ b/tools/crmadmin.c
@@ -20,7 +20,9 @@
#include <crm/cib.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
+#include <crm/common/iso8601.h>
#include <crm/common/ipc_controld.h>
+#include <crm/common/ipc_pacemakerd.h>
#include <crm/common/mainloop.h>
#define DEFAULT_MESSAGE_TIMEOUT_MS 30000
@@ -31,6 +33,8 @@ static GMainLoop *mainloop = NULL;
bool do_work(pcmk_ipc_api_t *api);
void do_find_node_list(xmlNode *xml_node);
+static char *ipc_name = NULL;
+
gboolean admin_message_timeout(gpointer data);
static enum {
@@ -40,6 +44,7 @@ static enum {
cmd_elect_dc,
cmd_whois_dc,
cmd_list_nodes,
+ cmd_pacemakerd_health,
} command = cmd_none;
static gboolean BE_VERBOSE = FALSE;
@@ -82,6 +87,15 @@ static pcmk__cli_option_t long_options[] = {
pcmk__option_default
},
{
+ "pacemakerd", no_argument, NULL, 'P',
+ "Display the status of local pacemakerd.", pcmk__option_default
+ },
+ {
+ "-spacer-", no_argument, NULL, '-',
+ "\n\tResult is the state of the sub-daemons watched by pacemakerd.\n",
+ pcmk__option_default
+ },
+ {
"dc_lookup", no_argument, NULL, 'D',
"Display the uname of the node co-ordinating the cluster.",
pcmk__option_default
@@ -122,16 +136,21 @@ static pcmk__cli_option_t long_options[] = {
{
"bash-export", no_argument, NULL, 'B',
"Display nodes as shell commands of the form 'export uname=uuid' "
- "(valid with -N/--nodes)'\n",
+ "(valid with -N/--nodes)",
+ pcmk__option_default
+ },
+ {
+ "ipc-name", required_argument, NULL, 'i',
+ "Name to use for ipc instead of 'crmadmin' (with -P/--pacemakerd).",
pcmk__option_default
},
{
"-spacer-", no_argument, NULL, '-',
- "Notes:", pcmk__option_default
+ "\nNotes:", pcmk__option_default
},
{
"-spacer-", no_argument, NULL, '-',
- "The -K and -E commands do not work and may be removed in a future "
+ "\nThe -K and -E commands do not work and may be removed in a future "
"version.",
pcmk__option_default
},
@@ -223,6 +242,88 @@ done:
quit_main_loop(exit_code);
}
+static void
+pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
+ enum pcmk_ipc_event event_type, crm_exit_t status,
+ void *event_data, void *user_data)
+{
+ pcmk_pacemakerd_api_reply_t *reply = event_data;
+
+ switch (event_type) {
+ case pcmk_ipc_event_disconnect:
+ if (exit_code == CRM_EX_DISCONNECT) { // Unexpected
+ fprintf(stderr, "error: Lost connection to pacemakerd\n");
+ }
+ goto done;
+ break;
+
+ case pcmk_ipc_event_reply:
+ break;
+
+ default:
+ return;
+ }
+
+ if (message_timer_id != 0) {
+ g_source_remove(message_timer_id);
+ message_timer_id = 0;
+ }
+
+ if (status != CRM_EX_OK) {
+ fprintf(stderr, "error: Bad reply from pacemakerd: %s",
+ crm_exit_str(status));
+ exit_code = status;
+ goto done;
+ }
+
+ if (reply->reply_type != pcmk_pacemakerd_reply_ping) {
+ fprintf(stderr, "error: Unknown reply type %d from pacemakerd\n",
+ reply->reply_type);
+ goto done;
+ }
+
+ // Parse desired information from reply
+ switch (command) {
+ case cmd_pacemakerd_health:
+ {
+ crm_time_t *crm_when = crm_time_new(NULL);
+ char *pinged_buf = NULL;
+
+ crm_time_set_timet(crm_when, &reply->data.ping.last_good);
+ pinged_buf = crm_time_as_string(crm_when,
+ crm_time_log_date | crm_time_log_timeofday |
+ crm_time_log_with_timezone);
+
+ printf("Status of %s: '%s' %s %s\n",
+ reply->data.ping.sys_from,
+ (reply->data.ping.status == pcmk_rc_ok)?
+ pcmk_pacemakerd_api_daemon_state_enum2text(
+ reply->data.ping.state):"query failed",
+ (reply->data.ping.status == pcmk_rc_ok)?"last updated":"",
+ (reply->data.ping.status == pcmk_rc_ok)?pinged_buf:"");
+ if (BE_SILENT &&
+ (reply->data.ping.state != pcmk_pacemakerd_state_invalid)) {
+ fprintf(stderr, "%s\n",
+ (reply->data.ping.status == pcmk_rc_ok)?
+ pcmk_pacemakerd_api_daemon_state_enum2text(
+ reply->data.ping.state):
+ "query failed");
+ }
+ exit_code = CRM_EX_OK;
+ free(pinged_buf);
+ }
+ break;
+
+ default: // Not really possible here
+ exit_code = CRM_EX_SOFTWARE;
+ break;
+ }
+
+done:
+ pcmk_disconnect_ipc(pacemakerd_api);
+ quit_main_loop(exit_code);
+}
+
// \return Standard Pacemaker return code
static int
list_nodes()
@@ -257,7 +358,9 @@ main(int argc, char **argv)
int flag;
int rc;
pcmk_ipc_api_t *controld_api = NULL;
+ pcmk_ipc_api_t *pacemakerd_api = NULL;
bool need_controld_api = true;
+ bool need_pacemakerd_api = false;
crm_log_cli_init("crmadmin");
pcmk__set_cli_options(NULL, "<command> [options]", long_options,
@@ -282,7 +385,9 @@ main(int argc, char **argv)
message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS;
}
break;
-
+ case 'i':
+ ipc_name = strdup(optarg);
+ break;
case '$':
case '?':
pcmk__cli_help(flag, CRM_EX_OK);
@@ -304,6 +409,11 @@ main(int argc, char **argv)
case 'q':
BE_SILENT = TRUE;
break;
+ case 'P':
+ command = cmd_pacemakerd_health;
+ need_pacemakerd_api = true;
+ need_controld_api = false;
+ break;
case 'S':
command = cmd_health;
crm_trace("Option %c => %s", flag, optarg);
@@ -369,7 +479,26 @@ main(int argc, char **argv)
}
}
- if (do_work(controld_api)) {
+ // Connect to pacemakerd if needed
+ if (need_pacemakerd_api) {
+ rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd);
+ if (pacemakerd_api == NULL) {
+ fprintf(stderr, "error: Could not connect to pacemakerd: %s\n",
+ pcmk_rc_str(rc));
+ exit_code = pcmk_rc2exitc(rc);
+ goto done;
+ }
+ pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL);
+ rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main);
+ if (rc != pcmk_rc_ok) {
+ fprintf(stderr, "error: Could not connect to pacemakerd: %s\n",
+ pcmk_rc_str(rc));
+ exit_code = pcmk_rc2exitc(rc);
+ goto done;
+ }
+ }
+
+ if (do_work(controld_api?controld_api:pacemakerd_api)) {
// A reply is needed from controller, so run main loop to get it
exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects
mainloop = g_main_loop_new(NULL, FALSE);
@@ -379,12 +508,19 @@ main(int argc, char **argv)
}
done:
+
if (controld_api != NULL) {
pcmk_ipc_api_t *capi = controld_api;
-
controld_api = NULL; // Ensure we can't free this twice
pcmk_free_ipc_api(capi);
}
+
+ if (pacemakerd_api != NULL) {
+ pcmk_ipc_api_t *capi = pacemakerd_api;
+ pacemakerd_api = NULL; // Ensure we can't free this twice
+ pcmk_free_ipc_api(capi);
+ }
+
if (mainloop != NULL) {
g_main_loop_unref(mainloop);
mainloop = NULL;
@@ -394,30 +530,35 @@ done:
// \return True if reply from controller is needed
bool
-do_work(pcmk_ipc_api_t *controld_api)
+do_work(pcmk_ipc_api_t *api)
{
bool need_reply = false;
int rc = pcmk_rc_ok;
switch (command) {
case cmd_shutdown:
- rc = pcmk_controld_api_shutdown(controld_api, dest_node);
+ rc = pcmk_controld_api_shutdown(api, dest_node);
break;
case cmd_health: // dest_node != NULL
case cmd_whois_dc: // dest_node == NULL
- rc = pcmk_controld_api_ping(controld_api, dest_node);
+ rc = pcmk_controld_api_ping(api, dest_node);
need_reply = true;
break;
case cmd_elect_dc:
- rc = pcmk_controld_api_start_election(controld_api);
+ rc = pcmk_controld_api_start_election(api);
break;
case cmd_list_nodes:
rc = list_nodes();
break;
+ case cmd_pacemakerd_health:
+ rc = pcmk_pacemakerd_api_ping(api, ipc_name);
+ need_reply = true;
+ break;
+
case cmd_none: // not actually possible here
break;
}
--
1.8.3.1
From 6ce5bb0d6fd30a204468ea245209d34f2682d7c9 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 21 Jul 2020 18:12:53 +0200
Subject: [PATCH 5/6] Fix: pacemakerd: interworking with sbd not using
pacemakerd-api
---
daemons/pacemakerd/pacemakerd.c | 8 +++++++-
include/crm/common/options_internal.h | 1 +
lib/common/watchdog.c | 15 +++++++++++++++
3 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index ccfae66..e91982a 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -454,6 +454,7 @@ pcmk_shutdown_worker(gpointer user_data)
crm_notice("Shutdown complete");
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE;
if (!fatal_error && running_with_sbd &&
+ pcmk__get_sbd_sync_resource_startup() &&
!shutdown_complete_state_reported_client_closed) {
return TRUE;
}
@@ -1248,10 +1249,15 @@ main(int argc, char **argv)
mainloop_add_signal(SIGTERM, pcmk_shutdown);
mainloop_add_signal(SIGINT, pcmk_shutdown);
- if (running_with_sbd) {
+ if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
} else {
+ if (running_with_sbd) {
+ crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
+ "by your sbd version) improve reliability of "
+ "interworking between SBD & pacemaker.");
+ }
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
init_children_processes(NULL);
}
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
index db54da4..d0429c9 100644
--- a/include/crm/common/options_internal.h
+++ b/include/crm/common/options_internal.h
@@ -111,6 +111,7 @@ bool pcmk__valid_utilization(const char *value);
// from watchdog.c
long pcmk__get_sbd_timeout(void);
+bool pcmk__get_sbd_sync_resource_startup(void);
long pcmk__auto_watchdog_timeout(void);
bool pcmk__valid_sbd_timeout(const char *value);
diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c
index 9d8896b..8838be6 100644
--- a/lib/common/watchdog.c
+++ b/lib/common/watchdog.c
@@ -227,6 +227,21 @@ pcmk__get_sbd_timeout(void)
return sbd_timeout;
}
+bool
+pcmk__get_sbd_sync_resource_startup(void)
+{
+ static bool sync_resource_startup = false;
+ static bool checked_sync_resource_startup = false;
+
+ if (!checked_sync_resource_startup) {
+ sync_resource_startup =
+ crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP"));
+ checked_sync_resource_startup = true;
+ }
+
+ return sync_resource_startup;
+}
+
long
pcmk__auto_watchdog_timeout()
{
--
1.8.3.1
From 567cb6ec6f317af9e973321633950ef26f43c486 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Thu, 23 Jul 2020 23:00:23 +0200
Subject: [PATCH 6/6] Fix: pacemakerd: improve logging when synced with SBD
---
daemons/pacemakerd/pacemakerd.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
index e91982a..c888b73 100644
--- a/daemons/pacemakerd/pacemakerd.c
+++ b/daemons/pacemakerd/pacemakerd.c
@@ -456,6 +456,7 @@ pcmk_shutdown_worker(gpointer user_data)
if (!fatal_error && running_with_sbd &&
pcmk__get_sbd_sync_resource_startup() &&
!shutdown_complete_state_reported_client_closed) {
+ crm_notice("Waiting for SBD to pick up shutdown-complete-state.");
return TRUE;
}
@@ -546,10 +547,14 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
if (crm_str_eq(pacemakerd_state,
XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE,
TRUE)) {
+ if (pcmk__get_sbd_sync_resource_startup()) {
+ crm_notice("Shutdown-complete-state passed to SBD.");
+ }
shutdown_complete_state_reported_to = c->pid;
} else if (crm_str_eq(pacemakerd_state,
XML_PING_ATTR_PACEMAKERDSTATE_WAITPING,
TRUE)) {
+ crm_notice("Received startup-trigger from SBD.");
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
mainloop_set_trigger(startup_trigger);
}
@@ -1250,12 +1255,13 @@ main(int argc, char **argv)
mainloop_add_signal(SIGINT, pcmk_shutdown);
if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) {
+ crm_notice("Waiting for startup-trigger from SBD.");
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING;
startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL);
} else {
if (running_with_sbd) {
crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported "
- "by your sbd version) improve reliability of "
+ "by your SBD version) improve reliability of "
"interworking between SBD & pacemaker.");
}
pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS;
--
1.8.3.1