From 17d5ceac78f610aabf6a3678813706faf252c2fb Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Mon, 20 Jul 2020 17:56:29 +0200 Subject: [PATCH 1/6] Fix: ipc-api: allow calling connect after disconnection --- lib/common/crmcommon_private.h | 1 + lib/common/ipc_client.c | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h index 49dae6c..d55df99 100644 --- a/lib/common/crmcommon_private.h +++ b/lib/common/crmcommon_private.h @@ -175,6 +175,7 @@ typedef struct pcmk__ipc_methods_s { struct pcmk_ipc_api_s { enum pcmk_ipc_server server; // Daemon this IPC API instance is for enum pcmk_ipc_dispatch dispatch_type; // How replies should be dispatched + size_t ipc_size_max; // maximum IPC buffer size crm_ipc_t *ipc; // IPC connection mainloop_io_t *mainloop_io; // If using mainloop, I/O source for IPC bool free_on_disconnect; // Whether disconnect should free object diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index 4077d61..df687da 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -46,8 +46,6 @@ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) { - size_t max_size = 0; - if (api == NULL) { return EINVAL; } @@ -64,13 +62,15 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) return EOPNOTSUPP; } + (*api)->ipc_size_max = 0; + // Set server methods and max_size (if not default) switch (server) { case pcmk_ipc_attrd: break; case pcmk_ipc_based: - max_size = 512 * 1024; // 512KB + (*api)->ipc_size_max = 512 * 1024; // 512KB break; case pcmk_ipc_controld: @@ -88,7 +88,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) case pcmk_ipc_schedulerd: // @TODO max_size could vary by client, maybe take as argument? - max_size = 5 * 1024 * 1024; // 5MB + (*api)->ipc_size_max = 5 * 1024 * 1024; // 5MB break; } if ((*api)->cmds == NULL) { @@ -97,7 +97,8 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) return ENOMEM; } - (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), max_size); + (*api)->ipc = crm_ipc_new(pcmk_ipc_name(*api, false), + (*api)->ipc_size_max); if ((*api)->ipc == NULL) { pcmk_free_ipc_api(*api); *api = NULL; @@ -451,11 +452,20 @@ pcmk_connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type) { int rc = pcmk_rc_ok; - if ((api == NULL) || (api->ipc == NULL)) { + if (api == NULL) { crm_err("Cannot connect to uninitialized API object"); return EINVAL; } + if (api->ipc == NULL) { + api->ipc = crm_ipc_new(pcmk_ipc_name(api, false), + api->ipc_size_max); + if (api->ipc == NULL) { + crm_err("Failed to re-create IPC API"); + return ENOMEM; + } + } + if (crm_ipc_connected(api->ipc)) { crm_trace("Already connected to %s IPC API", pcmk_ipc_name(api, true)); return pcmk_rc_ok; -- 1.8.3.1 From e5ad1a6c54da48c86c8ab262abd4921cb37e998d Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Mon, 20 Jul 2020 18:18:01 +0200 Subject: [PATCH 2/6] Fix: ipc-api: avoid infinite loop when disconnected Happens when using pcmk_dispatch_ipc when dispatching without mainloop. --- lib/common/ipc_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index df687da..aa032fe 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -392,7 +392,7 @@ pcmk_dispatch_ipc(pcmk_ipc_api_t *api) if (api == NULL) { return; } - while (crm_ipc_ready(api->ipc)) { + while (crm_ipc_ready(api->ipc) > 0) { if (crm_ipc_read(api->ipc) > 0) { dispatch_ipc_data(crm_ipc_buffer(api->ipc), 0, api); } -- 1.8.3.1 From 927b43a57d5e8256fbce8fe0792f8ea228c57687 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Mon, 9 Dec 2019 15:13:11 +0100 Subject: [PATCH 3/6] Fix: sbd-integration: sync pacemakerd with sbd Make pacemakerd wait to be pinged by sbd before starting sub-daemons. Pings further reply health-state and timestamp of last successful check. On shutdown bring down all the sub-daemons and wait to be polled for state by sbd before finally exiting pacemakerd. Add new api as not to make the xml-structure an external interface. --- daemons/pacemakerd/pacemakerd.c | 100 ++++++++++++++-- include/crm/common/Makefile.am | 2 +- include/crm/common/ipc_pacemakerd.h | 71 +++++++++++ include/crm/msg_xml.h | 7 ++ lib/common/Makefile.am | 1 + lib/common/crmcommon_private.h | 3 + lib/common/ipc_client.c | 5 +- lib/common/ipc_pacemakerd.c | 232 ++++++++++++++++++++++++++++++++++++ 8 files changed, 410 insertions(+), 11 deletions(-) create mode 100644 include/crm/common/ipc_pacemakerd.h create mode 100644 lib/common/ipc_pacemakerd.c diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index 652d6ca..ccfae66 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -40,8 +40,25 @@ static bool global_keep_tracking = false; #define PCMK_PROCESS_CHECK_INTERVAL 5 static crm_trigger_t *shutdown_trigger = NULL; +static crm_trigger_t *startup_trigger = NULL; static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid"; +/* state we report when asked via pacemakerd-api status-ping */ +static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT; +static gboolean running_with_sbd = FALSE; /* local copy */ +/* When contacted via pacemakerd-api by a client having sbd in + * the name we assume it is sbd-daemon which wants to know + * if pacemakerd shutdown gracefully. + * Thus when everything is shutdown properly pacemakerd + * waits till it has reported the graceful completion of + * shutdown to sbd and just when sbd-client closes the + * connection we can assume that the report has arrived + * properly so that pacemakerd can finally exit. + * Following two variables are used to track that handshake. + */ +static unsigned int shutdown_complete_state_reported_to = 0; +static gboolean shutdown_complete_state_reported_client_closed = FALSE; + typedef struct pcmk_child_s { pid_t pid; long flag; @@ -374,21 +391,20 @@ escalate_shutdown(gpointer data) static gboolean pcmk_shutdown_worker(gpointer user_data) { - static int phase = 0; + static int phase = SIZEOF(pcmk_children); static time_t next_log = 0; - static int max = SIZEOF(pcmk_children); int lpc = 0; - if (phase == 0) { + if (phase == SIZEOF(pcmk_children)) { crm_notice("Shutting down Pacemaker"); - phase = max; + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN; } for (; phase > 0; phase--) { /* Don't stop anything with start_seq < 1 */ - for (lpc = max - 1; lpc >= 0; lpc--) { + for (lpc = SIZEOF(pcmk_children) - 1; lpc >= 0; lpc--) { pcmk_child_t *child = &(pcmk_children[lpc]); if (phase != child->start_seq) { @@ -436,6 +452,11 @@ pcmk_shutdown_worker(gpointer user_data) } crm_notice("Shutdown complete"); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; + if (!fatal_error && running_with_sbd && + !shutdown_complete_state_reported_client_closed) { + return TRUE; + } { const char *delay = pcmk__env_option("shutdown_delay"); @@ -489,6 +510,51 @@ pcmk_ipc_accept(qb_ipcs_connection_t * c, uid_t uid, gid_t gid) return 0; } +static void +pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id) +{ + const char *value = NULL; + xmlNode *ping = NULL; + xmlNode *reply = NULL; + time_t pinged = time(NULL); + const char *from = crm_element_value(msg, F_CRM_SYS_FROM); + + /* Pinged for status */ + crm_trace("Pinged from %s.%s", + crm_str(crm_element_value(msg, F_CRM_ORIGIN)), + from?from:"unknown"); + ping = create_xml_node(NULL, XML_CRM_TAG_PING); + value = crm_element_value(msg, F_CRM_SYS_TO); + crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value); + crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state); + crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged); + crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok"); + reply = create_reply(msg, ping); + free_xml(ping); + if (reply) { + if (pcmk__ipc_send_xml(c, id, reply, crm_ipc_server_event) != + pcmk_rc_ok) { + crm_err("Failed sending ping-reply"); + } + free_xml(reply); + } else { + crm_err("Failed building ping-reply"); + } + /* just proceed state on sbd pinging us */ + if (from && strstr(from, "sbd")) { + if (crm_str_eq(pacemakerd_state, + XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, + TRUE)) { + shutdown_complete_state_reported_to = c->pid; + } else if (crm_str_eq(pacemakerd_state, + XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, + TRUE)) { + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + mainloop_set_trigger(startup_trigger); + } + } +} + /* Exit code means? */ static int32_t pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) @@ -514,6 +580,9 @@ pcmk_ipc_dispatch(qb_ipcs_connection_t * qbc, void *data, size_t size) crm_trace("Ignoring IPC request to purge node " "because peer cache is not used"); + } else if (crm_str_eq(task, CRM_OP_PING, TRUE)) { + pcmk_handle_ping_request(c, msg, id); + } else { crm_debug("Unrecognized IPC command '%s' sent to pacemakerd", crm_str(task)); @@ -533,6 +602,12 @@ pcmk_ipc_closed(qb_ipcs_connection_t * c) return 0; } crm_trace("Connection %p", c); + if (shutdown_complete_state_reported_to == client->pid) { + shutdown_complete_state_reported_client_closed = TRUE; + if (shutdown_trigger) { + mainloop_set_trigger(shutdown_trigger); + } + } pcmk__free_client(client); return 0; } @@ -924,8 +999,8 @@ find_and_track_existing_processes(void) return pcmk_rc_ok; } -static void -init_children_processes(void) +static gboolean +init_children_processes(void *user_data) { int start_seq = 1, lpc = 0; static int max = SIZEOF(pcmk_children); @@ -951,6 +1026,8 @@ init_children_processes(void) * This may be useful for the daemons to know */ setenv("PCMK_respawned", "true", 1); + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_RUNNING; + return TRUE; } static void @@ -1154,6 +1231,7 @@ main(int argc, char **argv) if(pcmk_locate_sbd() > 0) { setenv("PCMK_watchdog", "true", 1); + running_with_sbd = TRUE; } else { setenv("PCMK_watchdog", "false", 1); } @@ -1170,7 +1248,13 @@ main(int argc, char **argv) mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); - init_children_processes(); + if (running_with_sbd) { + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; + startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); + } else { + pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; + init_children_processes(NULL); + } crm_notice("Pacemaker daemon successfully started and accepting connections"); g_main_loop_run(mainloop); diff --git a/include/crm/common/Makefile.am b/include/crm/common/Makefile.am index f29d105..1b5730a 100644 --- a/include/crm/common/Makefile.am +++ b/include/crm/common/Makefile.am @@ -12,7 +12,7 @@ MAINTAINERCLEANFILES = Makefile.in headerdir=$(pkgincludedir)/crm/common header_HEADERS = xml.h ipc.h util.h iso8601.h mainloop.h logging.h results.h \ - nvpair.h acl.h ipc_controld.h + nvpair.h acl.h ipc_controld.h ipc_pacemakerd.h noinst_HEADERS = internal.h alerts_internal.h \ iso8601_internal.h remote_internal.h xml_internal.h \ ipc_internal.h output.h cmdline_internal.h curses_internal.h \ diff --git a/include/crm/common/ipc_pacemakerd.h b/include/crm/common/ipc_pacemakerd.h new file mode 100644 index 0000000..00e3edd --- /dev/null +++ b/include/crm/common/ipc_pacemakerd.h @@ -0,0 +1,71 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#ifndef PCMK__IPC_PACEMAKERD__H +# define PCMK__IPC_PACEMAKERD__H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * \file + * \brief IPC commands for Pacemakerd + * + * \ingroup core + */ + +#include // time_t +#include // pcmk_ipc_api_t + +enum pcmk_pacemakerd_state { + pcmk_pacemakerd_state_invalid = -1, + pcmk_pacemakerd_state_init = 0, + pcmk_pacemakerd_state_starting_daemons, + pcmk_pacemakerd_state_wait_for_ping, + pcmk_pacemakerd_state_running, + pcmk_pacemakerd_state_shutting_down, + pcmk_pacemakerd_state_shutdown_complete, + pcmk_pacemakerd_state_max = pcmk_pacemakerd_state_shutdown_complete, +}; + +//! Possible types of pacemakerd replies +enum pcmk_pacemakerd_api_reply { + pcmk_pacemakerd_reply_unknown, + pcmk_pacemakerd_reply_ping, +}; + +/*! + * Pacemakerd reply passed to event callback + */ +typedef struct { + enum pcmk_pacemakerd_api_reply reply_type; + + union { + // pcmk_pacemakerd_reply_ping + struct { + const char *sys_from; + enum pcmk_pacemakerd_state state; + time_t last_good; + int status; + } ping; + } data; +} pcmk_pacemakerd_api_reply_t; + +int pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name); +enum pcmk_pacemakerd_state + pcmk_pacemakerd_api_daemon_state_text2enum(const char *state); +const char + *pcmk_pacemakerd_api_daemon_state_enum2text(enum pcmk_pacemakerd_state state); + +#ifdef __cplusplus +} +#endif + +#endif // PCMK__IPC_PACEMAKERD__H diff --git a/include/crm/msg_xml.h b/include/crm/msg_xml.h index af3f33e..1fcb72d 100644 --- a/include/crm/msg_xml.h +++ b/include/crm/msg_xml.h @@ -123,6 +123,13 @@ extern "C" { # define XML_PING_ATTR_STATUS "result" # define XML_PING_ATTR_SYSFROM "crm_subsystem" # define XML_PING_ATTR_CRMDSTATE "crmd_state" +# define XML_PING_ATTR_PACEMAKERDSTATE "pacemakerd_state" +# define XML_PING_ATTR_PACEMAKERDSTATE_INIT "init" +# define XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS "starting_daemons" +# define XML_PING_ATTR_PACEMAKERDSTATE_WAITPING "wait_for_ping" +# define XML_PING_ATTR_PACEMAKERDSTATE_RUNNING "running" +# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN "shutting_down" +# define XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE "shutdown_complete" # define XML_TAG_FRAGMENT "cib_fragment" diff --git a/lib/common/Makefile.am b/lib/common/Makefile.am index db66a6e..e0249b9 100644 --- a/lib/common/Makefile.am +++ b/lib/common/Makefile.am @@ -50,6 +50,7 @@ libcrmcommon_la_SOURCES += io.c libcrmcommon_la_SOURCES += ipc_client.c libcrmcommon_la_SOURCES += ipc_common.c libcrmcommon_la_SOURCES += ipc_controld.c +libcrmcommon_la_SOURCES += ipc_pacemakerd.c libcrmcommon_la_SOURCES += ipc_server.c libcrmcommon_la_SOURCES += iso8601.c libcrmcommon_la_SOURCES += logging.c diff --git a/lib/common/crmcommon_private.h b/lib/common/crmcommon_private.h index d55df99..68e3390 100644 --- a/lib/common/crmcommon_private.h +++ b/lib/common/crmcommon_private.h @@ -210,4 +210,7 @@ bool pcmk__valid_ipc_header(const pcmk__ipc_header_t *header); G_GNUC_INTERNAL pcmk__ipc_methods_t *pcmk__controld_api_methods(void); +G_GNUC_INTERNAL +pcmk__ipc_methods_t *pcmk__pacemakerd_api_methods(void); + #endif // CRMCOMMON_PRIVATE__H diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c index aa032fe..033199d 100644 --- a/lib/common/ipc_client.c +++ b/lib/common/ipc_client.c @@ -41,7 +41,7 @@ * * \note The caller is responsible for freeing *api using pcmk_free_ipc_api(). * \note This is intended to supersede crm_ipc_new() but currently only - * supports the controller IPC API. + * supports the controller & pacemakerd IPC API. */ int pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) @@ -84,6 +84,7 @@ pcmk_new_ipc_api(pcmk_ipc_api_t **api, enum pcmk_ipc_server server) break; case pcmk_ipc_pacemakerd: + (*api)->cmds = pcmk__pacemakerd_api_methods(); break; case pcmk_ipc_schedulerd: @@ -259,7 +260,7 @@ pcmk_ipc_name(pcmk_ipc_api_t *api, bool for_log) return for_log? "fencer" : NULL /* "stonith-ng" */; case pcmk_ipc_pacemakerd: - return for_log? "launcher" : NULL /* CRM_SYSTEM_MCP */; + return for_log? "launcher" : CRM_SYSTEM_MCP; case pcmk_ipc_schedulerd: return for_log? "scheduler" : NULL /* CRM_SYSTEM_PENGINE */; diff --git a/lib/common/ipc_pacemakerd.c b/lib/common/ipc_pacemakerd.c new file mode 100644 index 0000000..241722e --- /dev/null +++ b/lib/common/ipc_pacemakerd.c @@ -0,0 +1,232 @@ +/* + * Copyright 2020 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * + * This source code is licensed under the GNU Lesser General Public License + * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. + */ + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include "crmcommon_private.h" + +typedef struct pacemakerd_api_private_s { + enum pcmk_pacemakerd_state state; + char *client_uuid; +} pacemakerd_api_private_t; + +static const char *pacemakerd_state_str[] = { + XML_PING_ATTR_PACEMAKERDSTATE_INIT, + XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS, + XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, + XML_PING_ATTR_PACEMAKERDSTATE_RUNNING, + XML_PING_ATTR_PACEMAKERDSTATE_SHUTTINGDOWN, + XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE +}; + +enum pcmk_pacemakerd_state +pcmk_pacemakerd_api_daemon_state_text2enum(const char *state) +{ + int i; + + if (state == NULL) { + return pcmk_pacemakerd_state_invalid; + } + for (i=pcmk_pacemakerd_state_init; i <= pcmk_pacemakerd_state_max; + i++) { + if (crm_str_eq(state, pacemakerd_state_str[i], TRUE)) { + return i; + } + } + return pcmk_pacemakerd_state_invalid; +} + +const char * +pcmk_pacemakerd_api_daemon_state_enum2text( + enum pcmk_pacemakerd_state state) +{ + if ((state >= pcmk_pacemakerd_state_init) && + (state <= pcmk_pacemakerd_state_max)) { + return pacemakerd_state_str[state]; + } + return "invalid"; +} + +// \return Standard Pacemaker return code +static int +new_data(pcmk_ipc_api_t *api) +{ + struct pacemakerd_api_private_s *private = NULL; + + api->api_data = calloc(1, sizeof(struct pacemakerd_api_private_s)); + + if (api->api_data == NULL) { + return errno; + } + + private = api->api_data; + private->state = pcmk_pacemakerd_state_invalid; + /* other as with cib, controld, ... we are addressing pacemakerd just + from the local node -> pid is unique and thus sufficient as an ID + */ + private->client_uuid = pcmk__getpid_s(); + + return pcmk_rc_ok; +} + +static void +free_data(void *data) +{ + free(((struct pacemakerd_api_private_s *) data)->client_uuid); + free(data); +} + +// \return Standard Pacemaker return code +static int +post_connect(pcmk_ipc_api_t *api) +{ + struct pacemakerd_api_private_s *private = NULL; + + if (api->api_data == NULL) { + return EINVAL; + } + private = api->api_data; + private->state = pcmk_pacemakerd_state_invalid; + + return pcmk_rc_ok; +} + +static void +post_disconnect(pcmk_ipc_api_t *api) +{ + struct pacemakerd_api_private_s *private = NULL; + + if (api->api_data == NULL) { + return; + } + private = api->api_data; + private->state = pcmk_pacemakerd_state_invalid; + + return; +} + +static bool +reply_expected(pcmk_ipc_api_t *api, xmlNode *request) +{ + const char *command = crm_element_value(request, F_CRM_TASK); + + if (command == NULL) { + return false; + } + + // We only need to handle commands that functions in this file can send + return !strcmp(command, CRM_OP_PING); +} + +static void +dispatch(pcmk_ipc_api_t *api, xmlNode *reply) +{ + crm_exit_t status = CRM_EX_OK; + xmlNode *msg_data = NULL; + pcmk_pacemakerd_api_reply_t reply_data = { + pcmk_pacemakerd_reply_unknown + }; + const char *value = NULL; + long long value_ll = 0; + + value = crm_element_value(reply, F_CRM_MSG_TYPE); + if ((value == NULL) || (strcmp(value, XML_ATTR_RESPONSE))) { + crm_debug("Unrecognizable pacemakerd message: invalid message type '%s'", + crm_str(value)); + status = CRM_EX_PROTOCOL; + goto done; + } + + if (crm_element_value(reply, XML_ATTR_REFERENCE) == NULL) { + crm_debug("Unrecognizable pacemakerd message: no reference"); + status = CRM_EX_PROTOCOL; + goto done; + } + + value = crm_element_value(reply, F_CRM_TASK); + if ((value == NULL) || strcmp(value, CRM_OP_PING)) { + crm_debug("Unrecognizable pacemakerd message: '%s'", crm_str(value)); + status = CRM_EX_PROTOCOL; + goto done; + } + + // Parse useful info from reply + + msg_data = get_message_xml(reply, F_CRM_DATA); + crm_element_value_ll(msg_data, XML_ATTR_TSTAMP, &value_ll); + + reply_data.reply_type = pcmk_pacemakerd_reply_ping; + reply_data.data.ping.state = + pcmk_pacemakerd_api_daemon_state_text2enum( + crm_element_value(msg_data, XML_PING_ATTR_PACEMAKERDSTATE)); + reply_data.data.ping.status = + crm_str_eq(crm_element_value(msg_data, XML_PING_ATTR_STATUS), + "ok", FALSE)?pcmk_rc_ok:pcmk_rc_error; + reply_data.data.ping.last_good = (time_t) value_ll; + reply_data.data.ping.sys_from = crm_element_value(msg_data, + XML_PING_ATTR_SYSFROM); + +done: + pcmk__call_ipc_callback(api, pcmk_ipc_event_reply, status, &reply_data); +} + +pcmk__ipc_methods_t * +pcmk__pacemakerd_api_methods() +{ + pcmk__ipc_methods_t *cmds = calloc(1, sizeof(pcmk__ipc_methods_t)); + + if (cmds != NULL) { + cmds->new_data = new_data; + cmds->free_data = free_data; + cmds->post_connect = post_connect; + cmds->reply_expected = reply_expected; + cmds->dispatch = dispatch; + cmds->post_disconnect = post_disconnect; + } + return cmds; +} + +int +pcmk_pacemakerd_api_ping(pcmk_ipc_api_t *api, const char *ipc_name) +{ + pacemakerd_api_private_t *private; + xmlNode *cmd; + int rc; + + CRM_CHECK(api != NULL, return -EINVAL); + private = api->api_data; + CRM_ASSERT(private != NULL); + + cmd = create_request(CRM_OP_PING, NULL, NULL, CRM_SYSTEM_MCP, + ipc_name?ipc_name:((crm_system_name? crm_system_name : "client")), + private->client_uuid); + + if (cmd) { + rc = pcmk__send_ipc_request(api, cmd); + if (rc != pcmk_rc_ok) { + crm_debug("Couldn't ping pacemakerd: %s rc=%d", + pcmk_rc_str(rc), rc); + rc = ECOMM; + } + free_xml(cmd); + } else { + rc = ENOMSG; + } + + return rc; +} -- 1.8.3.1 From 06da3c3685b0bdf093a13067cc399e782115e39c Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Mon, 20 Jul 2020 23:28:32 +0200 Subject: [PATCH 4/6] Feature: tools: Add -P to crmadmin to ping via pacemakerd-api --- include/crm/crm.h | 2 +- tools/crmadmin.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 152 insertions(+), 11 deletions(-) diff --git a/include/crm/crm.h b/include/crm/crm.h index dc2adc1..ce2074b 100644 --- a/include/crm/crm.h +++ b/include/crm/crm.h @@ -51,7 +51,7 @@ extern "C" { * >=3.0.13: Fail counts include operation name and interval * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED */ -# define CRM_FEATURE_SET "3.4.0" +# define CRM_FEATURE_SET "3.4.1" # define EOS '\0' # define DIMOF(a) ((int) (sizeof(a)/sizeof(a[0])) ) diff --git a/tools/crmadmin.c b/tools/crmadmin.c index 4688458..2ebdd14 100644 --- a/tools/crmadmin.c +++ b/tools/crmadmin.c @@ -20,7 +20,9 @@ #include #include #include +#include #include +#include #include #define DEFAULT_MESSAGE_TIMEOUT_MS 30000 @@ -31,6 +33,8 @@ static GMainLoop *mainloop = NULL; bool do_work(pcmk_ipc_api_t *api); void do_find_node_list(xmlNode *xml_node); +static char *ipc_name = NULL; + gboolean admin_message_timeout(gpointer data); static enum { @@ -40,6 +44,7 @@ static enum { cmd_elect_dc, cmd_whois_dc, cmd_list_nodes, + cmd_pacemakerd_health, } command = cmd_none; static gboolean BE_VERBOSE = FALSE; @@ -82,6 +87,15 @@ static pcmk__cli_option_t long_options[] = { pcmk__option_default }, { + "pacemakerd", no_argument, NULL, 'P', + "Display the status of local pacemakerd.", pcmk__option_default + }, + { + "-spacer-", no_argument, NULL, '-', + "\n\tResult is the state of the sub-daemons watched by pacemakerd.\n", + pcmk__option_default + }, + { "dc_lookup", no_argument, NULL, 'D', "Display the uname of the node co-ordinating the cluster.", pcmk__option_default @@ -122,16 +136,21 @@ static pcmk__cli_option_t long_options[] = { { "bash-export", no_argument, NULL, 'B', "Display nodes as shell commands of the form 'export uname=uuid' " - "(valid with -N/--nodes)'\n", + "(valid with -N/--nodes)", + pcmk__option_default + }, + { + "ipc-name", required_argument, NULL, 'i', + "Name to use for ipc instead of 'crmadmin' (with -P/--pacemakerd).", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', - "Notes:", pcmk__option_default + "\nNotes:", pcmk__option_default }, { "-spacer-", no_argument, NULL, '-', - "The -K and -E commands do not work and may be removed in a future " + "\nThe -K and -E commands do not work and may be removed in a future " "version.", pcmk__option_default }, @@ -223,6 +242,88 @@ done: quit_main_loop(exit_code); } +static void +pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, + enum pcmk_ipc_event event_type, crm_exit_t status, + void *event_data, void *user_data) +{ + pcmk_pacemakerd_api_reply_t *reply = event_data; + + switch (event_type) { + case pcmk_ipc_event_disconnect: + if (exit_code == CRM_EX_DISCONNECT) { // Unexpected + fprintf(stderr, "error: Lost connection to pacemakerd\n"); + } + goto done; + break; + + case pcmk_ipc_event_reply: + break; + + default: + return; + } + + if (message_timer_id != 0) { + g_source_remove(message_timer_id); + message_timer_id = 0; + } + + if (status != CRM_EX_OK) { + fprintf(stderr, "error: Bad reply from pacemakerd: %s", + crm_exit_str(status)); + exit_code = status; + goto done; + } + + if (reply->reply_type != pcmk_pacemakerd_reply_ping) { + fprintf(stderr, "error: Unknown reply type %d from pacemakerd\n", + reply->reply_type); + goto done; + } + + // Parse desired information from reply + switch (command) { + case cmd_pacemakerd_health: + { + crm_time_t *crm_when = crm_time_new(NULL); + char *pinged_buf = NULL; + + crm_time_set_timet(crm_when, &reply->data.ping.last_good); + pinged_buf = crm_time_as_string(crm_when, + crm_time_log_date | crm_time_log_timeofday | + crm_time_log_with_timezone); + + printf("Status of %s: '%s' %s %s\n", + reply->data.ping.sys_from, + (reply->data.ping.status == pcmk_rc_ok)? + pcmk_pacemakerd_api_daemon_state_enum2text( + reply->data.ping.state):"query failed", + (reply->data.ping.status == pcmk_rc_ok)?"last updated":"", + (reply->data.ping.status == pcmk_rc_ok)?pinged_buf:""); + if (BE_SILENT && + (reply->data.ping.state != pcmk_pacemakerd_state_invalid)) { + fprintf(stderr, "%s\n", + (reply->data.ping.status == pcmk_rc_ok)? + pcmk_pacemakerd_api_daemon_state_enum2text( + reply->data.ping.state): + "query failed"); + } + exit_code = CRM_EX_OK; + free(pinged_buf); + } + break; + + default: // Not really possible here + exit_code = CRM_EX_SOFTWARE; + break; + } + +done: + pcmk_disconnect_ipc(pacemakerd_api); + quit_main_loop(exit_code); +} + // \return Standard Pacemaker return code static int list_nodes() @@ -257,7 +358,9 @@ main(int argc, char **argv) int flag; int rc; pcmk_ipc_api_t *controld_api = NULL; + pcmk_ipc_api_t *pacemakerd_api = NULL; bool need_controld_api = true; + bool need_pacemakerd_api = false; crm_log_cli_init("crmadmin"); pcmk__set_cli_options(NULL, " [options]", long_options, @@ -282,7 +385,9 @@ main(int argc, char **argv) message_timeout_ms = DEFAULT_MESSAGE_TIMEOUT_MS; } break; - + case 'i': + ipc_name = strdup(optarg); + break; case '$': case '?': pcmk__cli_help(flag, CRM_EX_OK); @@ -304,6 +409,11 @@ main(int argc, char **argv) case 'q': BE_SILENT = TRUE; break; + case 'P': + command = cmd_pacemakerd_health; + need_pacemakerd_api = true; + need_controld_api = false; + break; case 'S': command = cmd_health; crm_trace("Option %c => %s", flag, optarg); @@ -369,7 +479,26 @@ main(int argc, char **argv) } } - if (do_work(controld_api)) { + // Connect to pacemakerd if needed + if (need_pacemakerd_api) { + rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd); + if (pacemakerd_api == NULL) { + fprintf(stderr, "error: Could not connect to pacemakerd: %s\n", + pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + goto done; + } + pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL); + rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main); + if (rc != pcmk_rc_ok) { + fprintf(stderr, "error: Could not connect to pacemakerd: %s\n", + pcmk_rc_str(rc)); + exit_code = pcmk_rc2exitc(rc); + goto done; + } + } + + if (do_work(controld_api?controld_api:pacemakerd_api)) { // A reply is needed from controller, so run main loop to get it exit_code = CRM_EX_DISCONNECT; // For unexpected disconnects mainloop = g_main_loop_new(NULL, FALSE); @@ -379,12 +508,19 @@ main(int argc, char **argv) } done: + if (controld_api != NULL) { pcmk_ipc_api_t *capi = controld_api; - controld_api = NULL; // Ensure we can't free this twice pcmk_free_ipc_api(capi); } + + if (pacemakerd_api != NULL) { + pcmk_ipc_api_t *capi = pacemakerd_api; + pacemakerd_api = NULL; // Ensure we can't free this twice + pcmk_free_ipc_api(capi); + } + if (mainloop != NULL) { g_main_loop_unref(mainloop); mainloop = NULL; @@ -394,30 +530,35 @@ done: // \return True if reply from controller is needed bool -do_work(pcmk_ipc_api_t *controld_api) +do_work(pcmk_ipc_api_t *api) { bool need_reply = false; int rc = pcmk_rc_ok; switch (command) { case cmd_shutdown: - rc = pcmk_controld_api_shutdown(controld_api, dest_node); + rc = pcmk_controld_api_shutdown(api, dest_node); break; case cmd_health: // dest_node != NULL case cmd_whois_dc: // dest_node == NULL - rc = pcmk_controld_api_ping(controld_api, dest_node); + rc = pcmk_controld_api_ping(api, dest_node); need_reply = true; break; case cmd_elect_dc: - rc = pcmk_controld_api_start_election(controld_api); + rc = pcmk_controld_api_start_election(api); break; case cmd_list_nodes: rc = list_nodes(); break; + case cmd_pacemakerd_health: + rc = pcmk_pacemakerd_api_ping(api, ipc_name); + need_reply = true; + break; + case cmd_none: // not actually possible here break; } -- 1.8.3.1 From 6ce5bb0d6fd30a204468ea245209d34f2682d7c9 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Tue, 21 Jul 2020 18:12:53 +0200 Subject: [PATCH 5/6] Fix: pacemakerd: interworking with sbd not using pacemakerd-api --- daemons/pacemakerd/pacemakerd.c | 8 +++++++- include/crm/common/options_internal.h | 1 + lib/common/watchdog.c | 15 +++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index ccfae66..e91982a 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -454,6 +454,7 @@ pcmk_shutdown_worker(gpointer user_data) crm_notice("Shutdown complete"); pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE; if (!fatal_error && running_with_sbd && + pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { return TRUE; } @@ -1248,10 +1249,15 @@ main(int argc, char **argv) mainloop_add_signal(SIGTERM, pcmk_shutdown); mainloop_add_signal(SIGINT, pcmk_shutdown); - if (running_with_sbd) { + if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); } else { + if (running_with_sbd) { + crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " + "by your sbd version) improve reliability of " + "interworking between SBD & pacemaker."); + } pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; init_children_processes(NULL); } diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h index db54da4..d0429c9 100644 --- a/include/crm/common/options_internal.h +++ b/include/crm/common/options_internal.h @@ -111,6 +111,7 @@ bool pcmk__valid_utilization(const char *value); // from watchdog.c long pcmk__get_sbd_timeout(void); +bool pcmk__get_sbd_sync_resource_startup(void); long pcmk__auto_watchdog_timeout(void); bool pcmk__valid_sbd_timeout(const char *value); diff --git a/lib/common/watchdog.c b/lib/common/watchdog.c index 9d8896b..8838be6 100644 --- a/lib/common/watchdog.c +++ b/lib/common/watchdog.c @@ -227,6 +227,21 @@ pcmk__get_sbd_timeout(void) return sbd_timeout; } +bool +pcmk__get_sbd_sync_resource_startup(void) +{ + static bool sync_resource_startup = false; + static bool checked_sync_resource_startup = false; + + if (!checked_sync_resource_startup) { + sync_resource_startup = + crm_is_true(getenv("SBD_SYNC_RESOURCE_STARTUP")); + checked_sync_resource_startup = true; + } + + return sync_resource_startup; +} + long pcmk__auto_watchdog_timeout() { -- 1.8.3.1 From 567cb6ec6f317af9e973321633950ef26f43c486 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Thu, 23 Jul 2020 23:00:23 +0200 Subject: [PATCH 6/6] Fix: pacemakerd: improve logging when synced with SBD --- daemons/pacemakerd/pacemakerd.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c index e91982a..c888b73 100644 --- a/daemons/pacemakerd/pacemakerd.c +++ b/daemons/pacemakerd/pacemakerd.c @@ -456,6 +456,7 @@ pcmk_shutdown_worker(gpointer user_data) if (!fatal_error && running_with_sbd && pcmk__get_sbd_sync_resource_startup() && !shutdown_complete_state_reported_client_closed) { + crm_notice("Waiting for SBD to pick up shutdown-complete-state."); return TRUE; } @@ -546,10 +547,14 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id) if (crm_str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_SHUTDOWNCOMPLETE, TRUE)) { + if (pcmk__get_sbd_sync_resource_startup()) { + crm_notice("Shutdown-complete-state passed to SBD."); + } shutdown_complete_state_reported_to = c->pid; } else if (crm_str_eq(pacemakerd_state, XML_PING_ATTR_PACEMAKERDSTATE_WAITPING, TRUE)) { + crm_notice("Received startup-trigger from SBD."); pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; mainloop_set_trigger(startup_trigger); } @@ -1250,12 +1255,13 @@ main(int argc, char **argv) mainloop_add_signal(SIGINT, pcmk_shutdown); if ((running_with_sbd) && pcmk__get_sbd_sync_resource_startup()) { + crm_notice("Waiting for startup-trigger from SBD."); pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_WAITPING; startup_trigger = mainloop_add_trigger(G_PRIORITY_HIGH, init_children_processes, NULL); } else { if (running_with_sbd) { crm_warn("Enabling SBD_SYNC_RESOURCE_STARTUP would (if supported " - "by your sbd version) improve reliability of " + "by your SBD version) improve reliability of " "interworking between SBD & pacemaker."); } pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_STARTINGDAEMONS; -- 1.8.3.1