diff --git a/SOURCES/004-systemd-metadata.patch b/SOURCES/004-systemd-metadata.patch new file mode 100644 index 0000000..142ef6a --- /dev/null +++ b/SOURCES/004-systemd-metadata.patch @@ -0,0 +1,73 @@ +From 09ef95a2eed48b4eb7488788a1b655d67eafe783 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 30 Nov 2021 14:47:12 -0500 +Subject: [PATCH] Low: libcrmservice: Handle systemd service templates. + +These unit files (which have an @ sign at the end) expect to be +parameterized by an instance name. Not providing an instance name +causes the dbus lookup to fail, and we fall back to assume this is an +LSB service. If the user doesn't provide an instance name, just add a +fake one. It doesn't seem to matter what name is given for the lookup. + +See: rhbz#2003151 +--- + lib/services/systemd.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/lib/services/systemd.c b/lib/services/systemd.c +index 8e9fff484..27a3b376d 100644 +--- a/lib/services/systemd.c ++++ b/lib/services/systemd.c +@@ -206,17 +206,27 @@ systemd_unit_extension(const char *name) + } + + static char * +-systemd_service_name(const char *name) ++systemd_service_name(const char *name, bool add_instance_name) + { +- if (name == NULL) { ++ if (pcmk__str_empty(name)) { + return NULL; + } + + if (systemd_unit_extension(name)) { + return strdup(name); +- } + +- return crm_strdup_printf("%s.service", name); ++ /* Services that end with an @ sign are systemd templates. They expect an ++ * instance name to follow the service name. If no instance name was ++ * provided, just add "x" to the string as the instance name. It doesn't ++ * seem to matter for purposes of looking up whether a service exists or ++ * not. ++ */ ++ } else if (add_instance_name && *(name+strlen(name)-1) == '@') { ++ return crm_strdup_printf("%sx.service", name); ++ ++ } else { ++ return crm_strdup_printf("%s.service", name); ++ } + } + + static void +@@ -427,7 +437,7 @@ invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path) + CRM_ASSERT(msg != NULL); + + // Add the (expanded) unit name as the argument +- name = systemd_service_name(arg_name); ++ name = systemd_service_name(arg_name, op == NULL || pcmk__str_eq(op->action, "meta-data", pcmk__str_none)); + CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, + DBUS_TYPE_INVALID)); + free(name); +@@ -944,7 +954,7 @@ invoke_unit_by_path(svc_action_t *op, const char *unit) + /* (ss) */ + { + const char *replace_s = "replace"; +- char *name = systemd_service_name(op->agent); ++ char *name = systemd_service_name(op->agent, pcmk__str_eq(op->action, "meta-data", pcmk__str_none)); + + CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); + CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID)); +-- +2.27.0 + diff --git a/SOURCES/005-fencing-reasons.patch b/SOURCES/005-fencing-reasons.patch new file mode 100644 index 0000000..e0772c6 --- /dev/null +++ b/SOURCES/005-fencing-reasons.patch @@ -0,0 +1,2200 @@ +From 3d10dad9a555aae040d8473edfe31a4e4279c066 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 12:34:03 -0600 +Subject: [PATCH 01/19] Refactor: libcrmcommon: add internal API for checking + for fencing action + +The naming is a little awkward -- "fencing action" has multiple meanings +depending on the context. It can refer to fencer API requests, fence device +actions, fence agent actions, or just those actions that fence a node (off and +reboot). + +This new function pcmk__is_fencing_action() uses the last meaning, so it does +*not* return true for unfencing ("on" actions). +--- + include/crm/common/internal.h | 1 + + lib/common/operations.c | 14 ++++++++++++++ + 2 files changed, 15 insertions(+) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index a35c5769a..694fc6cd4 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -218,6 +218,7 @@ char *pcmk__notify_key(const char *rsc_id, const char *notify_type, + char *pcmk__transition_key(int transition_id, int action_id, int target_rc, + const char *node); + void pcmk__filter_op_for_digest(xmlNode *param_set); ++bool pcmk__is_fencing_action(const char *action); + + + // bitwise arithmetic utilities +diff --git a/lib/common/operations.c b/lib/common/operations.c +index aa7106ce6..366c18970 100644 +--- a/lib/common/operations.c ++++ b/lib/common/operations.c +@@ -523,3 +523,17 @@ crm_op_needs_metadata(const char *rsc_class, const char *op) + CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, + CRMD_ACTION_NOTIFY, NULL); + } ++ ++/*! ++ * \internal ++ * \brief Check whether an action name is for a fencing action ++ * ++ * \param[in] action Action name to check ++ * ++ * \return true if \p action is "off", "reboot", or "poweroff", otherwise false ++ */ ++bool ++pcmk__is_fencing_action(const char *action) ++{ ++ return pcmk__str_any_of(action, "off", "reboot", "poweroff", NULL); ++} +-- +2.27.0 + + +From 86ac00fb3e99d79ca2c442ae1670fe850146f734 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 12:38:58 -0600 +Subject: [PATCH 02/19] Low: fencer,scheduler: compare fence action names + case-sensitively + +Use the new convenience function pcmk__is_fencing_action() to check whether +an action name is a fencing action ("off", "reboot", or "poweroff"). This +changes the behavior from case-insensitive to case-sensitive, which is more +appropriate (the case-insensitivity was inherited from lazy use of the old +safe_str_eq() function which was always case-insensitive). +--- + daemons/fenced/fenced_commands.c | 6 +++--- + daemons/fenced/fenced_remote.c | 2 +- + lib/pacemaker/pcmk_graph_producer.c | 2 +- + lib/pengine/common.c | 8 +------- + 4 files changed, 6 insertions(+), 12 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 63bfad3a9..46c840f2a 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -128,7 +128,7 @@ get_action_delay_max(stonith_device_t * device, const char * action) + const char *value = NULL; + int delay_max = 0; + +- if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { ++ if (!pcmk__is_fencing_action(action)) { + return 0; + } + +@@ -146,7 +146,7 @@ get_action_delay_base(stonith_device_t *device, const char *action, const char * + char *hash_value = NULL; + int delay_base = 0; + +- if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { ++ if (!pcmk__is_fencing_action(action)) { + return 0; + } + +@@ -448,7 +448,7 @@ stonith_device_execute(stonith_device_t * device) + + if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, + STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { +- if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) { ++ if (pcmk__is_fencing_action(cmd->action)) { + if (node_does_watchdog_fencing(stonith_our_uname)) { + pcmk__panic(__func__); + goto done; +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 963433bf3..358ea3aa7 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1758,7 +1758,7 @@ all_topology_devices_found(remote_fencing_op_t * op) + if (!tp) { + return FALSE; + } +- if (pcmk__strcase_any_of(op->action, "off", "reboot", NULL)) { ++ if (pcmk__is_fencing_action(op->action)) { + /* Don't count the devices on the target node if we are killing + * the target node. */ + skip_target = TRUE; +diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c +index ffcbd1274..5bec9d8ce 100644 +--- a/lib/pacemaker/pcmk_graph_producer.c ++++ b/lib/pacemaker/pcmk_graph_producer.c +@@ -721,7 +721,7 @@ add_downed_nodes(xmlNode *xml, const pe_action_t *action, + /* Fencing makes the action's node and any hosted guest nodes down */ + const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); + +- if (pcmk__strcase_any_of(fence, "off", "reboot", NULL)) { ++ if (pcmk__is_fencing_action(fence)) { + xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); + add_node_to_xml_by_id(action->node->details->id, downed); + pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed); +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 236fc26b1..fe4223816 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -27,12 +27,6 @@ check_health(const char *value) + "migrate-on-red", NULL); + } + +-static bool +-check_stonith_action(const char *value) +-{ +- return pcmk__strcase_any_of(value, "reboot", "poweroff", "off", NULL); +-} +- + static bool + check_placement_strategy(const char *value) + { +@@ -114,7 +108,7 @@ static pcmk__cluster_option_t pe_opts[] = { + }, + { + "stonith-action", NULL, "select", "reboot, off, poweroff", +- "reboot", check_stonith_action, ++ "reboot", pcmk__is_fencing_action, + "Action to send to fence device when a node needs to be fenced " + "(\"poweroff\" is a deprecated alias for \"off\")", + NULL +-- +2.27.0 + + +From c8f6e8a04c4fa4271db817af0a23aa941c9d7689 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 12 Nov 2021 17:42:21 -0600 +Subject: [PATCH 03/19] Refactor: fencing: rename type for peer query replies + +st_query_result_t contains the device information parsed from a peer's query +reply, but the name could easily be confused with the actual success/failure +result of the query action itself. Rename it to peer_device_info_t. +--- + daemons/fenced/fenced_remote.c | 103 +++++++++++++++++---------------- + 1 file changed, 52 insertions(+), 51 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 358ea3aa7..9e2f62804 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -41,7 +41,7 @@ + + /* When one fencer queries its peers for devices able to handle a fencing + * request, each peer will reply with a list of such devices available to it. +- * Each reply will be parsed into a st_query_result_t, with each device's ++ * Each reply will be parsed into a peer_device_info_t, with each device's + * information kept in a device_properties_t. + */ + +@@ -72,18 +72,19 @@ typedef struct st_query_result_s { + int ndevices; + /* Devices available to this host that are capable of fencing the target */ + GHashTable *devices; +-} st_query_result_t; ++} peer_device_info_t; + + GHashTable *stonith_remote_op_list = NULL; + +-void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc); ++void call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, ++ int rc); + static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup); + extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, + int call_options); + + static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); + static int get_op_total_timeout(const remote_fencing_op_t *op, +- const st_query_result_t *chosen_peer); ++ const peer_device_info_t *chosen_peer); + + static gint + sort_strings(gconstpointer a, gconstpointer b) +@@ -95,7 +96,7 @@ static void + free_remote_query(gpointer data) + { + if (data) { +- st_query_result_t *query = data; ++ peer_device_info_t *query = data; + + crm_trace("Free'ing query result from %s", query->host); + g_hash_table_destroy(query->devices); +@@ -150,8 +151,8 @@ count_peer_device(gpointer key, gpointer value, gpointer user_data) + * \return Number of devices available to peer that were not already executed + */ + static int +-count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer, +- gboolean verified_only) ++count_peer_devices(const remote_fencing_op_t *op, ++ const peer_device_info_t *peer, gboolean verified_only) + { + struct peer_count_data data; + +@@ -175,7 +176,7 @@ count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer, + * \return Device properties if found, NULL otherwise + */ + static device_properties_t * +-find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer, ++find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, + const char *device) + { + device_properties_t *props = g_hash_table_lookup(peer->devices, device); +@@ -196,7 +197,7 @@ find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer, + * \return TRUE if device was found and marked, FALSE otherwise + */ + static gboolean +-grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer, ++grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, + const char *device, gboolean verified_devices_only) + { + device_properties_t *props = find_peer_device(op, peer, device); +@@ -1216,7 +1217,7 @@ enum find_best_peer_options { + FIND_PEER_VERIFIED_ONLY = 0x0004, + }; + +-static st_query_result_t * ++static peer_device_info_t * + find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) + { + GList *iter = NULL; +@@ -1227,7 +1228,7 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer + } + + for (iter = op->query_results; iter != NULL; iter = iter->next) { +- st_query_result_t *peer = iter->data; ++ peer_device_info_t *peer = iter->data; + + crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", + peer->host, op->target, peer->ndevices, +@@ -1257,11 +1258,11 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer + return NULL; + } + +-static st_query_result_t * ++static peer_device_info_t * + stonith_choose_peer(remote_fencing_op_t * op) + { + const char *device = NULL; +- st_query_result_t *peer = NULL; ++ peer_device_info_t *peer = NULL; + uint32_t active = fencing_active_peers(); + + do { +@@ -1317,8 +1318,8 @@ stonith_choose_peer(remote_fencing_op_t * op) + } + + static int +-get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer, +- const char *device) ++get_device_timeout(const remote_fencing_op_t *op, ++ const peer_device_info_t *peer, const char *device) + { + device_properties_t *props; + +@@ -1338,7 +1339,7 @@ get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer, + + struct timeout_data { + const remote_fencing_op_t *op; +- const st_query_result_t *peer; ++ const peer_device_info_t *peer; + int total_timeout; + }; + +@@ -1365,7 +1366,7 @@ add_device_timeout(gpointer key, gpointer value, gpointer user_data) + } + + static int +-get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer) ++get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) + { + struct timeout_data timeout; + +@@ -1380,7 +1381,7 @@ get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer) + + static int + get_op_total_timeout(const remote_fencing_op_t *op, +- const st_query_result_t *chosen_peer) ++ const peer_device_info_t *chosen_peer) + { + int total_timeout = 0; + stonith_topology_t *tp = find_topology_for_host(op->target); +@@ -1403,7 +1404,7 @@ get_op_total_timeout(const remote_fencing_op_t *op, + } + for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { + for (iter = op->query_results; iter != NULL; iter = iter->next) { +- const st_query_result_t *peer = iter->data; ++ const peer_device_info_t *peer = iter->data; + + if (find_peer_device(op, peer, device_list->data)) { + total_timeout += get_device_timeout(op, peer, +@@ -1555,7 +1556,7 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op) + } + + void +-call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) ++call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc) + { + const char *device = NULL; + int timeout = op->base_timeout; +@@ -1734,8 +1735,8 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) + static gint + sort_peers(gconstpointer a, gconstpointer b) + { +- const st_query_result_t *peer_a = a; +- const st_query_result_t *peer_b = b; ++ const peer_device_info_t *peer_a = a; ++ const peer_device_info_t *peer_b = b; + + return (peer_b->ndevices - peer_a->ndevices); + } +@@ -1768,7 +1769,7 @@ all_topology_devices_found(remote_fencing_op_t * op) + for (device = tp->levels[i]; device; device = device->next) { + match = NULL; + for (iter = op->query_results; iter && !match; iter = iter->next) { +- st_query_result_t *peer = iter->data; ++ peer_device_info_t *peer = iter->data; + + if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; +@@ -1850,31 +1851,31 @@ parse_action_specific(xmlNode *xml, const char *peer, const char *device, + * + * \param[in] xml XML node containing device properties + * \param[in,out] op Operation that query and reply relate to +- * \param[in,out] result Peer's results ++ * \param[in,out] peer Peer's device information + * \param[in] device ID of device being parsed + */ + static void + add_device_properties(xmlNode *xml, remote_fencing_op_t *op, +- st_query_result_t *result, const char *device) ++ peer_device_info_t *peer, const char *device) + { + xmlNode *child; + int verified = 0; + device_properties_t *props = calloc(1, sizeof(device_properties_t)); + +- /* Add a new entry to this result's devices list */ ++ /* Add a new entry to this peer's devices list */ + CRM_ASSERT(props != NULL); +- g_hash_table_insert(result->devices, strdup(device), props); ++ g_hash_table_insert(peer->devices, strdup(device), props); + + /* Peers with verified (monitored) access will be preferred */ + crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); + if (verified) { + crm_trace("Peer %s has confirmed a verified device %s", +- result->host, device); ++ peer->host, device); + props->verified = TRUE; + } + + /* Parse action-specific device properties */ +- parse_action_specific(xml, result->host, device, op_requested_action(op), ++ parse_action_specific(xml, peer->host, device, op_requested_action(op), + op, st_phase_requested, props); + for (child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { +@@ -1883,10 +1884,10 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op, + * winds up getting remapped. + */ + if (pcmk__str_eq(ID(child), "off", pcmk__str_casei)) { +- parse_action_specific(child, result->host, device, "off", ++ parse_action_specific(child, peer->host, device, "off", + op, st_phase_off, props); + } else if (pcmk__str_eq(ID(child), "on", pcmk__str_casei)) { +- parse_action_specific(child, result->host, device, "on", ++ parse_action_specific(child, peer->host, device, "on", + op, st_phase_on, props); + } + } +@@ -1903,17 +1904,17 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op, + * + * \return Newly allocated result structure with parsed reply + */ +-static st_query_result_t * ++static peer_device_info_t * + add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml) + { +- st_query_result_t *result = calloc(1, sizeof(st_query_result_t)); ++ peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t)); + xmlNode *child; + + // cppcheck seems not to understand the abort logic in CRM_CHECK + // cppcheck-suppress memleak +- CRM_CHECK(result != NULL, return NULL); +- result->host = strdup(host); +- result->devices = pcmk__strkey_table(free, free); ++ CRM_CHECK(peer != NULL, return NULL); ++ peer->host = strdup(host); ++ peer->devices = pcmk__strkey_table(free, free); + + /* Each child element describes one capable device available to the peer */ + for (child = pcmk__xml_first_child(xml); child != NULL; +@@ -1921,17 +1922,17 @@ add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml + const char *device = ID(child); + + if (device) { +- add_device_properties(child, op, result, device); ++ add_device_properties(child, op, peer, device); + } + } + +- result->ndevices = g_hash_table_size(result->devices); +- CRM_CHECK(ndevices == result->ndevices, ++ peer->ndevices = g_hash_table_size(peer->devices); ++ CRM_CHECK(ndevices == peer->ndevices, + crm_err("Query claimed to have %d device%s but %d found", +- ndevices, pcmk__plural_s(ndevices), result->ndevices)); ++ ndevices, pcmk__plural_s(ndevices), peer->ndevices)); + +- op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); +- return result; ++ op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); ++ return peer; + } + + /*! +@@ -1957,7 +1958,7 @@ process_remote_stonith_query(xmlNode * msg) + const char *id = NULL; + const char *host = NULL; + remote_fencing_op_t *op = NULL; +- st_query_result_t *result = NULL; ++ peer_device_info_t *peer = NULL; + uint32_t replies_expected; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + +@@ -1991,7 +1992,7 @@ process_remote_stonith_query(xmlNode * msg) + op->replies, replies_expected, host, + op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); + if (ndevices > 0) { +- result = add_result(op, host, ndevices, dev); ++ peer = add_result(op, host, ndevices, dev); + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { +@@ -2001,7 +2002,7 @@ process_remote_stonith_query(xmlNode * msg) + if (op->state == st_query && all_topology_devices_found(op)) { + /* All the query results are in for the topology, start the fencing ops. */ + crm_trace("All topology devices found"); +- call_remote_stonith(op, result, pcmk_ok); ++ call_remote_stonith(op, peer, pcmk_ok); + + } else if (have_all_replies) { + crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", +@@ -2010,15 +2011,15 @@ process_remote_stonith_query(xmlNode * msg) + } + + } else if (op->state == st_query) { +- int nverified = count_peer_devices(op, result, TRUE); ++ int nverified = count_peer_devices(op, peer, TRUE); + + /* We have a result for a non-topology fencing op that looks promising, + * go ahead and start fencing before query timeout */ +- if (result && (host_is_target == FALSE) && nverified) { ++ if ((peer != NULL) && !host_is_target && nverified) { + /* we have a verified device living on a peer that is not the target */ + crm_trace("Found %d verified device%s", + nverified, pcmk__plural_s(nverified)); +- call_remote_stonith(op, result, pcmk_ok); ++ call_remote_stonith(op, peer, pcmk_ok); + + } else if (have_all_replies) { + crm_info("All query replies have arrived, continuing (%d expected/%d received) ", +@@ -2029,10 +2030,10 @@ process_remote_stonith_query(xmlNode * msg) + crm_trace("Waiting for more peer results before launching fencing operation"); + } + +- } else if (result && (op->state == st_done)) { ++ } else if ((peer != NULL) && (op->state == st_done)) { + crm_info("Discarding query result from %s (%d device%s): " +- "Operation is %s", result->host, +- result->ndevices, pcmk__plural_s(result->ndevices), ++ "Operation is %s", peer->host, ++ peer->ndevices, pcmk__plural_s(peer->ndevices), + stonith_op_state_str(op->state)); + } + +-- +2.27.0 + + +From 913e0620310089d2250e9ecde383df757f8e8063 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 12:46:37 -0600 +Subject: [PATCH 04/19] Low: fencer: improve broadcasting replies for fenced + originators + +If the target of a fencing action was also the originator, the executioner +broadcasts the result on their behalf. + +Previously, it would check if the action was not in a list of actions that are +never broadcasted. However we really only want to broadcast off/reboot results +so just check for that instead. + +This also rearranges reply creation slightly so we don't trace-log the reply +until it is fully created. +--- + daemons/fenced/fenced_commands.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 46c840f2a..e4185f6e1 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2385,32 +2385,31 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + int pid, bool merged) + { + xmlNode *reply = NULL; +- gboolean bcast = FALSE; ++ bool bcast = false; + + CRM_CHECK((cmd != NULL) && (result != NULL), return); + + reply = construct_async_reply(cmd, result); + +- // Only replies for certain actions are broadcast +- if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status", +- NULL)) { +- crm_trace("Never broadcast '%s' replies", cmd->action); ++ // If target was also the originator, broadcast fencing results for it ++ if (!stand_alone && pcmk__is_fencing_action(cmd->action) ++ && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) { + +- } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) { +- crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim); ++ crm_trace("Broadcast '%s' result for %s (target was also originator)", ++ cmd->action, cmd->victim); + crm_xml_add(reply, F_SUBTYPE, "broadcast"); +- bcast = TRUE; ++ crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); ++ bcast = true; + } + + log_async_result(cmd, result, pid, NULL, merged); +- crm_log_xml_trace(reply, "Reply"); + + if (merged) { + crm_xml_add(reply, F_STONITH_MERGED, "true"); + } ++ crm_log_xml_trace(reply, "Reply"); + + if (bcast) { +- crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); + send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); + + } else if (cmd->origin) { +-- +2.27.0 + + +From 8b8f94fd9ca5e61922cb81e32c8a3d0f1d75fb0b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 14:40:49 -0600 +Subject: [PATCH 05/19] Refactor: fencer: avoid code duplication when sending + async reply + +... and clean up reply function +--- + daemons/fenced/fenced_commands.c | 33 ++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index e4185f6e1..4ea0a337a 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2411,15 +2411,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + + if (bcast) { + send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); +- +- } else if (cmd->origin) { +- crm_trace("Directed reply to %s", cmd->origin); +- send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); +- + } else { +- crm_trace("Directed local %ssync reply to %s", +- (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); +- do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); ++ stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client); + } + + if (stand_alone) { +@@ -2814,16 +2807,28 @@ check_alternate_host(const char *target) + return alternate_host; + } + ++/*! ++ * \internal ++ * \brief Send a reply to a CPG peer or IPC client ++ * ++ * \param[in] reply XML reply to send ++ * \param[in] call_options Send synchronously if st_opt_sync_call is set here ++ * \param[in] remote_peer If not NULL, name of peer node to send CPG reply ++ * \param[in] client_id If not NULL, name of client to send IPC reply ++ */ + static void +-stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, ++stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer, + const char *client_id) + { +- if (remote_peer) { +- send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); +- } else { ++ CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client_id != NULL)), ++ return); ++ ++ if (remote_peer == NULL) { + do_local_reply(reply, client_id, +- pcmk_is_set(call_options, st_opt_sync_call), +- (remote_peer != NULL)); ++ pcmk_is_set(call_options, st_opt_sync_call), FALSE); ++ } else { ++ send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, ++ reply, FALSE); + } + } + +-- +2.27.0 + + +From 2cdbda58f0e9f38a0e302506107fd933cb415144 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 23 Nov 2021 17:24:09 -0600 +Subject: [PATCH 06/19] Refactor: fencer: ensure all requests get clean-up + +handle_request() has if-else blocks for each type of request. Previously, if a +request didn't need a reply, the function would do any clean-up needed and +return immediately. Now, we track whether a reply is needed, and all request +types flow to the end of the function for consistent clean-up. + +This doesn't change any behavior at this point, but allows us to do more at the +end of request handling. +--- + daemons/fenced/fenced_commands.c | 46 ++++++++++++++++++-------------- + 1 file changed, 26 insertions(+), 20 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 4ea0a337a..19477b49b 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2892,6 +2892,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + + xmlNode *data = NULL; + xmlNode *reply = NULL; ++ bool need_reply = true; + + char *output = NULL; + const char *op = crm_element_value(request, F_STONITH_OPERATION); +@@ -2921,10 +2922,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + pcmk__ipc_send_xml(client, id, reply, flags); + client->request_id = 0; + free_xml(reply); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) { + rc = stonith_device_action(request, &output); ++ need_reply = (rc != -EINPROGRESS); + + } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) { + const char *call_id = crm_element_value(request, F_STONITH_CALLID); +@@ -2933,7 +2936,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + + crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); + do_stonith_async_timeout_update(client_id, call_id, op_timeout); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { + if (remote_peer) { +@@ -2944,7 +2948,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + remove_relay_op(request); + + stonith_query(request, remote_peer, client_id, call_options); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { + const char *flag_name = NULL; +@@ -2965,7 +2970,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } + + pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) { + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); +@@ -2977,8 +2983,11 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + crm_element_value(dev, F_STONITH_ACTION), + crm_element_value(dev, F_STONITH_TARGET)); + +- if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { ++ if (initiate_remote_stonith_op(NULL, request, FALSE) == NULL) { ++ rc = -EPROTO; ++ } else { + rc = -EINPROGRESS; ++ need_reply = false; + } + + } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { +@@ -3012,7 +3021,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); + + if (stonith_check_fence_tolerance(tolerance, target, action)) { +- rc = 0; ++ rc = pcmk_ok; + goto done; + } + +@@ -3047,10 +3056,13 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + FALSE); + rc = -EINPROGRESS; + +- } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { ++ } else if (initiate_remote_stonith_op(client, request, FALSE) == NULL) { ++ rc = -EPROTO; ++ } else { + rc = -EINPROGRESS; + } + } ++ need_reply = (rc != -EINPROGRESS); + + } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) { + rc = stonith_fence_history(request, &data, remote_peer, call_options); +@@ -3058,8 +3070,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + /* we don't expect answers to the broadcast + * we might have sent out + */ +- free_xml(data); +- return pcmk_ok; ++ rc = pcmk_ok; ++ need_reply = false; + } + + } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { +@@ -3111,8 +3123,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + crm_element_value_int(request, XML_ATTR_ID, &node_id); + name = crm_element_value(request, XML_ATTR_UNAME); + reap_crm_member(node_id, name); +- +- return pcmk_ok; ++ rc = pcmk_ok; ++ need_reply = false; + + } else { + crm_err("Unknown IPC request %s from %s %s", op, +@@ -3120,20 +3132,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + ((client == NULL)? remote_peer : pcmk__client_name(client))); + } + +- done: +- ++done: + if (rc == -EACCES) { + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + crm_str(op), pcmk__client_name(client)); + } + +- /* Always reply unless the request is in process still. +- * If in progress, a reply will happen async after the request +- * processing is finished */ +- if (rc != -EINPROGRESS) { +- crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, +- id, pcmk_is_set(call_options, st_opt_sync_call), call_options, +- crm_element_value(request, F_STONITH_CALLOPTS)); ++ // Reply if result is known ++ if (need_reply) { + + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(client == NULL || client->request_id == id); +-- +2.27.0 + + +From 067d655ebd3fbb0ed27f4e7426db4c3b661ba777 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 23 Nov 2021 17:26:32 -0600 +Subject: [PATCH 07/19] Log: fencer: improve debug logs when processing CPG/IPC + messages + +By moving the result log messages from stonith_command() to handle_reply() and +handle_request(), we can simplify stonith_command() and give slightly better +messages. +--- + daemons/fenced/fenced_commands.c | 80 +++++++++++++++----------------- + 1 file changed, 38 insertions(+), 42 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 19477b49b..98af0e04f 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2883,7 +2883,7 @@ remove_relay_op(xmlNode * request) + } + } + +-static int ++static void + handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *request, const char *remote_peer) + { +@@ -3152,73 +3152,69 @@ done: + free_xml(data); + free_xml(reply); + +- return rc; ++ crm_debug("Processed %s request from %s %s: %s (rc=%d)", ++ op, ((client == NULL)? "peer" : "client"), ++ ((client == NULL)? remote_peer : pcmk__client_name(client)), ++ ((rc > 0)? "" : pcmk_strerror(rc)), rc); + } + + static void + handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) + { +- const char *op = crm_element_value(request, F_STONITH_OPERATION); ++ // Copy, because request might be freed before we want to log this ++ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); + + if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { + process_remote_stonith_query(request); +- } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { +- process_remote_stonith_exec(request); +- } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { +- /* Reply to a complex fencing op */ ++ } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) { + process_remote_stonith_exec(request); + } else { +- crm_err("Unknown %s reply from %s %s", op, +- ((client == NULL)? "peer" : "client"), ++ crm_err("Ignoring unknown %s reply from %s %s", ++ crm_str(op), ((client == NULL)? "peer" : "client"), + ((client == NULL)? remote_peer : pcmk__client_name(client))); + crm_log_xml_warn(request, "UnknownOp"); ++ free(op); ++ return; + } ++ crm_debug("Processed %s reply from %s %s", ++ op, ((client == NULL)? "peer" : "client"), ++ ((client == NULL)? remote_peer : pcmk__client_name(client))); ++ free(op); + } + ++/*! ++ * \internal ++ * \brief Handle a message from an IPC client or CPG peer ++ * ++ * \param[in] client If not NULL, IPC client that sent message ++ * \param[in] id If from IPC client, IPC message ID ++ * \param[in] flags Message flags ++ * \param[in] message Message XML ++ * \param[in] remote_peer If not NULL, CPG peer that sent message ++ */ + void + stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, +- xmlNode *request, const char *remote_peer) ++ xmlNode *message, const char *remote_peer) + { +- int call_options = 0; +- int rc = 0; +- gboolean is_reply = FALSE; +- +- /* Copy op for reporting. The original might get freed by handle_reply() +- * before we use it in crm_debug(): +- * handle_reply() +- * |- process_remote_stonith_exec() +- * |-- remote_op_done() +- * |--- handle_local_reply_and_notify() +- * |---- crm_xml_add(...F_STONITH_OPERATION...) +- * |--- free_xml(op->request) +- */ +- char *op = crm_element_value_copy(request, F_STONITH_OPERATION); +- +- if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) { +- is_reply = TRUE; +- } ++ int call_options = st_opt_none; ++ bool is_reply = get_xpath_object("//" T_STONITH_REPLY, message, ++ LOG_NEVER) != NULL; + +- crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); +- crm_debug("Processing %s%s %u from %s %s with call options 0x%08x", +- op, (is_reply? " reply" : ""), id, ++ crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options); ++ crm_debug("Processing %ssynchronous %s %s %u from %s %s", ++ pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", ++ crm_element_value(message, F_STONITH_OPERATION), ++ (is_reply? "reply" : "request"), id, + ((client == NULL)? "peer" : "client"), +- ((client == NULL)? remote_peer : pcmk__client_name(client)), +- call_options); ++ ((client == NULL)? remote_peer : pcmk__client_name(client))); + + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(client == NULL || client->request_id == id); + } + + if (is_reply) { +- handle_reply(client, request, remote_peer); ++ handle_reply(client, message, remote_peer); + } else { +- rc = handle_request(client, id, flags, request, remote_peer); ++ handle_request(client, id, flags, message, remote_peer); + } +- +- crm_debug("Processed %s%s from %s %s: %s (rc=%d)", +- op, (is_reply? " reply" : ""), +- ((client == NULL)? "peer" : "client"), +- ((client == NULL)? remote_peer : pcmk__client_name(client)), +- ((rc > 0)? "" : pcmk_strerror(rc)), rc); +- free(op); + } +-- +2.27.0 + + +From 44cb340c11b4652f452a47eb2b0050b4a459382b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 15 Nov 2021 16:29:09 -0600 +Subject: [PATCH 08/19] Refactor: fencer: drop unused argument from + notification functions + +--- + daemons/fenced/fenced_commands.c | 12 ++++++------ + daemons/fenced/fenced_history.c | 6 +++--- + daemons/fenced/fenced_remote.c | 6 +++--- + daemons/fenced/pacemaker-fenced.c | 18 +++++++++--------- + daemons/fenced/pacemaker-fenced.h | 6 +++--- + 5 files changed, 24 insertions(+), 24 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 98af0e04f..946ce4042 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2428,8 +2428,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); + crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); + +- do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + + free_xml(reply); +@@ -3082,7 +3082,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_device(call_options, op, rc, device_id); ++ do_stonith_notify_device(op, rc, device_id); + + } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { + xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); +@@ -3093,7 +3093,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_device(call_options, op, rc, device_id); ++ do_stonith_notify_device(op, rc, device_id); + + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { + char *device_id = NULL; +@@ -3103,7 +3103,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_level(call_options, op, rc, device_id); ++ do_stonith_notify_level(op, rc, device_id); + free(device_id); + + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { +@@ -3114,7 +3114,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_level(call_options, op, rc, device_id); ++ do_stonith_notify_level(op, rc, device_id); + + } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { + int node_id = 0; +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 1ba034ba9..7127593b6 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -100,7 +100,7 @@ stonith_fence_history_cleanup(const char *target, + g_hash_table_foreach_remove(stonith_remote_op_list, + stonith_remove_history_entry, + (gpointer) target); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + } + +@@ -396,7 +396,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history, + + if (updated) { + stonith_fence_history_trim(); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + + if (cnt == 0) { +@@ -470,7 +470,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + is done so send a notification for anything + that smells like history-sync + */ +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY_SYNCED, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk_ok, NULL); + if (crm_element_value(msg, F_STONITH_CALLID)) { + /* this is coming from the stonith-API + * +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 9e2f62804..c907cd120 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -423,8 +423,8 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc) + do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); + + /* bcast to all local clients that the fencing operation happend */ +- do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + + /* mark this op as having notify's already sent */ + op->notify_sent = TRUE; +@@ -1119,7 +1119,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) + + if (op->state != st_duplicate) { + /* kick history readers */ +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + + /* safe to trim as long as that doesn't touch pending ops */ +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index a64004ce1..a290e1670 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -357,7 +357,7 @@ do_stonith_async_timeout_update(const char *client_id, const char *call_id, int + } + + void +-do_stonith_notify(int options, const char *type, int result, xmlNode * data) ++do_stonith_notify(const char *type, int result, xmlNode *data) + { + /* TODO: Standardize the contents of data */ + xmlNode *update_msg = create_xml_node(NULL, "notify"); +@@ -380,7 +380,7 @@ do_stonith_notify(int options, const char *type, int result, xmlNode * data) + } + + static void +-do_stonith_notify_config(int options, const char *op, int rc, ++do_stonith_notify_config(const char *op, int rc, + const char *desc, int active) + { + xmlNode *notify_data = create_xml_node(NULL, op); +@@ -390,20 +390,20 @@ do_stonith_notify_config(int options, const char *op, int rc, + crm_xml_add(notify_data, F_STONITH_DEVICE, desc); + crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); + +- do_stonith_notify(options, op, rc, notify_data); ++ do_stonith_notify(op, rc, notify_data); + free_xml(notify_data); + } + + void +-do_stonith_notify_device(int options, const char *op, int rc, const char *desc) ++do_stonith_notify_device(const char *op, int rc, const char *desc) + { +- do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); ++ do_stonith_notify_config(op, rc, desc, g_hash_table_size(device_list)); + } + + void +-do_stonith_notify_level(int options, const char *op, int rc, const char *desc) ++do_stonith_notify_level(const char *op, int rc, const char *desc) + { +- do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); ++ do_stonith_notify_config(op, rc, desc, g_hash_table_size(topology)); + } + + static void +@@ -418,7 +418,7 @@ topology_remove_helper(const char *node, int level) + crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); + + rc = stonith_level_remove(data, &desc); +- do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); ++ do_stonith_notify_level(STONITH_OP_LEVEL_DEL, rc, desc); + + free_xml(data); + free(desc); +@@ -468,7 +468,7 @@ handle_topology_change(xmlNode *match, bool remove) + } + + rc = stonith_level_register(match, &desc); +- do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); ++ do_stonith_notify_level(STONITH_OP_LEVEL_ADD, rc, desc); + + free(desc); + } +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index a64b57693..3e41d867e 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -233,9 +233,9 @@ xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode + void + do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); + +-void do_stonith_notify(int options, const char *type, int result, xmlNode * data); +-void do_stonith_notify_device(int options, const char *op, int rc, const char *desc); +-void do_stonith_notify_level(int options, const char *op, int rc, const char *desc); ++void do_stonith_notify(const char *type, int result, xmlNode *data); ++void do_stonith_notify_device(const char *op, int rc, const char *desc); ++void do_stonith_notify_level(const char *op, int rc, const char *desc); + + remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client, + xmlNode *request, +-- +2.27.0 + + +From a49df4901b663b3366634c1d58f04625ecba4005 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Nov 2021 11:57:14 -0600 +Subject: [PATCH 09/19] Refactor: fencer: functionize checking for privileged + client + +... for readability and to make planned changes easier +--- + daemons/fenced/fenced_commands.c | 49 +++++++++++++++++++------------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 946ce4042..34c956f5c 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2883,6 +2883,32 @@ remove_relay_op(xmlNode * request) + } + } + ++/*! ++ * \internal ++ * \brief Check whether an API request was sent by a privileged user ++ * ++ * API commands related to fencing configuration may be done only by privileged ++ * IPC users (i.e. root or hacluster), because all other users should go through ++ * the CIB to have ACLs applied. If no client was given, this is a peer request, ++ * which is always allowed. ++ * ++ * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) ++ * \param[in] op Requested API operation (for logging only) ++ * ++ * \return true if sender is peer or privileged client, otherwise false ++ */ ++static inline bool ++is_privileged(pcmk__client_t *c, const char *op) ++{ ++ if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { ++ return true; ++ } else { ++ crm_warn("Rejecting IPC request '%s' from unprivileged client %s", ++ crm_str(op), pcmk__client_name(c)); ++ return false; ++ } ++} ++ + static void + handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *request, const char *remote_peer) +@@ -2898,15 +2924,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + const char *op = crm_element_value(request, F_STONITH_OPERATION); + const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); + +- /* IPC commands related to fencing configuration may be done only by +- * privileged users (i.e. root or hacluster), because all other users should +- * go through the CIB to have ACLs applied. +- * +- * If no client was given, this is a peer request, which is always allowed. +- */ +- bool allowed = (client == NULL) +- || pcmk_is_set(client->flags, pcmk__client_privileged); +- + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + + if (pcmk_is_set(call_options, st_opt_sync_call)) { +@@ -3077,7 +3094,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { + const char *device_id = NULL; + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_device_register(request, &device_id, FALSE); + } else { + rc = -EACCES; +@@ -3088,7 +3105,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); + const char *device_id = crm_element_value(dev, XML_ATTR_ID); + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_device_remove(device_id, FALSE); + } else { + rc = -EACCES; +@@ -3098,7 +3115,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { + char *device_id = NULL; + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_level_register(request, &device_id); + } else { + rc = -EACCES; +@@ -3109,7 +3126,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { + char *device_id = NULL; + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_level_remove(request, &device_id); + } else { + rc = -EACCES; +@@ -3133,14 +3150,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } + + done: +- if (rc == -EACCES) { +- crm_warn("Rejecting IPC request '%s' from unprivileged client %s", +- crm_str(op), pcmk__client_name(client)); +- } +- + // Reply if result is known + if (need_reply) { +- + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(client == NULL || client->request_id == id); + } +-- +2.27.0 + + +From 10ca8a5ef5266159bc3f993802aeae6537ceeb11 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Nov 2021 16:59:03 -0600 +Subject: [PATCH 10/19] Low: fencer: return -ETIME for peer fencing timeouts + +94c55684 set the result as pcmk_ok, but it appears that the intent was just to +keep the delegate from being set, and -ETIME should still do that, while being +more appropriate. +--- + daemons/fenced/fenced_remote.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index c907cd120..dc7b802da 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -608,7 +608,7 @@ remote_op_timeout_one(gpointer userdata) + + crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS + " id=%.8s", op->action, op->target, op->client_name, op->id); +- call_remote_stonith(op, NULL, pcmk_ok); ++ call_remote_stonith(op, NULL, -ETIME); + return FALSE; + } + +-- +2.27.0 + + +From fb2eefeb695cc92e1a2aed6f1f1d2b900d4fb83e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Nov 2021 17:54:56 -0600 +Subject: [PATCH 11/19] Refactor: fencer: functionize common part of timeout + handling + +Previously, remote_op_timeout() was called from multiple places, but only one +of those places needed the full processing. The common part is now in a new +function finalize_timed_out_op() called from all the places, and +remote_op_timeout() now has just the additional processing needed by the one +place plus a call to the new function. + +This will allow a future change to set a different exit reason depending on +which step timed out. +--- + daemons/fenced/fenced_remote.c | 49 +++++++++++++++++++++++----------- + 1 file changed, 34 insertions(+), 15 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index dc7b802da..22c4b0772 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -612,20 +612,18 @@ remote_op_timeout_one(gpointer userdata) + return FALSE; + } + +-static gboolean +-remote_op_timeout(gpointer userdata) ++/*! ++ * \internal ++ * \brief Finalize a remote fencer operation that timed out ++ * ++ * \param[in] op Fencer operation that timed out ++ */ ++static void ++finalize_timed_out_op(remote_fencing_op_t *op) + { +- remote_fencing_op_t *op = userdata; + + op->op_timer_total = 0; + +- if (op->state == st_done) { +- crm_debug("Action '%s' targeting %s for client %s already completed " +- CRM_XS " id=%.8s", +- op->action, op->target, op->client_name, op->id); +- return FALSE; +- } +- + crm_debug("Action '%s' targeting %s for client %s timed out " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); +@@ -637,14 +635,35 @@ remote_op_timeout(gpointer userdata) + */ + op->state = st_done; + remote_op_done(op, NULL, pcmk_ok, FALSE); +- return FALSE; ++ return; + } + + op->state = st_failed; + + remote_op_done(op, NULL, -ETIME, FALSE); ++} + +- return FALSE; ++/*! ++ * \internal ++ * \brief Finalize a remote fencer operation that timed out ++ * ++ * \param[in] userdata Fencer operation that timed out ++ * ++ * \return G_SOURCE_REMOVE (which tells glib not to restart timer) ++ */ ++static gboolean ++remote_op_timeout(gpointer userdata) ++{ ++ remote_fencing_op_t *op = userdata; ++ ++ if (op->state == st_done) { ++ crm_debug("Action '%s' targeting %s for client %s already completed " ++ CRM_XS " id=%.8s", ++ op->action, op->target, op->client_name, op->id); ++ } else { ++ finalize_timed_out_op(userdata); ++ } ++ return G_SOURCE_REMOVE; + } + + static gboolean +@@ -670,7 +689,7 @@ remote_op_query_timeout(gpointer data) + g_source_remove(op->op_timer_total); + op->op_timer_total = 0; + } +- remote_op_timeout(op); ++ finalize_timed_out_op(op); + } + + return FALSE; +@@ -1675,8 +1694,8 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc) + crm_info("No remaining peers capable of fencing (%s) %s for client %s " + CRM_XS " state=%s", op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); +- CRM_LOG_ASSERT(op->state < st_done); +- remote_op_timeout(op); ++ CRM_CHECK(op->state < st_done, return); ++ finalize_timed_out_op(op); + + } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { + // int rc = -EHOSTUNREACH; +-- +2.27.0 + + +From c047005a112ac7da5ba62084e39c79db739f0923 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 10:05:18 -0600 +Subject: [PATCH 12/19] Low: fencer: handle malformed manual confirmation + requests better + +Rename stonith_manual_ack() to fenced_handle_manual_confirmation(), and move +more of the manual confirmation handling in handle_request() into it, for +better code isolation. This will also make planned changes easier. + +The one behavioral difference is that a failure of initiate_remote_stonith_op() +will now be ignored rather than segmentation fault trying to dereference NULL. +--- + daemons/fenced/fenced_commands.c | 20 ++++++++++++-------- + daemons/fenced/fenced_remote.c | 29 ++++++++++++++++++++++++----- + daemons/fenced/pacemaker-fenced.h | 2 +- + 3 files changed, 37 insertions(+), 14 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 34c956f5c..6f325b9e8 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -3012,14 +3012,18 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + if (remote_peer || stand_alone) { + rc = stonith_fence(request); + +- } else if (call_options & st_opt_manual_ack) { +- remote_fencing_op_t *rop = NULL; +- xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); +- const char *target = crm_element_value(dev, F_STONITH_TARGET); +- +- crm_notice("Received manual confirmation that %s is fenced", target); +- rop = initiate_remote_stonith_op(client, request, TRUE); +- rc = stonith_manual_ack(request, rop); ++ } else if (pcmk_is_set(call_options, st_opt_manual_ack)) { ++ switch (fenced_handle_manual_confirmation(client, request)) { ++ case pcmk_rc_ok: ++ rc = pcmk_ok; ++ break; ++ case EINPROGRESS: ++ rc = -EINPROGRESS; ++ break; ++ default: ++ rc = -EPROTO; ++ break; ++ } + + } else { + const char *alternate_host = NULL; +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 22c4b0772..60ee5e32e 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1003,22 +1003,41 @@ static uint32_t fencing_active_peers(void) + return count; + } + ++/*! ++ * \internal ++ * \brief Process a manual confirmation of a pending fence action ++ * ++ * \param[in] client IPC client that sent confirmation ++ * \param[in] msg Request XML with manual confirmation ++ * ++ * \return Standard Pacemaker return code ++ */ + int +-stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) ++fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg) + { ++ remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); + ++ CRM_CHECK(dev != NULL, return EPROTO); ++ ++ crm_notice("Received manual confirmation that %s has been fenced", ++ crm_str(crm_element_value(dev, F_STONITH_TARGET))); ++ op = initiate_remote_stonith_op(client, msg, TRUE); ++ if (op == NULL) { ++ return EPROTO; ++ } + op->state = st_done; + set_fencing_completed(op); + op->delegate = strdup("a human"); + +- crm_notice("Injecting manual confirmation that %s is safely off/down", +- crm_element_value(dev, F_STONITH_TARGET)); ++ // For the fencer's purposes, the fencing operation is done + + remote_op_done(op, msg, pcmk_ok, FALSE); + +- // Replies are sent via done_cb -> send_async_reply() -> do_local_reply() +- return -EINPROGRESS; ++ /* For the requester's purposes, the operation is still pending. The ++ * actual result will be sent asynchronously via the operation's done_cb(). ++ */ ++ return EINPROGRESS; + } + + /*! +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index 3e41d867e..cf88644f1 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -256,7 +256,7 @@ bool fencing_peer_active(crm_node_t *peer); + + void set_fencing_completed(remote_fencing_op_t * op); + +-int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op); ++int fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg); + + gboolean node_has_attr(const char *node, const char *name, const char *value); + +-- +2.27.0 + + +From ec60f014b5a8f774aa57a26e40a2b1b94a7e3d3a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 10:35:31 -0600 +Subject: [PATCH 13/19] Low: fencer: handle malformed topology level removal + requests better + +Log the malformed request, and return -EPROTO instead of -EINVAL. If a request +is missing a level number, treat it as malformed instead of as a request to +remove all. +--- + daemons/fenced/fenced_commands.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 6f325b9e8..358844203 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -1678,27 +1678,27 @@ stonith_level_register(xmlNode *msg, char **desc) + int + stonith_level_remove(xmlNode *msg, char **desc) + { +- int id = 0; ++ int id = -1; + stonith_topology_t *tp; + char *target; + + /* Unlike additions, removal requests should always have one level tag */ + xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); + +- CRM_CHECK(level != NULL, return -EINVAL); ++ CRM_CHECK(level != NULL, return -EPROTO); + + target = stonith_level_key(level, -1); + crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); ++ ++ CRM_CHECK((id >= 0) && (id < ST_LEVEL_MAX), ++ crm_log_xml_warn(msg, "invalid level"); ++ free(target); ++ return -EPROTO); ++ + if (desc) { + *desc = crm_strdup_printf("%s[%d]", target, id); + } + +- /* Sanity-check arguments */ +- if (id >= ST_LEVEL_MAX) { +- free(target); +- return -EINVAL; +- } +- + tp = g_hash_table_lookup(topology, target); + if (tp == NULL) { + guint nentries = g_hash_table_size(topology); +@@ -1714,7 +1714,7 @@ stonith_level_remove(xmlNode *msg, char **desc) + "(%d active %s remaining)", target, nentries, + pcmk__plural_alt(nentries, "entry", "entries")); + +- } else if (id > 0 && tp->levels[id] != NULL) { ++ } else if (tp->levels[id] != NULL) { + guint nlevels; + + g_list_free_full(tp->levels[id], free); +-- +2.27.0 + + +From ee0cfb6b284c2d6d21f8e77bf6ff286b1364235d Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 12:33:05 -0600 +Subject: [PATCH 14/19] Refactor: fencer: avoid obscuring a variable + +handle_request() declared a xmlNode *reply variable, and then one of its "if" +blocks defined another one, obscuring the first. Drop the first declaration, +and instead move it to the one other place that needed it. + +Also remove a redundant assertion. +--- + daemons/fenced/fenced_commands.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 358844203..af0a92450 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2917,7 +2917,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + int rc = -EOPNOTSUPP; + + xmlNode *data = NULL; +- xmlNode *reply = NULL; + bool need_reply = true; + + char *output = NULL; +@@ -2926,8 +2925,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + +- if (pcmk_is_set(call_options, st_opt_sync_call)) { +- CRM_ASSERT(client == NULL || client->request_id == id); ++ if (pcmk_is_set(call_options, st_opt_sync_call) && (client != NULL)) { ++ CRM_ASSERT(client->request_id == id); + } + + if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { +@@ -3156,16 +3155,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + done: + // Reply if result is known + if (need_reply) { +- if (pcmk_is_set(call_options, st_opt_sync_call)) { +- CRM_ASSERT(client == NULL || client->request_id == id); +- } +- reply = stonith_construct_reply(request, output, data, rc); ++ xmlNode *reply = stonith_construct_reply(request, output, data, rc); ++ + stonith_send_reply(reply, call_options, remote_peer, client_id); ++ free_xml(reply); + } + + free(output); + free_xml(data); +- free_xml(reply); + + crm_debug("Processed %s request from %s %s: %s (rc=%d)", + op, ((client == NULL)? "peer" : "client"), +-- +2.27.0 + + +From a5fef7b95b7541860e29c1ff33be38db327208fb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 12:37:10 -0600 +Subject: [PATCH 15/19] Refactor: fencer: add convenience function for setting + protocol error result + +The fencer will soon track and return the full result (rather than just a +legacy return code) for fencing actions, for callbacks and notifications. +To simplify that process as well as move away from the legacy codes in general, +all fencer API operations will be modified to return a full result. + +This convenience function will come in handy for that. +--- + daemons/fenced/pacemaker-fenced.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index cf88644f1..3bc5dc3d1 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -262,6 +262,13 @@ gboolean node_has_attr(const char *node, const char *name, const char *value); + + gboolean node_does_watchdog_fencing(const char *node); + ++static inline void ++fenced_set_protocol_error(pcmk__action_result_t *result) ++{ ++ pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, ++ "Fencer API request missing required information (bug?)"); ++} ++ + extern char *stonith_our_uname; + extern gboolean stand_alone; + extern GHashTable *device_list; +-- +2.27.0 + + +From ed770d36fb34dc7b3344cd326830a6c06cc789ce Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 19 Nov 2021 09:59:51 -0600 +Subject: [PATCH 16/19] Refactor: fencer: make a few functions return void + +... to make planned changes easier. The return values were previously ignored. +--- + daemons/fenced/fenced_commands.c | 17 ++++++++------- + daemons/fenced/fenced_history.c | 6 +----- + daemons/fenced/fenced_remote.c | 35 ++++++++++++++----------------- + daemons/fenced/pacemaker-fenced.c | 6 +++--- + daemons/fenced/pacemaker-fenced.h | 8 +++---- + 5 files changed, 33 insertions(+), 39 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index af0a92450..ea7d281ce 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -1411,8 +1411,8 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) + return pcmk_ok; + } + +-int +-stonith_device_remove(const char *id, gboolean from_cib) ++void ++stonith_device_remove(const char *id, bool from_cib) + { + stonith_device_t *device = g_hash_table_lookup(device_list, id); + guint ndevices = 0; +@@ -1421,7 +1421,7 @@ stonith_device_remove(const char *id, gboolean from_cib) + ndevices = g_hash_table_size(device_list); + crm_info("Device '%s' not found (%d active device%s)", + id, ndevices, pcmk__plural_s(ndevices)); +- return pcmk_ok; ++ return; + } + + if (from_cib) { +@@ -1443,7 +1443,6 @@ stonith_device_remove(const char *id, gboolean from_cib) + (device->cib_registered? " cib" : ""), + (device->api_registered? " api" : "")); + } +- return pcmk_ok; + } + + /*! +@@ -3085,8 +3084,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + need_reply = (rc != -EINPROGRESS); + + } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) { +- rc = stonith_fence_history(request, &data, remote_peer, call_options); +- if (call_options & st_opt_discard_reply) { ++ stonith_fence_history(request, &data, remote_peer, call_options); ++ rc = pcmk_ok; ++ if (pcmk_is_set(call_options, st_opt_discard_reply)) { + /* we don't expect answers to the broadcast + * we might have sent out + */ +@@ -3109,7 +3109,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + const char *device_id = crm_element_value(dev, XML_ATTR_ID); + + if (is_privileged(client, op)) { +- rc = stonith_device_remove(device_id, FALSE); ++ stonith_device_remove(device_id, false); ++ rc = pcmk_ok; + } else { + rc = -EACCES; + } +@@ -3179,7 +3180,7 @@ handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) + if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { + process_remote_stonith_query(request); + } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) { +- process_remote_stonith_exec(request); ++ fenced_process_fencing_reply(request); + } else { + crm_err("Ignoring unknown %s reply from %s %s", + crm_str(op), ((client == NULL)? "peer" : "client"), +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 7127593b6..bc159383c 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -433,14 +433,11 @@ stonith_local_history(gboolean add_id, const char *target) + * a reply from + * \param[in] remote_peer + * \param[in] options call-options from the request +- * +- * \return always success as there is actully nothing that can go really wrong + */ +-int ++void + stonith_fence_history(xmlNode *msg, xmlNode **output, + const char *remote_peer, int options) + { +- int rc = 0; + const char *target = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_NEVER); + xmlNode *out_history = NULL; +@@ -525,5 +522,4 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + *output = stonith_local_history(FALSE, target); + } + free_xml(out_history); +- return rc; + } +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 60ee5e32e..6338aebde 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -2086,11 +2086,9 @@ process_remote_stonith_query(xmlNode * msg) + * or attempt another device as appropriate. + * + * \param[in] msg XML reply received +- * +- * \return pcmk_ok on success, -errno on error + */ +-int +-process_remote_stonith_exec(xmlNode * msg) ++void ++fenced_process_fencing_reply(xmlNode *msg) + { + int rc = 0; + const char *id = NULL; +@@ -2098,13 +2096,13 @@ process_remote_stonith_exec(xmlNode * msg) + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + +- CRM_CHECK(dev != NULL, return -EPROTO); ++ CRM_CHECK(dev != NULL, return); + + id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); +- CRM_CHECK(id != NULL, return -EPROTO); ++ CRM_CHECK(id != NULL, return); + + dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR); +- CRM_CHECK(dev != NULL, return -EPROTO); ++ CRM_CHECK(dev != NULL, return); + + crm_element_value_int(dev, F_STONITH_RC, &rc); + +@@ -2125,35 +2123,35 @@ process_remote_stonith_exec(xmlNode * msg) + /* Could be for an event that began before we started */ + /* TODO: Record the op for later querying */ + crm_info("Received peer result of unknown or expired operation %s", id); +- return -EOPNOTSUPP; ++ return; + } + + if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { + crm_err("Received outdated reply for device %s (instead of %s) to " + "fence (%s) %s. Operation already timed out at peer level.", + device, (const char *) op->devices->data, op->action, op->target); +- return rc; ++ return; + } + + if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) { + crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s " +- CRM_XS " rc=%d id=%.8s", ++ CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->originator, +- pcmk_strerror(rc), rc, op->id); ++ pcmk_strerror(rc), op->id); + if (rc == pcmk_ok) { + op->state = st_done; + } else { + op->state = st_failed; + } + remote_op_done(op, msg, rc, FALSE); +- return pcmk_ok; ++ return; + } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + /* If this isn't a remote level broadcast, and we are not the + * originator of the operation, we should not be receiving this msg. */ + crm_err("Received non-broadcast fencing result for operation %.8s " + "we do not own (device %s targeting %s)", + op->id, device, op->target); +- return rc; ++ return; + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { +@@ -2168,7 +2166,7 @@ process_remote_stonith_exec(xmlNode * msg) + * and notify our local clients. */ + if (op->state == st_done) { + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } + + if ((op->phase == 2) && (rc != pcmk_ok)) { +@@ -2184,14 +2182,14 @@ process_remote_stonith_exec(xmlNode * msg) + /* An operation completed successfully. Try another device if + * necessary, otherwise mark the operation as done. */ + advance_topology_device_in_level(op, device, msg, rc); +- return rc; ++ return; + } else { + /* This device failed, time to try another topology level. If no other + * levels are available, mark this operation as failed and report results. */ + if (advance_topology_level(op, false) != pcmk_rc_ok) { + op->state = st_failed; + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } + } + } else if (rc == pcmk_ok && op->devices == NULL) { +@@ -2199,12 +2197,12 @@ process_remote_stonith_exec(xmlNode * msg) + + op->state = st_done; + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } else if (rc == -ETIME && op->devices == NULL) { + /* If the operation timed out don't bother retrying other peers. */ + op->state = st_failed; + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } else { + /* fall-through and attempt other fencing action using another peer */ + } +@@ -2213,7 +2211,6 @@ process_remote_stonith_exec(xmlNode * msg) + crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, + op->client_name, rc); + call_remote_stonith(op, NULL, rc); +- return rc; + } + + gboolean +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index a290e1670..0a8b3bf6f 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -445,7 +445,7 @@ remove_cib_device(xmlXPathObjectPtr xpathObj) + + rsc_id = crm_element_value(match, XML_ATTR_ID); + +- stonith_device_remove(rsc_id, TRUE); ++ stonith_device_remove(rsc_id, true); + } + } + +@@ -610,7 +610,7 @@ watchdog_device_update(void) + } else { + /* be silent if no device - todo parameter to stonith_device_remove */ + if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) { +- stonith_device_remove(STONITH_WATCHDOG_ID, TRUE); ++ stonith_device_remove(STONITH_WATCHDOG_ID, true); + } + } + } +@@ -847,7 +847,7 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg) + } + if (search != NULL) { + *search = 0; +- stonith_device_remove(rsc_id, TRUE); ++ stonith_device_remove(rsc_id, true); + /* watchdog_device_update called afterwards + to fall back to implicit definition if needed */ + } else { +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index 3bc5dc3d1..5162ada75 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -214,7 +214,7 @@ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, + + int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib); + +-int stonith_device_remove(const char *id, gboolean from_cib); ++void stonith_device_remove(const char *id, bool from_cib); + + char *stonith_level_key(xmlNode * msg, int mode); + int stonith_level_kind(xmlNode * msg); +@@ -241,14 +241,14 @@ remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client, + xmlNode *request, + gboolean manual_ack); + +-int process_remote_stonith_exec(xmlNode * msg); ++void fenced_process_fencing_reply(xmlNode *msg); + + int process_remote_stonith_query(xmlNode * msg); + + void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); + +-int stonith_fence_history(xmlNode *msg, xmlNode **output, +- const char *remote_peer, int options); ++void stonith_fence_history(xmlNode *msg, xmlNode **output, ++ const char *remote_peer, int options); + + void stonith_fence_history_trim(void); + +-- +2.27.0 + + +From 27df49460930738e77f5ca42536aff1d3bdfcae7 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 19 Nov 2021 10:06:43 -0600 +Subject: [PATCH 17/19] Refactor: fencer: drop unnecessary argument when + advancing topology device + +If we're advancing to the next device in a topology level, by necessity that +means any previous device succeeded. +--- + daemons/fenced/fenced_remote.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 6338aebde..d54e6a4ef 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1519,14 +1519,13 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + * \internal + * \brief Advance an operation to the next device in its topology + * +- * \param[in,out] op Operation to advance +- * \param[in] device ID of device just completed +- * \param[in] msg XML reply that contained device result (if available) +- * \param[in] rc Return code of device's execution ++ * \param[in] op Fencer operation to advance ++ * \param[in] device ID of device that just completed ++ * \param[in] msg If not NULL, XML reply of last delegated fencing operation + */ + static void + advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, +- xmlNode *msg, int rc) ++ xmlNode *msg) + { + /* Advance to the next device at this topology level, if any */ + if (op->devices) { +@@ -1556,8 +1555,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, + + if (op->devices) { + /* Necessary devices remain, so execute the next one */ +- crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)", +- op->target, op->client_name, op->originator, rc); ++ crm_trace("Next targeting %s on behalf of %s@%s", ++ op->target, op->client_name, op->originator); + + // The requested delay has been applied for the first device + if (op->delay > 0) { +@@ -1570,7 +1569,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, + crm_trace("Marking complex fencing op targeting %s as complete", + op->target); + op->state = st_done; +- remote_op_done(op, msg, rc, FALSE); ++ remote_op_done(op, msg, pcmk_ok, FALSE); + } + } + +@@ -1701,7 +1700,7 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc) + */ + crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " + "after successful 'off'", device, op->target); +- advance_topology_device_in_level(op, device, NULL, pcmk_ok); ++ advance_topology_device_in_level(op, device, NULL); + return; + + } else if (op->owner == FALSE) { +@@ -2181,7 +2180,7 @@ fenced_process_fencing_reply(xmlNode *msg) + if (rc == pcmk_ok) { + /* An operation completed successfully. Try another device if + * necessary, otherwise mark the operation as done. */ +- advance_topology_device_in_level(op, device, msg, rc); ++ advance_topology_device_in_level(op, device, msg); + return; + } else { + /* This device failed, time to try another topology level. If no other +-- +2.27.0 + + +From 05437e1339bc1f9071b43e97d5846a939687951d Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 29 Nov 2021 11:59:17 -0600 +Subject: [PATCH 18/19] Refactor: fencer: minor renames for consistency + +... per review +--- + daemons/fenced/fenced_remote.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index d54e6a4ef..8feb40147 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -63,7 +63,7 @@ typedef struct device_properties_s { + int delay_base[st_phase_max]; + } device_properties_t; + +-typedef struct st_query_result_s { ++typedef struct { + /* Name of peer that sent this result */ + char *host; + /* Only try peers for non-topology based operations once */ +@@ -95,13 +95,12 @@ sort_strings(gconstpointer a, gconstpointer b) + static void + free_remote_query(gpointer data) + { +- if (data) { +- peer_device_info_t *query = data; ++ if (data != NULL) { ++ peer_device_info_t *peer = data; + +- crm_trace("Free'ing query result from %s", query->host); +- g_hash_table_destroy(query->devices); +- free(query->host); +- free(query); ++ g_hash_table_destroy(peer->devices); ++ free(peer->host); ++ free(peer); + } + } + +-- +2.27.0 + + +From 86974d7cef05bafbed540d02e59514292581ae65 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 30 Nov 2021 08:33:41 -0600 +Subject: [PATCH 19/19] Refactor: fencer: simplify send_async_reply() + +... as suggested in review +--- + daemons/fenced/fenced_commands.c | 28 ++++++++++++---------------- + 1 file changed, 12 insertions(+), 16 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index ea7d281ce..f34cb4f13 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2384,36 +2384,34 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + int pid, bool merged) + { + xmlNode *reply = NULL; +- bool bcast = false; + + CRM_CHECK((cmd != NULL) && (result != NULL), return); + ++ log_async_result(cmd, result, pid, NULL, merged); ++ + reply = construct_async_reply(cmd, result); ++ if (merged) { ++ crm_xml_add(reply, F_STONITH_MERGED, "true"); ++ } + +- // If target was also the originator, broadcast fencing results for it + if (!stand_alone && pcmk__is_fencing_action(cmd->action) + && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) { +- ++ /* The target was also the originator, so broadcast the result on its ++ * behalf (since it will be unable to). ++ */ + crm_trace("Broadcast '%s' result for %s (target was also originator)", + cmd->action, cmd->victim); + crm_xml_add(reply, F_SUBTYPE, "broadcast"); + crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); +- bcast = true; +- } +- +- log_async_result(cmd, result, pid, NULL, merged); +- +- if (merged) { +- crm_xml_add(reply, F_STONITH_MERGED, "true"); +- } +- crm_log_xml_trace(reply, "Reply"); +- +- if (bcast) { + send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); + } else { ++ // Reply only to the originator + stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client); + } + ++ crm_log_xml_trace(reply, "Reply"); ++ free_xml(reply); ++ + if (stand_alone) { + /* Do notification with a clean data object */ + xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); +@@ -2430,8 +2428,6 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data); + do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } +- +- free_xml(reply); + } + + static void +-- +2.27.0 + diff --git a/SOURCES/006-stateful-metadata.patch b/SOURCES/006-stateful-metadata.patch new file mode 100644 index 0000000..a9ea6f4 --- /dev/null +++ b/SOURCES/006-stateful-metadata.patch @@ -0,0 +1,143 @@ +From b52fe799c89637e2a761a5725c2376db5c05f2d1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 30 Nov 2021 15:51:54 -0600 +Subject: [PATCH 1/2] Low: resources: remove DOCTYPE from OCF 1.1-compliant + agents + +OCF 1.1 replaced the DTD schema with RNG, but DOCTYPE still refers to the DTD. +There's no DOCTYPE for RNG, and DOCTYPE is optional, so just remove it. +--- + extra/resources/Dummy | 3 +-- + extra/resources/HealthIOWait | 3 +-- + extra/resources/Stateful | 3 +-- + extra/resources/attribute | 3 +-- + extra/resources/ping | 3 +-- + extra/resources/remote | 3 +-- + 6 files changed, 6 insertions(+), 12 deletions(-) + +diff --git a/extra/resources/Dummy b/extra/resources/Dummy +index a344deac0..56584e564 100755 +--- a/extra/resources/Dummy ++++ b/extra/resources/Dummy +@@ -58,8 +58,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/HealthIOWait b/extra/resources/HealthIOWait +index 43a8b70c4..5f1483ef7 100755 +--- a/extra/resources/HealthIOWait ++++ b/extra/resources/HealthIOWait +@@ -25,8 +25,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/Stateful b/extra/resources/Stateful +index ae3424bbf..0d2062d51 100755 +--- a/extra/resources/Stateful ++++ b/extra/resources/Stateful +@@ -39,8 +39,7 @@ SCORE_PROMOTED=10 + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/attribute b/extra/resources/attribute +index 1800dff8f..a2bd353e0 100755 +--- a/extra/resources/attribute ++++ b/extra/resources/attribute +@@ -57,8 +57,7 @@ END + meta_data() { + cat < +- +- ++ + 1.1 + Manages a node attribute + +diff --git a/extra/resources/ping b/extra/resources/ping +index 6e296979f..7cc6b802d 100755 +--- a/extra/resources/ping ++++ b/extra/resources/ping +@@ -36,8 +36,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/remote b/extra/resources/remote +index a53262bb6..f7e40dc81 100755 +--- a/extra/resources/remote ++++ b/extra/resources/remote +@@ -24,8 +24,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + Pacemaker Remote connection + +-- +2.27.0 + + +From 70f469120f8db6a024c786466ee74a6c7fbd1f43 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 30 Nov 2021 15:53:39 -0600 +Subject: [PATCH 2/2] Fix: resources: use correct syntax in Stateful meta-data + +The OCF standard only allows "0" or "1" for booleans. + +This fixes incorrect ocf:pacemaker:Stateful meta-data syntax introduced by +7024398 as a regression in the 2.1.0 release. +--- + extra/resources/Stateful | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extra/resources/Stateful b/extra/resources/Stateful +index 0d2062d51..2ebe6725f 100755 +--- a/extra/resources/Stateful ++++ b/extra/resources/Stateful +@@ -57,7 +57,7 @@ Location to store the resource state in + + + +- ++ + + If this is set, the environment will be dumped to this file for every call. + +@@ -65,7 +65,7 @@ If this is set, the environment will be dumped to this file for every call. + + + +- ++ + + The notify action will sleep for this many seconds before returning, + to simulate a long-running notify. +-- +2.27.0 + diff --git a/SOURCES/007-memory-leak.patch b/SOURCES/007-memory-leak.patch new file mode 100644 index 0000000..38ad3a2 --- /dev/null +++ b/SOURCES/007-memory-leak.patch @@ -0,0 +1,39 @@ +From f491d9d5a7ed554fed985de356bb085fdec3421c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 7 Dec 2021 09:01:00 -0600 +Subject: [PATCH] Fix: fencer: avoid memory leak when broadcasting history + differences + +Regression introduced in 2.1.0 by dbc27b2 +--- + daemons/fenced/fenced_history.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index bc159383c..a9c57dc86 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -484,8 +484,6 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + !pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) { + xmlNode *history = get_xpath_object("//" F_STONITH_HISTORY_LIST, + msg, LOG_NEVER); +- GHashTable *received_history = +- history?stonith_xml_history_to_list(history):NULL; + + /* either a broadcast created directly upon stonith-API request + * or a diff as response to such a thing +@@ -497,6 +495,11 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + if (!history || + !crm_is_true(crm_element_value(history, + F_STONITH_DIFFERENTIAL))) { ++ GHashTable *received_history = NULL; ++ ++ if (history != NULL) { ++ received_history = stonith_xml_history_to_list(history); ++ } + out_history = + stonith_local_history_diff_and_merge(received_history, TRUE, NULL); + if (out_history) { +-- +2.27.0 + diff --git a/SOURCES/008-fencing-history.patch b/SOURCES/008-fencing-history.patch new file mode 100644 index 0000000..1ea9ac7 --- /dev/null +++ b/SOURCES/008-fencing-history.patch @@ -0,0 +1,43 @@ +From 0339e89f3238b31df78b864dae8684b82c370741 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 13 Dec 2021 15:22:40 -0600 +Subject: [PATCH] Fix: fencer: get current time correctly + +f52bc8e1ce (2.1.2) introduced a regression by using clock_gettime() with +CLOCK_MONOTONIC to get the current time. Use qb_util_timespec_from_epoch_get() +instead (which as of this writing uses clock_gettime() with CLOCK_REALTIME if +available, and falls back to gettimeofday() if not). +--- + daemons/fenced/fenced_commands.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index f34cb4f13..7685cb8c3 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2746,19 +2746,14 @@ bool fencing_peer_active(crm_node_t *peer) + return FALSE; + } + +-void set_fencing_completed(remote_fencing_op_t * op) ++void ++set_fencing_completed(remote_fencing_op_t *op) + { +-#ifdef CLOCK_MONOTONIC + struct timespec tv; + +- clock_gettime(CLOCK_MONOTONIC, &tv); +- ++ qb_util_timespec_from_epoch_get(&tv); + op->completed = tv.tv_sec; + op->completed_nsec = tv.tv_nsec; +-#else +- op->completed = time(NULL); +- op->completed_nsec = 0L; +-#endif + } + + /*! +-- +2.27.0 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 82993ae..459a81a 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -36,7 +36,7 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.2 -%global specversion 1 +%global specversion 2 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit ada5c3b36e2adf1703d54d39f40a4b8628eca175 @@ -266,6 +266,11 @@ Source1: nagios-agents-metadata-%{nagios_hash}.tar.gz Patch1: 001-acl-group-schema.patch Patch2: 002-fencing-reasons.patch Patch3: 003-fencing-reasons.patch +Patch4: 004-systemd-metadata.patch +Patch5: 005-fencing-reasons.patch +Patch6: 006-stateful-metadata.patch +Patch7: 007-memory-leak.patch +Patch8: 008-fencing-history.patch # downstream-only commits #Patch1xx: 1xx-xxxx.patch @@ -963,6 +968,14 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Thu Dec 16 2021 Ken Gaillot - 2.1.2-2 +- Correctly get metadata for systemd agent names that end in '@' +- Use correct OCF 1.1 syntax in ocf:pacemaker:Stateful meta-data +- Fix regression in displayed times in crm_mon's fence history +- Resolves: rhbz2003151 +- Resolves: rhbz2027370 +- Resolves: rhbz2032027 + * Tue Nov 30 2021 Ken Gaillot - 2.1.2-1 - Allow per-host fence delays for a single fence device - Use OCF 1.1 enum type in cluster option metadata for better validation