diff --git a/.gitignore b/.gitignore index 4f288db..5d39cf7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -SOURCES/pacemaker-7c3f66070.tar.gz +SOURCES/pacemaker-ada5c3b36.tar.gz diff --git a/.pacemaker.metadata b/.pacemaker.metadata index e991752..56942a1 100644 --- a/.pacemaker.metadata +++ b/.pacemaker.metadata @@ -1,2 +1,2 @@ 2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz -55917d293ae0f9b79ac196fae061ba95647382e1 SOURCES/pacemaker-7c3f66070.tar.gz +1dec5b062ad8e9a89b4953e17a59e4597797a1e6 SOURCES/pacemaker-ada5c3b36.tar.gz diff --git a/SOURCES/001-acl-group-schema.patch b/SOURCES/001-acl-group-schema.patch new file mode 100644 index 0000000..4835e3e --- /dev/null +++ b/SOURCES/001-acl-group-schema.patch @@ -0,0 +1,230 @@ +From f5ffbaf1f537d3d5b00e594211cd322f97df51ac Mon Sep 17 00:00:00 2001 +From: Grace Chin +Date: Fri, 5 Nov 2021 11:39:39 -0400 +Subject: [PATCH 1/3] Low: xml: clone acls schema in preparation for changes + +--- + xml/acls-3.8.rng | 80 ++++++++++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 80 insertions(+) + create mode 100644 xml/acls-3.8.rng + +diff --git a/xml/acls-3.8.rng b/xml/acls-3.8.rng +new file mode 100644 +index 000000000..0fe6eed96 +--- /dev/null ++++ b/xml/acls-3.8.rng +@@ -0,0 +1,80 @@ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ read ++ write ++ deny ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ +-- +2.27.0 + + +From 7838213fc639236bdedf5f15320152d973f1bdad Mon Sep 17 00:00:00 2001 +From: Grace Chin +Date: Fri, 5 Nov 2021 11:40:48 -0400 +Subject: [PATCH 2/3] Add a 'name' attribute to acl_target and acl_group + elements + +--- + xml/acls-3.8.rng | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/xml/acls-3.8.rng b/xml/acls-3.8.rng +index 0fe6eed96..48bcdffe3 100644 +--- a/xml/acls-3.8.rng ++++ b/xml/acls-3.8.rng +@@ -13,6 +13,9 @@ + + + ++ ++ ++ + + + +@@ -22,6 +25,9 @@ + + + ++ ++ ++ + + + +-- +2.27.0 + + +From c3c498f4636f57e29670f8e385b625024ed222d7 Mon Sep 17 00:00:00 2001 +From: Grace Chin +Date: Fri, 5 Nov 2021 11:42:48 -0400 +Subject: [PATCH 3/3] Changes made by run of 'cts/cts-cli -s' + +--- + cts/cli/regression.upgrade.exp | 7 +++++-- + cts/cli/regression.validity.exp | 22 ++++++++++++++++++---- + 2 files changed, 23 insertions(+), 6 deletions(-) + +diff --git a/cts/cli/regression.upgrade.exp b/cts/cli/regression.upgrade.exp +index e38adebdd..7ce7ec13b 100644 +--- a/cts/cli/regression.upgrade.exp ++++ b/cts/cli/regression.upgrade.exp +@@ -91,8 +91,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.6 + update_validation debug: pacemaker-3.6-style configuration is also valid for pacemaker-3.7 + update_validation debug: Testing 'pacemaker-3.7' validation (21 of X) + update_validation debug: Configuration valid for schema: pacemaker-3.7 +-update_validation trace: Stopping at pacemaker-3.7 +-update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.7 ++update_validation debug: pacemaker-3.7-style configuration is also valid for pacemaker-3.8 ++update_validation debug: Testing 'pacemaker-3.8' validation (22 of X) ++update_validation debug: Configuration valid for schema: pacemaker-3.8 ++update_validation trace: Stopping at pacemaker-3.8 ++update_validation info: Transformed the configuration from pacemaker-2.10 to pacemaker-3.8 + =#=#=#= Current cib after: Upgrade to latest CIB schema (trigger 2.10.xsl + the wrapping) =#=#=#= + + +diff --git a/cts/cli/regression.validity.exp b/cts/cli/regression.validity.exp +index 5ace430e7..125035a47 100644 +--- a/cts/cli/regression.validity.exp ++++ b/cts/cli/regression.validity.exp +@@ -121,7 +121,11 @@ update_validation debug: Testing 'pacemaker-3.7' validation (21 of X) + element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order + element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + update_validation trace: pacemaker-3.7 validation failed +-Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.7 ++update_validation debug: Testing 'pacemaker-3.8' validation (22 of X) ++element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order ++element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order ++update_validation trace: pacemaker-3.8 validation failed ++Cannot upgrade configuration (claiming schema pacemaker-1.2) to at least pacemaker-3.0 because it does not validate with any schema from pacemaker-1.2 to pacemaker-3.8 + =#=#=#= End test: Run crm_simulate with invalid CIB (enum violation) - Invalid configuration (78) =#=#=#= + * Passed: crm_simulate - Run crm_simulate with invalid CIB (enum violation) + =#=#=#= Begin test: Try to make resulting CIB invalid (unrecognized validate-with) =#=#=#= +@@ -226,7 +230,10 @@ update_validation trace: pacemaker-3.6 validation failed + update_validation debug: Testing 'pacemaker-3.7' validation (21 of X) + element cib: Relax-NG validity error : Invalid attribute validate-with for element cib + update_validation trace: pacemaker-3.7 validation failed +-Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.7 ++update_validation debug: Testing 'pacemaker-3.8' validation (22 of X) ++element cib: Relax-NG validity error : Invalid attribute validate-with for element cib ++update_validation trace: pacemaker-3.8 validation failed ++Cannot upgrade configuration (claiming schema pacemaker-9999.0) to at least pacemaker-3.0 because it does not validate with any schema from unknown to pacemaker-3.8 + =#=#=#= End test: Run crm_simulate with invalid CIB (unrecognized validate-with) - Invalid configuration (78) =#=#=#= + * Passed: crm_simulate - Run crm_simulate with invalid CIB (unrecognized validate-with) + =#=#=#= Begin test: Try to make resulting CIB invalid, but possibly recoverable (valid with X.Y+1) =#=#=#= +@@ -326,8 +333,11 @@ update_validation debug: Configuration valid for schema: pacemaker-3.6 + update_validation debug: pacemaker-3.6-style configuration is also valid for pacemaker-3.7 + update_validation debug: Testing 'pacemaker-3.7' validation (21 of X) + update_validation debug: Configuration valid for schema: pacemaker-3.7 +-update_validation trace: Stopping at pacemaker-3.7 +-update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.7 ++update_validation debug: pacemaker-3.7-style configuration is also valid for pacemaker-3.8 ++update_validation debug: Testing 'pacemaker-3.8' validation (22 of X) ++update_validation debug: Configuration valid for schema: pacemaker-3.8 ++update_validation trace: Stopping at pacemaker-3.8 ++update_validation info: Transformed the configuration from pacemaker-1.2 to pacemaker-3.8 + unpack_resources error: Resource start-up disabled since no STONITH resources have been defined + unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option + unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity +@@ -437,6 +447,8 @@ element rsc_order: Relax-NG validity error : Invalid attribute first-action for + element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order + element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order ++element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order ++element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + =#=#=#= Current cib after: Make resulting CIB invalid, and without validate-with attribute =#=#=#= + + +@@ -502,6 +514,8 @@ validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attrib + validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order + validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order ++validity.bad.xml:10: element rsc_order: Relax-NG validity error : Invalid attribute first-action for element rsc_order ++validity.bad.xml:10: element rsc_order: Relax-NG validity error : Element constraints has extra content: rsc_order + unpack_resources error: Resource start-up disabled since no STONITH resources have been defined + unpack_resources error: Either configure some or disable STONITH with the stonith-enabled option + unpack_resources error: NOTE: Clusters with shared data need STONITH to ensure data integrity +-- +2.27.0 + diff --git a/SOURCES/001-ping-agent.patch b/SOURCES/001-ping-agent.patch deleted file mode 100644 index 89fe41a..0000000 --- a/SOURCES/001-ping-agent.patch +++ /dev/null @@ -1,225 +0,0 @@ -From c6ee0973522268ed7b3241cf0ec2e06398444114 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Tue, 4 May 2021 12:02:17 -0400 -Subject: [PATCH 1/4] Remove deprecated attrd_options - ---- - extra/resources/ping | 11 +++-------- - 1 file changed, 3 insertions(+), 8 deletions(-) - -diff --git a/extra/resources/ping b/extra/resources/ping -index 3cf8dfe..2e93f22 100755 ---- a/extra/resources/ping -+++ b/extra/resources/ping -@@ -178,7 +178,7 @@ ping_stop() { - - rm -f "${OCF_RESKEY_pidfile}" - -- attrd_updater -D -n "$OCF_RESKEY_name" -d "$OCF_RESKEY_dampen" $attrd_options -+ attrd_updater -D -n "$OCF_RESKEY_name" -d "$OCF_RESKEY_dampen" - - return $OCF_SUCCESS - } -@@ -302,9 +302,9 @@ ping_update() { - - score=$(expr $active \* $OCF_RESKEY_multiplier) - if [ "$__OCF_ACTION" = "start" ] ; then -- attrd_updater -n "$OCF_RESKEY_name" -B "$score" -d "$OCF_RESKEY_dampen" $attrd_options -+ attrd_updater -n "$OCF_RESKEY_name" -B "$score" -d "$OCF_RESKEY_dampen" - else -- attrd_updater -n "$OCF_RESKEY_name" -v "$score" -d "$OCF_RESKEY_dampen" $attrd_options -+ attrd_updater -n "$OCF_RESKEY_name" -v "$score" -d "$OCF_RESKEY_dampen" - fi - rc=$? - case $rc in -@@ -396,11 +396,6 @@ case "${OCF_RESKEY_debug}" in - ;; - esac - --attrd_options='-q' --if [ "${OCF_RESKEY_debug}" = "true" ]; then -- attrd_options='' --fi -- - case "$__OCF_ACTION" in - meta-data) meta_data - exit $OCF_SUCCESS --- -1.8.3.1 - - -From 6d6c4691cf0970059689856c354daf9e098b4451 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Tue, 4 May 2021 14:50:37 -0400 -Subject: [PATCH 2/4] Replace debug values, true and false, with 0 and 1 - ---- - extra/resources/ping | 8 ++++---- - 1 file changed, 4 insertions(+), 4 deletions(-) - -diff --git a/extra/resources/ping b/extra/resources/ping -index 2e93f22..fee019b 100755 ---- a/extra/resources/ping -+++ b/extra/resources/ping -@@ -24,7 +24,7 @@ - : ${OCF_RESKEY_dampen:="5s"} - : ${OCF_RESKEY_attempts:="3"} - : ${OCF_RESKEY_multiplier:="1"} --: ${OCF_RESKEY_debug:="false"} -+: ${OCF_RESKEY_debug:="0"} - : ${OCF_RESKEY_failure_score:="0"} - : ${OCF_RESKEY_use_fping:="1"} - : ${OCF_RESKEY_host_list:=""} -@@ -152,7 +152,7 @@ END - - ping_conditional_log() { - level="$1"; shift -- if [ "${OCF_RESKEY_debug}" = "true" ]; then -+ if [ $OCF_RESKEY_debug -gt 0 ]; then - ocf_log "$level" "$*" - fi - } -@@ -388,8 +388,8 @@ fi - - # Check the debug option - case "${OCF_RESKEY_debug}" in -- true|True|TRUE|1) OCF_RESKEY_debug=true;; -- false|False|FALSE|0) OCF_RESKEY_debug=false;; -+ true|True|TRUE|1) OCF_RESKEY_debug=0;; -+ false|False|FALSE|0) OCF_RESKEY_debug=1;; - *) - ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}" - OCF_RESKEY_debug=false --- -1.8.3.1 - - -From a886a31056b6aca764c6911f5432af2c5ebf51df Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Tue, 11 May 2021 11:04:50 -0400 -Subject: [PATCH 3/4] Add verbose debug mode which logs ping and fping output - when set - ---- - extra/resources/ping | 19 ++++++++++++++----- - 1 file changed, 14 insertions(+), 5 deletions(-) - -diff --git a/extra/resources/ping b/extra/resources/ping -index fee019b..cc796af 100755 ---- a/extra/resources/ping -+++ b/extra/resources/ping -@@ -249,10 +249,13 @@ fping_check() { - - case $rc in - 0) -+ if [ $OCF_RESKEY_debug -gt 1 ]; then -+ ping_conditional_log info "$output" -+ fi - ;; - 1) - for h in $(echo "$output" | grep "is unreachable" | awk '{print $1}'); do -- ping_conditional_log warn "$h is inactive" -+ ping_conditional_log warn "$h is inactive: $output" - done - ;; - *) -@@ -282,7 +285,12 @@ ping_check() { - p_out=$($p_exe $p_args $OCF_RESKEY_options $host 2>&1); rc=$? - - case $rc in -- 0) active=$(expr $active + 1);; -+ 0) -+ active=$(expr $active + 1) -+ if [ $OCF_RESKEY_debug -gt 1 ]; then -+ ping_conditional_log info "$p_out" -+ fi -+ ;; - 1) ping_conditional_log warn "$host is inactive: $p_out";; - *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; - esac -@@ -388,10 +396,11 @@ fi - - # Check the debug option - case "${OCF_RESKEY_debug}" in -- true|True|TRUE|1) OCF_RESKEY_debug=0;; -- false|False|FALSE|0) OCF_RESKEY_debug=1;; -+ true|True|TRUE|1) OCF_RESKEY_debug=1;; -+ false|False|FALSE|0) OCF_RESKEY_debug=0;; -+ verbose|Verbose|VERBOSE|2) OCF_RESKEY_debug=2;; - *) -- ocf_log warn "Value for 'debug' is incorrect. Please specify 'true' or 'false' not: ${OCF_RESKEY_debug}" -+ ocf_log warn "Value for 'debug' is incorrect. Please specify 'true', 'false', or 'verbose', not: ${OCF_RESKEY_debug}" - OCF_RESKEY_debug=false - ;; - esac --- -1.8.3.1 - - -From 460043f133ced80e923b1290af70502a72deb7f8 Mon Sep 17 00:00:00 2001 -From: Grace Chin -Date: Tue, 11 May 2021 11:07:05 -0400 -Subject: [PATCH 4/4] Improve variable names - ---- - extra/resources/ping | 20 ++++++++++---------- - 1 file changed, 10 insertions(+), 10 deletions(-) - -diff --git a/extra/resources/ping b/extra/resources/ping -index cc796af..9763b60 100755 ---- a/extra/resources/ping -+++ b/extra/resources/ping -@@ -244,22 +244,22 @@ fping_check() { - timeout=$(expr $OCF_RESKEY_timeout \* 1000 / $OCF_RESKEY_attempts) - - cmd="$p_exe -r $OCF_RESKEY_attempts -t $timeout -B 1.0 $OCF_RESKEY_options $OCF_RESKEY_host_list" -- output=$($cmd 2>&1); rc=$? -- active=$(echo "$output" | grep "is alive" | wc -l) -+ fping_output=$($cmd 2>&1); rc=$? -+ active=$(echo "$fping_output" | grep "is alive" | wc -l) - - case $rc in - 0) - if [ $OCF_RESKEY_debug -gt 1 ]; then -- ping_conditional_log info "$output" -+ ping_conditional_log info "$fping_output" - fi - ;; - 1) -- for h in $(echo "$output" | grep "is unreachable" | awk '{print $1}'); do -- ping_conditional_log warn "$h is inactive: $output" -+ for h in $(echo "$fping_output" | grep "is unreachable" | awk '{print $1}'); do -+ ping_conditional_log warn "$h is inactive: $fping_output" - done - ;; - *) -- ocf_log err "Unexpected result for '$cmd' $rc: $(echo "$output" | tr '\n' ';')" -+ ocf_log err "Unexpected result for '$cmd' $rc: $(echo "$fping_output" | tr '\n' ';')" - ;; - esac - -@@ -282,17 +282,17 @@ ping_check() { - *:*) p_exe=ping6 - esac - -- p_out=$($p_exe $p_args $OCF_RESKEY_options $host 2>&1); rc=$? -+ ping_output=$($p_exe $p_args $OCF_RESKEY_options $host 2>&1); rc=$? - - case $rc in - 0) - active=$(expr $active + 1) - if [ $OCF_RESKEY_debug -gt 1 ]; then -- ping_conditional_log info "$p_out" -+ ping_conditional_log info "$ping_output" - fi - ;; -- 1) ping_conditional_log warn "$host is inactive: $p_out";; -- *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $p_out";; -+ 1) ping_conditional_log warn "$host is inactive: $ping_output";; -+ *) ocf_log err "Unexpected result for '$p_exe $p_args $OCF_RESKEY_options $host' $rc: $ping_output";; - esac - done - return $active --- -1.8.3.1 - diff --git a/SOURCES/002-fencing-reasons.patch b/SOURCES/002-fencing-reasons.patch new file mode 100644 index 0000000..f89cbec --- /dev/null +++ b/SOURCES/002-fencing-reasons.patch @@ -0,0 +1,2100 @@ +From 95b4f87aae5fb2cf771cf9a8f8e5420b65fb213f Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 21 Sep 2021 10:47:51 -0500 +Subject: [PATCH 01/12] Refactor: fencing: use pcmk__action_result_t in + stonith_action_t + +stonith_action_t previously had an rc member for a legacy return code, along +with output and error members for action stdout/stderr. When setting rc based +on the svc_action_t result, it used a mapping function svc_action_to_errno(). + +This replaces those with a pcmk__action_result_t member, which means we now +track the exit status and execution status as originally set by libcrmservice, +rather than the mapped rc. The library now calls the mapping function, now +returning standard codes and called result2rc(), when calling the client +callback. + +The exit_reason member is unused as of this commit. + +The behavior should be identical, with the small exception of +services_action_async() failure leaving the exit status as set by the services +library, which means callers will get the result2rc() mapping of the actual +result instead of the former -ECONNABORTED. +--- + lib/fencing/st_client.c | 118 +++++++++++++++++++++++----------------- + 1 file changed, 68 insertions(+), 50 deletions(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 08adb51c6..6c607b010 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #include + +@@ -57,9 +58,7 @@ struct stonith_action_s { + int max_retries; + + int pid; +- int rc; +- char *output; +- char *error; ++ pcmk__action_result_t result; + }; + + typedef struct stonith_private_s { +@@ -120,6 +119,7 @@ static void stonith_connection_destroy(gpointer user_data); + static void stonith_send_notification(gpointer data, gpointer user_data); + static int internal_stonith_action_execute(stonith_action_t * action); + static void log_action(stonith_action_t *action, pid_t pid); ++static int result2rc(const pcmk__action_result_t *result); + + /*! + * \brief Get agent namespace by name +@@ -196,6 +196,23 @@ stonith_get_namespace(const char *agent, const char *namespace_s) + return st_namespace_invalid; + } + ++/*! ++ * \internal ++ * \brief Set an action's result based on services library result ++ * ++ * \param[in] action Fence action to set result for ++ * \param[in] svc_action Service action to get result from ++ */ ++static void ++set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action) ++{ ++ pcmk__set_result(&(action->result), svc_action->rc, svc_action->status, ++ NULL); ++ pcmk__set_result_output(&(action->result), ++ services__grab_stdout(svc_action), ++ services__grab_stderr(svc_action)); ++} ++ + gboolean + stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) + { +@@ -259,19 +276,19 @@ stonith__watchdog_fencing_enabled_for_node(const char *node) + static void + log_action(stonith_action_t *action, pid_t pid) + { +- if (action->output) { ++ if (action->result.action_stdout != NULL) { + /* Logging the whole string confuses syslog when the string is xml */ + char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid); + +- crm_log_output(LOG_TRACE, prefix, action->output); ++ crm_log_output(LOG_TRACE, prefix, action->result.action_stdout); + free(prefix); + } + +- if (action->error) { ++ if (action->result.action_stderr != NULL) { + /* Logging the whole string confuses syslog when the string is xml */ + char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); + +- crm_log_output(LOG_WARNING, prefix, action->error); ++ crm_log_output(LOG_WARNING, prefix, action->result.action_stderr); + free(prefix); + } + } +@@ -645,8 +662,7 @@ stonith__destroy_action(stonith_action_t *action) + if (action->svc_action) { + services_action_free(action->svc_action); + } +- free(action->output); +- free(action->error); ++ pcmk__reset_result(&(action->result)); + free(action); + } + } +@@ -678,15 +694,15 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output, + } + if (action != NULL) { + if (rc) { +- *rc = action->rc; ++ *rc = pcmk_rc2legacy(result2rc(&(action->result))); + } +- if (output && action->output) { +- *output = action->output; +- action->output = NULL; // hand off memory management to caller ++ if ((output != NULL) && (action->result.action_stdout != NULL)) { ++ *output = action->result.action_stdout; ++ action->result.action_stdout = NULL; // hand off ownership to caller + } +- if (error_output && action->error) { +- *error_output = action->error; +- action->error = NULL; // hand off memory management to caller ++ if ((error_output != NULL) && (action->result.action_stderr != NULL)) { ++ *error_output = action->result.action_stderr; ++ action->result.action_stderr = NULL; // hand off ownership to caller + } + } + } +@@ -715,6 +731,9 @@ stonith_action_create(const char *agent, + action->timeout = action->remaining_timeout = timeout; + action->max_retries = FAILURE_MAX_RETRIES; + ++ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, ++ NULL); ++ + if (device_args) { + char buffer[512]; + const char *value = NULL; +@@ -739,7 +758,8 @@ update_remaining_timeout(stonith_action_t * action) + crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", + action->agent, action->action, action->max_retries); + action->remaining_timeout = 0; +- } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) { ++ } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT) ++ && (diff < (action->timeout * 0.7))) { + /* only set remaining timeout period if there is 30% + * or greater of the original timeout period left */ + action->remaining_timeout = action->timeout - diff; +@@ -750,31 +770,31 @@ update_remaining_timeout(stonith_action_t * action) + } + + static int +-svc_action_to_errno(svc_action_t *svc_action) { +- int rv = pcmk_ok; ++result2rc(const pcmk__action_result_t *result) { ++ int rc = pcmk_rc_ok; + +- if (svc_action->status == PCMK_EXEC_TIMEOUT) { +- rv = -ETIME; ++ if (result->execution_status == PCMK_EXEC_TIMEOUT) { ++ rc = ETIME; + +- } else if (svc_action->rc != PCMK_OCF_OK) { ++ } else if (result->exit_status != CRM_EX_OK) { + /* Try to provide a useful error code based on the fence agent's + * error output. + */ +- if (svc_action->stderr_data == NULL) { +- rv = -ENODATA; ++ if (result->action_stderr == NULL) { ++ rc = ENODATA; + +- } else if (strstr(svc_action->stderr_data, "imed out")) { ++ } else if (strstr(result->action_stderr, "imed out")) { + /* Some agents have their own internal timeouts */ +- rv = -ETIME; ++ rc = ETIME; + +- } else if (strstr(svc_action->stderr_data, "Unrecognised action")) { +- rv = -EOPNOTSUPP; ++ } else if (strstr(result->action_stderr, "Unrecognised action")) { ++ rc = EOPNOTSUPP; + + } else { +- rv = -pcmk_err_generic; ++ rc = pcmk_rc_error; + } + } +- return rv; ++ return rc; + } + + static void +@@ -782,11 +802,7 @@ stonith_action_async_done(svc_action_t *svc_action) + { + stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; + +- action->rc = svc_action_to_errno(svc_action); +- action->output = svc_action->stdout_data; +- svc_action->stdout_data = NULL; +- action->error = svc_action->stderr_data; +- svc_action->stderr_data = NULL; ++ set_result_from_svc_action(action, svc_action); + + svc_action->params = NULL; + +@@ -795,7 +811,9 @@ stonith_action_async_done(svc_action_t *svc_action) + + log_action(action, action->pid); + +- if (action->rc != pcmk_ok && update_remaining_timeout(action)) { ++ if ((action->result.exit_status != CRM_EX_OK) ++ && update_remaining_timeout(action)) { ++ + int rc = internal_stonith_action_execute(action); + if (rc == pcmk_ok) { + return; +@@ -803,7 +821,8 @@ stonith_action_async_done(svc_action_t *svc_action) + } + + if (action->done_cb) { +- action->done_cb(action->pid, action->rc, action->output, action->userdata); ++ action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))), ++ action->result.action_stdout, action->userdata); + } + + action->svc_action = NULL; // don't remove our caller +@@ -835,9 +854,13 @@ internal_stonith_action_execute(stonith_action_t * action) + static int stonith_sequence = 0; + char *buffer = NULL; + +- if ((action == NULL) || (action->action == NULL) || (action->args == NULL) ++ CRM_CHECK(action != NULL, return -EINVAL); ++ ++ if ((action->action == NULL) || (action->args == NULL) + || (action->agent == NULL)) { +- return -EPROTO; ++ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR, ++ PCMK_EXEC_ERROR_FATAL, NULL); ++ return -EINVAL; + } + + if (!action->tries) { +@@ -857,6 +880,7 @@ internal_stonith_action_execute(stonith_action_t * action) + free(buffer); + + if (svc_action->rc != PCMK_OCF_UNKNOWN) { ++ set_result_from_svc_action(action, svc_action); + services_action_free(svc_action); + return -E2BIG; + } +@@ -877,10 +901,7 @@ internal_stonith_action_execute(stonith_action_t * action) + + /* keep retries from executing out of control and free previous results */ + if (is_retry) { +- free(action->output); +- action->output = NULL; +- free(action->error); +- action->error = NULL; ++ pcmk__reset_result(&(action->result)); + sleep(1); + } + +@@ -889,22 +910,19 @@ internal_stonith_action_execute(stonith_action_t * action) + if (services_action_async_fork_notify(svc_action, + &stonith_action_async_done, + &stonith_action_async_forked)) { ++ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, ++ PCMK_EXEC_PENDING, NULL); + return pcmk_ok; + } + + } else if (services_action_sync(svc_action)) { // sync success + rc = pcmk_ok; +- action->rc = svc_action_to_errno(svc_action); +- action->output = svc_action->stdout_data; +- svc_action->stdout_data = NULL; +- action->error = svc_action->stderr_data; +- svc_action->stderr_data = NULL; + + } else { // sync failure +- action->rc = -ECONNABORTED; +- rc = action->rc; ++ rc = -ECONNABORTED; + } + ++ set_result_from_svc_action(action, svc_action); + svc_action->params = NULL; + services_action_free(svc_action); + return rc; +-- +2.27.0 + + +From 4c8e0b0ecc53cb3883f0da0eede20b900fff48d1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 21 Sep 2021 11:14:31 -0500 +Subject: [PATCH 02/12] Low: fencing: improve return code given back to library + callers + +Expose result2rc() internally for future reuse, and expand it to handle more +cases. In theory, this can give us better log messages and status output for +failures. +--- + include/crm/fencing/internal.h | 1 + + lib/fencing/st_client.c | 63 +++++++++++++++++++++------------- + 2 files changed, 41 insertions(+), 23 deletions(-) + +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index fa9059e6f..0d23967bb 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -60,6 +60,7 @@ stonith_action_t *stonith_action_create(const char *agent, + void stonith__destroy_action(stonith_action_t *action); + void stonith__action_result(stonith_action_t *action, int *rc, char **output, + char **error_output); ++int stonith__result2rc(const pcmk__action_result_t *result); + + int + stonith_action_execute_async(stonith_action_t * action, +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 6c607b010..809be1640 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -119,7 +119,6 @@ static void stonith_connection_destroy(gpointer user_data); + static void stonith_send_notification(gpointer data, gpointer user_data); + static int internal_stonith_action_execute(stonith_action_t * action); + static void log_action(stonith_action_t *action, pid_t pid); +-static int result2rc(const pcmk__action_result_t *result); + + /*! + * \brief Get agent namespace by name +@@ -694,7 +693,7 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output, + } + if (action != NULL) { + if (rc) { +- *rc = pcmk_rc2legacy(result2rc(&(action->result))); ++ *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result))); + } + if ((output != NULL) && (action->result.action_stdout != NULL)) { + *output = action->result.action_stdout; +@@ -769,32 +768,49 @@ update_remaining_timeout(stonith_action_t * action) + return action->remaining_timeout ? TRUE : FALSE; + } + +-static int +-result2rc(const pcmk__action_result_t *result) { +- int rc = pcmk_rc_ok; ++/*! ++ * \internal ++ * \brief Map a fencing action result to a standard return code ++ * ++ * \param[in] result Fencing action result to map ++ * ++ * \return Standard Pacemaker return code that best corresponds to \p result ++ */ ++int ++stonith__result2rc(const pcmk__action_result_t *result) ++{ ++ switch (result->execution_status) { ++ case PCMK_EXEC_CANCELLED: return ECANCELED; ++ case PCMK_EXEC_TIMEOUT: return ETIME; ++ case PCMK_EXEC_NOT_INSTALLED: return ENOENT; ++ case PCMK_EXEC_NOT_SUPPORTED: return EOPNOTSUPP; ++ case PCMK_EXEC_NOT_CONNECTED: return ENOTCONN; ++ case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV; ++ case PCMK_EXEC_NO_SECRETS: return EACCES; ++ default: break; ++ } + +- if (result->execution_status == PCMK_EXEC_TIMEOUT) { +- rc = ETIME; ++ if (result->exit_status == CRM_EX_OK) { ++ return pcmk_rc_ok; ++ } + +- } else if (result->exit_status != CRM_EX_OK) { +- /* Try to provide a useful error code based on the fence agent's +- * error output. +- */ +- if (result->action_stderr == NULL) { +- rc = ENODATA; ++ // Try to provide useful error code based on result's error output + +- } else if (strstr(result->action_stderr, "imed out")) { +- /* Some agents have their own internal timeouts */ +- rc = ETIME; ++ if (result->action_stderr == NULL) { ++ return ENODATA; + +- } else if (strstr(result->action_stderr, "Unrecognised action")) { +- rc = EOPNOTSUPP; ++ } else if (strcasestr(result->action_stderr, "timed out") ++ || strcasestr(result->action_stderr, "timeout")) { ++ return ETIME; + +- } else { +- rc = pcmk_rc_error; +- } ++ } else if (strcasestr(result->action_stderr, "unrecognised action") ++ || strcasestr(result->action_stderr, "unrecognized action") ++ || strcasestr(result->action_stderr, "unsupported action")) { ++ return EOPNOTSUPP; + } +- return rc; ++ ++ // Oh well, we tried ++ return pcmk_rc_error; + } + + static void +@@ -821,7 +837,8 @@ stonith_action_async_done(svc_action_t *svc_action) + } + + if (action->done_cb) { +- action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))), ++ action->done_cb(action->pid, ++ pcmk_rc2legacy(stonith__result2rc(&(action->result))), + action->result.action_stdout, action->userdata); + } + +-- +2.27.0 + + +From 153c9b552a5bad9dd36e8635fa478ed9cad1f240 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 7 Oct 2021 11:35:44 -0500 +Subject: [PATCH 03/12] Refactor: fencing: return full result from + stonith__action_result() + +Previously, stonith__action_result() grabbed an action's legacy rc, stdout, and +stderr separately. Now, directly return a pointer to the action's result +object, and map that to a legacy rc in the callers when needed. +--- + include/crm/fencing/internal.h | 3 +-- + lib/fencing/st_client.c | 36 ++++--------------------- + lib/fencing/st_rhcs.c | 48 ++++++++++++++++++++++++---------- + 3 files changed, 40 insertions(+), 47 deletions(-) + +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 0d23967bb..4e9f50fe8 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -58,8 +58,7 @@ stonith_action_t *stonith_action_create(const char *agent, + GHashTable * port_map, + const char * host_arg); + void stonith__destroy_action(stonith_action_t *action); +-void stonith__action_result(stonith_action_t *action, int *rc, char **output, +- char **error_output); ++pcmk__action_result_t *stonith__action_result(stonith_action_t *action); + int stonith__result2rc(const pcmk__action_result_t *result); + + int +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 809be1640..b9df18465 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -670,40 +670,14 @@ stonith__destroy_action(stonith_action_t *action) + * \internal + * \brief Get the result of an executed stonith action + * +- * \param[in,out] action Executed action +- * \param[out] rc Where to store result code (or NULL) +- * \param[out] output Where to store standard output (or NULL) +- * \param[out] error_output Where to store standard error output (or NULL) ++ * \param[in] action Executed action + * +- * \note If output or error_output is not NULL, the caller is responsible for +- * freeing the memory. ++ * \return Pointer to action's result (or NULL if \p action is NULL) + */ +-void +-stonith__action_result(stonith_action_t *action, int *rc, char **output, +- char **error_output) ++pcmk__action_result_t * ++stonith__action_result(stonith_action_t *action) + { +- if (rc) { +- *rc = pcmk_ok; +- } +- if (output) { +- *output = NULL; +- } +- if (error_output) { +- *error_output = NULL; +- } +- if (action != NULL) { +- if (rc) { +- *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result))); +- } +- if ((output != NULL) && (action->result.action_stdout != NULL)) { +- *output = action->result.action_stdout; +- action->result.action_stdout = NULL; // hand off ownership to caller +- } +- if ((error_output != NULL) && (action->result.action_stderr != NULL)) { +- *error_output = action->result.action_stderr; +- action->result.action_stderr = NULL; // hand off ownership to caller +- } +- } ++ return (action == NULL)? NULL : &(action->result); + } + + #define FAILURE_MAX_RETRIES 2 +diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c +index 89a2625bd..23e694975 100644 +--- a/lib/fencing/st_rhcs.c ++++ b/lib/fencing/st_rhcs.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2020 the Pacemaker project contributors ++ * Copyright 2004-2021 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -123,10 +123,10 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter) + static int + stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) + { +- char *buffer = NULL; + xmlNode *xml = NULL; + xmlNode *actions = NULL; + xmlXPathObject *xpathObj = NULL; ++ pcmk__action_result_t *result = NULL; + stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0, + 5, NULL, NULL, NULL); + int rc = stonith__execute(action); +@@ -138,23 +138,31 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) + return rc; + } + +- stonith__action_result(action, &rc, &buffer, NULL); +- stonith__destroy_action(action); +- if (rc < 0) { +- crm_warn("Metadata action for %s failed: %s " CRM_XS "rc=%d", +- agent, pcmk_strerror(rc), rc); +- free(buffer); +- return rc; ++ result = stonith__action_result(action); ++ ++ if (result->execution_status != PCMK_EXEC_DONE) { ++ crm_warn("Could not execute metadata action for %s: %s", ++ agent, pcmk_exec_status_str(result->execution_status)); ++ stonith__destroy_action(action); ++ return pcmk_rc2legacy(stonith__result2rc(result)); + } + +- if (buffer == NULL) { ++ if (result->exit_status != CRM_EX_OK) { ++ crm_warn("Metadata action for %s returned error code %d", ++ agent, result->exit_status); ++ stonith__destroy_action(action); ++ return pcmk_rc2legacy(stonith__result2rc(result)); ++ } ++ ++ if (result->action_stdout == NULL) { + crm_warn("Metadata action for %s returned no data", agent); ++ stonith__destroy_action(action); + return -ENODATA; + } + +- xml = string2xml(buffer); +- free(buffer); +- buffer = NULL; ++ xml = string2xml(result->action_stdout); ++ stonith__destroy_action(action); ++ + if (xml == NULL) { + crm_warn("Metadata for %s is invalid", agent); + return -pcmk_err_schema_validation; +@@ -289,7 +297,19 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target, + + rc = stonith__execute(action); + if (rc == pcmk_ok) { +- stonith__action_result(action, &rc, output, error_output); ++ pcmk__action_result_t *result = stonith__action_result(action); ++ ++ rc = pcmk_rc2legacy(stonith__result2rc(result)); ++ ++ // Take ownership of output so stonith__destroy_action() doesn't free it ++ if (output != NULL) { ++ *output = result->action_stdout; ++ result->action_stdout = NULL; ++ } ++ if (error_output != NULL) { ++ *error_output = result->action_stderr; ++ result->action_stderr = NULL; ++ } + } + stonith__destroy_action(action); + return rc; +-- +2.27.0 + + +From 7f7067014357cccb229a0bef091e234eb3765f7a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 21 Sep 2021 13:05:54 -0500 +Subject: [PATCH 04/12] Refactor: fencing: pass full result to async action + callback + +When executing an asynchronous fence agent command, the fencing library gets +the full result (exit status, execution status, and exit reason) from the +services library, then maps that to a legacy return code. + +Now, pass the full result object to the fencing async callback, rather than +separate arguments for legacy code and stdout. The mapping to a legacy code now +happens in the fencer rather than the fencing library. + +The goal of this and following commits is to push the full result object +further down the code path, so that ultimately the full result is always +available internally, and the legacy code mapping is only done for backward +compatibility when sending the result back to a client. + +This commit focuses on the async callback (done_cb() in both the fencer's +async_command_t and the fencing library's stonith_action_t). Later commits will +follow the chain: + + st_child_done() and stonith_fence_get_devices_cb() + -> stonith_send_async_reply() + -> stonith_construct_async_reply() and log_async_result() +--- + daemons/fenced/fenced_commands.c | 78 +++++++++++++++++++++----------- + include/crm/fencing/internal.h | 3 +- + lib/fencing/st_client.c | 10 ++-- + 3 files changed, 58 insertions(+), 33 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index b5ae28d90..d5d04ae69 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -62,7 +62,8 @@ struct device_search_s { + }; + + static gboolean stonith_device_dispatch(gpointer user_data); +-static void st_child_done(int pid, int rc, const char *output, void *user_data); ++static void st_child_done(int pid, const pcmk__action_result_t *result, ++ void *user_data); + static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, + const char *client_id); + +@@ -99,7 +100,8 @@ typedef struct async_command_s { + GList *device_next; + + void *internal_user_data; +- void (*done_cb) (int pid, int rc, const char *output, void *user_data); ++ void (*done_cb) (int pid, const pcmk__action_result_t *result, ++ void *user_data); + guint timer_sigterm; + guint timer_sigkill; + /*! If the operation timed out, this is the last signal +@@ -377,13 +379,25 @@ get_agent_metadata_cb(gpointer data) { + * \internal + * \brief Call a command's action callback for an internal (not library) result + * +- * \param[in] cmd Command to report result for +- * \param[in] rc Legacy return code to pass to callback ++ * \param[in] cmd Command to report result for ++ * \param[in] execution_status Execution status to use for result ++ * \param[in] exit_status Exit status to use for result ++ * \param[in] exit_reason Exit reason to use for result + */ + static void +-report_internal_result(async_command_t *cmd, int rc) ++report_internal_result(async_command_t *cmd, int exit_status, ++ int execution_status, const char *exit_reason) + { +- cmd->done_cb(0, rc, NULL, cmd); ++ pcmk__action_result_t result = { ++ // Ensure we don't pass garbage to free() ++ .exit_reason = NULL, ++ .action_stdout = NULL, ++ .action_stderr = NULL ++ }; ++ ++ pcmk__set_result(&result, exit_status, execution_status, exit_reason); ++ cmd->done_cb(0, &result, cmd); ++ pcmk__reset_result(&result); + } + + static gboolean +@@ -446,7 +460,7 @@ stonith_device_execute(stonith_device_t * device) + } + } else { + crm_info("Faking success for %s watchdog operation", cmd->action); +- report_internal_result(cmd, pcmk_ok); ++ report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL); + goto done; + } + } +@@ -462,7 +476,8 @@ stonith_device_execute(stonith_device_t * device) + crm_err("Considering %s unconfigured " + "because unable to load CIB secrets: %s", + device->id, pcmk_rc_str(exec_rc)); +- report_internal_result(cmd, -EACCES); ++ report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, ++ NULL); + goto done; + } + } +@@ -501,7 +516,7 @@ stonith_device_execute(stonith_device_t * device) + cmd->done_cb, fork_cb); + if (exec_rc < 0) { + cmd->activating_on = NULL; +- report_internal_result(cmd, exec_rc); ++ cmd->done_cb(0, stonith__action_result(action), cmd); + stonith__destroy_action(action); + } + +@@ -625,7 +640,8 @@ free_device(gpointer data) + async_command_t *cmd = gIter->data; + + crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); +- report_internal_result(cmd, -ENODEV); ++ report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, ++ NULL); + } + g_list_free(device->pending_ops); + +@@ -1079,7 +1095,8 @@ schedule_internal_command(const char *origin, + const char *victim, + int timeout, + void *internal_user_data, +- void (*done_cb) (int pid, int rc, const char *output, ++ void (*done_cb) (int pid, ++ const pcmk__action_result_t *result, + void *user_data)) + { + async_command_t *cmd = NULL; +@@ -1111,7 +1128,7 @@ enum fence_status_code { + }; + + static void +-status_search_cb(int pid, int rc, const char *output, void *user_data) ++status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) + { + async_command_t *cmd = user_data; + struct device_search_s *search = cmd->internal_user_data; +@@ -1127,7 +1144,7 @@ status_search_cb(int pid, int rc, const char *output, void *user_data) + + mainloop_set_trigger(dev->work); + +- switch (rc) { ++ switch (result->exit_status) { + case fence_status_unknown: + crm_trace("%s reported it cannot fence %s", dev->id, search->host); + break; +@@ -1141,14 +1158,15 @@ status_search_cb(int pid, int rc, const char *output, void *user_data) + default: + crm_warn("Assuming %s cannot fence %s " + "(status returned unknown code %d)", +- dev->id, search->host, rc); ++ dev->id, search->host, result->exit_status); + break; + } + search_devices_record_result(search, dev->id, can); + } + + static void +-dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data) ++dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, ++ void *user_data) + { + async_command_t *cmd = user_data; + struct device_search_s *search = cmd->internal_user_data; +@@ -1169,21 +1187,21 @@ dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data) + + mainloop_set_trigger(dev->work); + +- if (rc == CRM_EX_OK) { ++ if (result->exit_status == CRM_EX_OK) { + crm_info("Refreshing target list for %s", dev->id); + g_list_free_full(dev->targets, free); +- dev->targets = stonith__parse_targets(output); ++ dev->targets = stonith__parse_targets(result->action_stdout); + dev->targets_age = time(NULL); + + } else if (dev->targets != NULL) { + crm_info("Reusing most recent target list for %s " + "because list returned error code %d", +- dev->id, rc); ++ dev->id, result->exit_status); + + } else { // We have never successfully executed list + crm_warn("Assuming %s cannot fence %s " + "because list returned error code %d", +- dev->id, search->host, rc); ++ dev->id, search->host, result->exit_status); + + /* Fall back to pcmk_host_check="status" if the user didn't explicitly + * specify "dynamic-list". +@@ -2407,7 +2425,7 @@ cancel_stonith_command(async_command_t * cmd) + } + + static void +-st_child_done(int pid, int rc, const char *output, void *user_data) ++st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) + { + stonith_device_t *device = NULL; + stonith_device_t *next_device = NULL; +@@ -2423,7 +2441,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data) + /* The device is ready to do something else now */ + device = g_hash_table_lookup(device_list, cmd->device); + if (device) { +- if (!device->verified && (rc == pcmk_ok) && ++ if (!device->verified && (result->exit_status == CRM_EX_OK) && + (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { + + device->verified = TRUE; +@@ -2432,7 +2450,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data) + mainloop_set_trigger(device->work); + } + +- if (rc == 0) { ++ if (result->exit_status == CRM_EX_OK) { + GList *iter; + /* see if there are any required devices left to execute for this op */ + for (iter = cmd->device_next; iter != NULL; iter = iter->next) { +@@ -2445,7 +2463,8 @@ st_child_done(int pid, int rc, const char *output, void *user_data) + next_device = NULL; + } + +- } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) { ++ } else if ((cmd->device_next != NULL) ++ && !is_action_required(cmd->action, device)) { + /* if this device didn't work out, see if there are any others we can try. + * if the failed device was 'required', we can't pick another device. */ + next_device = g_hash_table_lookup(device_list, cmd->device_next->data); +@@ -2454,16 +2473,19 @@ st_child_done(int pid, int rc, const char *output, void *user_data) + + /* this operation requires more fencing, hooray! */ + if (next_device) { +- log_async_result(cmd, rc, pid, next_device->id, output, FALSE); ++ log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid, ++ next_device->id, result->action_stdout, FALSE); + schedule_stonith_command(cmd, next_device); + /* Prevent cmd from being freed */ + cmd = NULL; + goto done; + } + +- stonith_send_async_reply(cmd, output, rc, pid, false); ++ stonith_send_async_reply(cmd, result->action_stdout, ++ pcmk_rc2legacy(stonith__result2rc(result)), pid, ++ false); + +- if (rc != 0) { ++ if (result->exit_status != CRM_EX_OK) { + goto done; + } + +@@ -2509,7 +2531,9 @@ st_child_done(int pid, int rc, const char *output, void *user_data) + + cmd_list = g_list_remove_link(cmd_list, gIter); + +- stonith_send_async_reply(cmd_other, output, rc, pid, true); ++ stonith_send_async_reply(cmd_other, result->action_stdout, ++ pcmk_rc2legacy(stonith__result2rc(result)), ++ pid, true); + cancel_stonith_command(cmd_other); + + free_async_command(cmd_other); +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 4e9f50fe8..6a7e4232c 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -64,7 +64,8 @@ int stonith__result2rc(const pcmk__action_result_t *result); + int + stonith_action_execute_async(stonith_action_t * action, + void *userdata, +- void (*done) (int pid, int rc, const char *output, ++ void (*done) (int pid, ++ const pcmk__action_result_t *result, + void *user_data), + void (*fork_cb) (int pid, void *user_data)); + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index b9df18465..59dcab9a3 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -46,7 +46,8 @@ struct stonith_action_s { + int timeout; + int async; + void *userdata; +- void (*done_cb) (int pid, int status, const char *output, void *user_data); ++ void (*done_cb) (int pid, const pcmk__action_result_t *result, ++ void *user_data); + void (*fork_cb) (int pid, void *user_data); + + svc_action_t *svc_action; +@@ -811,9 +812,7 @@ stonith_action_async_done(svc_action_t *svc_action) + } + + if (action->done_cb) { +- action->done_cb(action->pid, +- pcmk_rc2legacy(stonith__result2rc(&(action->result))), +- action->result.action_stdout, action->userdata); ++ action->done_cb(action->pid, &(action->result), action->userdata); + } + + action->svc_action = NULL; // don't remove our caller +@@ -933,7 +932,8 @@ internal_stonith_action_execute(stonith_action_t * action) + int + stonith_action_execute_async(stonith_action_t * action, + void *userdata, +- void (*done) (int pid, int rc, const char *output, ++ void (*done) (int pid, ++ const pcmk__action_result_t *result, + void *user_data), + void (*fork_cb) (int pid, void *user_data)) + { +-- +2.27.0 + + +From bbd022306df7a873c0ecb2be2d33c56fbf327b8c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 21 Sep 2021 11:51:28 -0500 +Subject: [PATCH 05/12] Feature: fencing: set exit reason for internal + execution errors + +... most importantly, copying any exit reason set by the services library. +This ensures that the stonith_action_t exit reason is set when appropriate. +However, nothing uses it as of this commit. +--- + daemons/fenced/fenced_commands.c | 4 ++-- + lib/fencing/st_client.c | 6 +++--- + 2 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index d5d04ae69..f55a32649 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -477,7 +477,7 @@ stonith_device_execute(stonith_device_t * device) + "because unable to load CIB secrets: %s", + device->id, pcmk_rc_str(exec_rc)); + report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS, +- NULL); ++ "Failed to get CIB secrets"); + goto done; + } + } +@@ -641,7 +641,7 @@ free_device(gpointer data) + + crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action); + report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, +- NULL); ++ "Device was removed before action could be executed"); + } + g_list_free(device->pending_ops); + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 59dcab9a3..3d4127eff 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -207,7 +207,7 @@ static void + set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action) + { + pcmk__set_result(&(action->result), svc_action->rc, svc_action->status, +- NULL); ++ services__exit_reason(svc_action)); + pcmk__set_result_output(&(action->result), + services__grab_stdout(svc_action), + services__grab_stderr(svc_action)); +@@ -706,7 +706,7 @@ stonith_action_create(const char *agent, + action->max_retries = FAILURE_MAX_RETRIES; + + pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, +- NULL); ++ "Initialization bug in fencing library"); + + if (device_args) { + char buffer[512]; +@@ -849,7 +849,7 @@ internal_stonith_action_execute(stonith_action_t * action) + if ((action->action == NULL) || (action->args == NULL) + || (action->agent == NULL)) { + pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR, +- PCMK_EXEC_ERROR_FATAL, NULL); ++ PCMK_EXEC_ERROR_FATAL, "Bug in fencing library"); + return -EINVAL; + } + +-- +2.27.0 + + +From ed08f600688af1d25412d2427502ba5d4a55c0d6 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 7 Oct 2021 12:06:10 -0500 +Subject: [PATCH 06/12] Fix: fencer: handle dynamic target query failures + better + +Previously, the callbacks for list and status queries checked only the result's +exit status. However, the services library will use PCMK_OCF_UNKNOWN_ERROR (1) +as the exit status for internal failures, and that value signifies a recognized +node (not an error) for fence list actions. + +Now, the callbacks check the execution status as well. +--- + daemons/fenced/fenced_commands.c | 46 +++++++++++++++++++++++++++----- + 1 file changed, 39 insertions(+), 7 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index f55a32649..7b3fb25a1 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -1144,6 +1144,18 @@ status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data) + + mainloop_set_trigger(dev->work); + ++ if (result->execution_status != PCMK_EXEC_DONE) { ++ crm_warn("Assuming %s cannot fence %s " ++ "because status could not be executed: %s%s%s%s", ++ dev->id, search->host, ++ pcmk_exec_status_str(result->execution_status), ++ ((result->exit_reason == NULL)? "" : " ("), ++ ((result->exit_reason == NULL)? "" : result->exit_reason), ++ ((result->exit_reason == NULL)? "" : ")")); ++ search_devices_record_result(search, dev->id, FALSE); ++ return; ++ } ++ + switch (result->exit_status) { + case fence_status_unknown: + crm_trace("%s reported it cannot fence %s", dev->id, search->host); +@@ -1187,21 +1199,41 @@ dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, + + mainloop_set_trigger(dev->work); + +- if (result->exit_status == CRM_EX_OK) { ++ if ((result->execution_status == PCMK_EXEC_DONE) ++ && (result->exit_status == CRM_EX_OK)) { + crm_info("Refreshing target list for %s", dev->id); + g_list_free_full(dev->targets, free); + dev->targets = stonith__parse_targets(result->action_stdout); + dev->targets_age = time(NULL); + + } else if (dev->targets != NULL) { +- crm_info("Reusing most recent target list for %s " +- "because list returned error code %d", +- dev->id, result->exit_status); ++ if (result->execution_status == PCMK_EXEC_DONE) { ++ crm_info("Reusing most recent target list for %s " ++ "because list returned error code %d", ++ dev->id, result->exit_status); ++ } else { ++ crm_info("Reusing most recent target list for %s " ++ "because list could not be executed: %s%s%s%s", ++ dev->id, pcmk_exec_status_str(result->execution_status), ++ ((result->exit_reason == NULL)? "" : " ("), ++ ((result->exit_reason == NULL)? "" : result->exit_reason), ++ ((result->exit_reason == NULL)? "" : ")")); ++ } + + } else { // We have never successfully executed list +- crm_warn("Assuming %s cannot fence %s " +- "because list returned error code %d", +- dev->id, search->host, result->exit_status); ++ if (result->execution_status == PCMK_EXEC_DONE) { ++ crm_warn("Assuming %s cannot fence %s " ++ "because list returned error code %d", ++ dev->id, search->host, result->exit_status); ++ } else { ++ crm_warn("Assuming %s cannot fence %s " ++ "because list could not be executed: %s%s%s%s", ++ dev->id, search->host, ++ pcmk_exec_status_str(result->execution_status), ++ ((result->exit_reason == NULL)? "" : " ("), ++ ((result->exit_reason == NULL)? "" : result->exit_reason), ++ ((result->exit_reason == NULL)? "" : ")")); ++ } + + /* Fall back to pcmk_host_check="status" if the user didn't explicitly + * specify "dynamic-list". +-- +2.27.0 + + +From 5a30238a3b8691a5fc20f53906c0efcc50193306 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 21 Sep 2021 15:57:50 -0500 +Subject: [PATCH 07/12] Refactor: fencer: pass result object when sending an + async reply + +... via stonith_send_async_reply(), instead of sending the mapped legacy code +and action stdout separately. Also, drop the "stonith_" prefix since the +function is static. + +This moves the mapping from the stonith_send_async_reply() callers to the +function itself, so we use the result object and standard codes as long as +possible, and map to a legacy code only where needed. +--- + daemons/fenced/fenced_commands.c | 62 +++++++++++++++++++------------- + daemons/fenced/fenced_remote.c | 2 +- + 2 files changed, 39 insertions(+), 25 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 7b3fb25a1..e5f8162ce 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2376,12 +2376,28 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next, + } + } + ++/*! ++ * \internal ++ * \brief Reply to requester after asynchronous command completion ++ * ++ * \param[in] cmd Command that completed ++ * \param[in] result Result of command ++ * \param[in] pid Process ID of command, if available ++ * \param[in] merged If true, command was merged with another, not executed ++ */ + static void +-stonith_send_async_reply(async_command_t *cmd, const char *output, int rc, +- int pid, bool merged) ++send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, ++ int pid, bool merged) + { + xmlNode *reply = NULL; + gboolean bcast = FALSE; ++ const char *output = NULL; ++ int rc = pcmk_ok; ++ ++ CRM_CHECK((cmd != NULL) && (result != NULL), return); ++ ++ output = result->action_stdout; ++ rc = pcmk_rc2legacy(stonith__result2rc(result)); + + reply = stonith_construct_async_reply(cmd, output, NULL, rc); + +@@ -2513,9 +2529,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) + goto done; + } + +- stonith_send_async_reply(cmd, result->action_stdout, +- pcmk_rc2legacy(stonith__result2rc(result)), pid, +- false); ++ send_async_reply(cmd, result, pid, false); + + if (result->exit_status != CRM_EX_OK) { + goto done; +@@ -2563,9 +2577,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) + + cmd_list = g_list_remove_link(cmd_list, gIter); + +- stonith_send_async_reply(cmd_other, result->action_stdout, +- pcmk_rc2legacy(stonith__result2rc(result)), +- pid, true); ++ send_async_reply(cmd_other, result, pid, true); + cancel_stonith_command(cmd_other); + + free_async_command(cmd_other); +@@ -2604,26 +2616,28 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data) + /* Order based on priority */ + devices = g_list_sort(devices, sort_device_priority); + device = g_hash_table_lookup(device_list, devices->data); +- +- if (device) { +- cmd->device_list = devices; +- cmd->device_next = devices->next; +- devices = NULL; /* list owned by cmd now */ +- } + } + +- /* we have a device, schedule it for fencing. */ +- if (device) { +- schedule_stonith_command(cmd, device); +- /* in progress */ +- return; +- } ++ if (device == NULL) { // No device found ++ pcmk__action_result_t result = { ++ // Ensure we don't pass garbage to free() ++ .exit_reason = NULL, ++ .action_stdout = NULL, ++ .action_stderr = NULL ++ }; + +- /* no device found! */ +- stonith_send_async_reply(cmd, NULL, -ENODEV, 0, false); ++ pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, ++ "No fence device configured for target"); ++ send_async_reply(cmd, &result, 0, false); ++ pcmk__reset_result(&result); ++ free_async_command(cmd); ++ g_list_free_full(devices, free); + +- free_async_command(cmd); +- g_list_free_full(devices, free); ++ } else { // Device found, schedule it for fencing ++ cmd->device_list = devices; ++ cmd->device_next = devices->next; ++ schedule_stonith_command(cmd, device); ++ } + } + + static int +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index ffaf60018..b09d2865e 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -996,7 +996,7 @@ stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) + + remote_op_done(op, msg, pcmk_ok, FALSE); + +- /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */ ++ // Replies are sent via done_cb -> send_async_reply() -> do_local_reply() + return -EINPROGRESS; + } + +-- +2.27.0 + + +From c67b6bfbe0baa1253058417ddfb9bc4cf0844e27 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 7 Oct 2021 17:25:38 -0500 +Subject: [PATCH 08/12] Refactor: fencer: pass result object when building + async reply + +... via stonith_construct_async_reply(), instead of passing a mapped legacy rc +and action output separately, which will be helpful when we add the exit reason +to the reply. Also, drop the "stonith_" prefix since the function is static, and +drop an unused argument. +--- + daemons/fenced/fenced_commands.c | 33 +++++++++++++++----------------- + 1 file changed, 15 insertions(+), 18 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index e5f8162ce..6bc12e6c4 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -112,8 +112,8 @@ typedef struct async_command_s { + stonith_device_t *activating_on; + } async_command_t; + +-static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output, +- xmlNode * data, int rc); ++static xmlNode *construct_async_reply(async_command_t *cmd, ++ const pcmk__action_result_t *result); + + static gboolean + is_action_required(const char *action, stonith_device_t *device) +@@ -2399,7 +2399,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + output = result->action_stdout; + rc = pcmk_rc2legacy(stonith__result2rc(result)); + +- reply = stonith_construct_async_reply(cmd, output, NULL, rc); ++ reply = construct_async_reply(cmd, result); + + // Only replies for certain actions are broadcast + if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status", +@@ -2732,17 +2732,20 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i + return reply; + } + ++/*! ++ * \internal ++ * \brief Build an XML reply to an asynchronous fencing command ++ * ++ * \param[in] cmd Fencing command that reply is for ++ * \param[in] result Command result ++ */ + static xmlNode * +-stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc) ++construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result) + { +- xmlNode *reply = NULL; +- +- crm_trace("Creating a basic reply"); +- reply = create_xml_node(NULL, T_STONITH_REPLY); ++ xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY); + + crm_xml_add(reply, "st_origin", __func__); + crm_xml_add(reply, F_TYPE, T_STONITH_NG); +- + crm_xml_add(reply, F_STONITH_OPERATION, cmd->op); + crm_xml_add(reply, F_STONITH_DEVICE, cmd->device); + crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); +@@ -2753,15 +2756,9 @@ stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode + crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin); + crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id); + crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); +- +- crm_xml_add_int(reply, F_STONITH_RC, rc); +- +- crm_xml_add(reply, "st_output", output); +- +- if (data != NULL) { +- crm_info("Attaching reply output"); +- add_message_xml(reply, F_STONITH_CALLDATA, data); +- } ++ crm_xml_add_int(reply, F_STONITH_RC, ++ pcmk_rc2legacy(stonith__result2rc(result))); ++ crm_xml_add(reply, "st_output", result->action_stdout); + return reply; + } + +-- +2.27.0 + + +From 2686caeb3b74f687ddd86a4e483250ca8096ba7c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 19 Oct 2021 18:27:31 -0500 +Subject: [PATCH 09/12] Log: fencer: improve messages for asynchronous results + +Now that we have the full result object, pass it to log_async_result(). +Instead of logging a mapped legacy rc, log the execution status or exit status +as appropriate, along with the exit reason. +--- + daemons/fenced/fenced_commands.c | 43 +++++++++++++++++--------------- + 1 file changed, 23 insertions(+), 20 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 6bc12e6c4..9d06c68dc 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2305,15 +2305,14 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int + * \brief Log the result of an asynchronous command + * + * \param[in] cmd Command the result is for +- * \param[in] rc Legacy return code corresponding to result ++ * \param[in] result Result of command + * \param[in] pid Process ID of command, if available + * \param[in] next Alternate device that will be tried if command failed +- * \param[in] output Command output, if any + * \param[in] op_merged Whether this command was merged with an earlier one + */ + static void +-log_async_result(async_command_t *cmd, int rc, int pid, const char *next, +- const char *output, gboolean op_merged) ++log_async_result(async_command_t *cmd, const pcmk__action_result_t *result, ++ int pid, const char *next, bool op_merged) + { + int log_level = LOG_ERR; + int output_log_level = LOG_NEVER; +@@ -2321,17 +2320,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next, + + GString *msg = g_string_sized_new(80); // Reasonable starting size + +- // Choose log levels appropriately +- if (rc == 0) { // Success ++ // Choose log levels appropriately if we have a result ++ if ((result->execution_status == PCMK_EXEC_DONE) ++ && (result->exit_status == CRM_EX_OK)) { // Success + log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE; +- if ((output != NULL) ++ if ((result->action_stdout != NULL) + && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { + output_log_level = LOG_DEBUG; + } + next = NULL; + } else { // Failure + log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR; +- if ((output != NULL) ++ if ((result->action_stdout != NULL) + && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { + output_log_level = LOG_WARNING; + } +@@ -2347,10 +2347,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next, + } + g_string_append_printf(msg, "using %s ", cmd->device); + +- // Add result +- g_string_append_printf(msg, "returned %d (%s)", rc, pcmk_strerror(rc)); ++ // Add exit status or execution status as appropriate ++ if (result->execution_status == PCMK_EXEC_DONE) { ++ g_string_append_printf(msg, "returned %d", result->exit_status); ++ } else { ++ g_string_append_printf(msg, "could not be executed: %s", ++ pcmk_exec_status_str(result->execution_status)); ++ } + +- // Add next device if appropriate ++ // Add exit reason and next device if appropriate ++ if (result->exit_reason != NULL) { ++ g_string_append_printf(msg, " (%s)", result->exit_reason); ++ } + if (next != NULL) { + g_string_append_printf(msg, ", retrying with %s", next); + } +@@ -2371,7 +2379,7 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next, + if (output_log_level != LOG_NEVER) { + char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid); + +- crm_log_output(output_log_level, prefix, output); ++ crm_log_output(output_log_level, prefix, result->action_stdout); + free(prefix); + } + } +@@ -2391,14 +2399,9 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + { + xmlNode *reply = NULL; + gboolean bcast = FALSE; +- const char *output = NULL; +- int rc = pcmk_ok; + + CRM_CHECK((cmd != NULL) && (result != NULL), return); + +- output = result->action_stdout; +- rc = pcmk_rc2legacy(stonith__result2rc(result)); +- + reply = construct_async_reply(cmd, result); + + // Only replies for certain actions are broadcast +@@ -2412,7 +2415,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + bcast = TRUE; + } + +- log_async_result(cmd, rc, pid, NULL, output, merged); ++ log_async_result(cmd, result, pid, NULL, merged); + crm_log_xml_trace(reply, "Reply"); + + if (merged) { +@@ -2436,6 +2439,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + if (stand_alone) { + /* Do notification with a clean data object */ + xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); ++ int rc = pcmk_rc2legacy(stonith__result2rc(result)); + + crm_xml_add_int(notify_data, F_STONITH_RC, rc); + crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim); +@@ -2521,8 +2525,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) + + /* this operation requires more fencing, hooray! */ + if (next_device) { +- log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid, +- next_device->id, result->action_stdout, FALSE); ++ log_async_result(cmd, result, pid, next_device->id, false); + schedule_stonith_command(cmd, next_device); + /* Prevent cmd from being freed */ + cmd = NULL; +-- +2.27.0 + + +From 9f9dea518da50f629589d505ea0f330a47111d76 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 28 Oct 2021 13:29:31 -0500 +Subject: [PATCH 10/12] Test: cts-fencing: update expected log messages + +... which now log the original exit status rather than a mapped legacy rc +--- + cts/cts-fencing.in | 28 ++++++++++++++-------------- + 1 file changed, 14 insertions(+), 14 deletions(-) + +diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in +index babfb6351..5cd9f7b8f 100644 +--- a/cts/cts-fencing.in ++++ b/cts/cts-fencing.in +@@ -886,7 +886,7 @@ class Tests(object): + test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20") + + test.add_stonith_log_pattern("Total timeout set to 40") +- test.add_stonith_log_pattern("targeting node3 using false returned -201") ++ test.add_stonith_log_pattern("targeting node3 using false returned 1") + test.add_stonith_log_pattern("targeting node3 using true returned 0") + + # test what happens when the first fencing level fails. +@@ -920,8 +920,8 @@ class Tests(object): + test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 3") + + test.add_stonith_log_pattern("Total timeout set to 18") +- test.add_stonith_log_pattern("targeting node3 using false1 returned -201") +- test.add_stonith_log_pattern("targeting node3 using false2 returned -201") ++ test.add_stonith_log_pattern("targeting node3 using false1 returned 1") ++ test.add_stonith_log_pattern("targeting node3 using false2 returned 1") + test.add_stonith_log_pattern("targeting node3 using true3 returned 0") + test.add_stonith_log_pattern("targeting node3 using true4 returned 0") + +@@ -987,7 +987,7 @@ class Tests(object): + test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20") + + test.add_stonith_log_pattern("Total timeout set to 8") +- test.add_stonith_log_pattern("targeting node3 using false1 returned -201") ++ test.add_stonith_log_pattern("targeting node3 using false1 returned 1") + test.add_stonith_neg_log_pattern("targeting node3 using false2 returned ") + test.add_stonith_log_pattern("targeting node3 using true3 returned 0") + test.add_stonith_log_pattern("targeting node3 using true4 returned 0") +@@ -1147,7 +1147,7 @@ class Tests(object): + "--output-as=xml -R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V") + test.add_stonith_log_pattern("does not support reboot") +- test.add_stonith_log_pattern("using true1 returned 0 (OK)") ++ test.add_stonith_log_pattern("using true1 returned 0") + + # make sure reboot is used when reboot action is advertised + for test_type in test_types: +@@ -1158,7 +1158,7 @@ class Tests(object): + "--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"") + test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V") + test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'") +- test.add_stonith_log_pattern("using true1 returned 0 (OK)") ++ test.add_stonith_log_pattern("using true1 returned 0") + + # make sure requested fencing delay is applied only for the first device in the first level + # make sure static delay from pcmk_delay_base is added +@@ -1240,8 +1240,8 @@ class Tests(object): + '--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname)) + test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) + # both devices should be executed +- test.add_stonith_log_pattern("using true1 returned 0 (OK)") +- test.add_stonith_log_pattern("using true2 returned 0 (OK)") ++ test.add_stonith_log_pattern("using true1 returned 0") ++ test.add_stonith_log_pattern("using true2 returned 0") + + ### verify unfencing using automatic unfencing fails if any of the required agents fail + test = self.new_test("cpg_unfence_required_2", +@@ -1264,8 +1264,8 @@ class Tests(object): + test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname)) + test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname)) + test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) +- test.add_stonith_log_pattern("using true1 returned 0 (OK)") +- test.add_stonith_log_pattern("using true2 returned 0 (OK)") ++ test.add_stonith_log_pattern("using true1 returned 0") ++ test.add_stonith_log_pattern("using true2 returned 0") + + ### verify unfencing using automatic devices with topology + test = self.new_test("cpg_unfence_required_4", +@@ -1296,10 +1296,10 @@ class Tests(object): + test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 3 -v false4" % (our_uname)) + test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 4 -v true4" % (our_uname)) + test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname)) +- test.add_stonith_log_pattern("using true1 returned 0 (OK)") +- test.add_stonith_log_pattern("using true2 returned 0 (OK)") +- test.add_stonith_log_pattern("using true3 returned 0 (OK)") +- test.add_stonith_log_pattern("using true4 returned 0 (OK)") ++ test.add_stonith_log_pattern("using true1 returned 0") ++ test.add_stonith_log_pattern("using true2 returned 0") ++ test.add_stonith_log_pattern("using true3 returned 0") ++ test.add_stonith_log_pattern("using true4 returned 0") + + def build_unfence_on_target_tests(self): + """ Register tests that verify unfencing that runs on the target """ +-- +2.27.0 + + +From be72166ed9ccb53c218529783660503df95da719 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 16 Sep 2021 16:50:23 -0500 +Subject: [PATCH 11/12] Log: libcrmservice: downgrade failed action messages + +Previously, we would often get duplicate log messages for failed actions, +from the service library and again from its callers. + +Now that the service library tracks and provides exit reasons, callers can log +sufficient detail with better context, so downgrade the library's messages to +info level or lower. Similarly, avoid duplicate logs of process output. + +Certain messages (such as out-of-memory) remain at higher severity. +--- + daemons/controld/controld_execd.c | 15 +++--- + lib/fencing/st_client.c | 11 ++--- + lib/services/services.c | 14 +++--- + lib/services/services_linux.c | 80 ++++++++++++++++--------------- + lib/services/systemd.c | 20 ++++---- + lib/services/upstart.c | 19 ++++---- + 6 files changed, 80 insertions(+), 79 deletions(-) + +diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c +index bded6e6b6..3ddff6e13 100644 +--- a/daemons/controld/controld_execd.c ++++ b/daemons/controld/controld_execd.c +@@ -2684,16 +2684,15 @@ log_executor_event(lrmd_event_data_t *op, const char *op_key, + do_crm_log(log_level, "%s", str->str); + g_string_free(str, TRUE); + +- if (op->output != NULL) { +- char *prefix = crm_strdup_printf("%s-" PCMK__OP_FMT ":%d", node_name, ++ /* The services library has already logged the output at info or debug ++ * level, so just raise to notice if it looks like a failure. ++ */ ++ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) { ++ char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output", + op->rsc_id, op->op_type, +- op->interval_ms, op->call_id); ++ op->interval_ms, node_name); + +- if (op->rc) { +- crm_log_output(LOG_NOTICE, prefix, op->output); +- } else { +- crm_log_output(LOG_DEBUG, prefix, op->output); +- } ++ crm_log_output(LOG_NOTICE, prefix, op->output); + free(prefix); + } + } +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 3d4127eff..2fbff7f24 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -276,14 +276,9 @@ stonith__watchdog_fencing_enabled_for_node(const char *node) + static void + log_action(stonith_action_t *action, pid_t pid) + { +- if (action->result.action_stdout != NULL) { +- /* Logging the whole string confuses syslog when the string is xml */ +- char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid); +- +- crm_log_output(LOG_TRACE, prefix, action->result.action_stdout); +- free(prefix); +- } +- ++ /* The services library has already logged the output at info or debug ++ * level, so just raise to warning for stderr. ++ */ + if (action->result.action_stderr != NULL) { + /* Logging the whole string confuses syslog when the string is xml */ + char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); +diff --git a/lib/services/services.c b/lib/services/services.c +index 86a0a213c..cf8bbc70e 100644 +--- a/lib/services/services.c ++++ b/lib/services/services.c +@@ -319,13 +319,13 @@ services__create_resource_action(const char *name, const char *standard, + rc = services__nagios_prepare(op); + #endif + } else { +- crm_err("Unknown resource standard: %s", op->standard); ++ crm_info("Unknown resource standard: %s", op->standard); + rc = ENOENT; + } + + if (rc != pcmk_rc_ok) { +- crm_err("Cannot prepare %s operation for %s: %s", +- action, name, strerror(rc)); ++ crm_info("Cannot prepare %s operation for %s: %s", ++ action, name, strerror(rc)); + services__handle_exec_error(op, rc); + } + return op; +@@ -967,14 +967,14 @@ execute_metadata_action(svc_action_t *op) + const char *class = op->standard; + + if (op->agent == NULL) { +- crm_err("meta-data requested without specifying agent"); ++ crm_info("Meta-data requested without specifying agent"); + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR_FATAL, "Agent not specified"); + return EINVAL; + } + + if (class == NULL) { +- crm_err("meta-data requested for agent %s without specifying class", ++ crm_info("Meta-data requested for agent %s without specifying class", + op->agent); + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR_FATAL, +@@ -986,8 +986,8 @@ execute_metadata_action(svc_action_t *op) + class = resources_find_service_class(op->agent); + } + if (class == NULL) { +- crm_err("meta-data requested for %s, but could not determine class", +- op->agent); ++ crm_info("Meta-data requested for %s, but could not determine class", ++ op->agent); + services__set_result(op, services__generic_error(op), + PCMK_EXEC_ERROR_HARD, + "Agent standard could not be determined"); +diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c +index b2ff27a0d..9a4c6cf80 100644 +--- a/lib/services/services_linux.c ++++ b/lib/services/services_linux.c +@@ -64,8 +64,8 @@ sigchld_setup(struct sigchld_data_s *data) + + // Block SIGCHLD (saving previous set of blocked signals to restore later) + if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) { +- crm_err("Wait for child process completion failed: %s " +- CRM_XS " source=sigprocmask", pcmk_strerror(errno)); ++ crm_info("Wait for child process completion failed: %s " ++ CRM_XS " source=sigprocmask", pcmk_strerror(errno)); + return false; + } + return true; +@@ -81,8 +81,8 @@ sigchld_open(struct sigchld_data_s *data) + + fd = signalfd(-1, &(data->mask), SFD_NONBLOCK); + if (fd < 0) { +- crm_err("Wait for child process completion failed: %s " +- CRM_XS " source=signalfd", pcmk_strerror(errno)); ++ crm_info("Wait for child process completion failed: %s " ++ CRM_XS " source=signalfd", pcmk_strerror(errno)); + } + return fd; + } +@@ -108,8 +108,8 @@ sigchld_received(int fd) + } + s = read(fd, &fdsi, sizeof(struct signalfd_siginfo)); + if (s != sizeof(struct signalfd_siginfo)) { +- crm_err("Wait for child process completion failed: %s " +- CRM_XS " source=read", pcmk_strerror(errno)); ++ crm_info("Wait for child process completion failed: %s " ++ CRM_XS " source=read", pcmk_strerror(errno)); + + } else if (fdsi.ssi_signo == SIGCHLD) { + return true; +@@ -149,8 +149,8 @@ sigchld_handler() + if ((last_sigchld_data != NULL) + && (last_sigchld_data->pipe_fd[1] >= 0) + && (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) { +- crm_err("Wait for child process completion failed: %s " +- CRM_XS " source=write", pcmk_strerror(errno)); ++ crm_info("Wait for child process completion failed: %s " ++ CRM_XS " source=write", pcmk_strerror(errno)); + } + } + +@@ -162,19 +162,19 @@ sigchld_setup(struct sigchld_data_s *data) + data->pipe_fd[0] = data->pipe_fd[1] = -1; + + if (pipe(data->pipe_fd) == -1) { +- crm_err("Wait for child process completion failed: %s " +- CRM_XS " source=pipe", pcmk_strerror(errno)); ++ crm_info("Wait for child process completion failed: %s " ++ CRM_XS " source=pipe", pcmk_strerror(errno)); + return false; + } + + rc = pcmk__set_nonblocking(data->pipe_fd[0]); + if (rc != pcmk_rc_ok) { +- crm_warn("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d", ++ crm_info("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); + } + rc = pcmk__set_nonblocking(data->pipe_fd[1]); + if (rc != pcmk_rc_ok) { +- crm_warn("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d", ++ crm_info("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d", + pcmk_rc_str(rc), rc); + } + +@@ -183,8 +183,8 @@ sigchld_setup(struct sigchld_data_s *data) + data->sa.sa_flags = 0; + sigemptyset(&(data->sa.sa_mask)); + if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) { +- crm_err("Wait for child process completion failed: %s " +- CRM_XS " source=sigaction", pcmk_strerror(errno)); ++ crm_info("Wait for child process completion failed: %s " ++ CRM_XS " source=sigaction", pcmk_strerror(errno)); + } + + // Remember data for use in signal handler +@@ -585,7 +585,11 @@ log_op_output(svc_action_t *op) + { + char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid); + +- crm_log_output(LOG_NOTICE, prefix, op->stderr_data); ++ /* The library caller has better context to know how important the output ++ * is, so log it at info and debug severity here. They can log it again at ++ * higher severity if appropriate. ++ */ ++ crm_log_output(LOG_INFO, prefix, op->stderr_data); + strcpy(prefix + strlen(prefix) - strlen("error output"), "output"); + crm_log_output(LOG_DEBUG, prefix, op->stdout_data); + free(prefix); +@@ -673,7 +677,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo, + parse_exit_reason_from_stderr(op); + + } else if (mainloop_child_timeout(p)) { +- crm_warn("%s[%d] timed out after %dms", op->id, op->pid, op->timeout); ++ crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout); + services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT, + "Process did not exit within specified timeout"); + +@@ -686,7 +690,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo, + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL); + + } else { +- crm_warn("%s[%d] terminated with signal %d (%s)", ++ crm_info("%s[%d] terminated with signal %d (%s)", + op->id, op->pid, signo, strsignal(signo)); + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "Process interrupted by signal"); +@@ -908,12 +912,12 @@ action_launch_child(svc_action_t *op) + sp.sched_priority = 0; + + if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) { +- crm_warn("Could not reset scheduling policy for %s", op->id); ++ crm_info("Could not reset scheduling policy for %s", op->id); + } + } + #endif + if (setpriority(PRIO_PROCESS, 0, 0) == -1) { +- crm_warn("Could not reset process priority for %s", op->id); ++ crm_info("Could not reset process priority for %s", op->id); + } + + /* Man: The call setpgrp() is equivalent to setpgid(0,0) +@@ -941,7 +945,7 @@ action_launch_child(svc_action_t *op) + } else { + crm_err("Considering %s unconfigured " + "because unable to load CIB secrets: %s", +- op->rsc, pcmk_rc_str(rc)); ++ op->rsc, pcmk_rc_str(rc)); + exit_child(op, services__configuration_error(op, false), + "Unable to load CIB secrets"); + } +@@ -1043,7 +1047,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data) + + } else if (wait_rc < 0) { + wait_reason = pcmk_rc_str(errno); +- crm_warn("Wait for completion of %s[%d] failed: %s " ++ crm_info("Wait for completion of %s[%d] failed: %s " + CRM_XS " source=waitpid", + op->id, op->pid, wait_reason); + wait_rc = 0; // Act as if process is still running +@@ -1057,8 +1061,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data) + + } else if ((poll_rc < 0) && (errno != EINTR)) { + wait_reason = pcmk_rc_str(errno); +- crm_err("Wait for completion of %s[%d] failed: %s " +- CRM_XS " source=poll", op->id, op->pid, wait_reason); ++ crm_info("Wait for completion of %s[%d] failed: %s " ++ CRM_XS " source=poll", op->id, op->pid, wait_reason); + break; + } + +@@ -1078,7 +1082,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data) + services__set_result(op, services__generic_error(op), + PCMK_EXEC_TIMEOUT, + "Process did not exit within specified timeout"); +- crm_warn("%s[%d] timed out after %dms", ++ crm_info("%s[%d] timed out after %dms", + op->id, op->pid, op->timeout); + + } else { +@@ -1110,8 +1114,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data) + + services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR, + "Process interrupted by signal"); +- crm_err("%s[%d] terminated with signal %d (%s)", +- op->id, op->pid, signo, strsignal(signo)); ++ crm_info("%s[%d] terminated with signal %d (%s)", ++ op->id, op->pid, signo, strsignal(signo)); + + #ifdef WCOREDUMP + if (WCOREDUMP(status)) { +@@ -1155,7 +1159,7 @@ services__execute_file(svc_action_t *op) + // Catch common failure conditions early + if (stat(op->opaque->exec, &st) != 0) { + rc = errno; +- crm_warn("Cannot execute '%s': %s " CRM_XS " stat rc=%d", ++ crm_info("Cannot execute '%s': %s " CRM_XS " stat rc=%d", + op->opaque->exec, pcmk_strerror(rc), rc); + services__handle_exec_error(op, rc); + goto done; +@@ -1163,8 +1167,8 @@ services__execute_file(svc_action_t *op) + + if (pipe(stdout_fd) < 0) { + rc = errno; +- crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d", +- op->opaque->exec, pcmk_strerror(rc), rc); ++ crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d", ++ op->opaque->exec, pcmk_strerror(rc), rc); + services__handle_exec_error(op, rc); + goto done; + } +@@ -1174,8 +1178,8 @@ services__execute_file(svc_action_t *op) + + close_pipe(stdout_fd); + +- crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d", +- op->opaque->exec, pcmk_strerror(rc), rc); ++ crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d", ++ op->opaque->exec, pcmk_strerror(rc), rc); + services__handle_exec_error(op, rc); + goto done; + } +@@ -1187,8 +1191,8 @@ services__execute_file(svc_action_t *op) + close_pipe(stdout_fd); + close_pipe(stderr_fd); + +- crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d", +- op->opaque->exec, pcmk_strerror(rc), rc); ++ crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d", ++ op->opaque->exec, pcmk_strerror(rc), rc); + services__handle_exec_error(op, rc); + goto done; + } +@@ -1212,8 +1216,8 @@ services__execute_file(svc_action_t *op) + close_pipe(stdout_fd); + close_pipe(stderr_fd); + +- crm_err("Cannot execute '%s': %s " CRM_XS " fork rc=%d", +- op->opaque->exec, pcmk_strerror(rc), rc); ++ crm_info("Cannot execute '%s': %s " CRM_XS " fork rc=%d", ++ op->opaque->exec, pcmk_strerror(rc), rc); + services__handle_exec_error(op, rc); + if (op->synchronous) { + sigchld_cleanup(&data); +@@ -1271,7 +1275,7 @@ services__execute_file(svc_action_t *op) + op->opaque->stdout_fd = stdout_fd[0]; + rc = pcmk__set_nonblocking(op->opaque->stdout_fd); + if (rc != pcmk_rc_ok) { +- crm_warn("Could not set '%s' output non-blocking: %s " ++ crm_info("Could not set '%s' output non-blocking: %s " + CRM_XS " rc=%d", + op->opaque->exec, pcmk_rc_str(rc), rc); + } +@@ -1279,7 +1283,7 @@ services__execute_file(svc_action_t *op) + op->opaque->stderr_fd = stderr_fd[0]; + rc = pcmk__set_nonblocking(op->opaque->stderr_fd); + if (rc != pcmk_rc_ok) { +- crm_warn("Could not set '%s' error output non-blocking: %s " ++ crm_info("Could not set '%s' error output non-blocking: %s " + CRM_XS " rc=%d", + op->opaque->exec, pcmk_rc_str(rc), rc); + } +@@ -1290,7 +1294,7 @@ services__execute_file(svc_action_t *op) + // as long as no other standard uses stdin_fd assume stonith + rc = pcmk__set_nonblocking(op->opaque->stdin_fd); + if (rc != pcmk_rc_ok) { +- crm_warn("Could not set '%s' input non-blocking: %s " ++ crm_info("Could not set '%s' input non-blocking: %s " + CRM_XS " fd=%d,rc=%d", op->opaque->exec, + pcmk_rc_str(rc), op->opaque->stdin_fd, rc); + } +diff --git a/lib/services/systemd.c b/lib/services/systemd.c +index 6f5bef960..8e9fff484 100644 +--- a/lib/services/systemd.c ++++ b/lib/services/systemd.c +@@ -232,7 +232,8 @@ systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data) + } + + if (pcmk_dbus_find_error(pending, reply, &error)) { +- crm_err("Could not issue systemd reload %d: %s", reload_count, error.message); ++ crm_warn("Could not issue systemd reload %d: %s", ++ reload_count, error.message); + dbus_error_free(&error); + + } else { +@@ -291,8 +292,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error) + PCMK_EXEC_NOT_INSTALLED, "systemd unit not found"); + } + +- crm_err("DBus request for %s of systemd unit %s for resource %s failed: %s", +- op->action, op->agent, crm_str(op->rsc), error->message); ++ crm_info("DBus request for %s of systemd unit %s for resource %s failed: %s", ++ op->action, op->agent, crm_str(op->rsc), error->message); + } + + /*! +@@ -325,11 +326,11 @@ execute_after_loadunit(DBusMessage *reply, svc_action_t *op) + if (op != NULL) { + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR, + "systemd DBus method had unexpected reply"); +- crm_err("Could not load systemd unit %s for %s: " +- "DBus reply has unexpected type", op->agent, op->id); ++ crm_info("Could not load systemd unit %s for %s: " ++ "DBus reply has unexpected type", op->agent, op->id); + } else { +- crm_err("Could not load systemd unit: " +- "DBus reply has unexpected type"); ++ crm_info("Could not load systemd unit: " ++ "DBus reply has unexpected type"); + } + + } else { +@@ -688,7 +689,7 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op) + + } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, + __func__, __LINE__)) { +- crm_warn("DBus request for %s of %s succeeded but " ++ crm_info("DBus request for %s of %s succeeded but " + "return type was unexpected", op->action, crm_str(op->rsc)); + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, + "systemd DBus method had unexpected reply"); +@@ -981,7 +982,8 @@ systemd_timeout_callback(gpointer p) + svc_action_t * op = p; + + op->opaque->timerid = 0; +- crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc); ++ crm_info("%s action for systemd unit %s named '%s' timed out", ++ op->action, op->agent, op->rsc); + services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT, + "Systemd action did not complete within specified timeout"); + services__finalize_async_op(op); +diff --git a/lib/services/upstart.c b/lib/services/upstart.c +index 2fdc229ad..2ece803e1 100644 +--- a/lib/services/upstart.c ++++ b/lib/services/upstart.c +@@ -308,21 +308,21 @@ get_first_instance(const gchar * job, int timeout) + dbus_message_unref(msg); + + if (dbus_error_is_set(&error)) { +- crm_err("Call to %s failed: %s", method, error.message); ++ crm_info("Call to %s failed: %s", method, error.message); + dbus_error_free(&error); + goto done; + + } else if(reply == NULL) { +- crm_err("Call to %s failed: no reply", method); ++ crm_info("Call to %s failed: no reply", method); + goto done; + + } else if (!dbus_message_iter_init(reply, &args)) { +- crm_err("Call to %s failed: Message has no arguments", method); ++ crm_info("Call to %s failed: Message has no arguments", method); + goto done; + } + + if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) { +- crm_err("Call to %s failed: Message has invalid arguments", method); ++ crm_info("Call to %s failed: Message has invalid arguments", method); + goto done; + } + +@@ -432,8 +432,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error) + return; + } + +- crm_err("DBus request for %s of Upstart job %s for resource %s failed: %s", +- op->action, op->agent, crm_str(op->rsc), error->message); ++ crm_info("DBus request for %s of Upstart job %s for resource %s failed: %s", ++ op->action, op->agent, crm_str(op->rsc), error->message); + } + + /*! +@@ -468,7 +468,7 @@ job_method_complete(DBusPendingCall *pending, void *user_data) + + } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, + __func__, __LINE__)) { +- crm_warn("DBus request for %s of %s succeeded but " ++ crm_info("DBus request for %s of %s succeeded but " + "return type was unexpected", op->action, crm_str(op->rsc)); + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + +@@ -667,7 +667,8 @@ services__execute_upstart(svc_action_t *op) + + } else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH, + __func__, __LINE__)) { +- crm_warn("Call to %s passed but return type was unexpected", op->action); ++ crm_info("Call to %s passed but return type was unexpected", ++ op->action); + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + + } else { +@@ -675,7 +676,7 @@ services__execute_upstart(svc_action_t *op) + + dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path, + DBUS_TYPE_INVALID); +- crm_info("Call to %s passed: %s", op->action, path); ++ crm_debug("Call to %s passed: %s", op->action, path); + services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL); + } + +-- +2.27.0 + + +From 39f6861c72eb9dd76d2cf3da287fe7485615631b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 8 Nov 2021 09:43:38 -0600 +Subject: [PATCH 12/12] Low: fencing: avoid use-after-free with new result + object + +itnroduced by 153c9b552 (not released) +--- + lib/fencing/st_rhcs.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c +index 23e694975..6c8cbedc7 100644 +--- a/lib/fencing/st_rhcs.c ++++ b/lib/fencing/st_rhcs.c +@@ -143,15 +143,17 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) + if (result->execution_status != PCMK_EXEC_DONE) { + crm_warn("Could not execute metadata action for %s: %s", + agent, pcmk_exec_status_str(result->execution_status)); ++ rc = pcmk_rc2legacy(stonith__result2rc(result)); + stonith__destroy_action(action); +- return pcmk_rc2legacy(stonith__result2rc(result)); ++ return rc; + } + + if (result->exit_status != CRM_EX_OK) { + crm_warn("Metadata action for %s returned error code %d", + agent, result->exit_status); ++ rc = pcmk_rc2legacy(stonith__result2rc(result)); + stonith__destroy_action(action); +- return pcmk_rc2legacy(stonith__result2rc(result)); ++ return rc; + } + + if (result->action_stdout == NULL) { +-- +2.27.0 + diff --git a/SOURCES/002-pacemakerd-options.patch b/SOURCES/002-pacemakerd-options.patch deleted file mode 100644 index 56941ec..0000000 --- a/SOURCES/002-pacemakerd-options.patch +++ /dev/null @@ -1,451 +0,0 @@ -From 0d40ebf10b1794ece2c5c9768ea7222d3834d3b3 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 13 May 2021 11:42:18 -0400 -Subject: [PATCH 1/4] Build: Use a different variable to find man page - includes. - -With other programs outside of the tools directory being converted to -use glib for command line handling, their includes are not going to be -in tools/. So we need to use a different autoconf variable to find -them. ---- - mk/common.mk | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/mk/common.mk b/mk/common.mk -index b247670..aa59feb 100644 ---- a/mk/common.mk -+++ b/mk/common.mk -@@ -1,5 +1,5 @@ - # --# Copyright 2014-2020 the Pacemaker project contributors -+# Copyright 2014-2021 the Pacemaker project contributors - # - # The version control history for this file may have further details. - # -@@ -68,11 +68,11 @@ HELP2MAN_ARGS = -N --section 8 --name "Part of the Pacemaker cluster resource ma - # and all wrappers to C code. - %.8: % $(MAN8DEPS) - $(AM_V_at)chmod a+x $(abs_builddir)/$< -- $(AM_V_MAN)if [ -f $(top_srcdir)/tools/$@.inc ]; then \ -+ $(AM_V_MAN)if [ -f $(abs_srcdir)/$@.inc ]; then \ - PATH=$(abs_builddir):$$PATH $(HELP2MAN) $(HELP2MAN_ARGS) \ - -h --help-all \ - --no-discard-stderr \ -- -i $(top_srcdir)/tools/$@.inc $(abs_builddir)/$< \ -+ -i $(abs_srcdir)/$@.inc $(abs_builddir)/$< \ - | sed -f $(top_srcdir)/tools/fix-manpages > $@ ; \ - else \ - PATH=$(abs_builddir):$$PATH $(HELP2MAN) $(HELP2MAN_ARGS) \ --- -1.8.3.1 - - -From c7ab1d901bcbbf0137277e783e072777ca2f82d9 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 13 May 2021 11:44:16 -0400 -Subject: [PATCH 2/4] Refactor: daemons: Remove the pid_file variable from - pacemakerd. - -It's never used anywhere. ---- - daemons/pacemakerd/pacemakerd.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c -index 8ec9708..03d688e 100644 ---- a/daemons/pacemakerd/pacemakerd.c -+++ b/daemons/pacemakerd/pacemakerd.c -@@ -27,8 +27,7 @@ - - static crm_trigger_t *shutdown_trigger = NULL; - static crm_trigger_t *startup_trigger = NULL; --static const char *pid_file = PCMK_RUN_DIR "/pacemaker.pid"; - - /* state we report when asked via pacemakerd-api status-ping */ - static const char *pacemakerd_state = XML_PING_ATTR_PACEMAKERDSTATE_INIT; - static gboolean running_with_sbd = FALSE; /* local copy */ -@@ -224,7 +222,6 @@ main(int argc, char **argv) - /* Legacy */ - break; - case 'p': -- pid_file = optarg; - break; - case 's': - pcmk__set_env_option("node_start_state", "standby"); --- -1.8.3.1 - - -From 98990eed9f6a5dbde7c8a5aa0783e93d5479295b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 13 May 2021 13:14:38 -0400 -Subject: [PATCH 3/4] Refactor: daemons: Use glib for command line handling in - pacemakerd. - ---- - daemons/pacemakerd/Makefile.am | 2 + - daemons/pacemakerd/pacemakerd.8.inc | 5 + - daemons/pacemakerd/pacemakerd.c | 195 ++++++++++++++++++------------------ - 3 files changed, 102 insertions(+), 100 deletions(-) - create mode 100644 daemons/pacemakerd/pacemakerd.8.inc - -diff --git a/daemons/pacemakerd/Makefile.am b/daemons/pacemakerd/Makefile.am -index cc657f5..84517a3 100644 ---- a/daemons/pacemakerd/Makefile.am -+++ b/daemons/pacemakerd/Makefile.am -@@ -15,6 +15,8 @@ if BUILD_SYSTEMD - systemdsystemunit_DATA = pacemaker.service - endif - -+EXTRA_DIST = pacemakerd.8.inc -+ - ## SOURCES - - noinst_HEADERS = pacemakerd.h -diff --git a/daemons/pacemakerd/pacemakerd.8.inc b/daemons/pacemakerd/pacemakerd.8.inc -new file mode 100644 -index 0000000..902af4e ---- /dev/null -+++ b/daemons/pacemakerd/pacemakerd.8.inc -@@ -0,0 +1,5 @@ -+[synopsis] -+pacemakerd [options] -+ -+/subsidiary Pacemaker daemons/ -+.SH OPTIONS -diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c -index 03d688e..ce194bf 100644 ---- a/daemons/pacemakerd/pacemakerd.c -+++ b/daemons/pacemakerd/pacemakerd.c -@@ -23,12 +23,54 @@ - #include - #include - #include -+#include - #include - #include - - #include - #include - -+#define SUMMARY "pacemakerd - primary Pacemaker daemon that launches and monitors all subsidiary Pacemaker daemons" -+ -+struct { -+ gboolean features; -+ gboolean foreground; -+ gboolean shutdown; -+ gboolean standby; -+} options; -+ -+static gboolean -+pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { -+ return TRUE; -+} -+ -+static gboolean -+standby_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { -+ options.standby = TRUE; -+ pcmk__set_env_option("node_start_state", "standby"); -+ return TRUE; -+} -+ -+static GOptionEntry entries[] = { -+ { "features", 'F', 0, G_OPTION_ARG_NONE, &options.features, -+ "Display full version and list of features Pacemaker was built with", -+ NULL }, -+ { "foreground", 'f', 0, G_OPTION_ARG_NONE, &options.foreground, -+ "(Ignored) Pacemaker always runs in the foreground", -+ NULL }, -+ { "pid-file", 'p', 0, G_OPTION_ARG_CALLBACK, pid_cb, -+ "(Ignored) Daemon pid file location", -+ "FILE" }, -+ { "shutdown", 'S', 0, G_OPTION_ARG_NONE, &options.shutdown, -+ "Instruct Pacemaker to shutdown on this machine", -+ NULL }, -+ { "standby", 's', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, standby_cb, -+ "Start node in standby state", -+ NULL }, -+ -+ { NULL } -+}; -+ - static gboolean fatal_error = FALSE; - static GMainLoop *mainloop = NULL; - static bool global_keep_tracking = false; -@@ -642,49 +685,6 @@ pcmk_sigquit(int nsig) - .connection_destroyed = pcmk_ipc_destroy - }; - --static pcmk__cli_option_t long_options[] = { -- // long option, argument type, storage, short option, description, flags -- { -- "help", no_argument, NULL, '?', -- "\tThis text", pcmk__option_default -- }, -- { -- "version", no_argument, NULL, '$', -- "\tVersion information", pcmk__option_default -- }, -- { -- "verbose", no_argument, NULL, 'V', -- "\tIncrease debug output", pcmk__option_default -- }, -- { -- "shutdown", no_argument, NULL, 'S', -- "\tInstruct Pacemaker to shutdown on this machine", pcmk__option_default -- }, -- { -- "features", no_argument, NULL, 'F', -- "\tDisplay full version and list of features Pacemaker was built with", -- pcmk__option_default -- }, -- { -- "-spacer-", no_argument, NULL, '-', -- "\nAdditional Options:", pcmk__option_default -- }, -- { -- "foreground", no_argument, NULL, 'f', -- "\t(Ignored) Pacemaker always runs in the foreground", -- pcmk__option_default -- }, -- { -- "pid-file", required_argument, NULL, 'p', -- "\t(Ignored) Daemon pid file location", pcmk__option_default -- }, -- { -- "standby", no_argument, NULL, 's', -- "\tStart node in standby state", pcmk__option_default -- }, -- { 0, 0, 0, 0 } --}; -- - static void - mcp_chown(const char *path, uid_t uid, gid_t gid) - { -@@ -1168,83 +1211,66 @@ request_shutdown(crm_ipc_t *ipc) - return status; - } - -+static GOptionContext * -+build_arg_context(pcmk__common_args_t *args) { -+ GOptionContext *context = NULL; -+ -+ context = pcmk__build_arg_context(args, NULL, NULL, NULL); -+ pcmk__add_main_args(context, entries); -+ return context; -+} -+ - int - main(int argc, char **argv) - { -- int flag; -- int argerr = 0; -+ crm_exit_t exit_code = CRM_EX_OK; -+ -+ GError *error = NULL; -+ -+ pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); -+ gchar **processed_args = pcmk__cmdline_preproc(argv, "p"); -+ GOptionContext *context = build_arg_context(args); - -- int option_index = 0; - bool old_instance_connected = false; -- gboolean shutdown = FALSE; - - crm_ipc_t *old_instance = NULL; - qb_ipcs_service_t *ipcs = NULL; - - crm_log_preinit(NULL, argc, argv); -- pcmk__set_cli_options(NULL, "[options]", long_options, -- "primary Pacemaker daemon that launches and " -- "monitors all subsidiary Pacemaker daemons"); - mainloop_add_signal(SIGHUP, pcmk_ignore); - mainloop_add_signal(SIGQUIT, pcmk_sigquit); - -- while (1) { -- flag = pcmk__next_cli_option(argc, argv, &option_index, NULL); -- if (flag == -1) -- break; -- -- switch (flag) { -- case 'V': -- crm_bump_log_level(argc, argv); -- break; -- case 'f': -- /* Legacy */ -- break; -- case 'p': -- break; -- case 's': -- pcmk__set_env_option("node_start_state", "standby"); -- break; -- case '$': -- case '?': -- pcmk__cli_help(flag, CRM_EX_OK); -- break; -- case 'S': -- shutdown = TRUE; -- break; -- case 'F': -- printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, -- CRM_FEATURE_SET, CRM_FEATURES); -- crm_exit(CRM_EX_OK); -- default: -- printf("Argument code 0%o (%c) is not (?yet?) supported\n", flag, flag); -- ++argerr; -- break; -- } -+ if (!g_option_context_parse_strv(context, &processed_args, &error)) { -+ exit_code = CRM_EX_USAGE; -+ goto done; - } - -- if (optind < argc) { -- printf("non-option ARGV-elements: "); -- while (optind < argc) -- printf("%s ", argv[optind++]); -- printf("\n"); -- } -- if (argerr) { -- pcmk__cli_help('?', CRM_EX_USAGE); -+ if (options.features) { -+ printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, -+ CRM_FEATURE_SET, CRM_FEATURES); -+ exit_code = CRM_EX_OK; -+ goto done; - } - -+ if (args->version) { -+ g_strfreev(processed_args); -+ pcmk__free_arg_context(context); -+ /* FIXME: When pacemakerd is converted to use formatted output, this can go. */ -+ pcmk__cli_help('v', CRM_EX_USAGE); -+ } - - setenv("LC_ALL", "C", 1); - - pcmk__set_env_option("mcp", "true"); - -+ pcmk__cli_init_logging("pacemakerd", args->verbosity); - crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); - - crm_debug("Checking for existing Pacemaker instance"); - old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0); - old_instance_connected = crm_ipc_connect(old_instance); - -- if (shutdown) { -+ if (options.shutdown) { - if (old_instance_connected) { - crm_exit(request_shutdown(old_instance)); - } else { -@@ -1253,22 +1279,25 @@ main(int argc, char **argv) - "Pacemaker instance: %s", strerror(errno)); - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); -- crm_exit(CRM_EX_DISCONNECT); -+ exit_code = CRM_EX_DISCONNECT; -+ goto done; - } - - } else if (old_instance_connected) { - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); - crm_err("Aborting start-up because active Pacemaker instance found"); -- crm_exit(CRM_EX_FATAL); -+ exit_code = CRM_EX_FATAL; -+ goto done; - } - - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); - - #ifdef SUPPORT_COROSYNC - if (mcp_read_config() == FALSE) { -- crm_exit(CRM_EX_UNAVAILABLE); -+ exit_code = CRM_EX_UNAVAILABLE; -+ goto done; - } - #endif - -@@ -1292,7 +1321,8 @@ main(int argc, char **argv) - #ifdef SUPPORT_COROSYNC - /* Allows us to block shutdown */ - if (!cluster_connect_cfg()) { -- crm_exit(CRM_EX_PROTOCOL); -+ exit_code = CRM_EX_PROTOCOL; -+ goto done; - } - #endif - -@@ -1307,9 +1337,11 @@ main(int argc, char **argv) - case pcmk_rc_ok: - break; - case pcmk_rc_ipc_unauthorized: -- crm_exit(CRM_EX_CANTCREAT); -+ exit_code = CRM_EX_CANTCREAT; -+ goto done; - default: -- crm_exit(CRM_EX_FATAL); -+ exit_code = CRM_EX_FATAL; -+ goto done; - }; - - mainloop_add_signal(SIGTERM, pcmk_shutdown); -@@ -1342,5 +1374,11 @@ main(int argc, char **argv) - #ifdef SUPPORT_COROSYNC - cluster_disconnect_cfg(); - #endif -- crm_exit(CRM_EX_OK); -+ -+done: -+ g_strfreev(processed_args); -+ pcmk__free_arg_context(context); -+ -+ pcmk__output_and_clear_error(error, NULL); -+ crm_exit(exit_code); - } --- -1.8.3.1 - - -From 8f7924fbb2a012bedcad59335b7bebc5020b26e3 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 13 May 2021 13:27:13 -0400 -Subject: [PATCH 4/4] Low: pacemaker.service: Don't start pacemakerd with -f. - -This option is completely ignored by pacemakerd. ---- - daemons/pacemakerd/pacemaker.service.in | 2 +- - doc/sphinx/Clusters_from_Scratch/verification.rst | 2 +- - 2 files changed, 2 insertions(+), 2 deletions(-) - -diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in -index b128ddc..0363a22 100644 ---- a/daemons/pacemakerd/pacemaker.service.in -+++ b/daemons/pacemakerd/pacemaker.service.in -@@ -44,7 +44,7 @@ EnvironmentFile=-@CONFIGDIR@/pacemaker - EnvironmentFile=-@CONFIGDIR@/sbd - SuccessExitStatus=100 - --ExecStart=@sbindir@/pacemakerd -f -+ExecStart=@sbindir@/pacemakerd - - # Systemd v227 and above can limit the number of processes spawned by a - # service. That is a bad idea for an HA cluster resource manager, so disable it -diff --git a/doc/sphinx/Clusters_from_Scratch/verification.rst b/doc/sphinx/Clusters_from_Scratch/verification.rst -index 9d647f8..b7fa20e 100644 ---- a/doc/sphinx/Clusters_from_Scratch/verification.rst -+++ b/doc/sphinx/Clusters_from_Scratch/verification.rst -@@ -103,7 +103,7 @@ the necessary processes are running: - 2 ? S 0:00 [kthreadd] - ...lots of processes... - 17121 ? SLsl 0:01 /usr/sbin/corosync -f -- 17133 ? Ss 0:00 /usr/sbin/pacemakerd -f -+ 17133 ? Ss 0:00 /usr/sbin/pacemakerd - 17134 ? Ss 0:00 \_ /usr/libexec/pacemaker/pacemaker-based - 17135 ? Ss 0:00 \_ /usr/libexec/pacemaker/pacemaker-fenced - 17136 ? Ss 0:00 \_ /usr/libexec/pacemaker/pacemaker-execd --- -1.8.3.1 - diff --git a/SOURCES/003-fencing-reasons.patch b/SOURCES/003-fencing-reasons.patch new file mode 100644 index 0000000..666a12a --- /dev/null +++ b/SOURCES/003-fencing-reasons.patch @@ -0,0 +1,2476 @@ +From 8e6362cb2129bd56f817d449a195f3da87a545fa Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 12 Nov 2021 14:28:56 -0600 +Subject: [PATCH 01/13] Refactor: libcrmcommon,fencer: convenience macro for + initializing results + +for future reuse +--- + daemons/fenced/fenced_commands.c | 14 ++------------ + include/crm/common/results_internal.h | 15 +++++++++++++++ + 2 files changed, 17 insertions(+), 12 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 87600573e..9f2f1cc40 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -388,12 +388,7 @@ static void + report_internal_result(async_command_t *cmd, int exit_status, + int execution_status, const char *exit_reason) + { +- pcmk__action_result_t result = { +- // Ensure we don't pass garbage to free() +- .exit_reason = NULL, +- .action_stdout = NULL, +- .action_stderr = NULL +- }; ++ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + pcmk__set_result(&result, exit_status, execution_status, exit_reason); + cmd->done_cb(0, &result, cmd); +@@ -2616,12 +2611,7 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data) + } + + if (device == NULL) { // No device found +- pcmk__action_result_t result = { +- // Ensure we don't pass garbage to free() +- .exit_reason = NULL, +- .action_stdout = NULL, +- .action_stderr = NULL +- }; ++ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT; + + pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE, + "No fence device configured for target"); +diff --git a/include/crm/common/results_internal.h b/include/crm/common/results_internal.h +index 804bf2a7a..6befaa0ed 100644 +--- a/include/crm/common/results_internal.h ++++ b/include/crm/common/results_internal.h +@@ -30,6 +30,21 @@ typedef struct { + char *action_stderr; // Action error output + } pcmk__action_result_t; + ++/*! ++ * \internal ++ * \brief Static initialization for an action result ++ * ++ * \note Importantly, this ensures pcmk__reset_result() won't try to free ++ * garbage. ++ */ ++#define PCMK__UNKNOWN_RESULT { \ ++ .exit_status = CRM_EX_OK, \ ++ .execution_status = PCMK_EXEC_UNKNOWN, \ ++ .exit_reason = NULL, \ ++ .action_stdout = NULL, \ ++ .action_stderr = NULL, \ ++ } ++ + void pcmk__set_result(pcmk__action_result_t *result, int exit_status, + enum pcmk_exec_status exec_status, + const char *exit_reason); +-- +2.27.0 + + +From 0937c92476ac737a5f5146932824bde8bdd7db98 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 12 Nov 2021 16:02:27 -0600 +Subject: [PATCH 02/13] Refactor: various: add convenience function for + checking result success + +A successful pcmk__action_result_t has both exit status CRM_EX_OK (a.k.a +PCMK_OCF_OK) and execution status PCMK_EXEC_DONE. Since checking that is +clunky, we sometimes just check exit status, which is less than ideal. + +The convenience function makes it easy to check both, and improves readability. +--- + daemons/controld/controld_remote_ra.c | 4 ++-- + daemons/execd/execd_commands.c | 12 ++++++------ + daemons/fenced/fenced_commands.c | 14 ++++++-------- + include/crm/common/results_internal.h | 16 ++++++++++++++++ + lib/fencing/st_client.c | 4 ++-- + lib/fencing/st_rhcs.c | 2 +- + 6 files changed, 33 insertions(+), 19 deletions(-) + +diff --git a/daemons/controld/controld_remote_ra.c b/daemons/controld/controld_remote_ra.c +index 74cbfd673..55ac162c7 100644 +--- a/daemons/controld/controld_remote_ra.c ++++ b/daemons/controld/controld_remote_ra.c +@@ -297,7 +297,7 @@ static void + check_remote_node_state(remote_ra_cmd_t *cmd) + { + /* Only successful actions can change node state */ +- if (cmd->result.exit_status != PCMK_OCF_OK) { ++ if (!pcmk__result_ok(&(cmd->result))) { + return; + } + +@@ -365,7 +365,7 @@ report_remote_ra_result(remote_ra_cmd_t * cmd) + lrmd__set_result(&op, cmd->result.exit_status, cmd->result.execution_status, + cmd->result.exit_reason); + +- if (cmd->reported_success && (cmd->result.exit_status != PCMK_OCF_OK)) { ++ if (cmd->reported_success && !pcmk__result_ok(&(cmd->result))) { + op.t_rcchange = (unsigned int) time(NULL); + /* This edge case will likely never ever occur, but if it does the + * result is that a failure will not be processed correctly. This is only +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index 667525039..02070bf11 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -878,7 +878,7 @@ action_complete(svc_action_t * action) + } + + if (pcmk__str_eq(rclass, PCMK_RESOURCE_CLASS_SYSTEMD, pcmk__str_casei)) { +- if ((cmd->result.exit_status == PCMK_OCF_OK) ++ if (pcmk__result_ok(&(cmd->result)) + && pcmk__strcase_any_of(cmd->action, "start", "stop", NULL)) { + /* systemd returns from start and stop actions after the action + * begins, not after it completes. We have to jump through a few +@@ -894,7 +894,7 @@ action_complete(svc_action_t * action) + if (cmd->result.execution_status == PCMK_EXEC_PENDING) { + goagain = true; + +- } else if ((cmd->result.exit_status == PCMK_OCF_OK) ++ } else if (pcmk__result_ok(&(cmd->result)) + && pcmk__str_eq(cmd->real_action, "stop", pcmk__str_casei)) { + goagain = true; + +@@ -927,12 +927,12 @@ action_complete(svc_action_t * action) + #if SUPPORT_NAGIOS + if (rsc && pcmk__str_eq(rsc->class, PCMK_RESOURCE_CLASS_NAGIOS, pcmk__str_casei)) { + if (action_matches(cmd, "monitor", 0) +- && (cmd->result.exit_status == PCMK_OCF_OK)) { ++ && pcmk__result_ok(&(cmd->result))) { + /* Successfully executed --version for the nagios plugin */ + cmd->result.exit_status = PCMK_OCF_NOT_RUNNING; + + } else if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei) +- && (cmd->result.exit_status != PCMK_OCF_OK)) { ++ && !pcmk__result_ok(&(cmd->result))) { + #ifdef PCMK__TIME_USE_CGT + goagain = true; + #endif +@@ -955,7 +955,7 @@ action_complete(svc_action_t * action) + cmd->start_delay = delay; + cmd->timeout = timeout_left; + +- if (cmd->result.exit_status == PCMK_OCF_OK) { ++ if (pcmk__result_ok(&(cmd->result))) { + crm_debug("%s %s may still be in progress: re-scheduling (elapsed=%dms, remaining=%dms, start_delay=%dms)", + cmd->rsc_id, cmd->real_action, time_sum, timeout_left, delay); + +@@ -1066,7 +1066,7 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc) + cmd->interval_ms, rc); + + // Certain successful actions change the known state of the resource +- if ((rsc != NULL) && (cmd->result.exit_status == PCMK_OCF_OK)) { ++ if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) { + if (pcmk__str_eq(cmd->action, "start", pcmk__str_casei)) { + rsc->st_probe_rc = pcmk_ok; // maps to PCMK_OCF_OK + } else if (pcmk__str_eq(cmd->action, "stop", pcmk__str_casei)) { +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 9f2f1cc40..26501a4b3 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -1188,8 +1188,7 @@ dynamic_list_search_cb(int pid, const pcmk__action_result_t *result, + + mainloop_set_trigger(dev->work); + +- if ((result->execution_status == PCMK_EXEC_DONE) +- && (result->exit_status == CRM_EX_OK)) { ++ if (pcmk__result_ok(result)) { + crm_info("Refreshing target list for %s", dev->id); + g_list_free_full(dev->targets, free); + dev->targets = stonith__parse_targets(result->action_stdout); +@@ -2310,15 +2309,14 @@ log_async_result(async_command_t *cmd, const pcmk__action_result_t *result, + GString *msg = g_string_sized_new(80); // Reasonable starting size + + // Choose log levels appropriately if we have a result +- if ((result->execution_status == PCMK_EXEC_DONE) +- && (result->exit_status == CRM_EX_OK)) { // Success ++ if (pcmk__result_ok(result)) { + log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE; + if ((result->action_stdout != NULL) + && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { + output_log_level = LOG_DEBUG; + } + next = NULL; +- } else { // Failure ++ } else { + log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR; + if ((result->action_stdout != NULL) + && !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) { +@@ -2482,7 +2480,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) + /* The device is ready to do something else now */ + device = g_hash_table_lookup(device_list, cmd->device); + if (device) { +- if (!device->verified && (result->exit_status == CRM_EX_OK) && ++ if (!device->verified && pcmk__result_ok(result) && + (pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) { + + device->verified = TRUE; +@@ -2491,7 +2489,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) + mainloop_set_trigger(device->work); + } + +- if (result->exit_status == CRM_EX_OK) { ++ if (pcmk__result_ok(result)) { + GList *iter; + /* see if there are any required devices left to execute for this op */ + for (iter = cmd->device_next; iter != NULL; iter = iter->next) { +@@ -2523,7 +2521,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data) + + send_async_reply(cmd, result, pid, false); + +- if (result->exit_status != CRM_EX_OK) { ++ if (!pcmk__result_ok(result)) { + goto done; + } + +diff --git a/include/crm/common/results_internal.h b/include/crm/common/results_internal.h +index 6befaa0ed..0c5833937 100644 +--- a/include/crm/common/results_internal.h ++++ b/include/crm/common/results_internal.h +@@ -54,4 +54,20 @@ void pcmk__set_result_output(pcmk__action_result_t *result, + + void pcmk__reset_result(pcmk__action_result_t *result); + ++/*! ++ * \internal ++ * \brief Check whether a result is OK ++ * ++ * \param[in] result ++ * ++ * \return true if the result's exit status is CRM_EX_OK and its ++ * execution status is PCMK_EXEC_DONE, otherwise false ++ */ ++static inline bool ++pcmk__result_ok(const pcmk__action_result_t *result) ++{ ++ return (result != NULL) && (result->exit_status == CRM_EX_OK) ++ && (result->execution_status == PCMK_EXEC_DONE); ++} ++ + #endif // PCMK__COMMON_RESULTS_INTERNAL__H +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 2fbff7f24..af461d0d4 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -760,7 +760,7 @@ stonith__result2rc(const pcmk__action_result_t *result) + default: break; + } + +- if (result->exit_status == CRM_EX_OK) { ++ if (pcmk__result_ok(result)) { + return pcmk_rc_ok; + } + +@@ -797,7 +797,7 @@ stonith_action_async_done(svc_action_t *svc_action) + + log_action(action, action->pid); + +- if ((action->result.exit_status != CRM_EX_OK) ++ if (!pcmk__result_ok(&(action->result)) + && update_remaining_timeout(action)) { + + int rc = internal_stonith_action_execute(action); +diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c +index 6c8cbedc7..865e04bc2 100644 +--- a/lib/fencing/st_rhcs.c ++++ b/lib/fencing/st_rhcs.c +@@ -148,7 +148,7 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata) + return rc; + } + +- if (result->exit_status != CRM_EX_OK) { ++ if (!pcmk__result_ok(result)) { + crm_warn("Metadata action for %s returned error code %d", + agent, result->exit_status); + rc = pcmk_rc2legacy(stonith__result2rc(result)); +-- +2.27.0 + + +From 4c39ff00a0c028354a9da7f80986f7e34b05ba08 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 12 Nov 2021 16:07:01 -0600 +Subject: [PATCH 03/13] Low: fencing: improve mapping of execution status to + legacy return code + +PCMK_EXEC_PENDING is likely not possible with the current code, but map it to +EINPROGRESS for completeness. + +PCMK_EXEC_INVALID is not yet used by the fencer but will be. +--- + lib/fencing/st_client.c | 30 ++++++++++++++++++++++++++---- + 1 file changed, 26 insertions(+), 4 deletions(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index af461d0d4..93513e9f3 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -749,7 +749,12 @@ update_remaining_timeout(stonith_action_t * action) + int + stonith__result2rc(const pcmk__action_result_t *result) + { ++ if (pcmk__result_ok(result)) { ++ return pcmk_rc_ok; ++ } ++ + switch (result->execution_status) { ++ case PCMK_EXEC_PENDING: return EINPROGRESS; + case PCMK_EXEC_CANCELLED: return ECANCELED; + case PCMK_EXEC_TIMEOUT: return ETIME; + case PCMK_EXEC_NOT_INSTALLED: return ENOENT; +@@ -757,11 +762,28 @@ stonith__result2rc(const pcmk__action_result_t *result) + case PCMK_EXEC_NOT_CONNECTED: return ENOTCONN; + case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV; + case PCMK_EXEC_NO_SECRETS: return EACCES; +- default: break; +- } + +- if (pcmk__result_ok(result)) { +- return pcmk_rc_ok; ++ /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API ++ * operations that don't involve executing an agent (for example, ++ * registering devices). This allows us to use the CRM_EX_* codes in the ++ * exit status for finer-grained responses. ++ */ ++ case PCMK_EXEC_INVALID: ++ switch (result->exit_status) { ++ case CRM_EX_INSUFFICIENT_PRIV: return EACCES; ++ case CRM_EX_PROTOCOL: return EPROTO; ++ ++ /* CRM_EX_EXPIRED is used for orphaned fencing operations left ++ * over from a previous instance of the fencer. For API backward ++ * compatibility, this is mapped to the previously used code for ++ * this case, EHOSTUNREACH. ++ */ ++ case CRM_EX_EXPIRED: return EHOSTUNREACH; ++ default: break; ++ } ++ ++ default: ++ break; + } + + // Try to provide useful error code based on result's error output +-- +2.27.0 + + +From 4e638783d1cd7c9398a603fc6df7e9d868262b16 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 11:41:12 -0600 +Subject: [PATCH 04/13] Refactor: libstonithd: separate action-related code + into own source file + +Everything related to stonith_action_t has been moved from st_client.c to a new +st_actions.c, since st_client.c was ridiculously large, and the action stuff +isn't all client-related. No code was changed. + +Before: + 2804 st_client.c + +After: + 545 lib/fencing/st_actions.c + 2278 lib/fencing/st_client.c +--- + lib/fencing/Makefile.am | 2 +- + lib/fencing/st_actions.c | 545 +++++++++++++++++++++++++++++++++++++++ + lib/fencing/st_client.c | 528 +------------------------------------ + 3 files changed, 547 insertions(+), 528 deletions(-) + create mode 100644 lib/fencing/st_actions.c + +diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am +index 205c4873d..dac215c16 100644 +--- a/lib/fencing/Makefile.am ++++ b/lib/fencing/Makefile.am +@@ -22,7 +22,7 @@ libstonithd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB) + libstonithd_la_LIBADD = $(top_builddir)/lib/common/libcrmcommon.la + libstonithd_la_LIBADD += $(top_builddir)/lib/services/libcrmservice.la + +-libstonithd_la_SOURCES = st_client.c st_output.c st_rhcs.c ++libstonithd_la_SOURCES = st_actions.c st_client.c st_output.c st_rhcs.c + if BUILD_LHA_SUPPORT + libstonithd_la_SOURCES += st_lha.c + endif +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +new file mode 100644 +index 000000000..64d3afd5d +--- /dev/null ++++ b/lib/fencing/st_actions.c +@@ -0,0 +1,545 @@ ++/* ++ * Copyright 2004-2021 the Pacemaker project contributors ++ * ++ * The version control history for this file may have further details. ++ * ++ * This source code is licensed under the GNU Lesser General Public License ++ * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY. ++ */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++#include "fencing_private.h" ++ ++struct stonith_action_s { ++ /*! user defined data */ ++ char *agent; ++ char *action; ++ char *victim; ++ GHashTable *args; ++ int timeout; ++ int async; ++ void *userdata; ++ void (*done_cb) (int pid, const pcmk__action_result_t *result, ++ void *user_data); ++ void (*fork_cb) (int pid, void *user_data); ++ ++ svc_action_t *svc_action; ++ ++ /*! internal timing information */ ++ time_t initial_start_time; ++ int tries; ++ int remaining_timeout; ++ int max_retries; ++ ++ int pid; ++ pcmk__action_result_t result; ++}; ++ ++static int internal_stonith_action_execute(stonith_action_t *action); ++static void log_action(stonith_action_t *action, pid_t pid); ++ ++/*! ++ * \internal ++ * \brief Set an action's result based on services library result ++ * ++ * \param[in] action Fence action to set result for ++ * \param[in] svc_action Service action to get result from ++ */ ++static void ++set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action) ++{ ++ pcmk__set_result(&(action->result), svc_action->rc, svc_action->status, ++ services__exit_reason(svc_action)); ++ pcmk__set_result_output(&(action->result), ++ services__grab_stdout(svc_action), ++ services__grab_stderr(svc_action)); ++} ++ ++static void ++log_action(stonith_action_t *action, pid_t pid) ++{ ++ /* The services library has already logged the output at info or debug ++ * level, so just raise to warning for stderr. ++ */ ++ if (action->result.action_stderr != NULL) { ++ /* Logging the whole string confuses syslog when the string is xml */ ++ char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); ++ ++ crm_log_output(LOG_WARNING, prefix, action->result.action_stderr); ++ free(prefix); ++ } ++} ++ ++static void ++append_config_arg(gpointer key, gpointer value, gpointer user_data) ++{ ++ /* The fencer will filter "action" out when it registers the device, ++ * but ignore it here in case any external API users don't. ++ * ++ * Also filter out parameters handled directly by Pacemaker. ++ */ ++ if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei) ++ && !pcmk_stonith_param(key) ++ && (strstr(key, CRM_META) == NULL) ++ && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) { ++ ++ crm_trace("Passing %s=%s with fence action", ++ (const char *) key, (const char *) (value? value : "")); ++ g_hash_table_insert((GHashTable *) user_data, ++ strdup(key), strdup(value? value : "")); ++ } ++} ++ ++static GHashTable * ++make_args(const char *agent, const char *action, const char *victim, ++ uint32_t victim_nodeid, GHashTable * device_args, ++ GHashTable * port_map, const char *host_arg) ++{ ++ GHashTable *arg_list = NULL; ++ const char *value = NULL; ++ ++ CRM_CHECK(action != NULL, return NULL); ++ ++ arg_list = pcmk__strkey_table(free, free); ++ ++ // Add action to arguments (using an alias if requested) ++ if (device_args) { ++ char buffer[512]; ++ ++ snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action); ++ value = g_hash_table_lookup(device_args, buffer); ++ if (value) { ++ crm_debug("Substituting '%s' for fence action %s targeting %s", ++ value, action, victim); ++ action = value; ++ } ++ } ++ g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP), ++ strdup(action)); ++ ++ /* If this is a fencing operation against another node, add more standard ++ * arguments. ++ */ ++ if (victim && device_args) { ++ const char *param = NULL; ++ ++ /* Always pass the target's name, per ++ * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md ++ */ ++ g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim)); ++ ++ // If the target's node ID was specified, pass it, too ++ if (victim_nodeid) { ++ char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid); ++ ++ // cts-fencing looks for this log message ++ crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s", ++ nodeid, action, victim); ++ g_hash_table_insert(arg_list, strdup("nodeid"), nodeid); ++ } ++ ++ // Check whether target must be specified in some other way ++ param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT); ++ if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none) ++ && !pcmk__str_eq(param, "none", pcmk__str_casei)) { ++ ++ if (param == NULL) { ++ /* Use the caller's default for pcmk_host_argument, or "port" if ++ * none was given ++ */ ++ param = (host_arg == NULL)? "port" : host_arg; ++ } ++ value = g_hash_table_lookup(device_args, param); ++ ++ if (pcmk__str_eq(value, "dynamic", ++ pcmk__str_casei|pcmk__str_null_matches)) { ++ /* If the host argument was "dynamic" or not explicitly specified, ++ * add it with the target ++ */ ++ const char *alias = NULL; ++ ++ if (port_map) { ++ alias = g_hash_table_lookup(port_map, victim); ++ } ++ if (alias == NULL) { ++ alias = victim; ++ } ++ crm_debug("Passing %s='%s' with fence action %s targeting %s", ++ param, alias, action, victim); ++ g_hash_table_insert(arg_list, strdup(param), strdup(alias)); ++ } ++ } ++ } ++ ++ if (device_args) { ++ g_hash_table_foreach(device_args, append_config_arg, arg_list); ++ } ++ ++ return arg_list; ++} ++ ++/*! ++ * \internal ++ * \brief Free all memory used by a stonith action ++ * ++ * \param[in,out] action Action to free ++ */ ++void ++stonith__destroy_action(stonith_action_t *action) ++{ ++ if (action) { ++ free(action->agent); ++ if (action->args) { ++ g_hash_table_destroy(action->args); ++ } ++ free(action->action); ++ free(action->victim); ++ if (action->svc_action) { ++ services_action_free(action->svc_action); ++ } ++ pcmk__reset_result(&(action->result)); ++ free(action); ++ } ++} ++ ++/*! ++ * \internal ++ * \brief Get the result of an executed stonith action ++ * ++ * \param[in] action Executed action ++ * ++ * \return Pointer to action's result (or NULL if \p action is NULL) ++ */ ++pcmk__action_result_t * ++stonith__action_result(stonith_action_t *action) ++{ ++ return (action == NULL)? NULL : &(action->result); ++} ++ ++#define FAILURE_MAX_RETRIES 2 ++stonith_action_t * ++stonith_action_create(const char *agent, ++ const char *_action, ++ const char *victim, ++ uint32_t victim_nodeid, ++ int timeout, GHashTable * device_args, ++ GHashTable * port_map, const char *host_arg) ++{ ++ stonith_action_t *action; ++ ++ action = calloc(1, sizeof(stonith_action_t)); ++ action->args = make_args(agent, _action, victim, victim_nodeid, ++ device_args, port_map, host_arg); ++ crm_debug("Preparing '%s' action for %s using agent %s", ++ _action, (victim? victim : "no target"), agent); ++ action->agent = strdup(agent); ++ action->action = strdup(_action); ++ if (victim) { ++ action->victim = strdup(victim); ++ } ++ action->timeout = action->remaining_timeout = timeout; ++ action->max_retries = FAILURE_MAX_RETRIES; ++ ++ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, ++ "Initialization bug in fencing library"); ++ ++ if (device_args) { ++ char buffer[512]; ++ const char *value = NULL; ++ ++ snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); ++ value = g_hash_table_lookup(device_args, buffer); ++ ++ if (value) { ++ action->max_retries = atoi(value); ++ } ++ } ++ ++ return action; ++} ++ ++static gboolean ++update_remaining_timeout(stonith_action_t * action) ++{ ++ int diff = time(NULL) - action->initial_start_time; ++ ++ if (action->tries >= action->max_retries) { ++ crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", ++ action->agent, action->action, action->max_retries); ++ action->remaining_timeout = 0; ++ } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT) ++ && (diff < (action->timeout * 0.7))) { ++ /* only set remaining timeout period if there is 30% ++ * or greater of the original timeout period left */ ++ action->remaining_timeout = action->timeout - diff; ++ } else { ++ action->remaining_timeout = 0; ++ } ++ return action->remaining_timeout ? TRUE : FALSE; ++} ++ ++/*! ++ * \internal ++ * \brief Map a fencing action result to a standard return code ++ * ++ * \param[in] result Fencing action result to map ++ * ++ * \return Standard Pacemaker return code that best corresponds to \p result ++ */ ++int ++stonith__result2rc(const pcmk__action_result_t *result) ++{ ++ if (pcmk__result_ok(result)) { ++ return pcmk_rc_ok; ++ } ++ ++ switch (result->execution_status) { ++ case PCMK_EXEC_PENDING: return EINPROGRESS; ++ case PCMK_EXEC_CANCELLED: return ECANCELED; ++ case PCMK_EXEC_TIMEOUT: return ETIME; ++ case PCMK_EXEC_NOT_INSTALLED: return ENOENT; ++ case PCMK_EXEC_NOT_SUPPORTED: return EOPNOTSUPP; ++ case PCMK_EXEC_NOT_CONNECTED: return ENOTCONN; ++ case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV; ++ case PCMK_EXEC_NO_SECRETS: return EACCES; ++ ++ /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API ++ * operations that don't involve executing an agent (for example, ++ * registering devices). This allows us to use the CRM_EX_* codes in the ++ * exit status for finer-grained responses. ++ */ ++ case PCMK_EXEC_INVALID: ++ switch (result->exit_status) { ++ case CRM_EX_INSUFFICIENT_PRIV: return EACCES; ++ case CRM_EX_PROTOCOL: return EPROTO; ++ ++ /* CRM_EX_EXPIRED is used for orphaned fencing operations left ++ * over from a previous instance of the fencer. For API backward ++ * compatibility, this is mapped to the previously used code for ++ * this case, EHOSTUNREACH. ++ */ ++ case CRM_EX_EXPIRED: return EHOSTUNREACH; ++ default: break; ++ } ++ ++ default: ++ break; ++ } ++ ++ // Try to provide useful error code based on result's error output ++ ++ if (result->action_stderr == NULL) { ++ return ENODATA; ++ ++ } else if (strcasestr(result->action_stderr, "timed out") ++ || strcasestr(result->action_stderr, "timeout")) { ++ return ETIME; ++ ++ } else if (strcasestr(result->action_stderr, "unrecognised action") ++ || strcasestr(result->action_stderr, "unrecognized action") ++ || strcasestr(result->action_stderr, "unsupported action")) { ++ return EOPNOTSUPP; ++ } ++ ++ // Oh well, we tried ++ return pcmk_rc_error; ++} ++ ++static void ++stonith_action_async_done(svc_action_t *svc_action) ++{ ++ stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; ++ ++ set_result_from_svc_action(action, svc_action); ++ ++ svc_action->params = NULL; ++ ++ crm_debug("Child process %d performing action '%s' exited with rc %d", ++ action->pid, action->action, svc_action->rc); ++ ++ log_action(action, action->pid); ++ ++ if (!pcmk__result_ok(&(action->result)) ++ && update_remaining_timeout(action)) { ++ ++ int rc = internal_stonith_action_execute(action); ++ if (rc == pcmk_ok) { ++ return; ++ } ++ } ++ ++ if (action->done_cb) { ++ action->done_cb(action->pid, &(action->result), action->userdata); ++ } ++ ++ action->svc_action = NULL; // don't remove our caller ++ stonith__destroy_action(action); ++} ++ ++static void ++stonith_action_async_forked(svc_action_t *svc_action) ++{ ++ stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; ++ ++ action->pid = svc_action->pid; ++ action->svc_action = svc_action; ++ ++ if (action->fork_cb) { ++ (action->fork_cb) (svc_action->pid, action->userdata); ++ } ++ ++ crm_trace("Child process %d performing action '%s' successfully forked", ++ action->pid, action->action); ++} ++ ++static int ++internal_stonith_action_execute(stonith_action_t * action) ++{ ++ int rc = -EPROTO; ++ int is_retry = 0; ++ svc_action_t *svc_action = NULL; ++ static int stonith_sequence = 0; ++ char *buffer = NULL; ++ ++ CRM_CHECK(action != NULL, return -EINVAL); ++ ++ if ((action->action == NULL) || (action->args == NULL) ++ || (action->agent == NULL)) { ++ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR, ++ PCMK_EXEC_ERROR_FATAL, "Bug in fencing library"); ++ return -EINVAL; ++ } ++ ++ if (!action->tries) { ++ action->initial_start_time = time(NULL); ++ } ++ action->tries++; ++ ++ if (action->tries > 1) { ++ crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", ++ action->tries, action->agent, action->action, action->remaining_timeout); ++ is_retry = 1; ++ } ++ ++ buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", ++ basename(action->agent)); ++ svc_action = services_action_create_generic(buffer, NULL); ++ free(buffer); ++ ++ if (svc_action->rc != PCMK_OCF_UNKNOWN) { ++ set_result_from_svc_action(action, svc_action); ++ services_action_free(svc_action); ++ return -E2BIG; ++ } ++ ++ svc_action->timeout = 1000 * action->remaining_timeout; ++ svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH); ++ svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent), ++ action->action, action->tries); ++ svc_action->agent = strdup(action->agent); ++ svc_action->sequence = stonith_sequence++; ++ svc_action->params = action->args; ++ svc_action->cb_data = (void *) action; ++ svc_action->flags = pcmk__set_flags_as(__func__, __LINE__, ++ LOG_TRACE, "Action", ++ svc_action->id, svc_action->flags, ++ SVC_ACTION_NON_BLOCKED, ++ "SVC_ACTION_NON_BLOCKED"); ++ ++ /* keep retries from executing out of control and free previous results */ ++ if (is_retry) { ++ pcmk__reset_result(&(action->result)); ++ sleep(1); ++ } ++ ++ if (action->async) { ++ /* async */ ++ if (services_action_async_fork_notify(svc_action, ++ &stonith_action_async_done, ++ &stonith_action_async_forked)) { ++ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, ++ PCMK_EXEC_PENDING, NULL); ++ return pcmk_ok; ++ } ++ ++ } else if (services_action_sync(svc_action)) { // sync success ++ rc = pcmk_ok; ++ ++ } else { // sync failure ++ rc = -ECONNABORTED; ++ } ++ ++ set_result_from_svc_action(action, svc_action); ++ svc_action->params = NULL; ++ services_action_free(svc_action); ++ return rc; ++} ++ ++/*! ++ * \internal ++ * \brief Kick off execution of an async stonith action ++ * ++ * \param[in,out] action Action to be executed ++ * \param[in,out] userdata Datapointer to be passed to callbacks ++ * \param[in] done Callback to notify action has failed/succeeded ++ * \param[in] fork_callback Callback to notify successful fork of child ++ * ++ * \return pcmk_ok if ownership of action has been taken, -errno otherwise ++ */ ++int ++stonith_action_execute_async(stonith_action_t * action, ++ void *userdata, ++ void (*done) (int pid, ++ const pcmk__action_result_t *result, ++ void *user_data), ++ void (*fork_cb) (int pid, void *user_data)) ++{ ++ if (!action) { ++ return -EINVAL; ++ } ++ ++ action->userdata = userdata; ++ action->done_cb = done; ++ action->fork_cb = fork_cb; ++ action->async = 1; ++ ++ return internal_stonith_action_execute(action); ++} ++ ++/*! ++ * \internal ++ * \brief Execute a stonith action ++ * ++ * \param[in,out] action Action to execute ++ * ++ * \return pcmk_ok on success, -errno otherwise ++ */ ++int ++stonith__execute(stonith_action_t *action) ++{ ++ int rc = pcmk_ok; ++ ++ CRM_CHECK(action != NULL, return -EINVAL); ++ ++ // Keep trying until success, max retries, or timeout ++ do { ++ rc = internal_stonith_action_execute(action); ++ } while ((rc != pcmk_ok) && update_remaining_timeout(action)); ++ ++ return rc; ++} +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 93513e9f3..944cd1863 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -8,28 +8,20 @@ + */ + + #include +-#include ++ + #include + #include + #include + #include + #include +-#include + #include +- +-#include + #include +-#include +- + #include + + #include + #include + #include + #include +-#include +-#include +-#include + + #include + +@@ -37,31 +29,6 @@ + + CRM_TRACE_INIT_DATA(stonith); + +-struct stonith_action_s { +- /*! user defined data */ +- char *agent; +- char *action; +- char *victim; +- GHashTable *args; +- int timeout; +- int async; +- void *userdata; +- void (*done_cb) (int pid, const pcmk__action_result_t *result, +- void *user_data); +- void (*fork_cb) (int pid, void *user_data); +- +- svc_action_t *svc_action; +- +- /*! internal timing information */ +- time_t initial_start_time; +- int tries; +- int remaining_timeout; +- int max_retries; +- +- int pid; +- pcmk__action_result_t result; +-}; +- + typedef struct stonith_private_s { + char *token; + crm_ipc_t *ipc; +@@ -118,8 +85,6 @@ static int stonith_send_command(stonith_t *stonith, const char *op, + + static void stonith_connection_destroy(gpointer user_data); + static void stonith_send_notification(gpointer data, gpointer user_data); +-static int internal_stonith_action_execute(stonith_action_t * action); +-static void log_action(stonith_action_t *action, pid_t pid); + + /*! + * \brief Get agent namespace by name +@@ -196,23 +161,6 @@ stonith_get_namespace(const char *agent, const char *namespace_s) + return st_namespace_invalid; + } + +-/*! +- * \internal +- * \brief Set an action's result based on services library result +- * +- * \param[in] action Fence action to set result for +- * \param[in] svc_action Service action to get result from +- */ +-static void +-set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action) +-{ +- pcmk__set_result(&(action->result), svc_action->rc, svc_action->status, +- services__exit_reason(svc_action)); +- pcmk__set_result_output(&(action->result), +- services__grab_stdout(svc_action), +- services__grab_stderr(svc_action)); +-} +- + gboolean + stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) + { +@@ -273,21 +221,6 @@ stonith__watchdog_fencing_enabled_for_node(const char *node) + return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); + } + +-static void +-log_action(stonith_action_t *action, pid_t pid) +-{ +- /* The services library has already logged the output at info or debug +- * level, so just raise to warning for stderr. +- */ +- if (action->result.action_stderr != NULL) { +- /* Logging the whole string confuses syslog when the string is xml */ +- char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid); +- +- crm_log_output(LOG_WARNING, prefix, action->result.action_stderr); +- free(prefix); +- } +-} +- + /* when cycling through the list we don't want to delete items + so just mark them and when we know nobody is using the list + loop over it to remove the marked items +@@ -530,465 +463,6 @@ stonith_api_register_level(stonith_t * st, int options, const char *node, int le + level, device_list); + } + +-static void +-append_config_arg(gpointer key, gpointer value, gpointer user_data) +-{ +- /* The fencer will filter "action" out when it registers the device, +- * but ignore it here in case any external API users don't. +- * +- * Also filter out parameters handled directly by Pacemaker. +- */ +- if (!pcmk__str_eq(key, STONITH_ATTR_ACTION_OP, pcmk__str_casei) +- && !pcmk_stonith_param(key) +- && (strstr(key, CRM_META) == NULL) +- && !pcmk__str_eq(key, "crm_feature_set", pcmk__str_casei)) { +- +- crm_trace("Passing %s=%s with fence action", +- (const char *) key, (const char *) (value? value : "")); +- g_hash_table_insert((GHashTable *) user_data, +- strdup(key), strdup(value? value : "")); +- } +-} +- +-static GHashTable * +-make_args(const char *agent, const char *action, const char *victim, +- uint32_t victim_nodeid, GHashTable * device_args, +- GHashTable * port_map, const char *host_arg) +-{ +- GHashTable *arg_list = NULL; +- const char *value = NULL; +- +- CRM_CHECK(action != NULL, return NULL); +- +- arg_list = pcmk__strkey_table(free, free); +- +- // Add action to arguments (using an alias if requested) +- if (device_args) { +- char buffer[512]; +- +- snprintf(buffer, sizeof(buffer), "pcmk_%s_action", action); +- value = g_hash_table_lookup(device_args, buffer); +- if (value) { +- crm_debug("Substituting '%s' for fence action %s targeting %s", +- value, action, victim); +- action = value; +- } +- } +- g_hash_table_insert(arg_list, strdup(STONITH_ATTR_ACTION_OP), +- strdup(action)); +- +- /* If this is a fencing operation against another node, add more standard +- * arguments. +- */ +- if (victim && device_args) { +- const char *param = NULL; +- +- /* Always pass the target's name, per +- * https://github.com/ClusterLabs/fence-agents/blob/master/doc/FenceAgentAPI.md +- */ +- g_hash_table_insert(arg_list, strdup("nodename"), strdup(victim)); +- +- // If the target's node ID was specified, pass it, too +- if (victim_nodeid) { +- char *nodeid = crm_strdup_printf("%" PRIu32, victim_nodeid); +- +- // cts-fencing looks for this log message +- crm_info("Passing '%s' as nodeid with fence action '%s' targeting %s", +- nodeid, action, victim); +- g_hash_table_insert(arg_list, strdup("nodeid"), nodeid); +- } +- +- // Check whether target must be specified in some other way +- param = g_hash_table_lookup(device_args, PCMK_STONITH_HOST_ARGUMENT); +- if (!pcmk__str_eq(agent, "fence_legacy", pcmk__str_none) +- && !pcmk__str_eq(param, "none", pcmk__str_casei)) { +- +- if (param == NULL) { +- /* Use the caller's default for pcmk_host_argument, or "port" if +- * none was given +- */ +- param = (host_arg == NULL)? "port" : host_arg; +- } +- value = g_hash_table_lookup(device_args, param); +- +- if (pcmk__str_eq(value, "dynamic", +- pcmk__str_casei|pcmk__str_null_matches)) { +- /* If the host argument was "dynamic" or not explicitly specified, +- * add it with the target +- */ +- const char *alias = NULL; +- +- if (port_map) { +- alias = g_hash_table_lookup(port_map, victim); +- } +- if (alias == NULL) { +- alias = victim; +- } +- crm_debug("Passing %s='%s' with fence action %s targeting %s", +- param, alias, action, victim); +- g_hash_table_insert(arg_list, strdup(param), strdup(alias)); +- } +- } +- } +- +- if (device_args) { +- g_hash_table_foreach(device_args, append_config_arg, arg_list); +- } +- +- return arg_list; +-} +- +-/*! +- * \internal +- * \brief Free all memory used by a stonith action +- * +- * \param[in,out] action Action to free +- */ +-void +-stonith__destroy_action(stonith_action_t *action) +-{ +- if (action) { +- free(action->agent); +- if (action->args) { +- g_hash_table_destroy(action->args); +- } +- free(action->action); +- free(action->victim); +- if (action->svc_action) { +- services_action_free(action->svc_action); +- } +- pcmk__reset_result(&(action->result)); +- free(action); +- } +-} +- +-/*! +- * \internal +- * \brief Get the result of an executed stonith action +- * +- * \param[in] action Executed action +- * +- * \return Pointer to action's result (or NULL if \p action is NULL) +- */ +-pcmk__action_result_t * +-stonith__action_result(stonith_action_t *action) +-{ +- return (action == NULL)? NULL : &(action->result); +-} +- +-#define FAILURE_MAX_RETRIES 2 +-stonith_action_t * +-stonith_action_create(const char *agent, +- const char *_action, +- const char *victim, +- uint32_t victim_nodeid, +- int timeout, GHashTable * device_args, +- GHashTable * port_map, const char *host_arg) +-{ +- stonith_action_t *action; +- +- action = calloc(1, sizeof(stonith_action_t)); +- action->args = make_args(agent, _action, victim, victim_nodeid, +- device_args, port_map, host_arg); +- crm_debug("Preparing '%s' action for %s using agent %s", +- _action, (victim? victim : "no target"), agent); +- action->agent = strdup(agent); +- action->action = strdup(_action); +- if (victim) { +- action->victim = strdup(victim); +- } +- action->timeout = action->remaining_timeout = timeout; +- action->max_retries = FAILURE_MAX_RETRIES; +- +- pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN, +- "Initialization bug in fencing library"); +- +- if (device_args) { +- char buffer[512]; +- const char *value = NULL; +- +- snprintf(buffer, sizeof(buffer), "pcmk_%s_retries", _action); +- value = g_hash_table_lookup(device_args, buffer); +- +- if (value) { +- action->max_retries = atoi(value); +- } +- } +- +- return action; +-} +- +-static gboolean +-update_remaining_timeout(stonith_action_t * action) +-{ +- int diff = time(NULL) - action->initial_start_time; +- +- if (action->tries >= action->max_retries) { +- crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed", +- action->agent, action->action, action->max_retries); +- action->remaining_timeout = 0; +- } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT) +- && (diff < (action->timeout * 0.7))) { +- /* only set remaining timeout period if there is 30% +- * or greater of the original timeout period left */ +- action->remaining_timeout = action->timeout - diff; +- } else { +- action->remaining_timeout = 0; +- } +- return action->remaining_timeout ? TRUE : FALSE; +-} +- +-/*! +- * \internal +- * \brief Map a fencing action result to a standard return code +- * +- * \param[in] result Fencing action result to map +- * +- * \return Standard Pacemaker return code that best corresponds to \p result +- */ +-int +-stonith__result2rc(const pcmk__action_result_t *result) +-{ +- if (pcmk__result_ok(result)) { +- return pcmk_rc_ok; +- } +- +- switch (result->execution_status) { +- case PCMK_EXEC_PENDING: return EINPROGRESS; +- case PCMK_EXEC_CANCELLED: return ECANCELED; +- case PCMK_EXEC_TIMEOUT: return ETIME; +- case PCMK_EXEC_NOT_INSTALLED: return ENOENT; +- case PCMK_EXEC_NOT_SUPPORTED: return EOPNOTSUPP; +- case PCMK_EXEC_NOT_CONNECTED: return ENOTCONN; +- case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV; +- case PCMK_EXEC_NO_SECRETS: return EACCES; +- +- /* For the fencing API, PCMK_EXEC_INVALID is used with fencer API +- * operations that don't involve executing an agent (for example, +- * registering devices). This allows us to use the CRM_EX_* codes in the +- * exit status for finer-grained responses. +- */ +- case PCMK_EXEC_INVALID: +- switch (result->exit_status) { +- case CRM_EX_INSUFFICIENT_PRIV: return EACCES; +- case CRM_EX_PROTOCOL: return EPROTO; +- +- /* CRM_EX_EXPIRED is used for orphaned fencing operations left +- * over from a previous instance of the fencer. For API backward +- * compatibility, this is mapped to the previously used code for +- * this case, EHOSTUNREACH. +- */ +- case CRM_EX_EXPIRED: return EHOSTUNREACH; +- default: break; +- } +- +- default: +- break; +- } +- +- // Try to provide useful error code based on result's error output +- +- if (result->action_stderr == NULL) { +- return ENODATA; +- +- } else if (strcasestr(result->action_stderr, "timed out") +- || strcasestr(result->action_stderr, "timeout")) { +- return ETIME; +- +- } else if (strcasestr(result->action_stderr, "unrecognised action") +- || strcasestr(result->action_stderr, "unrecognized action") +- || strcasestr(result->action_stderr, "unsupported action")) { +- return EOPNOTSUPP; +- } +- +- // Oh well, we tried +- return pcmk_rc_error; +-} +- +-static void +-stonith_action_async_done(svc_action_t *svc_action) +-{ +- stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; +- +- set_result_from_svc_action(action, svc_action); +- +- svc_action->params = NULL; +- +- crm_debug("Child process %d performing action '%s' exited with rc %d", +- action->pid, action->action, svc_action->rc); +- +- log_action(action, action->pid); +- +- if (!pcmk__result_ok(&(action->result)) +- && update_remaining_timeout(action)) { +- +- int rc = internal_stonith_action_execute(action); +- if (rc == pcmk_ok) { +- return; +- } +- } +- +- if (action->done_cb) { +- action->done_cb(action->pid, &(action->result), action->userdata); +- } +- +- action->svc_action = NULL; // don't remove our caller +- stonith__destroy_action(action); +-} +- +-static void +-stonith_action_async_forked(svc_action_t *svc_action) +-{ +- stonith_action_t *action = (stonith_action_t *) svc_action->cb_data; +- +- action->pid = svc_action->pid; +- action->svc_action = svc_action; +- +- if (action->fork_cb) { +- (action->fork_cb) (svc_action->pid, action->userdata); +- } +- +- crm_trace("Child process %d performing action '%s' successfully forked", +- action->pid, action->action); +-} +- +-static int +-internal_stonith_action_execute(stonith_action_t * action) +-{ +- int rc = -EPROTO; +- int is_retry = 0; +- svc_action_t *svc_action = NULL; +- static int stonith_sequence = 0; +- char *buffer = NULL; +- +- CRM_CHECK(action != NULL, return -EINVAL); +- +- if ((action->action == NULL) || (action->args == NULL) +- || (action->agent == NULL)) { +- pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR, +- PCMK_EXEC_ERROR_FATAL, "Bug in fencing library"); +- return -EINVAL; +- } +- +- if (!action->tries) { +- action->initial_start_time = time(NULL); +- } +- action->tries++; +- +- if (action->tries > 1) { +- crm_info("Attempt %d to execute %s (%s). remaining timeout is %d", +- action->tries, action->agent, action->action, action->remaining_timeout); +- is_retry = 1; +- } +- +- buffer = crm_strdup_printf(PCMK__FENCE_BINDIR "/%s", +- basename(action->agent)); +- svc_action = services_action_create_generic(buffer, NULL); +- free(buffer); +- +- if (svc_action->rc != PCMK_OCF_UNKNOWN) { +- set_result_from_svc_action(action, svc_action); +- services_action_free(svc_action); +- return -E2BIG; +- } +- +- svc_action->timeout = 1000 * action->remaining_timeout; +- svc_action->standard = strdup(PCMK_RESOURCE_CLASS_STONITH); +- svc_action->id = crm_strdup_printf("%s_%s_%d", basename(action->agent), +- action->action, action->tries); +- svc_action->agent = strdup(action->agent); +- svc_action->sequence = stonith_sequence++; +- svc_action->params = action->args; +- svc_action->cb_data = (void *) action; +- svc_action->flags = pcmk__set_flags_as(__func__, __LINE__, +- LOG_TRACE, "Action", +- svc_action->id, svc_action->flags, +- SVC_ACTION_NON_BLOCKED, +- "SVC_ACTION_NON_BLOCKED"); +- +- /* keep retries from executing out of control and free previous results */ +- if (is_retry) { +- pcmk__reset_result(&(action->result)); +- sleep(1); +- } +- +- if (action->async) { +- /* async */ +- if (services_action_async_fork_notify(svc_action, +- &stonith_action_async_done, +- &stonith_action_async_forked)) { +- pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, +- PCMK_EXEC_PENDING, NULL); +- return pcmk_ok; +- } +- +- } else if (services_action_sync(svc_action)) { // sync success +- rc = pcmk_ok; +- +- } else { // sync failure +- rc = -ECONNABORTED; +- } +- +- set_result_from_svc_action(action, svc_action); +- svc_action->params = NULL; +- services_action_free(svc_action); +- return rc; +-} +- +-/*! +- * \internal +- * \brief Kick off execution of an async stonith action +- * +- * \param[in,out] action Action to be executed +- * \param[in,out] userdata Datapointer to be passed to callbacks +- * \param[in] done Callback to notify action has failed/succeeded +- * \param[in] fork_callback Callback to notify successful fork of child +- * +- * \return pcmk_ok if ownership of action has been taken, -errno otherwise +- */ +-int +-stonith_action_execute_async(stonith_action_t * action, +- void *userdata, +- void (*done) (int pid, +- const pcmk__action_result_t *result, +- void *user_data), +- void (*fork_cb) (int pid, void *user_data)) +-{ +- if (!action) { +- return -EINVAL; +- } +- +- action->userdata = userdata; +- action->done_cb = done; +- action->fork_cb = fork_cb; +- action->async = 1; +- +- return internal_stonith_action_execute(action); +-} +- +-/*! +- * \internal +- * \brief Execute a stonith action +- * +- * \param[in,out] action Action to execute +- * +- * \return pcmk_ok on success, -errno otherwise +- */ +-int +-stonith__execute(stonith_action_t *action) +-{ +- int rc = pcmk_ok; +- +- CRM_CHECK(action != NULL, return -EINVAL); +- +- // Keep trying until success, max retries, or timeout +- do { +- rc = internal_stonith_action_execute(action); +- } while ((rc != pcmk_ok) && update_remaining_timeout(action)); +- +- return rc; +-} +- + static int + stonith_api_device_list(stonith_t * stonith, int call_options, const char *namespace, + stonith_key_value_t ** devices, int timeout) +-- +2.27.0 + + +From 883a3cf7d3f73d02417d3997a7885dd5a7bebac7 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Wed, 10 Nov 2021 15:39:17 -0600 +Subject: [PATCH 05/13] Low: fencing,executor: improve mapping of legacy return + code to execution status + +Move stonith_rc2status() from the executor to the fencing library for future +reuse, exposing it internally as stonith__legacy2status(). Update it to use +recently added execution status codes. +--- + daemons/execd/execd_commands.c | 66 ++++++++-------------------------- + include/crm/fencing/internal.h | 2 ++ + lib/fencing/st_actions.c | 36 +++++++++++++++++++ + 3 files changed, 52 insertions(+), 52 deletions(-) + +diff --git a/daemons/execd/execd_commands.c b/daemons/execd/execd_commands.c +index 02070bf11..0ccaa1ced 100644 +--- a/daemons/execd/execd_commands.c ++++ b/daemons/execd/execd_commands.c +@@ -21,6 +21,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -999,56 +1000,6 @@ action_complete(svc_action_t * action) + cmd_finalize(cmd, rsc); + } + +-/*! +- * \internal +- * \brief Determine operation status of a stonith operation +- * +- * Non-stonith resource operations get their operation status directly from the +- * service library, but the fencer does not have an equivalent, so we must infer +- * an operation status from the fencer API's return code. +- * +- * \param[in] action Name of action performed on stonith resource +- * \param[in] interval_ms Action interval +- * \param[in] rc Action result from fencer +- * +- * \return Operation status corresponding to fencer API return code +- */ +-static int +-stonith_rc2status(const char *action, guint interval_ms, int rc) +-{ +- int status = PCMK_EXEC_DONE; +- +- switch (rc) { +- case pcmk_ok: +- break; +- +- case -EOPNOTSUPP: +- case -EPROTONOSUPPORT: +- status = PCMK_EXEC_NOT_SUPPORTED; +- break; +- +- case -ETIME: +- case -ETIMEDOUT: +- status = PCMK_EXEC_TIMEOUT; +- break; +- +- case -ENOTCONN: +- case -ECOMM: +- // Couldn't talk to fencer +- status = PCMK_EXEC_ERROR; +- break; +- +- case -ENODEV: +- // The device is not registered with the fencer +- status = PCMK_EXEC_ERROR; +- break; +- +- default: +- break; +- } +- return status; +-} +- + static void + stonith_action_complete(lrmd_cmd_t * cmd, int rc) + { +@@ -1062,8 +1013,19 @@ stonith_action_complete(lrmd_cmd_t * cmd, int rc) + * the fencer return code. + */ + if (cmd->result.execution_status != PCMK_EXEC_CANCELLED) { +- cmd->result.execution_status = stonith_rc2status(cmd->action, +- cmd->interval_ms, rc); ++ cmd->result.execution_status = stonith__legacy2status(rc); ++ ++ // Simplify status codes from fencer ++ switch (cmd->result.execution_status) { ++ case PCMK_EXEC_NOT_CONNECTED: ++ case PCMK_EXEC_INVALID: ++ case PCMK_EXEC_NO_FENCE_DEVICE: ++ case PCMK_EXEC_NO_SECRETS: ++ cmd->result.execution_status = PCMK_EXEC_ERROR; ++ break; ++ default: ++ break; ++ } + + // Certain successful actions change the known state of the resource + if ((rsc != NULL) && pcmk__result_ok(&(cmd->result))) { +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 6a7e4232c..80f6443be 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -182,6 +182,8 @@ bool stonith__event_state_pending(stonith_history_t *history, void *user_data); + bool stonith__event_state_eq(stonith_history_t *history, void *user_data); + bool stonith__event_state_neq(stonith_history_t *history, void *user_data); + ++int stonith__legacy2status(int rc); ++ + /*! + * \internal + * \brief Is a fencing operation in pending state? +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +index 64d3afd5d..9e785595a 100644 +--- a/lib/fencing/st_actions.c ++++ b/lib/fencing/st_actions.c +@@ -360,6 +360,42 @@ stonith__result2rc(const pcmk__action_result_t *result) + return pcmk_rc_error; + } + ++/*! ++ * \internal ++ * \brief Determine execution status equivalent of legacy fencer return code ++ * ++ * Fence action notifications, and fence action callbacks from older fencers ++ * (<=2.1.2) in a rolling upgrade, will have only a legacy return code. Map this ++ * to an execution status as best as possible (essentially, the inverse of ++ * stonith__result2rc()). ++ * ++ * \param[in] rc Legacy return code from fencer ++ * ++ * \return Execution status best corresponding to \p rc ++ */ ++int ++stonith__legacy2status(int rc) ++{ ++ if (rc >= 0) { ++ return PCMK_EXEC_DONE; ++ } ++ switch (-rc) { ++ case EACCES: return PCMK_EXEC_NO_SECRETS; ++ case ECANCELED: return PCMK_EXEC_CANCELLED; ++ case EHOSTUNREACH: return PCMK_EXEC_INVALID; ++ case EINPROGRESS: return PCMK_EXEC_PENDING; ++ case ENODEV: return PCMK_EXEC_NO_FENCE_DEVICE; ++ case ENOENT: return PCMK_EXEC_NOT_INSTALLED; ++ case ENOTCONN: return PCMK_EXEC_NOT_CONNECTED; ++ case EOPNOTSUPP: return PCMK_EXEC_NOT_SUPPORTED; ++ case EPROTO: return PCMK_EXEC_INVALID; ++ case EPROTONOSUPPORT: return PCMK_EXEC_NOT_SUPPORTED; ++ case ETIME: return PCMK_EXEC_TIMEOUT; ++ case ETIMEDOUT: return PCMK_EXEC_TIMEOUT; ++ default: return PCMK_EXEC_ERROR; ++ } ++} ++ + static void + stonith_action_async_done(svc_action_t *svc_action) + { +-- +2.27.0 + + +From 639a9f4a2cbeb6cc41b754a1dcb1f360a9500e03 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 16:54:32 -0600 +Subject: [PATCH 06/13] Refactor: fencing: add functions for getting/setting + result via XML + +These will come in handy as we update the various fencer messages to include a +full result rather than just a legacy return code. The functions are in a new +source file fenced_messages.c which can have other stuff moved to it later. +--- + include/crm/fencing/internal.h | 3 + + lib/fencing/st_actions.c | 107 +++++++++++++++++++++++++++++++++ + 2 files changed, 110 insertions(+) + +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 80f6443be..4b5fd3959 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -60,6 +60,9 @@ stonith_action_t *stonith_action_create(const char *agent, + void stonith__destroy_action(stonith_action_t *action); + pcmk__action_result_t *stonith__action_result(stonith_action_t *action); + int stonith__result2rc(const pcmk__action_result_t *result); ++void stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result); ++void stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result); ++xmlNode *stonith__find_xe_with_result(xmlNode *xml); + + int + stonith_action_execute_async(stonith_action_t * action, +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +index 9e785595a..d4fc3f5ed 100644 +--- a/lib/fencing/st_actions.c ++++ b/lib/fencing/st_actions.c +@@ -396,6 +396,113 @@ stonith__legacy2status(int rc) + } + } + ++/*! ++ * \internal ++ * \brief Add a fencing result to an XML element as attributes ++ * ++ * \param[in] xml XML element to add result to ++ * \param[in] result Fencing result to add (assume success if NULL) ++ */ ++void ++stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result) ++{ ++ int exit_status = CRM_EX_OK; ++ enum pcmk_exec_status execution_status = PCMK_EXEC_DONE; ++ const char *exit_reason = NULL; ++ const char *action_stdout = NULL; ++ int rc = pcmk_ok; ++ ++ CRM_CHECK(xml != NULL, return); ++ ++ if (result != NULL) { ++ exit_status = result->exit_status; ++ execution_status = result->execution_status; ++ exit_reason = result->exit_reason; ++ action_stdout = result->action_stdout; ++ rc = pcmk_rc2legacy(stonith__result2rc(result)); ++ } ++ ++ crm_xml_add_int(xml, XML_LRM_ATTR_OPSTATUS, (int) execution_status); ++ crm_xml_add_int(xml, XML_LRM_ATTR_RC, exit_status); ++ crm_xml_add(xml, XML_LRM_ATTR_EXIT_REASON, exit_reason); ++ crm_xml_add(xml, "st_output", action_stdout); ++ ++ /* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external ++ * code that use libstonithd <=2.1.2 don't check for the full result, and ++ * need a legacy return code instead. ++ */ ++ crm_xml_add_int(xml, F_STONITH_RC, rc); ++} ++ ++/*! ++ * \internal ++ * \brief Find a fencing result beneath an XML element ++ * ++ * \param[in] xml XML element to search ++ * ++ * \return \p xml or descendent of it that contains a fencing result, else NULL ++ */ ++xmlNode * ++stonith__find_xe_with_result(xmlNode *xml) ++{ ++ xmlNode *match = get_xpath_object("//@" XML_LRM_ATTR_RC, xml, LOG_NEVER); ++ ++ if (match == NULL) { ++ /* @COMPAT Peers <=2.1.2 in a rolling upgrade provide only a legacy ++ * return code, not a full result, so check for that. ++ */ ++ match = get_xpath_object("//@" F_STONITH_RC, xml, LOG_ERR); ++ } ++ return match; ++} ++ ++/*! ++ * \internal ++ * \brief Get a fencing result from an XML element's attributes ++ * ++ * \param[in] xml XML element with fencing result ++ * \param[out] result Where to store fencing result ++ */ ++void ++stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result) ++{ ++ int exit_status = CRM_EX_OK; ++ int execution_status = PCMK_EXEC_DONE; ++ const char *exit_reason = NULL; ++ char *action_stdout = NULL; ++ ++ CRM_CHECK((xml != NULL) && (result != NULL), return); ++ ++ exit_reason = crm_element_value(xml, XML_LRM_ATTR_EXIT_REASON); ++ action_stdout = crm_element_value_copy(xml, "st_output"); ++ ++ // A result must include an exit status and execution status ++ if ((crm_element_value_int(xml, XML_LRM_ATTR_RC, &exit_status) < 0) ++ || (crm_element_value_int(xml, XML_LRM_ATTR_OPSTATUS, ++ &execution_status) < 0)) { ++ int rc = pcmk_ok; ++ exit_status = CRM_EX_ERROR; ++ ++ /* @COMPAT Peers <=2.1.2 in rolling upgrades provide only a legacy ++ * return code, not a full result, so check for that. ++ */ ++ if (crm_element_value_int(xml, F_STONITH_RC, &rc) == 0) { ++ if ((rc == pcmk_ok) || (rc == -EINPROGRESS)) { ++ exit_status = CRM_EX_OK; ++ } ++ execution_status = stonith__legacy2status(rc); ++ exit_reason = pcmk_strerror(rc); ++ ++ } else { ++ execution_status = PCMK_EXEC_ERROR; ++ exit_reason = "Fencer reply contained neither a full result " ++ "nor a legacy return code (bug?)"; ++ } ++ } ++ pcmk__set_result(result, exit_status, execution_status, exit_reason); ++ pcmk__set_result_output(result, action_stdout, NULL); ++} ++ + static void + stonith_action_async_done(svc_action_t *svc_action) + { +-- +2.27.0 + + +From 1f0121c6ad0d0235bcf01c8b60f9153592b3db83 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 10:10:53 -0600 +Subject: [PATCH 07/13] Refactor: fencing: rename functions for invoking fence + callbacks + +... to make it clearer what the difference between them is +--- + lib/fencing/st_client.c | 44 +++++++++++++++++++++++++++++++++-------- + 1 file changed, 36 insertions(+), 8 deletions(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 944cd1863..dfc5860fc 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -847,9 +847,21 @@ stonith_api_del_callback(stonith_t * stonith, int call_id, bool all_callbacks) + return pcmk_ok; + } + ++/*! ++ * \internal ++ * \brief Invoke a (single) specified fence action callback ++ * ++ * \param[in] st Fencer API connection ++ * \param[in] call_id If positive, call ID of completed fence action, otherwise ++ * legacy return code for early action failure ++ * \param[in] rc Legacy return code for action result ++ * \param[in] userdata User data to pass to callback ++ * \param[in] callback Fence action callback to invoke ++ */ + static void +-invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, +- void (*callback) (stonith_t * st, stonith_callback_data_t * data)) ++invoke_fence_action_callback(stonith_t *st, int call_id, int rc, void *userdata, ++ void (*callback) (stonith_t *st, ++ stonith_callback_data_t *data)) + { + stonith_callback_data_t data = { 0, }; + +@@ -860,8 +872,21 @@ invoke_callback(stonith_t * st, int call_id, int rc, void *userdata, + callback(st, &data); + } + ++/*! ++ * \internal ++ * \brief Invoke any callbacks registered for a specified fence action result ++ * ++ * Given a fence action result from the fencer, invoke any callback registered ++ * for that action, as well as any global callback registered. ++ * ++ * \param[in] st Fencer API connection ++ * \param[in] msg If non-NULL, fencer reply ++ * \param[in] call_id If \p msg is NULL, call ID of action that timed out ++ * \param[in] rc Legacy return code for result of action ++ */ + static void +-stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc) ++invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id, ++ int rc) + { + stonith_private_t *private = NULL; + stonith_callback_client_t *blob = NULL; +@@ -899,7 +924,8 @@ stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc + + if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { + crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); +- invoke_callback(stonith, call_id, rc, local_blob.user_data, local_blob.callback); ++ invoke_fence_action_callback(stonith, call_id, rc, local_blob.user_data, ++ local_blob.callback); + + } else if (private->op_callback == NULL && rc != pcmk_ok) { + crm_warn("Fencing command failed: %s", pcmk_strerror(rc)); +@@ -908,7 +934,8 @@ stonith_perform_callback(stonith_t * stonith, xmlNode * msg, int call_id, int rc + + if (private->op_callback != NULL) { + crm_trace("Invoking global callback for call %d", call_id); +- invoke_callback(stonith, call_id, rc, NULL, private->op_callback); ++ invoke_fence_action_callback(stonith, call_id, rc, NULL, ++ private->op_callback); + } + crm_trace("OP callback activated."); + } +@@ -919,7 +946,7 @@ stonith_async_timeout_handler(gpointer data) + struct timer_rec_s *timer = data; + + crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); +- stonith_perform_callback(timer->stonith, NULL, timer->call_id, -ETIME); ++ invoke_registered_callbacks(timer->stonith, NULL, timer->call_id, -ETIME); + + /* Always return TRUE, never remove the handler + * We do that in stonith_del_callback() +@@ -994,7 +1021,7 @@ stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) + crm_trace("Activating %s callbacks...", type); + + if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) { +- stonith_perform_callback(st, blob.xml, 0, 0); ++ invoke_registered_callbacks(st, blob.xml, 0, 0); + + } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) { + foreach_notify_entry(private, stonith_send_notification, &blob); +@@ -1229,7 +1256,8 @@ stonith_api_add_callback(stonith_t * stonith, int call_id, int timeout, int opti + } else if (call_id < 0) { + if (!(options & st_opt_report_only_success)) { + crm_trace("Call failed, calling %s: %s", callback_name, pcmk_strerror(call_id)); +- invoke_callback(stonith, call_id, call_id, user_data, callback); ++ invoke_fence_action_callback(stonith, call_id, call_id, user_data, ++ callback); + } else { + crm_warn("Fencer call failed: %s", pcmk_strerror(call_id)); + } +-- +2.27.0 + + +From c32f11e70a88244f5a3217608055a4eaf8d28231 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 10:21:00 -0600 +Subject: [PATCH 08/13] Refactor: fencing: drop unnecessary argument when + invoking callbacks + +Refactor invoke_registered_callbacks() to treat a NULL message as a timeout, so +we can drop the rc argument. +--- + lib/fencing/st_client.c | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index dfc5860fc..9f2b0c1c1 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -882,15 +882,14 @@ invoke_fence_action_callback(stonith_t *st, int call_id, int rc, void *userdata, + * \param[in] st Fencer API connection + * \param[in] msg If non-NULL, fencer reply + * \param[in] call_id If \p msg is NULL, call ID of action that timed out +- * \param[in] rc Legacy return code for result of action + */ + static void +-invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id, +- int rc) ++invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id) + { + stonith_private_t *private = NULL; + stonith_callback_client_t *blob = NULL; + stonith_callback_client_t local_blob; ++ int rc = pcmk_ok; + + CRM_CHECK(stonith != NULL, return); + CRM_CHECK(stonith->st_private != NULL, return); +@@ -902,7 +901,13 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id, + local_blob.user_data = NULL; + local_blob.only_success = FALSE; + +- if (msg != NULL) { ++ if (msg == NULL) { ++ // Fencer didn't reply in time ++ rc = -ETIME; ++ ++ } else { ++ // We have the fencer reply ++ + crm_element_value_int(msg, F_STONITH_RC, &rc); + crm_element_value_int(msg, F_STONITH_CALLID, &call_id); + } +@@ -946,7 +951,7 @@ stonith_async_timeout_handler(gpointer data) + struct timer_rec_s *timer = data; + + crm_err("Async call %d timed out after %dms", timer->call_id, timer->timeout); +- invoke_registered_callbacks(timer->stonith, NULL, timer->call_id, -ETIME); ++ invoke_registered_callbacks(timer->stonith, NULL, timer->call_id); + + /* Always return TRUE, never remove the handler + * We do that in stonith_del_callback() +@@ -1021,7 +1026,7 @@ stonith_dispatch_internal(const char *buffer, ssize_t length, gpointer userdata) + crm_trace("Activating %s callbacks...", type); + + if (pcmk__str_eq(type, T_STONITH_NG, pcmk__str_casei)) { +- invoke_registered_callbacks(st, blob.xml, 0, 0); ++ invoke_registered_callbacks(st, blob.xml, 0); + + } else if (pcmk__str_eq(type, T_STONITH_NOTIFY, pcmk__str_casei)) { + foreach_notify_entry(private, stonith_send_notification, &blob); +-- +2.27.0 + + +From 5d8279b51ea9df738354649e4065663f2c16f1e6 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 10:21:57 -0600 +Subject: [PATCH 09/13] Log: fencing: improve message for callback errors + +Improve checking of fencer replies, which also allows us to distinguish an +internal bug from a bad fencer reply in logs. Lower the bad reply message to +warning. +--- + lib/fencing/st_client.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 9f2b0c1c1..170b9d450 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -904,15 +904,20 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id) + if (msg == NULL) { + // Fencer didn't reply in time + rc = -ETIME; ++ CRM_LOG_ASSERT(call_id > 0); + + } else { + // We have the fencer reply + +- crm_element_value_int(msg, F_STONITH_RC, &rc); +- crm_element_value_int(msg, F_STONITH_CALLID, &call_id); +- } ++ if (crm_element_value_int(msg, F_STONITH_RC, &rc) != 0) { ++ rc = -pcmk_err_generic; ++ } + +- CRM_CHECK(call_id > 0, crm_log_xml_err(msg, "Bad result")); ++ if ((crm_element_value_int(msg, F_STONITH_CALLID, &call_id) != 0) ++ || (call_id <= 0)) { ++ crm_log_xml_warn(msg, "Bad fencer reply"); ++ } ++ } + + blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table, + call_id); +-- +2.27.0 + + +From e03c14d24e8cb011e870b9460930d139705bf0a2 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 9 Nov 2021 14:59:12 -0600 +Subject: [PATCH 10/13] Doc: fencing: correct stonith_api_operations_t method + descriptions + +Many of the methods return a positive call ID on success +--- + include/crm/stonith-ng.h | 60 ++++++++++++++++++++++------------------ + 1 file changed, 33 insertions(+), 27 deletions(-) + +diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h +index 8d6ad477d..9643820e9 100644 +--- a/include/crm/stonith-ng.h ++++ b/include/crm/stonith-ng.h +@@ -164,39 +164,38 @@ typedef struct stonith_api_operations_s + int (*disconnect)(stonith_t *st); + + /*! +- * \brief Remove a registered stonith device with the local stonith daemon. ++ * \brief Unregister a fence device with the local fencer + * +- * \note Synchronous, guaranteed to occur in daemon before function returns. +- * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*remove_device)( + stonith_t *st, int options, const char *name); + + /*! +- * \brief Register a stonith device with the local stonith daemon. ++ * \brief Register a fence device with the local fencer + * +- * \note Synchronous, guaranteed to occur in daemon before function returns. +- * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*register_device)( + stonith_t *st, int options, const char *id, + const char *provider, const char *agent, stonith_key_value_t *params); + + /*! +- * \brief Remove a fencing level for a specific node. ++ * \brief Unregister a fencing level for specified node with local fencer + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*remove_level)( + stonith_t *st, int options, const char *node, int level); + + /*! +- * \brief Register a fencing level containing the fencing devices to be used +- * at that level for a specific node. ++ * \brief Register a fencing level for specified node with local fencer + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*register_level)( + stonith_t *st, int options, const char *node, int level, stonith_key_value_t *device_list); +@@ -226,21 +225,24 @@ typedef struct stonith_api_operations_s + /*! + * \brief Retrieve string listing hosts and port assignments from a local stonith device. + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*list)(stonith_t *st, int options, const char *id, char **list_output, int timeout); + + /*! + * \brief Check to see if a local stonith device is reachable + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*monitor)(stonith_t *st, int options, const char *id, int timeout); + + /*! + * \brief Check to see if a local stonith device's port is reachable + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*status)(stonith_t *st, int options, const char *id, const char *port, int timeout); + +@@ -267,7 +269,8 @@ typedef struct stonith_api_operations_s + * \param timeout, The default per device timeout to use with each device + * capable of fencing the target. + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*fence)(stonith_t *st, int options, const char *node, const char *action, + int timeout, int tolerance); +@@ -275,7 +278,8 @@ typedef struct stonith_api_operations_s + /*! + * \brief Manually confirm that a node is down. + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*confirm)(stonith_t *st, int options, const char *node); + +@@ -304,9 +308,6 @@ typedef struct stonith_api_operations_s + * \param[in] callback The callback function to register + * + * \return \c TRUE on success, \c FALSE if call_id is negative, -errno otherwise +- * +- * \todo This function should return \c pcmk_ok on success, and \c call_id +- * when negative, but that would break backward compatibility. + */ + int (*register_callback)(stonith_t *st, + int call_id, +@@ -317,12 +318,14 @@ typedef struct stonith_api_operations_s + void (*callback)(stonith_t *st, stonith_callback_data_t *data)); + + /*! +- * \brief Remove a registered callback for a given call id. ++ * \brief Remove a registered callback for a given call id ++ * ++ * \return pcmk_ok + */ + int (*remove_callback)(stonith_t *st, int call_id, bool all_callbacks); + + /*! +- * \brief Remove fencing level for specific node, node regex or attribute ++ * \brief Unregister fencing level for specified node, pattern or attribute + * + * \param[in] st Fencer connection to use + * \param[in] options Bitmask of stonith_call_options to pass to the fencer +@@ -332,7 +335,8 @@ typedef struct stonith_api_operations_s + * \param[in] value If not NULL, target by this node attribute value + * \param[in] level Index number of level to remove + * +- * \return 0 on success, negative error code otherwise ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + * + * \note The caller should set only one of node, pattern or attr/value. + */ +@@ -341,7 +345,7 @@ typedef struct stonith_api_operations_s + const char *attr, const char *value, int level); + + /*! +- * \brief Register fencing level for specific node, node regex or attribute ++ * \brief Register fencing level for specified node, pattern or attribute + * + * \param[in] st Fencer connection to use + * \param[in] options Bitmask of stonith_call_options to pass to fencer +@@ -352,7 +356,8 @@ typedef struct stonith_api_operations_s + * \param[in] level Index number of level to add + * \param[in] device_list Devices to use in level + * +- * \return 0 on success, negative error code otherwise ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + * + * \note The caller should set only one of node, pattern or attr/value. + */ +@@ -398,7 +403,8 @@ typedef struct stonith_api_operations_s + * \param delay, Apply a fencing delay. Value -1 means disable also any + * static/random fencing delays from pcmk_delay_base/max + * +- * \return Legacy Pacemaker return code ++ * \return pcmk_ok (if synchronous) or positive call ID (if asynchronous) ++ * on success, otherwise a negative legacy Pacemaker return code + */ + int (*fence_with_delay)(stonith_t *st, int options, const char *node, const char *action, + int timeout, int tolerance, int delay); +-- +2.27.0 + + +From 18c382731889b626b21ba6a14f9213ef1e45a524 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 23 Nov 2021 11:14:24 -0600 +Subject: [PATCH 11/13] Refactor: fencing: define constant for XML attribute + for action output + +--- + daemons/fenced/fenced_commands.c | 4 ++-- + include/crm/fencing/internal.h | 1 + + lib/fencing/st_actions.c | 4 ++-- + lib/fencing/st_client.c | 2 +- + 4 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 26501a4b3..aa14c52af 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2677,7 +2677,7 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i + + crm_xml_add(reply, "st_origin", __func__); + crm_xml_add(reply, F_TYPE, T_STONITH_NG); +- crm_xml_add(reply, "st_output", output); ++ crm_xml_add(reply, F_STONITH_OUTPUT, output); + crm_xml_add_int(reply, F_STONITH_RC, rc); + + if (request == NULL) { +@@ -2743,7 +2743,7 @@ construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result) + crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options); + crm_xml_add_int(reply, F_STONITH_RC, + pcmk_rc2legacy(stonith__result2rc(result))); +- crm_xml_add(reply, "st_output", result->action_stdout); ++ crm_xml_add(reply, F_STONITH_OUTPUT, result->action_stdout); + return reply; + } + +diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h +index 4b5fd3959..f0d294a0b 100644 +--- a/include/crm/fencing/internal.h ++++ b/include/crm/fencing/internal.h +@@ -105,6 +105,7 @@ void stonith__device_parameter_flags(uint32_t *device_flags, + # define F_STONITH_REMOTE_OP_ID "st_remote_op" + # define F_STONITH_REMOTE_OP_ID_RELAY "st_remote_op_relay" + # define F_STONITH_RC "st_rc" ++# define F_STONITH_OUTPUT "st_output" + /*! Timeout period per a device execution */ + # define F_STONITH_TIMEOUT "st_timeout" + # define F_STONITH_TOLERANCE "st_tolerance" +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +index d4fc3f5ed..5636810a5 100644 +--- a/lib/fencing/st_actions.c ++++ b/lib/fencing/st_actions.c +@@ -425,7 +425,7 @@ stonith__xe_set_result(xmlNode *xml, const pcmk__action_result_t *result) + crm_xml_add_int(xml, XML_LRM_ATTR_OPSTATUS, (int) execution_status); + crm_xml_add_int(xml, XML_LRM_ATTR_RC, exit_status); + crm_xml_add(xml, XML_LRM_ATTR_EXIT_REASON, exit_reason); +- crm_xml_add(xml, "st_output", action_stdout); ++ crm_xml_add(xml, F_STONITH_OUTPUT, action_stdout); + + /* @COMPAT Peers in rolling upgrades, Pacemaker Remote nodes, and external + * code that use libstonithd <=2.1.2 don't check for the full result, and +@@ -474,7 +474,7 @@ stonith__xe_get_result(xmlNode *xml, pcmk__action_result_t *result) + CRM_CHECK((xml != NULL) && (result != NULL), return); + + exit_reason = crm_element_value(xml, XML_LRM_ATTR_EXIT_REASON); +- action_stdout = crm_element_value_copy(xml, "st_output"); ++ action_stdout = crm_element_value_copy(xml, F_STONITH_OUTPUT); + + // A result must include an exit status and execution status + if ((crm_element_value_int(xml, XML_LRM_ATTR_RC, &exit_status) < 0) +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 170b9d450..2dfadf922 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -600,7 +600,7 @@ stonith_api_list(stonith_t * stonith, int call_options, const char *id, char **l + if (output && list_info) { + const char *list_str; + +- list_str = crm_element_value(output, "st_output"); ++ list_str = crm_element_value(output, F_STONITH_OUTPUT); + + if (list_str) { + *list_info = strdup(list_str); +-- +2.27.0 + + +From 9fe9ed5d46c810cb9c12eb07271373ab92d271cd Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 23 Nov 2021 11:39:32 -0600 +Subject: [PATCH 12/13] Refactor: fencing: simplify invoking callbacks + +--- + lib/fencing/st_client.c | 42 +++++++++++++++++------------------------ + 1 file changed, 17 insertions(+), 25 deletions(-) + +diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c +index 2dfadf922..2ca094566 100644 +--- a/lib/fencing/st_client.c ++++ b/lib/fencing/st_client.c +@@ -887,8 +887,7 @@ static void + invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id) + { + stonith_private_t *private = NULL; +- stonith_callback_client_t *blob = NULL; +- stonith_callback_client_t local_blob; ++ stonith_callback_client_t *cb_info = NULL; + int rc = pcmk_ok; + + CRM_CHECK(stonith != NULL, return); +@@ -896,11 +895,6 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id) + + private = stonith->st_private; + +- local_blob.id = NULL; +- local_blob.callback = NULL; +- local_blob.user_data = NULL; +- local_blob.only_success = FALSE; +- + if (msg == NULL) { + // Fencer didn't reply in time + rc = -ETIME; +@@ -919,26 +913,21 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id) + } + } + +- blob = pcmk__intkey_table_lookup(private->stonith_op_callback_table, +- call_id); +- if (blob != NULL) { +- local_blob = *blob; +- blob = NULL; +- +- stonith_api_del_callback(stonith, call_id, FALSE); +- +- } else { +- crm_trace("No callback found for call %d", call_id); +- local_blob.callback = NULL; ++ if (call_id > 0) { ++ cb_info = pcmk__intkey_table_lookup(private->stonith_op_callback_table, ++ call_id); + } + +- if (local_blob.callback != NULL && (rc == pcmk_ok || local_blob.only_success == FALSE)) { +- crm_trace("Invoking callback %s for call %d", crm_str(local_blob.id), call_id); +- invoke_fence_action_callback(stonith, call_id, rc, local_blob.user_data, +- local_blob.callback); ++ if ((cb_info != NULL) && (cb_info->callback != NULL) ++ && (rc == pcmk_ok || !(cb_info->only_success))) { ++ crm_trace("Invoking callback %s for call %d", ++ crm_str(cb_info->id), call_id); ++ invoke_fence_action_callback(stonith, call_id, rc, cb_info->user_data, ++ cb_info->callback); + +- } else if (private->op_callback == NULL && rc != pcmk_ok) { +- crm_warn("Fencing command failed: %s", pcmk_strerror(rc)); ++ } else if ((private->op_callback == NULL) && (rc != pcmk_ok)) { ++ crm_warn("Fencing action without registered callback failed: %s", ++ pcmk_strerror(rc)); + crm_log_xml_debug(msg, "Failed fence update"); + } + +@@ -947,7 +936,10 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id) + invoke_fence_action_callback(stonith, call_id, rc, NULL, + private->op_callback); + } +- crm_trace("OP callback activated."); ++ ++ if (cb_info != NULL) { ++ stonith_api_del_callback(stonith, call_id, FALSE); ++ } + } + + static gboolean +-- +2.27.0 + + +From 8113b800ce677ba17a16ca176e8f6f9b4a042316 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 23 Nov 2021 18:14:48 -0600 +Subject: [PATCH 13/13] Refactor: fencing: add a missing "break" statement + +No effect, but more correct +--- + lib/fencing/st_actions.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c +index 5636810a5..7eaa8b0f2 100644 +--- a/lib/fencing/st_actions.c ++++ b/lib/fencing/st_actions.c +@@ -336,6 +336,7 @@ stonith__result2rc(const pcmk__action_result_t *result) + case CRM_EX_EXPIRED: return EHOSTUNREACH; + default: break; + } ++ break; + + default: + break; +-- +2.27.0 + diff --git a/SOURCES/003-pacemakerd-output.patch b/SOURCES/003-pacemakerd-output.patch deleted file mode 100644 index 167e22b..0000000 --- a/SOURCES/003-pacemakerd-output.patch +++ /dev/null @@ -1,343 +0,0 @@ -From 7c35387a9896cb968cf4087b5cbed94af44e1ea5 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 14 May 2021 12:03:46 -0400 -Subject: [PATCH 1/5] Feature: daemons: Convert pacemakerd to formatted output. - -The main purpose of this is to finish getting pacemakerd moved off the -existing command line handling code (pcmk__cli_help in particular) so -that code can eventually be deprecated or removed. pacemakerd itself -does fairly little printing. ---- - daemons/pacemakerd/pacemakerd.c | 58 ++++++++++++++++++++++++++++++----------- - 1 file changed, 43 insertions(+), 15 deletions(-) - -diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c -index ce194bf..bd59729 100644 ---- a/daemons/pacemakerd/pacemakerd.c -+++ b/daemons/pacemakerd/pacemakerd.c -@@ -25,6 +25,7 @@ - #include - #include - #include -+#include - #include - #include - -@@ -37,6 +38,14 @@ struct { - gboolean standby; - } options; - -+static pcmk__output_t *out = NULL; -+ -+static pcmk__supported_format_t formats[] = { -+ PCMK__SUPPORTED_FORMAT_NONE, -+ PCMK__SUPPORTED_FORMAT_TEXT, -+ { NULL, NULL, NULL } -+}; -+ - static gboolean - pid_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **err) { - return TRUE; -@@ -1167,10 +1176,10 @@ pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api, - } - - static GOptionContext * --build_arg_context(pcmk__common_args_t *args) { -+build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { - GOptionContext *context = NULL; - -- context = pcmk__build_arg_context(args, NULL, NULL, NULL); -+ context = pcmk__build_arg_context(args, "text", group, NULL); - pcmk__add_main_args(context, entries); - return context; - } -@@ -1182,9 +1191,11 @@ main(int argc, char **argv) - - GError *error = NULL; - -+ int rc = pcmk_rc_ok; -+ GOptionGroup *output_group = NULL; - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); - gchar **processed_args = pcmk__cmdline_preproc(argv, "p"); -- GOptionContext *context = build_arg_context(args); -+ GOptionContext *context = build_arg_context(args, &output_group); - - bool old_instance_connected = false; - -@@ -1195,23 +1205,30 @@ main(int argc, char **argv) - mainloop_add_signal(SIGHUP, pcmk_ignore); - mainloop_add_signal(SIGQUIT, pcmk_sigquit); - -+ pcmk__register_formats(output_group, formats); - if (!g_option_context_parse_strv(context, &processed_args, &error)) { - exit_code = CRM_EX_USAGE; - goto done; - } - -+ rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv); -+ if (rc != pcmk_rc_ok) { -+ exit_code = CRM_EX_ERROR; -+ g_set_error(&error, PCMK__EXITC_ERROR, exit_code, "Error creating output format %s: %s", -+ args->output_ty, pcmk_rc_str(rc)); -+ goto done; -+ } -+ - if (options.features) { -- printf("Pacemaker %s (Build: %s)\n Supporting v%s: %s\n", PACEMAKER_VERSION, BUILD_VERSION, -- CRM_FEATURE_SET, CRM_FEATURES); -+ out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION, -+ BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); - exit_code = CRM_EX_OK; - goto done; - } - - if (args->version) { -- g_strfreev(processed_args); -- pcmk__free_arg_context(context); -- /* FIXME: When pacemakerd is converted to use formatted output, this can go. */ -- pcmk__cli_help('v', CRM_EX_USAGE); -+ out->version(out, false); -+ goto done; - } - - setenv("LC_ALL", "C", 1); -@@ -1248,6 +1265,13 @@ main(int argc, char **argv) - crm_ipc_close(old_instance); - crm_ipc_destroy(old_instance); - -+ /* Don't allow any accidental output after this point. */ -+ if (out != NULL) { -+ out->finish(out, exit_code, true, NULL); -+ pcmk__output_free(out); -+ out = NULL; -+ } -+ - #ifdef SUPPORT_COROSYNC - if (mcp_read_config() == FALSE) { - exit_code = CRM_EX_UNAVAILABLE; -@@ -1333,6 +1357,11 @@ done: - g_strfreev(processed_args); - pcmk__free_arg_context(context); - -- pcmk__output_and_clear_error(error, NULL); -+ pcmk__output_and_clear_error(error, out); -+ -+ if (out != NULL) { -+ out->finish(out, exit_code, true, NULL); -+ pcmk__output_free(out); -+ } - crm_exit(exit_code); - } --- -1.8.3.1 - - -From 35e6da64381fcb092d81ce16835cc28670b077cb Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 17 May 2021 10:04:04 -0400 -Subject: [PATCH 2/5] Features: daemons: Output the pacemakerd feature list in - XML. - ---- - daemons/pacemakerd/pacemakerd.c | 45 ++++++++++++++++++++++++++++++++++++++--- - 1 file changed, 42 insertions(+), 3 deletions(-) - -diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c -index bd59729..93cf743 100644 ---- a/daemons/pacemakerd/pacemakerd.c -+++ b/daemons/pacemakerd/pacemakerd.c -@@ -43,6 +43,42 @@ static pcmk__output_t *out = NULL; - static pcmk__supported_format_t formats[] = { - PCMK__SUPPORTED_FORMAT_NONE, - PCMK__SUPPORTED_FORMAT_TEXT, -+ PCMK__SUPPORTED_FORMAT_XML, -+ { NULL, NULL, NULL } -+}; -+ -+static int -+pacemakerd_features(pcmk__output_t *out, va_list args) { -+ out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION, -+ BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); -+ return pcmk_rc_ok; -+} -+ -+static int -+pacemakerd_features_xml(pcmk__output_t *out, va_list args) { -+ gchar **feature_list = g_strsplit(CRM_FEATURES, " ", 0); -+ -+ pcmk__output_xml_create_parent(out, "pacemakerd", -+ "version", PACEMAKER_VERSION, -+ "build", BUILD_VERSION, -+ "feature_set", CRM_FEATURE_SET, -+ NULL); -+ out->begin_list(out, NULL, NULL, "features"); -+ -+ for (char **s = feature_list; *s != NULL; s++) { -+ pcmk__output_create_xml_text_node(out, "feature", *s); -+ } -+ -+ out->end_list(out); -+ -+ g_strfreev(feature_list); -+ return pcmk_rc_ok; -+} -+ -+static pcmk__message_entry_t fmt_functions[] = { -+ { "features", "default", pacemakerd_features }, -+ { "features", "xml", pacemakerd_features_xml }, -+ - { NULL, NULL, NULL } - }; - -@@ -200,7 +236,7 @@ static GOptionContext * - build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) { - GOptionContext *context = NULL; - -- context = pcmk__build_arg_context(args, "text", group, NULL); -+ context = pcmk__build_arg_context(args, "text (default), xml", group, NULL); - pcmk__add_main_args(context, entries); - return context; - } -@@ -241,9 +277,12 @@ main(int argc, char **argv) - goto done; - } - -+ pcmk__force_args(context, &error, "%s --xml-simple-list", g_get_prgname()); -+ -+ pcmk__register_messages(out, fmt_functions); -+ - if (options.features) { -- out->info(out, "Pacemaker %s (Build: %s)\n Supporting v%s: %s", PACEMAKER_VERSION, -- BUILD_VERSION, CRM_FEATURE_SET, CRM_FEATURES); -+ out->message(out, "features"); - exit_code = CRM_EX_OK; - goto done; - } --- -1.8.3.1 - - -From 5b7f5eb35b025b59805cf3c7c3dcb6a3cf4b71b3 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 17 May 2021 11:09:53 -0400 -Subject: [PATCH 3/5] Low: daemons: Conditionally enable logging in pacemakerd. - -If we're doing an interactive command-line call, use -pcmk__cli_init_logging. At the moment, all command line calls except -for --shutdown do their work before logging would even come up, so we -really only need to do this for --shutdown. - -If we're doing a daemon call, use crm_log_init. ---- - daemons/pacemakerd/pacemakerd.c | 7 +++++-- - 1 file changed, 5 insertions(+), 2 deletions(-) - -diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c -index 93cf743..c20bde7 100644 ---- a/daemons/pacemakerd/pacemakerd.c -+++ b/daemons/pacemakerd/pacemakerd.c -@@ -296,8 +296,11 @@ main(int argc, char **argv) - - pcmk__set_env_option("mcp", "true"); - -- pcmk__cli_init_logging("pacemakerd", args->verbosity); -- crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); -+ if (options.shutdown) { -+ pcmk__cli_init_logging("pacemakerd", args->verbosity); -+ } else { -+ crm_log_init(NULL, LOG_INFO, TRUE, FALSE, argc, argv, FALSE); -+ } - - crm_debug("Checking for existing Pacemaker instance"); - old_instance = crm_ipc_new(CRM_SYSTEM_MCP, 0); --- -1.8.3.1 - - -From 2393362bb7489e86d937ed46a1c5cfb93d9bf3ab Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 17 May 2021 11:58:06 -0400 -Subject: [PATCH 4/5] Fix: include: Bump CRM_FEATURE_SET for new pacemakerd - args. - ---- - include/crm/crm.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/crm/crm.h b/include/crm/crm.h -index fdfc825..92a98fa 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -66,7 +66,7 @@ extern "C" { - * >=3.0.13: Fail counts include operation name and interval - * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED - */ --# define CRM_FEATURE_SET "3.10.0" -+# define CRM_FEATURE_SET "3.10.1" - - /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and - * recipient of a CPG message. This imposes an arbitrary limit on cluster node --- -1.8.3.1 - - -From 3ad8edbd91631b87ef5f53fa2d68f0c8bbb9ee2b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 17 May 2021 11:57:09 -0400 -Subject: [PATCH 5/5] Feature: xml: Add schema for pacemakerd. - ---- - xml/Makefile.am | 1 + - xml/api/pacemakerd-2.10.rng | 28 ++++++++++++++++++++++++++++ - 2 files changed, 29 insertions(+) - create mode 100644 xml/api/pacemakerd-2.10.rng - -diff --git a/xml/Makefile.am b/xml/Makefile.am -index 12a51c5..b9448d4 100644 ---- a/xml/Makefile.am -+++ b/xml/Makefile.am -@@ -56,6 +56,7 @@ API_request_base = command-output \ - crm_simulate \ - crmadmin \ - digests \ -+ pacemakerd \ - stonith_admin \ - version - -diff --git a/xml/api/pacemakerd-2.10.rng b/xml/api/pacemakerd-2.10.rng -new file mode 100644 -index 0000000..41a11e7 ---- /dev/null -+++ b/xml/api/pacemakerd-2.10.rng -@@ -0,0 +1,28 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - diff --git a/SOURCES/004-check-level.patch b/SOURCES/004-check-level.patch deleted file mode 100644 index f2abb5f..0000000 --- a/SOURCES/004-check-level.patch +++ /dev/null @@ -1,199 +0,0 @@ -From 3905e7eac11298fc20efd567a773666f948edf61 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 3 May 2021 11:19:04 -0400 -Subject: [PATCH 1/2] Feature: tools: Add OCF_CHECK_LEVEL to crm_resource - environment. - -If --validate= or --force-check= are given with a level, pass that along -as OCF_CHECK_LEVEL. This argument is optional, and if no value is given -then the environment variable will not be set and whatever's the default -on the resource agent will be used. - -See: rhbz#1955792. ---- - tools/crm_resource.c | 29 +++++++++++++++++++++-------- - tools/crm_resource.h | 4 ++-- - tools/crm_resource_runtime.c | 13 ++++++++++--- - 3 files changed, 33 insertions(+), 13 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 45db2b2..6ca96f8 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -100,6 +100,7 @@ struct { - int timeout_ms; // Parsed from --timeout value - char *agent_spec; // Standard and/or provider and/or agent - gchar *xml_file; // Value of (deprecated) --xml-file -+ int check_level; // Optional value of --validate or --force-check - - // Resource configuration specified via command-line arguments - gboolean cmdline_config; // Resource configuration was via arguments -@@ -113,6 +114,7 @@ struct { - GHashTable *override_params; // Resource parameter values that override config - } options = { - .attr_set_type = XML_TAG_ATTR_SETS, -+ .check_level = -1, - .cib_options = cib_sync_call, - .require_cib = TRUE, - .require_dataset = TRUE, -@@ -402,14 +404,15 @@ static GOptionEntry query_entries[] = { - }; - - static GOptionEntry command_entries[] = { -- { "validate", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, -+ { "validate", 0, G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, - validate_or_force_cb, - "Validate resource configuration by calling agent's validate-all\n" - INDENT "action. The configuration may be specified either by giving an\n" - INDENT "existing resource name with -r, or by specifying --class,\n" - INDENT "--agent, and --provider arguments, along with any number of\n" -- INDENT "--option arguments.", -- NULL }, -+ INDENT "--option arguments. An optional LEVEL argument can be given\n" -+ INDENT "to control the level of checking performed.", -+ "LEVEL" }, - { "cleanup", 'C', G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, cleanup_refresh_cb, - "If resource has any past failures, clear its history and fail\n" - INDENT "count. Optionally filtered by --resource, --node, --operation\n" -@@ -546,11 +549,12 @@ static GOptionEntry advanced_entries[] = { - INDENT "the cluster believes the resource is a clone instance already\n" - INDENT "running on the local node.", - NULL }, -- { "force-check", 0, G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, -+ { "force-check", 0, G_OPTION_FLAG_OPTIONAL_ARG, G_OPTION_ARG_CALLBACK, - validate_or_force_cb, - "(Advanced) Bypass the cluster and check the state of a resource on\n" -- INDENT "the local node", -- NULL }, -+ INDENT "the local node. An optional LEVEL argument can be given\n" -+ INDENT "to control the level of checking performed.", -+ "LEVEL" }, - - { NULL } - }; -@@ -910,6 +914,15 @@ validate_or_force_cb(const gchar *option_name, const gchar *optarg, - if (options.override_params == NULL) { - options.override_params = pcmk__strkey_table(free, free); - } -+ -+ if (optarg != NULL) { -+ if (pcmk__scan_min_int(optarg, &options.check_level, 0) != pcmk_rc_ok) { -+ g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, -+ "Invalid check level setting: %s", optarg); -+ return FALSE; -+ } -+ } -+ - return TRUE; - } - -@@ -1826,12 +1839,12 @@ main(int argc, char **argv) - options.v_class, options.v_provider, options.v_agent, - "validate-all", options.cmdline_params, - options.override_params, options.timeout_ms, -- args->verbosity, options.force); -+ args->verbosity, options.force, options.check_level); - } else { - exit_code = cli_resource_execute(rsc, options.rsc_id, - options.operation, options.override_params, - options.timeout_ms, cib_conn, data_set, -- args->verbosity, options.force); -+ args->verbosity, options.force, options.check_level); - } - goto done; - -diff --git a/tools/crm_resource.h b/tools/crm_resource.h -index 3560377..5ab10d6 100644 ---- a/tools/crm_resource.h -+++ b/tools/crm_resource.h -@@ -88,11 +88,11 @@ crm_exit_t cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc - const char *rsc_type, const char *rsc_action, - GHashTable *params, GHashTable *override_hash, - int timeout_ms, int resource_verbose, -- gboolean force); -+ gboolean force, int check_level); - crm_exit_t cli_resource_execute(pe_resource_t *rsc, const char *requested_name, - const char *rsc_action, GHashTable *override_hash, - int timeout_ms, cib_t *cib, pe_working_set_t *data_set, -- int resource_verbose, gboolean force); -+ int resource_verbose, gboolean force, int check_level); - - int cli_resource_update_attribute(pe_resource_t *rsc, const char *requested_name, - const char *attr_set, const char *attr_set_type, -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index fe0ec98..bde83b6 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1679,7 +1679,8 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - const char *rsc_class, const char *rsc_prov, - const char *rsc_type, const char *action, - GHashTable *params, GHashTable *override_hash, -- int timeout_ms, int resource_verbose, gboolean force) -+ int timeout_ms, int resource_verbose, gboolean force, -+ int check_level) - { - GHashTable *params_copy = NULL; - crm_exit_t exit_code = CRM_EX_OK; -@@ -1703,6 +1704,12 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - /* add crm_feature_set env needed by some resource agents */ - g_hash_table_insert(params, strdup(XML_ATTR_CRM_VERSION), strdup(CRM_FEATURE_SET)); - -+ if (check_level >= 0) { -+ char *level = crm_strdup_printf("%d", check_level); -+ setenv("OCF_CHECK_LEVEL", level, 1); -+ free(level); -+ } -+ - /* resources_action_create frees the params hash table it's passed, but we - * may need to reuse it in a second call to resources_action_create. Thus - * we'll make a copy here so that gets freed and the original remains for -@@ -1790,7 +1797,7 @@ crm_exit_t - cli_resource_execute(pe_resource_t *rsc, const char *requested_name, - const char *rsc_action, GHashTable *override_hash, - int timeout_ms, cib_t * cib, pe_working_set_t *data_set, -- int resource_verbose, gboolean force) -+ int resource_verbose, gboolean force, int check_level) - { - pcmk__output_t *out = data_set->priv; - crm_exit_t exit_code = CRM_EX_OK; -@@ -1856,7 +1863,7 @@ cli_resource_execute(pe_resource_t *rsc, const char *requested_name, - - exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, action, - params, override_hash, timeout_ms, -- resource_verbose, force); -+ resource_verbose, force, check_level); - return exit_code; - } - --- -1.8.3.1 - - -From d13ba4bd6defe0dd81fdf8ab39ae5b889513c0c0 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 May 2021 10:59:23 -0400 -Subject: [PATCH 2/2] Fix: include: Bump feature set to 3.10.2. - -This is for the OCF_CHECK_LEVEL environment variable. - -See: rhbz#1955792. ---- - include/crm/crm.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/crm/crm.h b/include/crm/crm.h -index 92a98fa..ee52c36 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -66,7 +66,7 @@ extern "C" { - * >=3.0.13: Fail counts include operation name and interval - * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED - */ --# define CRM_FEATURE_SET "3.10.1" -+# define CRM_FEATURE_SET "3.10.2" - - /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and - * recipient of a CPG message. This imposes an arbitrary limit on cluster node --- -1.8.3.1 - diff --git a/SOURCES/004-systemd-metadata.patch b/SOURCES/004-systemd-metadata.patch new file mode 100644 index 0000000..142ef6a --- /dev/null +++ b/SOURCES/004-systemd-metadata.patch @@ -0,0 +1,73 @@ +From 09ef95a2eed48b4eb7488788a1b655d67eafe783 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 30 Nov 2021 14:47:12 -0500 +Subject: [PATCH] Low: libcrmservice: Handle systemd service templates. + +These unit files (which have an @ sign at the end) expect to be +parameterized by an instance name. Not providing an instance name +causes the dbus lookup to fail, and we fall back to assume this is an +LSB service. If the user doesn't provide an instance name, just add a +fake one. It doesn't seem to matter what name is given for the lookup. + +See: rhbz#2003151 +--- + lib/services/systemd.c | 22 ++++++++++++++++------ + 1 file changed, 16 insertions(+), 6 deletions(-) + +diff --git a/lib/services/systemd.c b/lib/services/systemd.c +index 8e9fff484..27a3b376d 100644 +--- a/lib/services/systemd.c ++++ b/lib/services/systemd.c +@@ -206,17 +206,27 @@ systemd_unit_extension(const char *name) + } + + static char * +-systemd_service_name(const char *name) ++systemd_service_name(const char *name, bool add_instance_name) + { +- if (name == NULL) { ++ if (pcmk__str_empty(name)) { + return NULL; + } + + if (systemd_unit_extension(name)) { + return strdup(name); +- } + +- return crm_strdup_printf("%s.service", name); ++ /* Services that end with an @ sign are systemd templates. They expect an ++ * instance name to follow the service name. If no instance name was ++ * provided, just add "x" to the string as the instance name. It doesn't ++ * seem to matter for purposes of looking up whether a service exists or ++ * not. ++ */ ++ } else if (add_instance_name && *(name+strlen(name)-1) == '@') { ++ return crm_strdup_printf("%sx.service", name); ++ ++ } else { ++ return crm_strdup_printf("%s.service", name); ++ } + } + + static void +@@ -427,7 +437,7 @@ invoke_unit_by_name(const char *arg_name, svc_action_t *op, char **path) + CRM_ASSERT(msg != NULL); + + // Add the (expanded) unit name as the argument +- name = systemd_service_name(arg_name); ++ name = systemd_service_name(arg_name, op == NULL || pcmk__str_eq(op->action, "meta-data", pcmk__str_none)); + CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, + DBUS_TYPE_INVALID)); + free(name); +@@ -944,7 +954,7 @@ invoke_unit_by_path(svc_action_t *op, const char *unit) + /* (ss) */ + { + const char *replace_s = "replace"; +- char *name = systemd_service_name(op->agent); ++ char *name = systemd_service_name(op->agent, pcmk__str_eq(op->action, "meta-data", pcmk__str_none)); + + CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &name, DBUS_TYPE_INVALID)); + CRM_LOG_ASSERT(dbus_message_append_args(msg, DBUS_TYPE_STRING, &replace_s, DBUS_TYPE_INVALID)); +-- +2.27.0 + diff --git a/SOURCES/005-crm_resource.patch b/SOURCES/005-crm_resource.patch deleted file mode 100644 index 1683026..0000000 --- a/SOURCES/005-crm_resource.patch +++ /dev/null @@ -1,866 +0,0 @@ -From a5a507d4e1abf242903472719a19977811e6f164 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 20 May 2021 11:59:36 -0400 -Subject: [PATCH 01/10] Feature: libcrmcommon: Add OCF_OUTPUT_FORMAT to - crm_resource environment. - -See: rhbz#1644628 ---- - lib/common/output.c | 2 ++ - 1 file changed, 2 insertions(+) - -diff --git a/lib/common/output.c b/lib/common/output.c -index 6cb49b5..58872e0 100644 ---- a/lib/common/output.c -+++ b/lib/common/output.c -@@ -71,6 +71,8 @@ pcmk__output_new(pcmk__output_t **out, const char *fmt_name, const char *filenam - return ENOMEM; - } - -+ setenv("OCF_OUTPUT_FORMAT", (*out)->fmt_name, 1); -+ - return pcmk_rc_ok; - } - --- -1.8.3.1 - - -From acc6ecdbfb797d69794e68f75a734d6252434e01 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 21 May 2021 14:20:30 -0400 -Subject: [PATCH 02/10] Feature: schemas: Copy crm_resource schema in - preparation for changes. - -See: rhbz#1644628 ---- - xml/api/crm_resource-2.11.rng | 238 ++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 238 insertions(+) - create mode 100644 xml/api/crm_resource-2.11.rng - -diff --git a/xml/api/crm_resource-2.11.rng b/xml/api/crm_resource-2.11.rng -new file mode 100644 -index 0000000..8e386db ---- /dev/null -+++ b/xml/api/crm_resource-2.11.rng -@@ -0,0 +1,238 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ promoted -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ ocf -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ true -+ false -+ -+ -+ -+ true -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ Stopped -+ Started -+ Master -+ Slave -+ -+ -+ --- -1.8.3.1 - - -From 1bbdf2149a111e9e19c388834f82001e0d31c427 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 24 May 2021 12:23:55 -0400 -Subject: [PATCH 03/10] Feature: xml: Update the crm_resource schema for XML - output. - -See: rhbz#1644628 ---- - xml/api/crm_resource-2.11.rng | 50 +++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 50 insertions(+) - -diff --git a/xml/api/crm_resource-2.11.rng b/xml/api/crm_resource-2.11.rng -index 8e386db..aaa54d6 100644 ---- a/xml/api/crm_resource-2.11.rng -+++ b/xml/api/crm_resource-2.11.rng -@@ -20,6 +20,7 @@ - - - -+ - - - -@@ -227,6 +228,55 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - Stopped --- -1.8.3.1 - - -From d89f5bc7fec856fdcd32fa14edbd0019507d5d15 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 1 Jun 2021 15:26:58 -0400 -Subject: [PATCH 04/10] Low: libcrmcommon: Increase PCMK__API_VERSION for new - crm_resource output. - -See: rhbz#1644628 ---- - include/crm/common/output_internal.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index 10b315b..0436cde 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -27,7 +27,7 @@ extern "C" { - # include - # include - --# define PCMK__API_VERSION "2.9" -+# define PCMK__API_VERSION "2.11" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) --- -1.8.3.1 - - -From 30bd2ddf43ee2a911681e51f40ed9ba20ec250b0 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 27 May 2021 13:57:12 -0400 -Subject: [PATCH 05/10] Low: tools: Pass NULL to - cli_resource_execute_from_params... - -if no resource name is given. This happens if we are validating based -on the --class/--agent/--provider command line options instead. ---- - tools/crm_resource.c | 2 +- - tools/crm_resource_runtime.c | 8 ++++---- - 2 files changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 24f1121..37a0bb0 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1840,7 +1840,7 @@ main(int argc, char **argv) - - case cmd_execute_agent: - if (options.cmdline_config) { -- exit_code = cli_resource_execute_from_params(out, "test", -+ exit_code = cli_resource_execute_from_params(out, NULL, - options.v_class, options.v_provider, options.v_agent, - "validate-all", options.cmdline_params, - options.override_params, options.timeout_ms, -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 48a4b40..ebf48bb 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1717,14 +1717,14 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - */ - params_copy = pcmk__str_table_dup(params); - -- op = resources_action_create(rsc_name, rsc_class, rsc_prov, rsc_type, action, 0, -- timeout_ms, params_copy, 0); -+ op = resources_action_create(rsc_name ? rsc_name : "test", rsc_class, rsc_prov, -+ rsc_type, action, 0, timeout_ms, params_copy, 0); - if (op == NULL) { - /* Re-run with stderr enabled so we can display a sane error message */ - crm_enable_stderr(TRUE); - params_copy = pcmk__str_table_dup(params); -- op = resources_action_create(rsc_name, rsc_class, rsc_prov, rsc_type, action, 0, -- timeout_ms, params_copy, 0); -+ op = resources_action_create(rsc_name ? rsc_name : "test", rsc_class, rsc_prov, -+ rsc_type, action, 0, timeout_ms, params_copy, 0); - - /* Callers of cli_resource_execute expect that the params hash table will - * be freed. That function uses this one, so for that reason and for --- -1.8.3.1 - - -From ee56efd53d14cfc4f902769540b72b3bb6096a73 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 24 May 2021 12:08:52 -0400 -Subject: [PATCH 06/10] Feature: tools: Add an agent-status message for - crm_resource. - -This moves what was previously only done in an out->info call to its own -output message, which means it will appear in XML output as well. Also, -note that if --class/--agent/--provider are given, the resource name -will be set to "test". In that case, do not display the resource name -in the output. - -This message will be used for --validate and the --force-* command line -options to crm_resource. - -See: rhbz#1644628 ---- - tools/crm_resource_print.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 53 insertions(+) - -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 9d82cf8..88d5878 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -152,6 +152,57 @@ attribute_list_default(pcmk__output_t *out, va_list args) { - return pcmk_rc_ok; - } - -+PCMK__OUTPUT_ARGS("agent-status", "int", "const char *", "const char *", "const char *", -+ "const char *", "const char *", "int") -+static int -+agent_status_default(pcmk__output_t *out, va_list args) { -+ int status = va_arg(args, int); -+ const char *action = va_arg(args, const char *); -+ const char *name = va_arg(args, const char *); -+ const char *class = va_arg(args, const char *); -+ const char *provider = va_arg(args, const char *); -+ const char *type = va_arg(args, const char *); -+ int rc = va_arg(args, int); -+ -+ if (status == PCMK_LRM_OP_DONE) { -+ out->info(out, "Operation %s%s%s (%s%s%s:%s) returned: '%s' (%d)", -+ action, name ? " for " : "", name ? name : "", -+ class, provider ? ":" : "", provider ? provider : "", type, -+ services_ocf_exitcode_str(rc), rc); -+ } else { -+ out->err(out, "Operation %s%s%s (%s%s%s:%s) failed: '%s' (%d)", -+ action, name ? " for " : "", name ? name : "", -+ class, provider ? ":" : "", provider ? provider : "", type, -+ services_lrm_status_str(status), status); -+ } -+ -+ return pcmk_rc_ok; -+} -+ -+PCMK__OUTPUT_ARGS("agent-status", "int", "const char *", "const char *", "const char *", -+ "const char *", "const char *", "int") -+static int -+agent_status_xml(pcmk__output_t *out, va_list args) { -+ int status G_GNUC_UNUSED = va_arg(args, int); -+ const char *action G_GNUC_UNUSED = va_arg(args, const char *); -+ const char *name G_GNUC_UNUSED = va_arg(args, const char *); -+ const char *class G_GNUC_UNUSED = va_arg(args, const char *); -+ const char *provider G_GNUC_UNUSED = va_arg(args, const char *); -+ const char *type G_GNUC_UNUSED = va_arg(args, const char *); -+ int rc = va_arg(args, int); -+ -+ char *status_str = pcmk__itoa(rc); -+ -+ pcmk__output_create_xml_node(out, "agent-status", -+ "code", status_str, -+ "message", services_ocf_exitcode_str(rc), -+ NULL); -+ -+ free(status_str); -+ -+ return pcmk_rc_ok; -+} -+ - PCMK__OUTPUT_ARGS("attribute-list", "pe_resource_t *", "char *", "GHashTable *") - static int - attribute_list_text(pcmk__output_t *out, va_list args) { -@@ -562,6 +613,8 @@ resource_names(pcmk__output_t *out, va_list args) { - } - - static pcmk__message_entry_t fmt_functions[] = { -+ { "agent-status", "default", agent_status_default }, -+ { "agent-status", "xml", agent_status_xml }, - { "attribute-list", "default", attribute_list_default }, - { "attribute-list", "text", attribute_list_text }, - { "property-list", "default", property_list_default }, --- -1.8.3.1 - - -From 85cb6b6bff96b18c5174d11e4de4d49cbfb20bb7 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 1 Jun 2021 14:47:30 -0400 -Subject: [PATCH 07/10] Feature: tools: Add an overridden params output - message. - -This also replaces what was previously being done in an out->info call -with an output message. This means it shows up in XML output as well. -Also, note that if --class/--agent/--provider are given, the resource -name will be set to "test". In that case, do not display the resource -name in the output. - -See: rhbz#1644628 ---- - tools/crm_resource_print.c | 39 +++++++++++++++++++++++++++++++++++++++ - 1 file changed, 39 insertions(+) - -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 88d5878..119d83f 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -224,6 +224,43 @@ attribute_list_text(pcmk__output_t *out, va_list args) { - return pcmk_rc_ok; - } - -+PCMK__OUTPUT_ARGS("override", "const char *", "const char *", "const char *") -+static int -+override_default(pcmk__output_t *out, va_list args) { -+ const char *rsc_name = va_arg(args, const char *); -+ const char *name = va_arg(args, const char *); -+ const char *value = va_arg(args, const char *); -+ -+ if (rsc_name == NULL) { -+ out->list_item(out, NULL, "Overriding the cluster configuration with '%s' = '%s'", -+ name, value); -+ } else { -+ out->list_item(out, NULL, "Overriding the cluster configuration for '%s' with '%s' = '%s'", -+ rsc_name, name, value); -+ } -+ -+ return pcmk_rc_ok; -+} -+ -+PCMK__OUTPUT_ARGS("override", "const char *", "const char *", "const char *") -+static int -+override_xml(pcmk__output_t *out, va_list args) { -+ const char *rsc_name = va_arg(args, const char *); -+ const char *name = va_arg(args, const char *); -+ const char *value = va_arg(args, const char *); -+ -+ xmlNodePtr node = pcmk__output_create_xml_node(out, "override", -+ "name", name, -+ "value", value, -+ NULL); -+ -+ if (rsc_name != NULL) { -+ crm_xml_add(node, "rsc", rsc_name); -+ } -+ -+ return pcmk_rc_ok; -+} -+ - PCMK__OUTPUT_ARGS("property-list", "pe_resource_t *", "char *") - static int - property_list_default(pcmk__output_t *out, va_list args) { -@@ -617,6 +654,8 @@ static pcmk__message_entry_t fmt_functions[] = { - { "agent-status", "xml", agent_status_xml }, - { "attribute-list", "default", attribute_list_default }, - { "attribute-list", "text", attribute_list_text }, -+ { "override", "default", override_default }, -+ { "override", "xml", override_xml }, - { "property-list", "default", property_list_default }, - { "property-list", "text", property_list_text }, - { "resource-check-list", "default", resource_check_list_default }, --- -1.8.3.1 - - -From e5e24592c7c3231c619fb5253e7925ffbc634a99 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 4 Jun 2021 10:24:51 -0400 -Subject: [PATCH 08/10] Low: tools: Use simple XML lists for resource actions - as well. - -See: rhbz#1644628 ---- - tools/crm_resource.c | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 37a0bb0..e957011 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1643,6 +1643,7 @@ main(int argc, char **argv) - * saves from having to write custom messages to build the lists around all these things - */ - switch (options.rsc_cmd) { -+ case cmd_execute_agent: - case cmd_list_resources: - case cmd_query_xml: - case cmd_query_raw_xml: --- -1.8.3.1 - - -From 3e75174d0bc31b261adb1994214a5878b79da85b Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 4 Jun 2021 10:30:10 -0400 -Subject: [PATCH 09/10] Feature: tools: Add an output message for resource - actions. - -This wraps up the override and agent-status messages into a single -message, along with any stdout/stderr from the resource action. This -message should be called after taking the action. - -This also implements handling XML output from resource actions. Check -to see if the validate-all action returns XML. If so, output it as a -CDATA block under a "command" element. If not, treat it as plain text -and output it as stdout/stderr from a command. - -See: rhbz#1644628 ---- - tools/crm_resource_print.c | 122 +++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 122 insertions(+) - -diff --git a/tools/crm_resource_print.c b/tools/crm_resource_print.c -index 119d83f..19a366d 100644 ---- a/tools/crm_resource_print.c -+++ b/tools/crm_resource_print.c -@@ -293,6 +293,126 @@ property_list_text(pcmk__output_t *out, va_list args) { - return pcmk_rc_ok; - } - -+PCMK__OUTPUT_ARGS("resource-agent-action", "int", "const char *", "const char *", -+ "const char *", "const char *", "const char *", "GHashTable *", -+ "int", "int", "char *", "char *") -+static int -+resource_agent_action_default(pcmk__output_t *out, va_list args) { -+ int verbose = va_arg(args, int); -+ -+ const char *class = va_arg(args, const char *); -+ const char *provider = va_arg(args, const char *); -+ const char *type = va_arg(args, const char *); -+ const char *rsc_name = va_arg(args, const char *); -+ const char *action = va_arg(args, const char *); -+ GHashTable *overrides = va_arg(args, GHashTable *); -+ int rc = va_arg(args, int); -+ int status = va_arg(args, int); -+ char *stdout_data = va_arg(args, char *); -+ char *stderr_data = va_arg(args, char *); -+ -+ if (overrides) { -+ GHashTableIter iter; -+ char *name = NULL; -+ char *value = NULL; -+ -+ out->begin_list(out, NULL, NULL, "overrides"); -+ -+ g_hash_table_iter_init(&iter, overrides); -+ while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) { -+ out->message(out, "override", rsc_name, name, value); -+ } -+ -+ out->end_list(out); -+ } -+ -+ out->message(out, "agent-status", status, action, rsc_name, class, provider, -+ type, rc); -+ -+ /* hide output for validate-all if not in verbose */ -+ if (verbose == 0 && pcmk__str_eq(action, "validate-all", pcmk__str_casei)) { -+ return pcmk_rc_ok; -+ } -+ -+ if (stdout_data || stderr_data) { -+ xmlNodePtr doc = string2xml(stdout_data); -+ -+ if (doc != NULL) { -+ out->output_xml(out, "command", stdout_data); -+ xmlFreeNode(doc); -+ } else { -+ out->subprocess_output(out, rc, stdout_data, stderr_data); -+ } -+ } -+ -+ return pcmk_rc_ok; -+} -+ -+PCMK__OUTPUT_ARGS("resource-agent-action", "int", "const char *", "const char *", -+ "const char *", "const char *", "const char *", "GHashTable *", -+ "int", "int", "char *", "char *") -+static int -+resource_agent_action_xml(pcmk__output_t *out, va_list args) { -+ int verbose G_GNUC_UNUSED = va_arg(args, int); -+ -+ const char *class = va_arg(args, const char *); -+ const char *provider = va_arg(args, const char *); -+ const char *type = va_arg(args, const char *); -+ const char *rsc_name = va_arg(args, const char *); -+ const char *action = va_arg(args, const char *); -+ GHashTable *overrides = va_arg(args, GHashTable *); -+ int rc = va_arg(args, int); -+ int status = va_arg(args, int); -+ char *stdout_data = va_arg(args, char *); -+ char *stderr_data = va_arg(args, char *); -+ -+ xmlNodePtr node = pcmk__output_xml_create_parent(out, "resource-agent-action", -+ "action", action, -+ "class", class, -+ "type", type, -+ NULL); -+ -+ if (rsc_name) { -+ crm_xml_add(node, "rsc", rsc_name); -+ } -+ -+ if (provider) { -+ crm_xml_add(node, "provider", provider); -+ } -+ -+ if (overrides) { -+ GHashTableIter iter; -+ char *name = NULL; -+ char *value = NULL; -+ -+ out->begin_list(out, NULL, NULL, "overrides"); -+ -+ g_hash_table_iter_init(&iter, overrides); -+ while (g_hash_table_iter_next(&iter, (gpointer *) &name, (gpointer *) &value)) { -+ out->message(out, "override", rsc_name, name, value); -+ } -+ -+ out->end_list(out); -+ } -+ -+ out->message(out, "agent-status", status, action, rsc_name, class, provider, -+ type, rc); -+ -+ if (stdout_data || stderr_data) { -+ xmlNodePtr doc = string2xml(stdout_data); -+ -+ if (doc != NULL) { -+ out->output_xml(out, "command", stdout_data); -+ xmlFreeNode(doc); -+ } else { -+ out->subprocess_output(out, rc, stdout_data, stderr_data); -+ } -+ } -+ -+ pcmk__output_xml_pop_parent(out); -+ return pcmk_rc_ok; -+} -+ - PCMK__OUTPUT_ARGS("resource-check-list", "resource_checks_t *") - static int - resource_check_list_default(pcmk__output_t *out, va_list args) { -@@ -658,6 +778,8 @@ static pcmk__message_entry_t fmt_functions[] = { - { "override", "xml", override_xml }, - { "property-list", "default", property_list_default }, - { "property-list", "text", property_list_text }, -+ { "resource-agent-action", "default", resource_agent_action_default }, -+ { "resource-agent-action", "xml", resource_agent_action_xml }, - { "resource-check-list", "default", resource_check_list_default }, - { "resource-check-list", "xml", resource_check_list_xml }, - { "resource-search-list", "default", resource_search_list_default }, --- -1.8.3.1 - - -From b50b2418e1e997b42f5370b4672a3f105d74634f Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 4 Jun 2021 10:40:16 -0400 -Subject: [PATCH 10/10] Feature: tools: Use the new resource-agent-action - message. - -See: rhbz#1644628 ---- - tools/crm_resource_runtime.c | 21 +++------------------ - 1 file changed, 3 insertions(+), 18 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index ebf48bb..755be9f 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1765,28 +1765,13 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - if (services_action_sync(op)) { - exit_code = op->rc; - -- if (op->status == PCMK_LRM_OP_DONE) { -- out->info(out, "Operation %s for %s (%s:%s:%s) returned: '%s' (%d)", -- action, rsc_name, rsc_class, rsc_prov ? rsc_prov : "", rsc_type, -- services_ocf_exitcode_str(op->rc), op->rc); -- } else { -- out->err(out, "Operation %s for %s (%s:%s:%s) failed: '%s' (%d)", -- action, rsc_name, rsc_class, rsc_prov ? rsc_prov : "", rsc_type, -- services_lrm_status_str(op->status), op->status); -- } -- -- /* hide output for validate-all if not in verbose */ -- if (resource_verbose == 0 && pcmk__str_eq(action, "validate-all", pcmk__str_casei)) -- goto done; -- -- if (op->stdout_data || op->stderr_data) { -- out->subprocess_output(out, op->rc, op->stdout_data, op->stderr_data); -- } -+ out->message(out, "resource-agent-action", resource_verbose, rsc_class, -+ rsc_prov, rsc_type, rsc_name, action, override_hash, op->rc, -+ op->status, op->stdout_data, op->stderr_data); - } else { - exit_code = op->rc == 0 ? CRM_EX_ERROR : op->rc; - } - --done: - services_action_free(op); - /* See comment above about why we free params here. */ - g_hash_table_destroy(params); --- -1.8.3.1 - diff --git a/SOURCES/005-fencing-reasons.patch b/SOURCES/005-fencing-reasons.patch new file mode 100644 index 0000000..e0772c6 --- /dev/null +++ b/SOURCES/005-fencing-reasons.patch @@ -0,0 +1,2200 @@ +From 3d10dad9a555aae040d8473edfe31a4e4279c066 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 12:34:03 -0600 +Subject: [PATCH 01/19] Refactor: libcrmcommon: add internal API for checking + for fencing action + +The naming is a little awkward -- "fencing action" has multiple meanings +depending on the context. It can refer to fencer API requests, fence device +actions, fence agent actions, or just those actions that fence a node (off and +reboot). + +This new function pcmk__is_fencing_action() uses the last meaning, so it does +*not* return true for unfencing ("on" actions). +--- + include/crm/common/internal.h | 1 + + lib/common/operations.c | 14 ++++++++++++++ + 2 files changed, 15 insertions(+) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index a35c5769a..694fc6cd4 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -218,6 +218,7 @@ char *pcmk__notify_key(const char *rsc_id, const char *notify_type, + char *pcmk__transition_key(int transition_id, int action_id, int target_rc, + const char *node); + void pcmk__filter_op_for_digest(xmlNode *param_set); ++bool pcmk__is_fencing_action(const char *action); + + + // bitwise arithmetic utilities +diff --git a/lib/common/operations.c b/lib/common/operations.c +index aa7106ce6..366c18970 100644 +--- a/lib/common/operations.c ++++ b/lib/common/operations.c +@@ -523,3 +523,17 @@ crm_op_needs_metadata(const char *rsc_class, const char *op) + CRMD_ACTION_MIGRATE, CRMD_ACTION_MIGRATED, + CRMD_ACTION_NOTIFY, NULL); + } ++ ++/*! ++ * \internal ++ * \brief Check whether an action name is for a fencing action ++ * ++ * \param[in] action Action name to check ++ * ++ * \return true if \p action is "off", "reboot", or "poweroff", otherwise false ++ */ ++bool ++pcmk__is_fencing_action(const char *action) ++{ ++ return pcmk__str_any_of(action, "off", "reboot", "poweroff", NULL); ++} +-- +2.27.0 + + +From 86ac00fb3e99d79ca2c442ae1670fe850146f734 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 12:38:58 -0600 +Subject: [PATCH 02/19] Low: fencer,scheduler: compare fence action names + case-sensitively + +Use the new convenience function pcmk__is_fencing_action() to check whether +an action name is a fencing action ("off", "reboot", or "poweroff"). This +changes the behavior from case-insensitive to case-sensitive, which is more +appropriate (the case-insensitivity was inherited from lazy use of the old +safe_str_eq() function which was always case-insensitive). +--- + daemons/fenced/fenced_commands.c | 6 +++--- + daemons/fenced/fenced_remote.c | 2 +- + lib/pacemaker/pcmk_graph_producer.c | 2 +- + lib/pengine/common.c | 8 +------- + 4 files changed, 6 insertions(+), 12 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 63bfad3a9..46c840f2a 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -128,7 +128,7 @@ get_action_delay_max(stonith_device_t * device, const char * action) + const char *value = NULL; + int delay_max = 0; + +- if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { ++ if (!pcmk__is_fencing_action(action)) { + return 0; + } + +@@ -146,7 +146,7 @@ get_action_delay_base(stonith_device_t *device, const char *action, const char * + char *hash_value = NULL; + int delay_base = 0; + +- if (!pcmk__strcase_any_of(action, "off", "reboot", NULL)) { ++ if (!pcmk__is_fencing_action(action)) { + return 0; + } + +@@ -448,7 +448,7 @@ stonith_device_execute(stonith_device_t * device) + + if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, + STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { +- if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) { ++ if (pcmk__is_fencing_action(cmd->action)) { + if (node_does_watchdog_fencing(stonith_our_uname)) { + pcmk__panic(__func__); + goto done; +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 963433bf3..358ea3aa7 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1758,7 +1758,7 @@ all_topology_devices_found(remote_fencing_op_t * op) + if (!tp) { + return FALSE; + } +- if (pcmk__strcase_any_of(op->action, "off", "reboot", NULL)) { ++ if (pcmk__is_fencing_action(op->action)) { + /* Don't count the devices on the target node if we are killing + * the target node. */ + skip_target = TRUE; +diff --git a/lib/pacemaker/pcmk_graph_producer.c b/lib/pacemaker/pcmk_graph_producer.c +index ffcbd1274..5bec9d8ce 100644 +--- a/lib/pacemaker/pcmk_graph_producer.c ++++ b/lib/pacemaker/pcmk_graph_producer.c +@@ -721,7 +721,7 @@ add_downed_nodes(xmlNode *xml, const pe_action_t *action, + /* Fencing makes the action's node and any hosted guest nodes down */ + const char *fence = g_hash_table_lookup(action->meta, "stonith_action"); + +- if (pcmk__strcase_any_of(fence, "off", "reboot", NULL)) { ++ if (pcmk__is_fencing_action(fence)) { + xmlNode *downed = create_xml_node(xml, XML_GRAPH_TAG_DOWNED); + add_node_to_xml_by_id(action->node->details->id, downed); + pe_foreach_guest_node(data_set, action->node, add_node_to_xml, downed); +diff --git a/lib/pengine/common.c b/lib/pengine/common.c +index 236fc26b1..fe4223816 100644 +--- a/lib/pengine/common.c ++++ b/lib/pengine/common.c +@@ -27,12 +27,6 @@ check_health(const char *value) + "migrate-on-red", NULL); + } + +-static bool +-check_stonith_action(const char *value) +-{ +- return pcmk__strcase_any_of(value, "reboot", "poweroff", "off", NULL); +-} +- + static bool + check_placement_strategy(const char *value) + { +@@ -114,7 +108,7 @@ static pcmk__cluster_option_t pe_opts[] = { + }, + { + "stonith-action", NULL, "select", "reboot, off, poweroff", +- "reboot", check_stonith_action, ++ "reboot", pcmk__is_fencing_action, + "Action to send to fence device when a node needs to be fenced " + "(\"poweroff\" is a deprecated alias for \"off\")", + NULL +-- +2.27.0 + + +From c8f6e8a04c4fa4271db817af0a23aa941c9d7689 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 12 Nov 2021 17:42:21 -0600 +Subject: [PATCH 03/19] Refactor: fencing: rename type for peer query replies + +st_query_result_t contains the device information parsed from a peer's query +reply, but the name could easily be confused with the actual success/failure +result of the query action itself. Rename it to peer_device_info_t. +--- + daemons/fenced/fenced_remote.c | 103 +++++++++++++++++---------------- + 1 file changed, 52 insertions(+), 51 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 358ea3aa7..9e2f62804 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -41,7 +41,7 @@ + + /* When one fencer queries its peers for devices able to handle a fencing + * request, each peer will reply with a list of such devices available to it. +- * Each reply will be parsed into a st_query_result_t, with each device's ++ * Each reply will be parsed into a peer_device_info_t, with each device's + * information kept in a device_properties_t. + */ + +@@ -72,18 +72,19 @@ typedef struct st_query_result_s { + int ndevices; + /* Devices available to this host that are capable of fencing the target */ + GHashTable *devices; +-} st_query_result_t; ++} peer_device_info_t; + + GHashTable *stonith_remote_op_list = NULL; + +-void call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc); ++void call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, ++ int rc); + static void remote_op_done(remote_fencing_op_t * op, xmlNode * data, int rc, int dup); + extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op, xmlNode * data, + int call_options); + + static void report_timeout_period(remote_fencing_op_t * op, int op_timeout); + static int get_op_total_timeout(const remote_fencing_op_t *op, +- const st_query_result_t *chosen_peer); ++ const peer_device_info_t *chosen_peer); + + static gint + sort_strings(gconstpointer a, gconstpointer b) +@@ -95,7 +96,7 @@ static void + free_remote_query(gpointer data) + { + if (data) { +- st_query_result_t *query = data; ++ peer_device_info_t *query = data; + + crm_trace("Free'ing query result from %s", query->host); + g_hash_table_destroy(query->devices); +@@ -150,8 +151,8 @@ count_peer_device(gpointer key, gpointer value, gpointer user_data) + * \return Number of devices available to peer that were not already executed + */ + static int +-count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer, +- gboolean verified_only) ++count_peer_devices(const remote_fencing_op_t *op, ++ const peer_device_info_t *peer, gboolean verified_only) + { + struct peer_count_data data; + +@@ -175,7 +176,7 @@ count_peer_devices(const remote_fencing_op_t *op, const st_query_result_t *peer, + * \return Device properties if found, NULL otherwise + */ + static device_properties_t * +-find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer, ++find_peer_device(const remote_fencing_op_t *op, const peer_device_info_t *peer, + const char *device) + { + device_properties_t *props = g_hash_table_lookup(peer->devices, device); +@@ -196,7 +197,7 @@ find_peer_device(const remote_fencing_op_t *op, const st_query_result_t *peer, + * \return TRUE if device was found and marked, FALSE otherwise + */ + static gboolean +-grab_peer_device(const remote_fencing_op_t *op, st_query_result_t *peer, ++grab_peer_device(const remote_fencing_op_t *op, peer_device_info_t *peer, + const char *device, gboolean verified_devices_only) + { + device_properties_t *props = find_peer_device(op, peer, device); +@@ -1216,7 +1217,7 @@ enum find_best_peer_options { + FIND_PEER_VERIFIED_ONLY = 0x0004, + }; + +-static st_query_result_t * ++static peer_device_info_t * + find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer_options options) + { + GList *iter = NULL; +@@ -1227,7 +1228,7 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer + } + + for (iter = op->query_results; iter != NULL; iter = iter->next) { +- st_query_result_t *peer = iter->data; ++ peer_device_info_t *peer = iter->data; + + crm_trace("Testing result from %s targeting %s with %d device%s: %d %x", + peer->host, op->target, peer->ndevices, +@@ -1257,11 +1258,11 @@ find_best_peer(const char *device, remote_fencing_op_t * op, enum find_best_peer + return NULL; + } + +-static st_query_result_t * ++static peer_device_info_t * + stonith_choose_peer(remote_fencing_op_t * op) + { + const char *device = NULL; +- st_query_result_t *peer = NULL; ++ peer_device_info_t *peer = NULL; + uint32_t active = fencing_active_peers(); + + do { +@@ -1317,8 +1318,8 @@ stonith_choose_peer(remote_fencing_op_t * op) + } + + static int +-get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer, +- const char *device) ++get_device_timeout(const remote_fencing_op_t *op, ++ const peer_device_info_t *peer, const char *device) + { + device_properties_t *props; + +@@ -1338,7 +1339,7 @@ get_device_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer, + + struct timeout_data { + const remote_fencing_op_t *op; +- const st_query_result_t *peer; ++ const peer_device_info_t *peer; + int total_timeout; + }; + +@@ -1365,7 +1366,7 @@ add_device_timeout(gpointer key, gpointer value, gpointer user_data) + } + + static int +-get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer) ++get_peer_timeout(const remote_fencing_op_t *op, const peer_device_info_t *peer) + { + struct timeout_data timeout; + +@@ -1380,7 +1381,7 @@ get_peer_timeout(const remote_fencing_op_t *op, const st_query_result_t *peer) + + static int + get_op_total_timeout(const remote_fencing_op_t *op, +- const st_query_result_t *chosen_peer) ++ const peer_device_info_t *chosen_peer) + { + int total_timeout = 0; + stonith_topology_t *tp = find_topology_for_host(op->target); +@@ -1403,7 +1404,7 @@ get_op_total_timeout(const remote_fencing_op_t *op, + } + for (device_list = tp->levels[i]; device_list; device_list = device_list->next) { + for (iter = op->query_results; iter != NULL; iter = iter->next) { +- const st_query_result_t *peer = iter->data; ++ const peer_device_info_t *peer = iter->data; + + if (find_peer_device(op, peer, device_list->data)) { + total_timeout += get_device_timeout(op, peer, +@@ -1555,7 +1556,7 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op) + } + + void +-call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) ++call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc) + { + const char *device = NULL; + int timeout = op->base_timeout; +@@ -1734,8 +1735,8 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) + static gint + sort_peers(gconstpointer a, gconstpointer b) + { +- const st_query_result_t *peer_a = a; +- const st_query_result_t *peer_b = b; ++ const peer_device_info_t *peer_a = a; ++ const peer_device_info_t *peer_b = b; + + return (peer_b->ndevices - peer_a->ndevices); + } +@@ -1768,7 +1769,7 @@ all_topology_devices_found(remote_fencing_op_t * op) + for (device = tp->levels[i]; device; device = device->next) { + match = NULL; + for (iter = op->query_results; iter && !match; iter = iter->next) { +- st_query_result_t *peer = iter->data; ++ peer_device_info_t *peer = iter->data; + + if (skip_target && pcmk__str_eq(peer->host, op->target, pcmk__str_casei)) { + continue; +@@ -1850,31 +1851,31 @@ parse_action_specific(xmlNode *xml, const char *peer, const char *device, + * + * \param[in] xml XML node containing device properties + * \param[in,out] op Operation that query and reply relate to +- * \param[in,out] result Peer's results ++ * \param[in,out] peer Peer's device information + * \param[in] device ID of device being parsed + */ + static void + add_device_properties(xmlNode *xml, remote_fencing_op_t *op, +- st_query_result_t *result, const char *device) ++ peer_device_info_t *peer, const char *device) + { + xmlNode *child; + int verified = 0; + device_properties_t *props = calloc(1, sizeof(device_properties_t)); + +- /* Add a new entry to this result's devices list */ ++ /* Add a new entry to this peer's devices list */ + CRM_ASSERT(props != NULL); +- g_hash_table_insert(result->devices, strdup(device), props); ++ g_hash_table_insert(peer->devices, strdup(device), props); + + /* Peers with verified (monitored) access will be preferred */ + crm_element_value_int(xml, F_STONITH_DEVICE_VERIFIED, &verified); + if (verified) { + crm_trace("Peer %s has confirmed a verified device %s", +- result->host, device); ++ peer->host, device); + props->verified = TRUE; + } + + /* Parse action-specific device properties */ +- parse_action_specific(xml, result->host, device, op_requested_action(op), ++ parse_action_specific(xml, peer->host, device, op_requested_action(op), + op, st_phase_requested, props); + for (child = pcmk__xml_first_child(xml); child != NULL; + child = pcmk__xml_next(child)) { +@@ -1883,10 +1884,10 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op, + * winds up getting remapped. + */ + if (pcmk__str_eq(ID(child), "off", pcmk__str_casei)) { +- parse_action_specific(child, result->host, device, "off", ++ parse_action_specific(child, peer->host, device, "off", + op, st_phase_off, props); + } else if (pcmk__str_eq(ID(child), "on", pcmk__str_casei)) { +- parse_action_specific(child, result->host, device, "on", ++ parse_action_specific(child, peer->host, device, "on", + op, st_phase_on, props); + } + } +@@ -1903,17 +1904,17 @@ add_device_properties(xmlNode *xml, remote_fencing_op_t *op, + * + * \return Newly allocated result structure with parsed reply + */ +-static st_query_result_t * ++static peer_device_info_t * + add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml) + { +- st_query_result_t *result = calloc(1, sizeof(st_query_result_t)); ++ peer_device_info_t *peer = calloc(1, sizeof(peer_device_info_t)); + xmlNode *child; + + // cppcheck seems not to understand the abort logic in CRM_CHECK + // cppcheck-suppress memleak +- CRM_CHECK(result != NULL, return NULL); +- result->host = strdup(host); +- result->devices = pcmk__strkey_table(free, free); ++ CRM_CHECK(peer != NULL, return NULL); ++ peer->host = strdup(host); ++ peer->devices = pcmk__strkey_table(free, free); + + /* Each child element describes one capable device available to the peer */ + for (child = pcmk__xml_first_child(xml); child != NULL; +@@ -1921,17 +1922,17 @@ add_result(remote_fencing_op_t *op, const char *host, int ndevices, xmlNode *xml + const char *device = ID(child); + + if (device) { +- add_device_properties(child, op, result, device); ++ add_device_properties(child, op, peer, device); + } + } + +- result->ndevices = g_hash_table_size(result->devices); +- CRM_CHECK(ndevices == result->ndevices, ++ peer->ndevices = g_hash_table_size(peer->devices); ++ CRM_CHECK(ndevices == peer->ndevices, + crm_err("Query claimed to have %d device%s but %d found", +- ndevices, pcmk__plural_s(ndevices), result->ndevices)); ++ ndevices, pcmk__plural_s(ndevices), peer->ndevices)); + +- op->query_results = g_list_insert_sorted(op->query_results, result, sort_peers); +- return result; ++ op->query_results = g_list_insert_sorted(op->query_results, peer, sort_peers); ++ return peer; + } + + /*! +@@ -1957,7 +1958,7 @@ process_remote_stonith_query(xmlNode * msg) + const char *id = NULL; + const char *host = NULL; + remote_fencing_op_t *op = NULL; +- st_query_result_t *result = NULL; ++ peer_device_info_t *peer = NULL; + uint32_t replies_expected; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + +@@ -1991,7 +1992,7 @@ process_remote_stonith_query(xmlNode * msg) + op->replies, replies_expected, host, + op->target, op->action, ndevices, pcmk__plural_s(ndevices), id); + if (ndevices > 0) { +- result = add_result(op, host, ndevices, dev); ++ peer = add_result(op, host, ndevices, dev); + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { +@@ -2001,7 +2002,7 @@ process_remote_stonith_query(xmlNode * msg) + if (op->state == st_query && all_topology_devices_found(op)) { + /* All the query results are in for the topology, start the fencing ops. */ + crm_trace("All topology devices found"); +- call_remote_stonith(op, result, pcmk_ok); ++ call_remote_stonith(op, peer, pcmk_ok); + + } else if (have_all_replies) { + crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ", +@@ -2010,15 +2011,15 @@ process_remote_stonith_query(xmlNode * msg) + } + + } else if (op->state == st_query) { +- int nverified = count_peer_devices(op, result, TRUE); ++ int nverified = count_peer_devices(op, peer, TRUE); + + /* We have a result for a non-topology fencing op that looks promising, + * go ahead and start fencing before query timeout */ +- if (result && (host_is_target == FALSE) && nverified) { ++ if ((peer != NULL) && !host_is_target && nverified) { + /* we have a verified device living on a peer that is not the target */ + crm_trace("Found %d verified device%s", + nverified, pcmk__plural_s(nverified)); +- call_remote_stonith(op, result, pcmk_ok); ++ call_remote_stonith(op, peer, pcmk_ok); + + } else if (have_all_replies) { + crm_info("All query replies have arrived, continuing (%d expected/%d received) ", +@@ -2029,10 +2030,10 @@ process_remote_stonith_query(xmlNode * msg) + crm_trace("Waiting for more peer results before launching fencing operation"); + } + +- } else if (result && (op->state == st_done)) { ++ } else if ((peer != NULL) && (op->state == st_done)) { + crm_info("Discarding query result from %s (%d device%s): " +- "Operation is %s", result->host, +- result->ndevices, pcmk__plural_s(result->ndevices), ++ "Operation is %s", peer->host, ++ peer->ndevices, pcmk__plural_s(peer->ndevices), + stonith_op_state_str(op->state)); + } + +-- +2.27.0 + + +From 913e0620310089d2250e9ecde383df757f8e8063 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 12:46:37 -0600 +Subject: [PATCH 04/19] Low: fencer: improve broadcasting replies for fenced + originators + +If the target of a fencing action was also the originator, the executioner +broadcasts the result on their behalf. + +Previously, it would check if the action was not in a list of actions that are +never broadcasted. However we really only want to broadcast off/reboot results +so just check for that instead. + +This also rearranges reply creation slightly so we don't trace-log the reply +until it is fully created. +--- + daemons/fenced/fenced_commands.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 46c840f2a..e4185f6e1 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2385,32 +2385,31 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + int pid, bool merged) + { + xmlNode *reply = NULL; +- gboolean bcast = FALSE; ++ bool bcast = false; + + CRM_CHECK((cmd != NULL) && (result != NULL), return); + + reply = construct_async_reply(cmd, result); + +- // Only replies for certain actions are broadcast +- if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status", +- NULL)) { +- crm_trace("Never broadcast '%s' replies", cmd->action); ++ // If target was also the originator, broadcast fencing results for it ++ if (!stand_alone && pcmk__is_fencing_action(cmd->action) ++ && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) { + +- } else if (!stand_alone && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei) && !pcmk__str_eq(cmd->action, "on", pcmk__str_casei)) { +- crm_trace("Broadcast '%s' reply for %s", cmd->action, cmd->victim); ++ crm_trace("Broadcast '%s' result for %s (target was also originator)", ++ cmd->action, cmd->victim); + crm_xml_add(reply, F_SUBTYPE, "broadcast"); +- bcast = TRUE; ++ crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); ++ bcast = true; + } + + log_async_result(cmd, result, pid, NULL, merged); +- crm_log_xml_trace(reply, "Reply"); + + if (merged) { + crm_xml_add(reply, F_STONITH_MERGED, "true"); + } ++ crm_log_xml_trace(reply, "Reply"); + + if (bcast) { +- crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); + send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); + + } else if (cmd->origin) { +-- +2.27.0 + + +From 8b8f94fd9ca5e61922cb81e32c8a3d0f1d75fb0b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 11 Nov 2021 14:40:49 -0600 +Subject: [PATCH 05/19] Refactor: fencer: avoid code duplication when sending + async reply + +... and clean up reply function +--- + daemons/fenced/fenced_commands.c | 33 ++++++++++++++++++-------------- + 1 file changed, 19 insertions(+), 14 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index e4185f6e1..4ea0a337a 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2411,15 +2411,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + + if (bcast) { + send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); +- +- } else if (cmd->origin) { +- crm_trace("Directed reply to %s", cmd->origin); +- send_cluster_message(crm_get_peer(0, cmd->origin), crm_msg_stonith_ng, reply, FALSE); +- + } else { +- crm_trace("Directed local %ssync reply to %s", +- (cmd->options & st_opt_sync_call) ? "" : "a-", cmd->client_name); +- do_local_reply(reply, cmd->client, cmd->options & st_opt_sync_call, FALSE); ++ stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client); + } + + if (stand_alone) { +@@ -2814,16 +2807,28 @@ check_alternate_host(const char *target) + return alternate_host; + } + ++/*! ++ * \internal ++ * \brief Send a reply to a CPG peer or IPC client ++ * ++ * \param[in] reply XML reply to send ++ * \param[in] call_options Send synchronously if st_opt_sync_call is set here ++ * \param[in] remote_peer If not NULL, name of peer node to send CPG reply ++ * \param[in] client_id If not NULL, name of client to send IPC reply ++ */ + static void +-stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer, ++stonith_send_reply(xmlNode *reply, int call_options, const char *remote_peer, + const char *client_id) + { +- if (remote_peer) { +- send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, reply, FALSE); +- } else { ++ CRM_CHECK((reply != NULL) && ((remote_peer != NULL) || (client_id != NULL)), ++ return); ++ ++ if (remote_peer == NULL) { + do_local_reply(reply, client_id, +- pcmk_is_set(call_options, st_opt_sync_call), +- (remote_peer != NULL)); ++ pcmk_is_set(call_options, st_opt_sync_call), FALSE); ++ } else { ++ send_cluster_message(crm_get_peer(0, remote_peer), crm_msg_stonith_ng, ++ reply, FALSE); + } + } + +-- +2.27.0 + + +From 2cdbda58f0e9f38a0e302506107fd933cb415144 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 23 Nov 2021 17:24:09 -0600 +Subject: [PATCH 06/19] Refactor: fencer: ensure all requests get clean-up + +handle_request() has if-else blocks for each type of request. Previously, if a +request didn't need a reply, the function would do any clean-up needed and +return immediately. Now, we track whether a reply is needed, and all request +types flow to the end of the function for consistent clean-up. + +This doesn't change any behavior at this point, but allows us to do more at the +end of request handling. +--- + daemons/fenced/fenced_commands.c | 46 ++++++++++++++++++-------------- + 1 file changed, 26 insertions(+), 20 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 4ea0a337a..19477b49b 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2892,6 +2892,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + + xmlNode *data = NULL; + xmlNode *reply = NULL; ++ bool need_reply = true; + + char *output = NULL; + const char *op = crm_element_value(request, F_STONITH_OPERATION); +@@ -2921,10 +2922,12 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + pcmk__ipc_send_xml(client, id, reply, flags); + client->request_id = 0; + free_xml(reply); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, STONITH_OP_EXEC, pcmk__str_none)) { + rc = stonith_device_action(request, &output); ++ need_reply = (rc != -EINPROGRESS); + + } else if (pcmk__str_eq(op, STONITH_OP_TIMEOUT_UPDATE, pcmk__str_none)) { + const char *call_id = crm_element_value(request, F_STONITH_CALLID); +@@ -2933,7 +2936,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + + crm_element_value_int(request, F_STONITH_TIMEOUT, &op_timeout); + do_stonith_async_timeout_update(client_id, call_id, op_timeout); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { + if (remote_peer) { +@@ -2944,7 +2948,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + remove_relay_op(request); + + stonith_query(request, remote_peer, client_id, call_options); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { + const char *flag_name = NULL; +@@ -2965,7 +2970,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } + + pcmk__ipc_send_ack(client, id, flags, "ack", CRM_EX_OK); +- return 0; ++ rc = pcmk_ok; ++ need_reply = false; + + } else if (pcmk__str_eq(op, STONITH_OP_RELAY, pcmk__str_none)) { + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); +@@ -2977,8 +2983,11 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + crm_element_value(dev, F_STONITH_ACTION), + crm_element_value(dev, F_STONITH_TARGET)); + +- if (initiate_remote_stonith_op(NULL, request, FALSE) != NULL) { ++ if (initiate_remote_stonith_op(NULL, request, FALSE) == NULL) { ++ rc = -EPROTO; ++ } else { + rc = -EINPROGRESS; ++ need_reply = false; + } + + } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { +@@ -3012,7 +3021,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + crm_element_value_int(dev, F_STONITH_TOLERANCE, &tolerance); + + if (stonith_check_fence_tolerance(tolerance, target, action)) { +- rc = 0; ++ rc = pcmk_ok; + goto done; + } + +@@ -3047,10 +3056,13 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + FALSE); + rc = -EINPROGRESS; + +- } else if (initiate_remote_stonith_op(client, request, FALSE) != NULL) { ++ } else if (initiate_remote_stonith_op(client, request, FALSE) == NULL) { ++ rc = -EPROTO; ++ } else { + rc = -EINPROGRESS; + } + } ++ need_reply = (rc != -EINPROGRESS); + + } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) { + rc = stonith_fence_history(request, &data, remote_peer, call_options); +@@ -3058,8 +3070,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + /* we don't expect answers to the broadcast + * we might have sent out + */ +- free_xml(data); +- return pcmk_ok; ++ rc = pcmk_ok; ++ need_reply = false; + } + + } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { +@@ -3111,8 +3123,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + crm_element_value_int(request, XML_ATTR_ID, &node_id); + name = crm_element_value(request, XML_ATTR_UNAME); + reap_crm_member(node_id, name); +- +- return pcmk_ok; ++ rc = pcmk_ok; ++ need_reply = false; + + } else { + crm_err("Unknown IPC request %s from %s %s", op, +@@ -3120,20 +3132,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + ((client == NULL)? remote_peer : pcmk__client_name(client))); + } + +- done: +- ++done: + if (rc == -EACCES) { + crm_warn("Rejecting IPC request '%s' from unprivileged client %s", + crm_str(op), pcmk__client_name(client)); + } + +- /* Always reply unless the request is in process still. +- * If in progress, a reply will happen async after the request +- * processing is finished */ +- if (rc != -EINPROGRESS) { +- crm_trace("Reply handling: %p %u %u %d %d %s", client, client?client->request_id:0, +- id, pcmk_is_set(call_options, st_opt_sync_call), call_options, +- crm_element_value(request, F_STONITH_CALLOPTS)); ++ // Reply if result is known ++ if (need_reply) { + + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(client == NULL || client->request_id == id); +-- +2.27.0 + + +From 067d655ebd3fbb0ed27f4e7426db4c3b661ba777 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 23 Nov 2021 17:26:32 -0600 +Subject: [PATCH 07/19] Log: fencer: improve debug logs when processing CPG/IPC + messages + +By moving the result log messages from stonith_command() to handle_reply() and +handle_request(), we can simplify stonith_command() and give slightly better +messages. +--- + daemons/fenced/fenced_commands.c | 80 +++++++++++++++----------------- + 1 file changed, 38 insertions(+), 42 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 19477b49b..98af0e04f 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2883,7 +2883,7 @@ remove_relay_op(xmlNode * request) + } + } + +-static int ++static void + handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *request, const char *remote_peer) + { +@@ -3152,73 +3152,69 @@ done: + free_xml(data); + free_xml(reply); + +- return rc; ++ crm_debug("Processed %s request from %s %s: %s (rc=%d)", ++ op, ((client == NULL)? "peer" : "client"), ++ ((client == NULL)? remote_peer : pcmk__client_name(client)), ++ ((rc > 0)? "" : pcmk_strerror(rc)), rc); + } + + static void + handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) + { +- const char *op = crm_element_value(request, F_STONITH_OPERATION); ++ // Copy, because request might be freed before we want to log this ++ char *op = crm_element_value_copy(request, F_STONITH_OPERATION); + + if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { + process_remote_stonith_query(request); +- } else if (pcmk__str_eq(op, T_STONITH_NOTIFY, pcmk__str_none)) { +- process_remote_stonith_exec(request); +- } else if (pcmk__str_eq(op, STONITH_OP_FENCE, pcmk__str_none)) { +- /* Reply to a complex fencing op */ ++ } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) { + process_remote_stonith_exec(request); + } else { +- crm_err("Unknown %s reply from %s %s", op, +- ((client == NULL)? "peer" : "client"), ++ crm_err("Ignoring unknown %s reply from %s %s", ++ crm_str(op), ((client == NULL)? "peer" : "client"), + ((client == NULL)? remote_peer : pcmk__client_name(client))); + crm_log_xml_warn(request, "UnknownOp"); ++ free(op); ++ return; + } ++ crm_debug("Processed %s reply from %s %s", ++ op, ((client == NULL)? "peer" : "client"), ++ ((client == NULL)? remote_peer : pcmk__client_name(client))); ++ free(op); + } + ++/*! ++ * \internal ++ * \brief Handle a message from an IPC client or CPG peer ++ * ++ * \param[in] client If not NULL, IPC client that sent message ++ * \param[in] id If from IPC client, IPC message ID ++ * \param[in] flags Message flags ++ * \param[in] message Message XML ++ * \param[in] remote_peer If not NULL, CPG peer that sent message ++ */ + void + stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, +- xmlNode *request, const char *remote_peer) ++ xmlNode *message, const char *remote_peer) + { +- int call_options = 0; +- int rc = 0; +- gboolean is_reply = FALSE; +- +- /* Copy op for reporting. The original might get freed by handle_reply() +- * before we use it in crm_debug(): +- * handle_reply() +- * |- process_remote_stonith_exec() +- * |-- remote_op_done() +- * |--- handle_local_reply_and_notify() +- * |---- crm_xml_add(...F_STONITH_OPERATION...) +- * |--- free_xml(op->request) +- */ +- char *op = crm_element_value_copy(request, F_STONITH_OPERATION); +- +- if (get_xpath_object("//" T_STONITH_REPLY, request, LOG_NEVER)) { +- is_reply = TRUE; +- } ++ int call_options = st_opt_none; ++ bool is_reply = get_xpath_object("//" T_STONITH_REPLY, message, ++ LOG_NEVER) != NULL; + +- crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); +- crm_debug("Processing %s%s %u from %s %s with call options 0x%08x", +- op, (is_reply? " reply" : ""), id, ++ crm_element_value_int(message, F_STONITH_CALLOPTS, &call_options); ++ crm_debug("Processing %ssynchronous %s %s %u from %s %s", ++ pcmk_is_set(call_options, st_opt_sync_call)? "" : "a", ++ crm_element_value(message, F_STONITH_OPERATION), ++ (is_reply? "reply" : "request"), id, + ((client == NULL)? "peer" : "client"), +- ((client == NULL)? remote_peer : pcmk__client_name(client)), +- call_options); ++ ((client == NULL)? remote_peer : pcmk__client_name(client))); + + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(client == NULL || client->request_id == id); + } + + if (is_reply) { +- handle_reply(client, request, remote_peer); ++ handle_reply(client, message, remote_peer); + } else { +- rc = handle_request(client, id, flags, request, remote_peer); ++ handle_request(client, id, flags, message, remote_peer); + } +- +- crm_debug("Processed %s%s from %s %s: %s (rc=%d)", +- op, (is_reply? " reply" : ""), +- ((client == NULL)? "peer" : "client"), +- ((client == NULL)? remote_peer : pcmk__client_name(client)), +- ((rc > 0)? "" : pcmk_strerror(rc)), rc); +- free(op); + } +-- +2.27.0 + + +From 44cb340c11b4652f452a47eb2b0050b4a459382b Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 15 Nov 2021 16:29:09 -0600 +Subject: [PATCH 08/19] Refactor: fencer: drop unused argument from + notification functions + +--- + daemons/fenced/fenced_commands.c | 12 ++++++------ + daemons/fenced/fenced_history.c | 6 +++--- + daemons/fenced/fenced_remote.c | 6 +++--- + daemons/fenced/pacemaker-fenced.c | 18 +++++++++--------- + daemons/fenced/pacemaker-fenced.h | 6 +++--- + 5 files changed, 24 insertions(+), 24 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 98af0e04f..946ce4042 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2428,8 +2428,8 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + crm_xml_add(notify_data, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id); + crm_xml_add(notify_data, F_STONITH_ORIGIN, cmd->client); + +- do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + + free_xml(reply); +@@ -3082,7 +3082,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_device(call_options, op, rc, device_id); ++ do_stonith_notify_device(op, rc, device_id); + + } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_DEL, pcmk__str_none)) { + xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); +@@ -3093,7 +3093,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_device(call_options, op, rc, device_id); ++ do_stonith_notify_device(op, rc, device_id); + + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { + char *device_id = NULL; +@@ -3103,7 +3103,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_level(call_options, op, rc, device_id); ++ do_stonith_notify_level(op, rc, device_id); + free(device_id); + + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { +@@ -3114,7 +3114,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else { + rc = -EACCES; + } +- do_stonith_notify_level(call_options, op, rc, device_id); ++ do_stonith_notify_level(op, rc, device_id); + + } else if(pcmk__str_eq(op, CRM_OP_RM_NODE_CACHE, pcmk__str_casei)) { + int node_id = 0; +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 1ba034ba9..7127593b6 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -100,7 +100,7 @@ stonith_fence_history_cleanup(const char *target, + g_hash_table_foreach_remove(stonith_remote_op_list, + stonith_remove_history_entry, + (gpointer) target); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + } + +@@ -396,7 +396,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history, + + if (updated) { + stonith_fence_history_trim(); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + + if (cnt == 0) { +@@ -470,7 +470,7 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + is done so send a notification for anything + that smells like history-sync + */ +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY_SYNCED, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY_SYNCED, pcmk_ok, NULL); + if (crm_element_value(msg, F_STONITH_CALLID)) { + /* this is coming from the stonith-API + * +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 9e2f62804..c907cd120 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -423,8 +423,8 @@ handle_local_reply_and_notify(remote_fencing_op_t * op, xmlNode * data, int rc) + do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE); + + /* bcast to all local clients that the fencing operation happend */ +- do_stonith_notify(0, T_STONITH_NOTIFY_FENCE, rc, notify_data); +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + + /* mark this op as having notify's already sent */ + op->notify_sent = TRUE; +@@ -1119,7 +1119,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer) + + if (op->state != st_duplicate) { + /* kick history readers */ +- do_stonith_notify(0, T_STONITH_NOTIFY_HISTORY, 0, NULL); ++ do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } + + /* safe to trim as long as that doesn't touch pending ops */ +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index a64004ce1..a290e1670 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -357,7 +357,7 @@ do_stonith_async_timeout_update(const char *client_id, const char *call_id, int + } + + void +-do_stonith_notify(int options, const char *type, int result, xmlNode * data) ++do_stonith_notify(const char *type, int result, xmlNode *data) + { + /* TODO: Standardize the contents of data */ + xmlNode *update_msg = create_xml_node(NULL, "notify"); +@@ -380,7 +380,7 @@ do_stonith_notify(int options, const char *type, int result, xmlNode * data) + } + + static void +-do_stonith_notify_config(int options, const char *op, int rc, ++do_stonith_notify_config(const char *op, int rc, + const char *desc, int active) + { + xmlNode *notify_data = create_xml_node(NULL, op); +@@ -390,20 +390,20 @@ do_stonith_notify_config(int options, const char *op, int rc, + crm_xml_add(notify_data, F_STONITH_DEVICE, desc); + crm_xml_add_int(notify_data, F_STONITH_ACTIVE, active); + +- do_stonith_notify(options, op, rc, notify_data); ++ do_stonith_notify(op, rc, notify_data); + free_xml(notify_data); + } + + void +-do_stonith_notify_device(int options, const char *op, int rc, const char *desc) ++do_stonith_notify_device(const char *op, int rc, const char *desc) + { +- do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(device_list)); ++ do_stonith_notify_config(op, rc, desc, g_hash_table_size(device_list)); + } + + void +-do_stonith_notify_level(int options, const char *op, int rc, const char *desc) ++do_stonith_notify_level(const char *op, int rc, const char *desc) + { +- do_stonith_notify_config(options, op, rc, desc, g_hash_table_size(topology)); ++ do_stonith_notify_config(op, rc, desc, g_hash_table_size(topology)); + } + + static void +@@ -418,7 +418,7 @@ topology_remove_helper(const char *node, int level) + crm_xml_add(data, XML_ATTR_STONITH_TARGET, node); + + rc = stonith_level_remove(data, &desc); +- do_stonith_notify_level(0, STONITH_OP_LEVEL_DEL, rc, desc); ++ do_stonith_notify_level(STONITH_OP_LEVEL_DEL, rc, desc); + + free_xml(data); + free(desc); +@@ -468,7 +468,7 @@ handle_topology_change(xmlNode *match, bool remove) + } + + rc = stonith_level_register(match, &desc); +- do_stonith_notify_level(0, STONITH_OP_LEVEL_ADD, rc, desc); ++ do_stonith_notify_level(STONITH_OP_LEVEL_ADD, rc, desc); + + free(desc); + } +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index a64b57693..3e41d867e 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -233,9 +233,9 @@ xmlNode *stonith_construct_reply(xmlNode * request, const char *output, xmlNode + void + do_stonith_async_timeout_update(const char *client, const char *call_id, int timeout); + +-void do_stonith_notify(int options, const char *type, int result, xmlNode * data); +-void do_stonith_notify_device(int options, const char *op, int rc, const char *desc); +-void do_stonith_notify_level(int options, const char *op, int rc, const char *desc); ++void do_stonith_notify(const char *type, int result, xmlNode *data); ++void do_stonith_notify_device(const char *op, int rc, const char *desc); ++void do_stonith_notify_level(const char *op, int rc, const char *desc); + + remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client, + xmlNode *request, +-- +2.27.0 + + +From a49df4901b663b3366634c1d58f04625ecba4005 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Nov 2021 11:57:14 -0600 +Subject: [PATCH 09/19] Refactor: fencer: functionize checking for privileged + client + +... for readability and to make planned changes easier +--- + daemons/fenced/fenced_commands.c | 49 +++++++++++++++++++------------- + 1 file changed, 30 insertions(+), 19 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 946ce4042..34c956f5c 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2883,6 +2883,32 @@ remove_relay_op(xmlNode * request) + } + } + ++/*! ++ * \internal ++ * \brief Check whether an API request was sent by a privileged user ++ * ++ * API commands related to fencing configuration may be done only by privileged ++ * IPC users (i.e. root or hacluster), because all other users should go through ++ * the CIB to have ACLs applied. If no client was given, this is a peer request, ++ * which is always allowed. ++ * ++ * \param[in] c IPC client that sent request (or NULL if sent by CPG peer) ++ * \param[in] op Requested API operation (for logging only) ++ * ++ * \return true if sender is peer or privileged client, otherwise false ++ */ ++static inline bool ++is_privileged(pcmk__client_t *c, const char *op) ++{ ++ if ((c == NULL) || pcmk_is_set(c->flags, pcmk__client_privileged)) { ++ return true; ++ } else { ++ crm_warn("Rejecting IPC request '%s' from unprivileged client %s", ++ crm_str(op), pcmk__client_name(c)); ++ return false; ++ } ++} ++ + static void + handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *request, const char *remote_peer) +@@ -2898,15 +2924,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + const char *op = crm_element_value(request, F_STONITH_OPERATION); + const char *client_id = crm_element_value(request, F_STONITH_CLIENTID); + +- /* IPC commands related to fencing configuration may be done only by +- * privileged users (i.e. root or hacluster), because all other users should +- * go through the CIB to have ACLs applied. +- * +- * If no client was given, this is a peer request, which is always allowed. +- */ +- bool allowed = (client == NULL) +- || pcmk_is_set(client->flags, pcmk__client_privileged); +- + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + + if (pcmk_is_set(call_options, st_opt_sync_call)) { +@@ -3077,7 +3094,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else if (pcmk__str_eq(op, STONITH_OP_DEVICE_ADD, pcmk__str_none)) { + const char *device_id = NULL; + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_device_register(request, &device_id, FALSE); + } else { + rc = -EACCES; +@@ -3088,7 +3105,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + xmlNode *dev = get_xpath_object("//" F_STONITH_DEVICE, request, LOG_ERR); + const char *device_id = crm_element_value(dev, XML_ATTR_ID); + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_device_remove(device_id, FALSE); + } else { + rc = -EACCES; +@@ -3098,7 +3115,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_ADD, pcmk__str_none)) { + char *device_id = NULL; + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_level_register(request, &device_id); + } else { + rc = -EACCES; +@@ -3109,7 +3126,7 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } else if (pcmk__str_eq(op, STONITH_OP_LEVEL_DEL, pcmk__str_none)) { + char *device_id = NULL; + +- if (allowed) { ++ if (is_privileged(client, op)) { + rc = stonith_level_remove(request, &device_id); + } else { + rc = -EACCES; +@@ -3133,14 +3150,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + } + + done: +- if (rc == -EACCES) { +- crm_warn("Rejecting IPC request '%s' from unprivileged client %s", +- crm_str(op), pcmk__client_name(client)); +- } +- + // Reply if result is known + if (need_reply) { +- + if (pcmk_is_set(call_options, st_opt_sync_call)) { + CRM_ASSERT(client == NULL || client->request_id == id); + } +-- +2.27.0 + + +From 10ca8a5ef5266159bc3f993802aeae6537ceeb11 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Nov 2021 16:59:03 -0600 +Subject: [PATCH 10/19] Low: fencer: return -ETIME for peer fencing timeouts + +94c55684 set the result as pcmk_ok, but it appears that the intent was just to +keep the delegate from being set, and -ETIME should still do that, while being +more appropriate. +--- + daemons/fenced/fenced_remote.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index c907cd120..dc7b802da 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -608,7 +608,7 @@ remote_op_timeout_one(gpointer userdata) + + crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS + " id=%.8s", op->action, op->target, op->client_name, op->id); +- call_remote_stonith(op, NULL, pcmk_ok); ++ call_remote_stonith(op, NULL, -ETIME); + return FALSE; + } + +-- +2.27.0 + + +From fb2eefeb695cc92e1a2aed6f1f1d2b900d4fb83e Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 16 Nov 2021 17:54:56 -0600 +Subject: [PATCH 11/19] Refactor: fencer: functionize common part of timeout + handling + +Previously, remote_op_timeout() was called from multiple places, but only one +of those places needed the full processing. The common part is now in a new +function finalize_timed_out_op() called from all the places, and +remote_op_timeout() now has just the additional processing needed by the one +place plus a call to the new function. + +This will allow a future change to set a different exit reason depending on +which step timed out. +--- + daemons/fenced/fenced_remote.c | 49 +++++++++++++++++++++++----------- + 1 file changed, 34 insertions(+), 15 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index dc7b802da..22c4b0772 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -612,20 +612,18 @@ remote_op_timeout_one(gpointer userdata) + return FALSE; + } + +-static gboolean +-remote_op_timeout(gpointer userdata) ++/*! ++ * \internal ++ * \brief Finalize a remote fencer operation that timed out ++ * ++ * \param[in] op Fencer operation that timed out ++ */ ++static void ++finalize_timed_out_op(remote_fencing_op_t *op) + { +- remote_fencing_op_t *op = userdata; + + op->op_timer_total = 0; + +- if (op->state == st_done) { +- crm_debug("Action '%s' targeting %s for client %s already completed " +- CRM_XS " id=%.8s", +- op->action, op->target, op->client_name, op->id); +- return FALSE; +- } +- + crm_debug("Action '%s' targeting %s for client %s timed out " + CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->id); +@@ -637,14 +635,35 @@ remote_op_timeout(gpointer userdata) + */ + op->state = st_done; + remote_op_done(op, NULL, pcmk_ok, FALSE); +- return FALSE; ++ return; + } + + op->state = st_failed; + + remote_op_done(op, NULL, -ETIME, FALSE); ++} + +- return FALSE; ++/*! ++ * \internal ++ * \brief Finalize a remote fencer operation that timed out ++ * ++ * \param[in] userdata Fencer operation that timed out ++ * ++ * \return G_SOURCE_REMOVE (which tells glib not to restart timer) ++ */ ++static gboolean ++remote_op_timeout(gpointer userdata) ++{ ++ remote_fencing_op_t *op = userdata; ++ ++ if (op->state == st_done) { ++ crm_debug("Action '%s' targeting %s for client %s already completed " ++ CRM_XS " id=%.8s", ++ op->action, op->target, op->client_name, op->id); ++ } else { ++ finalize_timed_out_op(userdata); ++ } ++ return G_SOURCE_REMOVE; + } + + static gboolean +@@ -670,7 +689,7 @@ remote_op_query_timeout(gpointer data) + g_source_remove(op->op_timer_total); + op->op_timer_total = 0; + } +- remote_op_timeout(op); ++ finalize_timed_out_op(op); + } + + return FALSE; +@@ -1675,8 +1694,8 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc) + crm_info("No remaining peers capable of fencing (%s) %s for client %s " + CRM_XS " state=%s", op->action, op->target, op->client_name, + stonith_op_state_str(op->state)); +- CRM_LOG_ASSERT(op->state < st_done); +- remote_op_timeout(op); ++ CRM_CHECK(op->state < st_done, return); ++ finalize_timed_out_op(op); + + } else if(op->replies >= op->replies_expected || op->replies >= fencing_active_peers()) { + // int rc = -EHOSTUNREACH; +-- +2.27.0 + + +From c047005a112ac7da5ba62084e39c79db739f0923 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 10:05:18 -0600 +Subject: [PATCH 12/19] Low: fencer: handle malformed manual confirmation + requests better + +Rename stonith_manual_ack() to fenced_handle_manual_confirmation(), and move +more of the manual confirmation handling in handle_request() into it, for +better code isolation. This will also make planned changes easier. + +The one behavioral difference is that a failure of initiate_remote_stonith_op() +will now be ignored rather than segmentation fault trying to dereference NULL. +--- + daemons/fenced/fenced_commands.c | 20 ++++++++++++-------- + daemons/fenced/fenced_remote.c | 29 ++++++++++++++++++++++++----- + daemons/fenced/pacemaker-fenced.h | 2 +- + 3 files changed, 37 insertions(+), 14 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 34c956f5c..6f325b9e8 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -3012,14 +3012,18 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + if (remote_peer || stand_alone) { + rc = stonith_fence(request); + +- } else if (call_options & st_opt_manual_ack) { +- remote_fencing_op_t *rop = NULL; +- xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, request, LOG_TRACE); +- const char *target = crm_element_value(dev, F_STONITH_TARGET); +- +- crm_notice("Received manual confirmation that %s is fenced", target); +- rop = initiate_remote_stonith_op(client, request, TRUE); +- rc = stonith_manual_ack(request, rop); ++ } else if (pcmk_is_set(call_options, st_opt_manual_ack)) { ++ switch (fenced_handle_manual_confirmation(client, request)) { ++ case pcmk_rc_ok: ++ rc = pcmk_ok; ++ break; ++ case EINPROGRESS: ++ rc = -EINPROGRESS; ++ break; ++ default: ++ rc = -EPROTO; ++ break; ++ } + + } else { + const char *alternate_host = NULL; +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 22c4b0772..60ee5e32e 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1003,22 +1003,41 @@ static uint32_t fencing_active_peers(void) + return count; + } + ++/*! ++ * \internal ++ * \brief Process a manual confirmation of a pending fence action ++ * ++ * \param[in] client IPC client that sent confirmation ++ * \param[in] msg Request XML with manual confirmation ++ * ++ * \return Standard Pacemaker return code ++ */ + int +-stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op) ++fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg) + { ++ remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_ERR); + ++ CRM_CHECK(dev != NULL, return EPROTO); ++ ++ crm_notice("Received manual confirmation that %s has been fenced", ++ crm_str(crm_element_value(dev, F_STONITH_TARGET))); ++ op = initiate_remote_stonith_op(client, msg, TRUE); ++ if (op == NULL) { ++ return EPROTO; ++ } + op->state = st_done; + set_fencing_completed(op); + op->delegate = strdup("a human"); + +- crm_notice("Injecting manual confirmation that %s is safely off/down", +- crm_element_value(dev, F_STONITH_TARGET)); ++ // For the fencer's purposes, the fencing operation is done + + remote_op_done(op, msg, pcmk_ok, FALSE); + +- // Replies are sent via done_cb -> send_async_reply() -> do_local_reply() +- return -EINPROGRESS; ++ /* For the requester's purposes, the operation is still pending. The ++ * actual result will be sent asynchronously via the operation's done_cb(). ++ */ ++ return EINPROGRESS; + } + + /*! +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index 3e41d867e..cf88644f1 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -256,7 +256,7 @@ bool fencing_peer_active(crm_node_t *peer); + + void set_fencing_completed(remote_fencing_op_t * op); + +-int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op); ++int fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg); + + gboolean node_has_attr(const char *node, const char *name, const char *value); + +-- +2.27.0 + + +From ec60f014b5a8f774aa57a26e40a2b1b94a7e3d3a Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 10:35:31 -0600 +Subject: [PATCH 13/19] Low: fencer: handle malformed topology level removal + requests better + +Log the malformed request, and return -EPROTO instead of -EINVAL. If a request +is missing a level number, treat it as malformed instead of as a request to +remove all. +--- + daemons/fenced/fenced_commands.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 6f325b9e8..358844203 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -1678,27 +1678,27 @@ stonith_level_register(xmlNode *msg, char **desc) + int + stonith_level_remove(xmlNode *msg, char **desc) + { +- int id = 0; ++ int id = -1; + stonith_topology_t *tp; + char *target; + + /* Unlike additions, removal requests should always have one level tag */ + xmlNode *level = get_xpath_object("//" XML_TAG_FENCING_LEVEL, msg, LOG_ERR); + +- CRM_CHECK(level != NULL, return -EINVAL); ++ CRM_CHECK(level != NULL, return -EPROTO); + + target = stonith_level_key(level, -1); + crm_element_value_int(level, XML_ATTR_STONITH_INDEX, &id); ++ ++ CRM_CHECK((id >= 0) && (id < ST_LEVEL_MAX), ++ crm_log_xml_warn(msg, "invalid level"); ++ free(target); ++ return -EPROTO); ++ + if (desc) { + *desc = crm_strdup_printf("%s[%d]", target, id); + } + +- /* Sanity-check arguments */ +- if (id >= ST_LEVEL_MAX) { +- free(target); +- return -EINVAL; +- } +- + tp = g_hash_table_lookup(topology, target); + if (tp == NULL) { + guint nentries = g_hash_table_size(topology); +@@ -1714,7 +1714,7 @@ stonith_level_remove(xmlNode *msg, char **desc) + "(%d active %s remaining)", target, nentries, + pcmk__plural_alt(nentries, "entry", "entries")); + +- } else if (id > 0 && tp->levels[id] != NULL) { ++ } else if (tp->levels[id] != NULL) { + guint nlevels; + + g_list_free_full(tp->levels[id], free); +-- +2.27.0 + + +From ee0cfb6b284c2d6d21f8e77bf6ff286b1364235d Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 12:33:05 -0600 +Subject: [PATCH 14/19] Refactor: fencer: avoid obscuring a variable + +handle_request() declared a xmlNode *reply variable, and then one of its "if" +blocks defined another one, obscuring the first. Drop the first declaration, +and instead move it to the one other place that needed it. + +Also remove a redundant assertion. +--- + daemons/fenced/fenced_commands.c | 13 +++++-------- + 1 file changed, 5 insertions(+), 8 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index 358844203..af0a92450 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2917,7 +2917,6 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + int rc = -EOPNOTSUPP; + + xmlNode *data = NULL; +- xmlNode *reply = NULL; + bool need_reply = true; + + char *output = NULL; +@@ -2926,8 +2925,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + + crm_element_value_int(request, F_STONITH_CALLOPTS, &call_options); + +- if (pcmk_is_set(call_options, st_opt_sync_call)) { +- CRM_ASSERT(client == NULL || client->request_id == id); ++ if (pcmk_is_set(call_options, st_opt_sync_call) && (client != NULL)) { ++ CRM_ASSERT(client->request_id == id); + } + + if (pcmk__str_eq(op, CRM_OP_REGISTER, pcmk__str_none)) { +@@ -3156,16 +3155,14 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + done: + // Reply if result is known + if (need_reply) { +- if (pcmk_is_set(call_options, st_opt_sync_call)) { +- CRM_ASSERT(client == NULL || client->request_id == id); +- } +- reply = stonith_construct_reply(request, output, data, rc); ++ xmlNode *reply = stonith_construct_reply(request, output, data, rc); ++ + stonith_send_reply(reply, call_options, remote_peer, client_id); ++ free_xml(reply); + } + + free(output); + free_xml(data); +- free_xml(reply); + + crm_debug("Processed %s request from %s %s: %s (rc=%d)", + op, ((client == NULL)? "peer" : "client"), +-- +2.27.0 + + +From a5fef7b95b7541860e29c1ff33be38db327208fb Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Thu, 18 Nov 2021 12:37:10 -0600 +Subject: [PATCH 15/19] Refactor: fencer: add convenience function for setting + protocol error result + +The fencer will soon track and return the full result (rather than just a +legacy return code) for fencing actions, for callbacks and notifications. +To simplify that process as well as move away from the legacy codes in general, +all fencer API operations will be modified to return a full result. + +This convenience function will come in handy for that. +--- + daemons/fenced/pacemaker-fenced.h | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index cf88644f1..3bc5dc3d1 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -262,6 +262,13 @@ gboolean node_has_attr(const char *node, const char *name, const char *value); + + gboolean node_does_watchdog_fencing(const char *node); + ++static inline void ++fenced_set_protocol_error(pcmk__action_result_t *result) ++{ ++ pcmk__set_result(result, CRM_EX_PROTOCOL, PCMK_EXEC_INVALID, ++ "Fencer API request missing required information (bug?)"); ++} ++ + extern char *stonith_our_uname; + extern gboolean stand_alone; + extern GHashTable *device_list; +-- +2.27.0 + + +From ed770d36fb34dc7b3344cd326830a6c06cc789ce Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 19 Nov 2021 09:59:51 -0600 +Subject: [PATCH 16/19] Refactor: fencer: make a few functions return void + +... to make planned changes easier. The return values were previously ignored. +--- + daemons/fenced/fenced_commands.c | 17 ++++++++------- + daemons/fenced/fenced_history.c | 6 +----- + daemons/fenced/fenced_remote.c | 35 ++++++++++++++----------------- + daemons/fenced/pacemaker-fenced.c | 6 +++--- + daemons/fenced/pacemaker-fenced.h | 8 +++---- + 5 files changed, 33 insertions(+), 39 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index af0a92450..ea7d281ce 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -1411,8 +1411,8 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) + return pcmk_ok; + } + +-int +-stonith_device_remove(const char *id, gboolean from_cib) ++void ++stonith_device_remove(const char *id, bool from_cib) + { + stonith_device_t *device = g_hash_table_lookup(device_list, id); + guint ndevices = 0; +@@ -1421,7 +1421,7 @@ stonith_device_remove(const char *id, gboolean from_cib) + ndevices = g_hash_table_size(device_list); + crm_info("Device '%s' not found (%d active device%s)", + id, ndevices, pcmk__plural_s(ndevices)); +- return pcmk_ok; ++ return; + } + + if (from_cib) { +@@ -1443,7 +1443,6 @@ stonith_device_remove(const char *id, gboolean from_cib) + (device->cib_registered? " cib" : ""), + (device->api_registered? " api" : "")); + } +- return pcmk_ok; + } + + /*! +@@ -3085,8 +3084,9 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + need_reply = (rc != -EINPROGRESS); + + } else if (pcmk__str_eq(op, STONITH_OP_FENCE_HISTORY, pcmk__str_none)) { +- rc = stonith_fence_history(request, &data, remote_peer, call_options); +- if (call_options & st_opt_discard_reply) { ++ stonith_fence_history(request, &data, remote_peer, call_options); ++ rc = pcmk_ok; ++ if (pcmk_is_set(call_options, st_opt_discard_reply)) { + /* we don't expect answers to the broadcast + * we might have sent out + */ +@@ -3109,7 +3109,8 @@ handle_request(pcmk__client_t *client, uint32_t id, uint32_t flags, + const char *device_id = crm_element_value(dev, XML_ATTR_ID); + + if (is_privileged(client, op)) { +- rc = stonith_device_remove(device_id, FALSE); ++ stonith_device_remove(device_id, false); ++ rc = pcmk_ok; + } else { + rc = -EACCES; + } +@@ -3179,7 +3180,7 @@ handle_reply(pcmk__client_t *client, xmlNode *request, const char *remote_peer) + if (pcmk__str_eq(op, STONITH_OP_QUERY, pcmk__str_none)) { + process_remote_stonith_query(request); + } else if (pcmk__str_any_of(op, T_STONITH_NOTIFY, STONITH_OP_FENCE, NULL)) { +- process_remote_stonith_exec(request); ++ fenced_process_fencing_reply(request); + } else { + crm_err("Ignoring unknown %s reply from %s %s", + crm_str(op), ((client == NULL)? "peer" : "client"), +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index 7127593b6..bc159383c 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -433,14 +433,11 @@ stonith_local_history(gboolean add_id, const char *target) + * a reply from + * \param[in] remote_peer + * \param[in] options call-options from the request +- * +- * \return always success as there is actully nothing that can go really wrong + */ +-int ++void + stonith_fence_history(xmlNode *msg, xmlNode **output, + const char *remote_peer, int options) + { +- int rc = 0; + const char *target = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_TARGET, msg, LOG_NEVER); + xmlNode *out_history = NULL; +@@ -525,5 +522,4 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + *output = stonith_local_history(FALSE, target); + } + free_xml(out_history); +- return rc; + } +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 60ee5e32e..6338aebde 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -2086,11 +2086,9 @@ process_remote_stonith_query(xmlNode * msg) + * or attempt another device as appropriate. + * + * \param[in] msg XML reply received +- * +- * \return pcmk_ok on success, -errno on error + */ +-int +-process_remote_stonith_exec(xmlNode * msg) ++void ++fenced_process_fencing_reply(xmlNode *msg) + { + int rc = 0; + const char *id = NULL; +@@ -2098,13 +2096,13 @@ process_remote_stonith_exec(xmlNode * msg) + remote_fencing_op_t *op = NULL; + xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR); + +- CRM_CHECK(dev != NULL, return -EPROTO); ++ CRM_CHECK(dev != NULL, return); + + id = crm_element_value(dev, F_STONITH_REMOTE_OP_ID); +- CRM_CHECK(id != NULL, return -EPROTO); ++ CRM_CHECK(id != NULL, return); + + dev = get_xpath_object("//@" F_STONITH_RC, msg, LOG_ERR); +- CRM_CHECK(dev != NULL, return -EPROTO); ++ CRM_CHECK(dev != NULL, return); + + crm_element_value_int(dev, F_STONITH_RC, &rc); + +@@ -2125,35 +2123,35 @@ process_remote_stonith_exec(xmlNode * msg) + /* Could be for an event that began before we started */ + /* TODO: Record the op for later querying */ + crm_info("Received peer result of unknown or expired operation %s", id); +- return -EOPNOTSUPP; ++ return; + } + + if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) { + crm_err("Received outdated reply for device %s (instead of %s) to " + "fence (%s) %s. Operation already timed out at peer level.", + device, (const char *) op->devices->data, op->action, op->target); +- return rc; ++ return; + } + + if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) { + crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s " +- CRM_XS " rc=%d id=%.8s", ++ CRM_XS " id=%.8s", + op->action, op->target, op->client_name, op->originator, +- pcmk_strerror(rc), rc, op->id); ++ pcmk_strerror(rc), op->id); + if (rc == pcmk_ok) { + op->state = st_done; + } else { + op->state = st_failed; + } + remote_op_done(op, msg, rc, FALSE); +- return pcmk_ok; ++ return; + } else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) { + /* If this isn't a remote level broadcast, and we are not the + * originator of the operation, we should not be receiving this msg. */ + crm_err("Received non-broadcast fencing result for operation %.8s " + "we do not own (device %s targeting %s)", + op->id, device, op->target); +- return rc; ++ return; + } + + if (pcmk_is_set(op->call_options, st_opt_topology)) { +@@ -2168,7 +2166,7 @@ process_remote_stonith_exec(xmlNode * msg) + * and notify our local clients. */ + if (op->state == st_done) { + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } + + if ((op->phase == 2) && (rc != pcmk_ok)) { +@@ -2184,14 +2182,14 @@ process_remote_stonith_exec(xmlNode * msg) + /* An operation completed successfully. Try another device if + * necessary, otherwise mark the operation as done. */ + advance_topology_device_in_level(op, device, msg, rc); +- return rc; ++ return; + } else { + /* This device failed, time to try another topology level. If no other + * levels are available, mark this operation as failed and report results. */ + if (advance_topology_level(op, false) != pcmk_rc_ok) { + op->state = st_failed; + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } + } + } else if (rc == pcmk_ok && op->devices == NULL) { +@@ -2199,12 +2197,12 @@ process_remote_stonith_exec(xmlNode * msg) + + op->state = st_done; + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } else if (rc == -ETIME && op->devices == NULL) { + /* If the operation timed out don't bother retrying other peers. */ + op->state = st_failed; + remote_op_done(op, msg, rc, FALSE); +- return rc; ++ return; + } else { + /* fall-through and attempt other fencing action using another peer */ + } +@@ -2213,7 +2211,6 @@ process_remote_stonith_exec(xmlNode * msg) + crm_trace("Next for %s on behalf of %s@%s (rc was %d)", op->target, op->originator, + op->client_name, rc); + call_remote_stonith(op, NULL, rc); +- return rc; + } + + gboolean +diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c +index a290e1670..0a8b3bf6f 100644 +--- a/daemons/fenced/pacemaker-fenced.c ++++ b/daemons/fenced/pacemaker-fenced.c +@@ -445,7 +445,7 @@ remove_cib_device(xmlXPathObjectPtr xpathObj) + + rsc_id = crm_element_value(match, XML_ATTR_ID); + +- stonith_device_remove(rsc_id, TRUE); ++ stonith_device_remove(rsc_id, true); + } + } + +@@ -610,7 +610,7 @@ watchdog_device_update(void) + } else { + /* be silent if no device - todo parameter to stonith_device_remove */ + if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) { +- stonith_device_remove(STONITH_WATCHDOG_ID, TRUE); ++ stonith_device_remove(STONITH_WATCHDOG_ID, true); + } + } + } +@@ -847,7 +847,7 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg) + } + if (search != NULL) { + *search = 0; +- stonith_device_remove(rsc_id, TRUE); ++ stonith_device_remove(rsc_id, true); + /* watchdog_device_update called afterwards + to fall back to implicit definition if needed */ + } else { +diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h +index 3bc5dc3d1..5162ada75 100644 +--- a/daemons/fenced/pacemaker-fenced.h ++++ b/daemons/fenced/pacemaker-fenced.h +@@ -214,7 +214,7 @@ void stonith_command(pcmk__client_t *client, uint32_t id, uint32_t flags, + + int stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib); + +-int stonith_device_remove(const char *id, gboolean from_cib); ++void stonith_device_remove(const char *id, bool from_cib); + + char *stonith_level_key(xmlNode * msg, int mode); + int stonith_level_kind(xmlNode * msg); +@@ -241,14 +241,14 @@ remote_fencing_op_t *initiate_remote_stonith_op(pcmk__client_t *client, + xmlNode *request, + gboolean manual_ack); + +-int process_remote_stonith_exec(xmlNode * msg); ++void fenced_process_fencing_reply(xmlNode *msg); + + int process_remote_stonith_query(xmlNode * msg); + + void *create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer); + +-int stonith_fence_history(xmlNode *msg, xmlNode **output, +- const char *remote_peer, int options); ++void stonith_fence_history(xmlNode *msg, xmlNode **output, ++ const char *remote_peer, int options); + + void stonith_fence_history_trim(void); + +-- +2.27.0 + + +From 27df49460930738e77f5ca42536aff1d3bdfcae7 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Fri, 19 Nov 2021 10:06:43 -0600 +Subject: [PATCH 17/19] Refactor: fencer: drop unnecessary argument when + advancing topology device + +If we're advancing to the next device in a topology level, by necessity that +means any previous device succeeded. +--- + daemons/fenced/fenced_remote.c | 19 +++++++++---------- + 1 file changed, 9 insertions(+), 10 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index 6338aebde..d54e6a4ef 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -1519,14 +1519,13 @@ report_timeout_period(remote_fencing_op_t * op, int op_timeout) + * \internal + * \brief Advance an operation to the next device in its topology + * +- * \param[in,out] op Operation to advance +- * \param[in] device ID of device just completed +- * \param[in] msg XML reply that contained device result (if available) +- * \param[in] rc Return code of device's execution ++ * \param[in] op Fencer operation to advance ++ * \param[in] device ID of device that just completed ++ * \param[in] msg If not NULL, XML reply of last delegated fencing operation + */ + static void + advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, +- xmlNode *msg, int rc) ++ xmlNode *msg) + { + /* Advance to the next device at this topology level, if any */ + if (op->devices) { +@@ -1556,8 +1555,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, + + if (op->devices) { + /* Necessary devices remain, so execute the next one */ +- crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)", +- op->target, op->client_name, op->originator, rc); ++ crm_trace("Next targeting %s on behalf of %s@%s", ++ op->target, op->client_name, op->originator); + + // The requested delay has been applied for the first device + if (op->delay > 0) { +@@ -1570,7 +1569,7 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, + crm_trace("Marking complex fencing op targeting %s as complete", + op->target); + op->state = st_done; +- remote_op_done(op, msg, rc, FALSE); ++ remote_op_done(op, msg, pcmk_ok, FALSE); + } + } + +@@ -1701,7 +1700,7 @@ call_remote_stonith(remote_fencing_op_t *op, peer_device_info_t *peer, int rc) + */ + crm_warn("Ignoring %s 'on' failure (no capable peers) targeting %s " + "after successful 'off'", device, op->target); +- advance_topology_device_in_level(op, device, NULL, pcmk_ok); ++ advance_topology_device_in_level(op, device, NULL); + return; + + } else if (op->owner == FALSE) { +@@ -2181,7 +2180,7 @@ fenced_process_fencing_reply(xmlNode *msg) + if (rc == pcmk_ok) { + /* An operation completed successfully. Try another device if + * necessary, otherwise mark the operation as done. */ +- advance_topology_device_in_level(op, device, msg, rc); ++ advance_topology_device_in_level(op, device, msg); + return; + } else { + /* This device failed, time to try another topology level. If no other +-- +2.27.0 + + +From 05437e1339bc1f9071b43e97d5846a939687951d Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 29 Nov 2021 11:59:17 -0600 +Subject: [PATCH 18/19] Refactor: fencer: minor renames for consistency + +... per review +--- + daemons/fenced/fenced_remote.c | 13 ++++++------- + 1 file changed, 6 insertions(+), 7 deletions(-) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index d54e6a4ef..8feb40147 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -63,7 +63,7 @@ typedef struct device_properties_s { + int delay_base[st_phase_max]; + } device_properties_t; + +-typedef struct st_query_result_s { ++typedef struct { + /* Name of peer that sent this result */ + char *host; + /* Only try peers for non-topology based operations once */ +@@ -95,13 +95,12 @@ sort_strings(gconstpointer a, gconstpointer b) + static void + free_remote_query(gpointer data) + { +- if (data) { +- peer_device_info_t *query = data; ++ if (data != NULL) { ++ peer_device_info_t *peer = data; + +- crm_trace("Free'ing query result from %s", query->host); +- g_hash_table_destroy(query->devices); +- free(query->host); +- free(query); ++ g_hash_table_destroy(peer->devices); ++ free(peer->host); ++ free(peer); + } + } + +-- +2.27.0 + + +From 86974d7cef05bafbed540d02e59514292581ae65 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 30 Nov 2021 08:33:41 -0600 +Subject: [PATCH 19/19] Refactor: fencer: simplify send_async_reply() + +... as suggested in review +--- + daemons/fenced/fenced_commands.c | 28 ++++++++++++---------------- + 1 file changed, 12 insertions(+), 16 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index ea7d281ce..f34cb4f13 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2384,36 +2384,34 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + int pid, bool merged) + { + xmlNode *reply = NULL; +- bool bcast = false; + + CRM_CHECK((cmd != NULL) && (result != NULL), return); + ++ log_async_result(cmd, result, pid, NULL, merged); ++ + reply = construct_async_reply(cmd, result); ++ if (merged) { ++ crm_xml_add(reply, F_STONITH_MERGED, "true"); ++ } + +- // If target was also the originator, broadcast fencing results for it + if (!stand_alone && pcmk__is_fencing_action(cmd->action) + && pcmk__str_eq(cmd->origin, cmd->victim, pcmk__str_casei)) { +- ++ /* The target was also the originator, so broadcast the result on its ++ * behalf (since it will be unable to). ++ */ + crm_trace("Broadcast '%s' result for %s (target was also originator)", + cmd->action, cmd->victim); + crm_xml_add(reply, F_SUBTYPE, "broadcast"); + crm_xml_add(reply, F_STONITH_OPERATION, T_STONITH_NOTIFY); +- bcast = true; +- } +- +- log_async_result(cmd, result, pid, NULL, merged); +- +- if (merged) { +- crm_xml_add(reply, F_STONITH_MERGED, "true"); +- } +- crm_log_xml_trace(reply, "Reply"); +- +- if (bcast) { + send_cluster_message(NULL, crm_msg_stonith_ng, reply, FALSE); + } else { ++ // Reply only to the originator + stonith_send_reply(reply, cmd->options, cmd->origin, cmd->client); + } + ++ crm_log_xml_trace(reply, "Reply"); ++ free_xml(reply); ++ + if (stand_alone) { + /* Do notification with a clean data object */ + xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE); +@@ -2430,8 +2428,6 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result, + do_stonith_notify(T_STONITH_NOTIFY_FENCE, rc, notify_data); + do_stonith_notify(T_STONITH_NOTIFY_HISTORY, pcmk_ok, NULL); + } +- +- free_xml(reply); + } + + static void +-- +2.27.0 + diff --git a/SOURCES/006-crm_simulate.patch b/SOURCES/006-crm_simulate.patch deleted file mode 100644 index c8d4e3f..0000000 --- a/SOURCES/006-crm_simulate.patch +++ /dev/null @@ -1,896 +0,0 @@ -From 97571e6ccc9b7fa339a7e27d9b0b9ab782ff3003 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 16 Jun 2021 13:54:10 -0400 -Subject: [PATCH 1/5] Low: schemas: Copy crm_mon.rng in preparation for - changes. - ---- - xml/api/crm_mon-2.12.rng | 243 +++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 243 insertions(+) - create mode 100644 xml/api/crm_mon-2.12.rng - -diff --git a/xml/api/crm_mon-2.12.rng b/xml/api/crm_mon-2.12.rng -new file mode 100644 -index 0000000..ffec923 ---- /dev/null -+++ b/xml/api/crm_mon-2.12.rng -@@ -0,0 +1,243 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ granted -+ revoked -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - - -From da394983f106f974274ddd94675a04c85086010e Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 18 Jun 2021 15:06:34 -0400 -Subject: [PATCH 2/5] Refactor: Split node history out into its own XML schema. - -This allows for sharing it between crm_mon and crm_simulate. ---- - xml/Makefile.am | 2 +- - xml/api/crm_mon-2.12.rng | 64 +-------------------------------------- - xml/api/node-history-2.12.rng | 70 +++++++++++++++++++++++++++++++++++++++++++ - 3 files changed, 72 insertions(+), 64 deletions(-) - create mode 100644 xml/api/node-history-2.12.rng - -diff --git a/xml/Makefile.am b/xml/Makefile.am -index b9448d4..8e7b6d3 100644 ---- a/xml/Makefile.am -+++ b/xml/Makefile.am -@@ -64,7 +64,7 @@ API_request_base = command-output \ - CIB_cfg_base = options nodes resources constraints fencing acls tags alerts - - # Names of all schemas (including top level and those included by others) --API_base = $(API_request_base) fence-event failure generic-list item node-attrs nodes resources status -+API_base = $(API_request_base) fence-event failure generic-list item node-attrs node-history nodes resources status - CIB_base = cib $(CIB_cfg_base) status score rule nvset - - # Static schema files and transforms (only CIB has transforms) -diff --git a/xml/api/crm_mon-2.12.rng b/xml/api/crm_mon-2.12.rng -index ffec923..be14412 100644 ---- a/xml/api/crm_mon-2.12.rng -+++ b/xml/api/crm_mon-2.12.rng -@@ -20,7 +20,7 @@ - - - -- -+ - - - -@@ -113,14 +113,6 @@ - - - -- -- -- -- -- -- -- -- - - - -@@ -156,60 +148,6 @@ - - - -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - - - -diff --git a/xml/api/node-history-2.12.rng b/xml/api/node-history-2.12.rng -new file mode 100644 -index 0000000..9628000 ---- /dev/null -+++ b/xml/api/node-history-2.12.rng -@@ -0,0 +1,70 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - - -From bf72b2615630eef7876e443d60b34d5a316de847 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Wed, 16 Jun 2021 14:09:31 -0400 -Subject: [PATCH 3/5] Low: schemas: Copy crm_simulate.rng in preparation for - changes. - ---- - xml/api/crm_simulate-2.12.rng | 335 ++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 335 insertions(+) - create mode 100644 xml/api/crm_simulate-2.12.rng - -diff --git a/xml/api/crm_simulate-2.12.rng b/xml/api/crm_simulate-2.12.rng -new file mode 100644 -index 0000000..9a7612d ---- /dev/null -+++ b/xml/api/crm_simulate-2.12.rng -@@ -0,0 +1,335 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - - -From c46e07788788acf5669e3f89b9344190a91c7331 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 18 Jun 2021 15:10:19 -0400 -Subject: [PATCH 4/5] Feature: tools: Add the node-summary to crm_simulate - output. - -If --show-failcounts is given to crm_simulate, it should also display -the node-summary message. - -See: rhbz#1686426 ---- - tools/crm_simulate.c | 7 +++++-- - xml/api/crm_simulate-2.12.rng | 3 +++ - 2 files changed, 8 insertions(+), 2 deletions(-) - -diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c -index b4aa9d1..2ea292c 100644 ---- a/tools/crm_simulate.c -+++ b/tools/crm_simulate.c -@@ -409,11 +409,14 @@ print_cluster_status(pe_working_set_t * data_set, unsigned int print_opts) - FALSE, FALSE, all, all, FALSE); - - if (options.show_attrs) { -- out->message(out, "node-attribute-list", data_set, -- 0, rc == pcmk_rc_ok, FALSE, FALSE, FALSE, all, all); -+ rc = out->message(out, "node-attribute-list", data_set, -+ 0, rc == pcmk_rc_ok, FALSE, FALSE, FALSE, all, all); - } - - if (options.show_failcounts) { -+ rc = out->message(out, "node-summary", data_set, all, all, -+ 0, print_opts, FALSE, FALSE, FALSE, FALSE, rc == pcmk_rc_ok); -+ - out->message(out, "failed-action-list", data_set, all, all, - rc == pcmk_rc_ok); - } -diff --git a/xml/api/crm_simulate-2.12.rng b/xml/api/crm_simulate-2.12.rng -index 9a7612d..f90bd36 100644 ---- a/xml/api/crm_simulate-2.12.rng -+++ b/xml/api/crm_simulate-2.12.rng -@@ -67,6 +67,9 @@ - - - -+ -+ -+ - - - --- -1.8.3.1 - - -From bac50336e0264604716e5997b87ee7e65311b982 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Fri, 18 Jun 2021 15:21:52 -0400 -Subject: [PATCH 5/5] Low: libcrmcommon: Increase PCMK__API_VERSION for new - crm_resource output. - -See: rhbz#1686426 ---- - include/crm/common/output_internal.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h -index 0436cde..ba9c423 100644 ---- a/include/crm/common/output_internal.h -+++ b/include/crm/common/output_internal.h -@@ -27,7 +27,7 @@ extern "C" { - # include - # include - --# define PCMK__API_VERSION "2.11" -+# define PCMK__API_VERSION "2.12" - - #if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS) - # define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS))) --- -1.8.3.1 - diff --git a/SOURCES/006-stateful-metadata.patch b/SOURCES/006-stateful-metadata.patch new file mode 100644 index 0000000..a9ea6f4 --- /dev/null +++ b/SOURCES/006-stateful-metadata.patch @@ -0,0 +1,143 @@ +From b52fe799c89637e2a761a5725c2376db5c05f2d1 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 30 Nov 2021 15:51:54 -0600 +Subject: [PATCH 1/2] Low: resources: remove DOCTYPE from OCF 1.1-compliant + agents + +OCF 1.1 replaced the DTD schema with RNG, but DOCTYPE still refers to the DTD. +There's no DOCTYPE for RNG, and DOCTYPE is optional, so just remove it. +--- + extra/resources/Dummy | 3 +-- + extra/resources/HealthIOWait | 3 +-- + extra/resources/Stateful | 3 +-- + extra/resources/attribute | 3 +-- + extra/resources/ping | 3 +-- + extra/resources/remote | 3 +-- + 6 files changed, 6 insertions(+), 12 deletions(-) + +diff --git a/extra/resources/Dummy b/extra/resources/Dummy +index a344deac0..56584e564 100755 +--- a/extra/resources/Dummy ++++ b/extra/resources/Dummy +@@ -58,8 +58,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/HealthIOWait b/extra/resources/HealthIOWait +index 43a8b70c4..5f1483ef7 100755 +--- a/extra/resources/HealthIOWait ++++ b/extra/resources/HealthIOWait +@@ -25,8 +25,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/Stateful b/extra/resources/Stateful +index ae3424bbf..0d2062d51 100755 +--- a/extra/resources/Stateful ++++ b/extra/resources/Stateful +@@ -39,8 +39,7 @@ SCORE_PROMOTED=10 + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/attribute b/extra/resources/attribute +index 1800dff8f..a2bd353e0 100755 +--- a/extra/resources/attribute ++++ b/extra/resources/attribute +@@ -57,8 +57,7 @@ END + meta_data() { + cat < +- +- ++ + 1.1 + Manages a node attribute + +diff --git a/extra/resources/ping b/extra/resources/ping +index 6e296979f..7cc6b802d 100755 +--- a/extra/resources/ping ++++ b/extra/resources/ping +@@ -36,8 +36,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + + +diff --git a/extra/resources/remote b/extra/resources/remote +index a53262bb6..f7e40dc81 100755 +--- a/extra/resources/remote ++++ b/extra/resources/remote +@@ -24,8 +24,7 @@ + meta_data() { + cat < +- +- ++ + 1.1 + Pacemaker Remote connection + +-- +2.27.0 + + +From 70f469120f8db6a024c786466ee74a6c7fbd1f43 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 30 Nov 2021 15:53:39 -0600 +Subject: [PATCH 2/2] Fix: resources: use correct syntax in Stateful meta-data + +The OCF standard only allows "0" or "1" for booleans. + +This fixes incorrect ocf:pacemaker:Stateful meta-data syntax introduced by +7024398 as a regression in the 2.1.0 release. +--- + extra/resources/Stateful | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/extra/resources/Stateful b/extra/resources/Stateful +index 0d2062d51..2ebe6725f 100755 +--- a/extra/resources/Stateful ++++ b/extra/resources/Stateful +@@ -57,7 +57,7 @@ Location to store the resource state in + + + +- ++ + + If this is set, the environment will be dumped to this file for every call. + +@@ -65,7 +65,7 @@ If this is set, the environment will be dumped to this file for every call. + + + +- ++ + + The notify action will sleep for this many seconds before returning, + to simulate a long-running notify. +-- +2.27.0 + diff --git a/SOURCES/007-memory-leak.patch b/SOURCES/007-memory-leak.patch new file mode 100644 index 0000000..38ad3a2 --- /dev/null +++ b/SOURCES/007-memory-leak.patch @@ -0,0 +1,39 @@ +From f491d9d5a7ed554fed985de356bb085fdec3421c Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Tue, 7 Dec 2021 09:01:00 -0600 +Subject: [PATCH] Fix: fencer: avoid memory leak when broadcasting history + differences + +Regression introduced in 2.1.0 by dbc27b2 +--- + daemons/fenced/fenced_history.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c +index bc159383c..a9c57dc86 100644 +--- a/daemons/fenced/fenced_history.c ++++ b/daemons/fenced/fenced_history.c +@@ -484,8 +484,6 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + !pcmk__str_eq(remote_peer, stonith_our_uname, pcmk__str_casei)) { + xmlNode *history = get_xpath_object("//" F_STONITH_HISTORY_LIST, + msg, LOG_NEVER); +- GHashTable *received_history = +- history?stonith_xml_history_to_list(history):NULL; + + /* either a broadcast created directly upon stonith-API request + * or a diff as response to such a thing +@@ -497,6 +495,11 @@ stonith_fence_history(xmlNode *msg, xmlNode **output, + if (!history || + !crm_is_true(crm_element_value(history, + F_STONITH_DIFFERENTIAL))) { ++ GHashTable *received_history = NULL; ++ ++ if (history != NULL) { ++ received_history = stonith_xml_history_to_list(history); ++ } + out_history = + stonith_local_history_diff_and_merge(received_history, TRUE, NULL); + if (out_history) { +-- +2.27.0 + diff --git a/SOURCES/007-unfencing-loop.patch b/SOURCES/007-unfencing-loop.patch deleted file mode 100644 index d4950c8..0000000 --- a/SOURCES/007-unfencing-loop.patch +++ /dev/null @@ -1,733 +0,0 @@ -From 6dcd6b51d7d3993bc483588d6ed75077518ed600 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 4 Jun 2021 16:30:55 -0500 -Subject: [PATCH 01/11] Low: controller: check whether unfenced node was remote - node - -... so the controller can indicate the node is remote (if known at that point, -which is not guaranteed) when setting unfencing-related node attributes. ---- - daemons/controld/controld_fencing.c | 21 ++++++++++++++++++--- - 1 file changed, 18 insertions(+), 3 deletions(-) - -diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c -index 23dff28..0fba661 100644 ---- a/daemons/controld/controld_fencing.c -+++ b/daemons/controld/controld_fencing.c -@@ -757,15 +757,30 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data) - if (pcmk__str_eq("on", op, pcmk__str_casei)) { - const char *value = NULL; - char *now = pcmk__ttoa(time(NULL)); -+ gboolean is_remote_node = FALSE; -+ -+ /* This check is not 100% reliable, since this node is not -+ * guaranteed to have the remote node cached. However, it -+ * doesn't have to be reliable, since the attribute manager can -+ * learn a node's "remoteness" by other means sooner or later. -+ * This allows it to learn more quickly if this node does have -+ * the information. -+ */ -+ if (g_hash_table_lookup(crm_remote_peer_cache, uuid) != NULL) { -+ is_remote_node = TRUE; -+ } - -- update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, FALSE); -+ update_attrd(target, CRM_ATTR_UNFENCED, now, NULL, -+ is_remote_node); - free(now); - - value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_ALL); -- update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, FALSE); -+ update_attrd(target, CRM_ATTR_DIGESTS_ALL, value, NULL, -+ is_remote_node); - - value = crm_meta_value(action->params, XML_OP_ATTR_DIGESTS_SECURE); -- update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, FALSE); -+ update_attrd(target, CRM_ATTR_DIGESTS_SECURE, value, NULL, -+ is_remote_node); - - } else if (action->sent_update == FALSE) { - send_stonith_update(action, target, uuid); --- -1.8.3.1 - - -From 3ef6d9403f68ab8559c45cc99f5a8da05ca6420b Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 7 Jun 2021 10:50:36 -0500 -Subject: [PATCH 02/11] Refactor: pacemaker-attrd: functionize adding remote - node to cache - -... for future reuse ---- - daemons/attrd/attrd_commands.c | 34 +++++++++++++++++++++++----------- - 1 file changed, 23 insertions(+), 11 deletions(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index 731c243..93a165b 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -102,6 +102,28 @@ free_attribute(gpointer data) - } - } - -+/*! -+ * \internal -+ * \brief Ensure a Pacemaker Remote node is in the correct peer cache -+ * -+ * \param[in] -+ */ -+static void -+cache_remote_node(const char *node_name) -+{ -+ /* If we previously assumed this node was an unseen cluster node, -+ * remove its entry from the cluster peer cache. -+ */ -+ crm_node_t *dup = pcmk__search_cluster_node_cache(0, node_name); -+ -+ if (dup && (dup->uuid == NULL)) { -+ reap_crm_member(0, node_name); -+ } -+ -+ // Ensure node is in the remote peer cache -+ CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); -+} -+ - static xmlNode * - build_attribute_xml( - xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user, -@@ -709,17 +731,7 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml) - - crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote); - if (is_remote) { -- /* If we previously assumed this node was an unseen cluster node, -- * remove its entry from the cluster peer cache. -- */ -- crm_node_t *dup = pcmk__search_cluster_node_cache(0, host); -- -- if (dup && (dup->uuid == NULL)) { -- reap_crm_member(0, host); -- } -- -- /* Ensure this host is in the remote peer cache */ -- CRM_ASSERT(crm_remote_peer_get(host) != NULL); -+ cache_remote_node(host); - } - - if (v == NULL) { --- -1.8.3.1 - - -From 6fac2c71bc2c56870ac828d7cd7b7c799279c47e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 7 Jun 2021 10:39:34 -0500 -Subject: [PATCH 03/11] Refactor: pacemaker-attrd: don't try to remove votes - for remote nodes - -Remote nodes never vote. - -This has no effect in practice since the removal would simply do nothing, -but we might as well not waste time trying. ---- - daemons/attrd/attrd_commands.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index 93a165b..dbe777e 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -976,7 +976,8 @@ attrd_election_cb(gpointer user_data) - void - attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) - { -- bool remove_voter = FALSE; -+ bool gone = false; -+ bool is_remote = pcmk_is_set(peer->flags, crm_remote_node); - - switch (kind) { - case crm_status_uname: -@@ -984,7 +985,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - - case crm_status_processes: - if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { -- remove_voter = TRUE; -+ gone = true; - } - break; - -@@ -1000,13 +1001,13 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - } else { - // Remove all attribute values associated with lost nodes - attrd_peer_remove(peer->uname, FALSE, "loss"); -- remove_voter = TRUE; -+ gone = true; - } - break; - } - -- // In case an election is in progress, remove any vote by the node -- if (remove_voter) { -+ // Remove votes from cluster nodes that leave, in case election in progress -+ if (gone && !is_remote) { - attrd_remove_voter(peer); - } - } --- -1.8.3.1 - - -From 54089fc663d6aaf10ca164c6c94b3b17237788de Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 7 Jun 2021 10:40:06 -0500 -Subject: [PATCH 04/11] Low: pacemaker-attrd: check for remote nodes in peer - update callback - -If a remote node was started before the local cluster node joined the cluster, -the cluster node will assume its node attributes are for a cluster node until -it learns otherwise. Check for remoteness in the peer update callback, to have -another way we can learn it. ---- - daemons/attrd/attrd_commands.c | 4 ++++ - 1 file changed, 4 insertions(+) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index dbe777e..5f6a754 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -1009,6 +1009,10 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - // Remove votes from cluster nodes that leave, in case election in progress - if (gone && !is_remote) { - attrd_remove_voter(peer); -+ -+ // Ensure remote nodes that come up are in the remote node cache -+ } else if (!gone && is_remote) { -+ cache_remote_node(peer->uname); - } - } - --- -1.8.3.1 - - -From 8c048df0312d0d9c857d87b570a352429a710928 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 7 Jun 2021 11:29:12 -0500 -Subject: [PATCH 05/11] Log: pacemaker-attrd: log peer status changes - ---- - daemons/attrd/attrd_commands.c | 9 +++++++++ - 1 file changed, 9 insertions(+) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index 5f6a754..d6d179b 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -972,6 +972,7 @@ attrd_election_cb(gpointer user_data) - return FALSE; - } - -+#define state_text(state) ((state)? (const char *)(state) : "in unknown state") - - void - attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data) -@@ -981,15 +982,23 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da - - switch (kind) { - case crm_status_uname: -+ crm_debug("%s node %s is now %s", -+ (is_remote? "Remote" : "Cluster"), -+ peer->uname, state_text(peer->state)); - break; - - case crm_status_processes: - if (!pcmk_is_set(peer->processes, crm_get_cluster_proc())) { - gone = true; - } -+ crm_debug("Node %s is %s a peer", -+ peer->uname, (gone? "no longer" : "now")); - break; - - case crm_status_nstate: -+ crm_debug("%s node %s is now %s (was %s)", -+ (is_remote? "Remote" : "Cluster"), -+ peer->uname, state_text(peer->state), state_text(data)); - if (pcmk__str_eq(peer->state, CRM_NODE_MEMBER, pcmk__str_casei)) { - /* If we're the writer, send new peers a list of all attributes - * (unless it's a remote node, which doesn't run its own attrd) --- -1.8.3.1 - - -From 1dcc8dee4990cf0dbdec0e14db6d9a3ad67a41d5 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 7 Jun 2021 11:13:53 -0500 -Subject: [PATCH 06/11] Low: pacemaker-attrd: ensure node ID is only set for - attributes when known - -In most cases, attribute updates contained the node ID, and the node ID was -used by other code, only if known (i.e. positive). However a couple places did -not check this, so add that. - -I am unsure whether the missing check caused problems in practice, but there -appears to be the possibility that a remote node would wrongly be added to the -cluster node cache. ---- - daemons/attrd/attrd_commands.c | 6 ++++-- - 1 file changed, 4 insertions(+), 2 deletions(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index d6d179b..b3f441c 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -136,7 +136,9 @@ build_attribute_xml( - crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid); - crm_xml_add(xml, PCMK__XA_ATTR_USER, user); - crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer); -- crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid); -+ if (peerid > 0) { -+ crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid); -+ } - crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value); - crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000); - crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private); -@@ -937,7 +939,7 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) - /* If this is a cluster node whose node ID we are learning, remember it */ - if ((v->nodeid == 0) && (v->is_remote == FALSE) - && (crm_element_value_int(xml, PCMK__XA_ATTR_NODE_ID, -- (int*)&v->nodeid) == 0)) { -+ (int*)&v->nodeid) == 0) && (v->nodeid > 0)) { - - crm_node_t *known_peer = crm_get_peer(v->nodeid, host); - --- -1.8.3.1 - - -From 8d12490e88b558d01db37a38f7d35175c6d2d69a Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Thu, 10 Jun 2021 17:25:57 -0500 -Subject: [PATCH 07/11] Refactor: pacemaker-attrd: functionize processing a - sync response - -... for code isolation, and because we need to add more to it ---- - daemons/attrd/attrd_commands.c | 59 ++++++++++++++++++++++++++++-------------- - 1 file changed, 39 insertions(+), 20 deletions(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index b3f441c..d02d3e6 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -572,6 +572,43 @@ attrd_peer_clear_failure(crm_node_t *peer, xmlNode *xml) - } - - /*! -+ * \internal -+ * \brief Load attributes from a peer sync response -+ * -+ * \param[in] peer Peer that sent clear request -+ * \param[in] peer_won Whether peer is the attribute writer -+ * \param[in] xml Request XML -+ */ -+static void -+process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml) -+{ -+ crm_info("Processing " PCMK__ATTRD_CMD_SYNC_RESPONSE " from %s", -+ peer->uname); -+ -+ if (peer_won) { -+ /* Initialize the "seen" flag for all attributes to cleared, so we can -+ * detect attributes that local node has but the writer doesn't. -+ */ -+ clear_attribute_value_seen(); -+ } -+ -+ // Process each attribute update in the sync response -+ for (xmlNode *child = pcmk__xml_first_child(xml); child != NULL; -+ child = pcmk__xml_next(child)) { -+ attrd_peer_update(peer, child, -+ crm_element_value(child, PCMK__XA_ATTR_NODE_NAME), -+ TRUE); -+ } -+ -+ if (peer_won) { -+ /* If any attributes are still not marked as seen, the writer doesn't -+ * know about them, so send all peers an update with them. -+ */ -+ attrd_current_only_attribute_update(peer, xml); -+ } -+} -+ -+/*! - \internal - \brief Broadcast private attribute for local node with protocol version - */ -@@ -596,7 +633,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) - const char *op = crm_element_value(xml, PCMK__XA_TASK); - const char *election_op = crm_element_value(xml, F_CRM_TASK); - const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME); -- bool peer_won = FALSE; -+ bool peer_won = false; - - if (election_op) { - attrd_handle_election_op(peer, xml); -@@ -631,25 +668,7 @@ attrd_peer_message(crm_node_t *peer, xmlNode *xml) - - } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_SYNC_RESPONSE, pcmk__str_casei) - && !pcmk__str_eq(peer->uname, attrd_cluster->uname, pcmk__str_casei)) { -- xmlNode *child = NULL; -- -- crm_info("Processing %s from %s", op, peer->uname); -- -- /* Clear the seen flag for attribute processing held only in the own node. */ -- if (peer_won) { -- clear_attribute_value_seen(); -- } -- -- for (child = pcmk__xml_first_child(xml); child != NULL; -- child = pcmk__xml_next(child)) { -- host = crm_element_value(child, PCMK__XA_ATTR_NODE_NAME); -- attrd_peer_update(peer, child, host, TRUE); -- } -- -- if (peer_won) { -- /* Synchronize if there is an attribute held only by own node that Writer does not have. */ -- attrd_current_only_attribute_update(peer, xml); -- } -+ process_peer_sync_response(peer, peer_won, xml); - - } else if (pcmk__str_eq(op, PCMK__ATTRD_CMD_FLUSH, pcmk__str_casei)) { - /* Ignore. The flush command was removed in 2.0.0 but may be --- -1.8.3.1 - - -From a890a0e5bbbcabf907f51ed0460868035f72464d Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 11 Jun 2021 14:40:39 -0500 -Subject: [PATCH 08/11] Refactor: pacemaker-attrd: functionize broadcasting - local override - -... for code isolation ---- - daemons/attrd/attrd_commands.c | 42 +++++++++++++++++++++++++++++------------- - 1 file changed, 29 insertions(+), 13 deletions(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index d02d3e6..4783427 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -804,6 +804,34 @@ attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml) - free_xml(sync); - } - -+/*! -+ * \internal -+ * \brief Override an attribute sync with a local value -+ * -+ * Broadcast the local node's value for an attribute that's different from the -+ * value provided in a peer's attribute synchronization response. This ensures a -+ * node's values for itself take precedence and all peers are kept in sync. -+ * -+ * \param[in] a Attribute entry to override -+ * -+ * \return Local instance of attribute value -+ */ -+static attribute_value_t * -+broadcast_local_value(attribute_t *a) -+{ -+ attribute_value_t *v = g_hash_table_lookup(a->values, attrd_cluster->uname); -+ xmlNode *sync = create_xml_node(NULL, __func__); -+ -+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); -+ build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, -+ a->user, a->is_private, v->nodename, v->nodeid, -+ v->current, FALSE); -+ attrd_xml_add_writer(sync); -+ send_attrd_message(NULL, sync); -+ free_xml(sync); -+ return v; -+} -+ - void - attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) - { -@@ -899,21 +927,9 @@ attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter) - if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei) - && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) { - -- xmlNode *sync = create_xml_node(NULL, __func__); -- - crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s", - attr, host, v->current, value, peer->uname); -- -- crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); -- v = g_hash_table_lookup(a->values, host); -- build_attribute_xml(sync, attr, a->set, a->uuid, a->timeout_ms, a->user, -- a->is_private, v->nodename, v->nodeid, v->current, FALSE); -- -- attrd_xml_add_writer(sync); -- -- /* Broadcast in case any other nodes had the inconsistent value */ -- send_attrd_message(NULL, sync); -- free_xml(sync); -+ v = broadcast_local_value(a); - - } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) { - crm_notice("Setting %s[%s]: %s -> %s " CRM_XS " from %s", --- -1.8.3.1 - - -From f6f65e3dab070f1bbdf6d1383f4d6173a8840bc9 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 11 Jun 2021 14:50:29 -0500 -Subject: [PATCH 09/11] Log: pacemaker-attrd: improve messages when - broadcasting local-only values - -The traces aren't necessary since build_attribute_xml() already logs the same -info at debug. Also, rename function for clarity, and make static. ---- - daemons/attrd/attrd_commands.c | 35 ++++++++++++++++------------------- - 1 file changed, 16 insertions(+), 19 deletions(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index 4783427..356defb 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -51,11 +51,12 @@ GHashTable *attributes = NULL; - - void write_attribute(attribute_t *a, bool ignore_delay); - void write_or_elect_attribute(attribute_t *a); --void attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml); - void attrd_peer_update(crm_node_t *peer, xmlNode *xml, const char *host, bool filter); - void attrd_peer_sync(crm_node_t *peer, xmlNode *xml); - void attrd_peer_remove(const char *host, gboolean uncache, const char *source); - -+static void broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml); -+ - static gboolean - send_attrd_message(crm_node_t * node, xmlNode * data) - { -@@ -604,7 +605,7 @@ process_peer_sync_response(crm_node_t *peer, bool peer_won, xmlNode *xml) - /* If any attributes are still not marked as seen, the writer doesn't - * know about them, so send all peers an update with them. - */ -- attrd_current_only_attribute_update(peer, xml); -+ broadcast_unseen_local_values(peer, xml); - } - } - -@@ -768,40 +769,36 @@ attrd_lookup_or_create_value(GHashTable *values, const char *host, xmlNode *xml) - return(v); - } - --void --attrd_current_only_attribute_update(crm_node_t *peer, xmlNode *xml) -+void -+broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml) - { - GHashTableIter aIter; - GHashTableIter vIter; -- attribute_t *a; -+ attribute_t *a = NULL; - attribute_value_t *v = NULL; -- xmlNode *sync = create_xml_node(NULL, __func__); -- gboolean build = FALSE; -- -- crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); -+ xmlNode *sync = NULL; - - g_hash_table_iter_init(&aIter, attributes); - while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) { - g_hash_table_iter_init(&vIter, a->values); - while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { -- if (pcmk__str_eq(v->nodename, attrd_cluster->uname, pcmk__str_casei) && v->seen == FALSE) { -- crm_trace("Syncing %s[%s] = %s to everyone.(from local only attributes)", a->id, v->nodename, v->current); -- -- build = TRUE; -+ if (!(v->seen) && pcmk__str_eq(v->nodename, attrd_cluster->uname, -+ pcmk__str_casei)) { -+ if (sync == NULL) { -+ sync = create_xml_node(NULL, __func__); -+ crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); -+ } - build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, - v->nodename, v->nodeid, v->current, (a->timeout_ms && a->timer ? TRUE : FALSE)); -- } else { -- crm_trace("Local attribute(%s[%s] = %s) was ignore.(another host) : [%s]", a->id, v->nodename, v->current, attrd_cluster->uname); -- continue; - } - } - } - -- if (build) { -- crm_debug("Syncing values to everyone.(from local only attributes)"); -+ if (sync != NULL) { -+ crm_debug("Broadcasting local-only values"); - send_attrd_message(NULL, sync); -+ free_xml(sync); - } -- free_xml(sync); - } - - /*! --- -1.8.3.1 - - -From ab90ffb785ea018556f216b8f540f8c3429a3947 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 11 Jun 2021 15:04:20 -0500 -Subject: [PATCH 10/11] Refactor: pacemaker-attrd: simplify attribute XML - creation function - -... and rename for clarity ---- - daemons/attrd/attrd_commands.c | 48 ++++++++++++++++++++++++------------------ - 1 file changed, 27 insertions(+), 21 deletions(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index 356defb..5b32a77 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -125,25 +125,35 @@ cache_remote_node(const char *node_name) - CRM_ASSERT(crm_remote_peer_get(node_name) != NULL); - } - -+/*! -+ * \internal -+ * \brief Create an XML representation of an attribute for use in peer messages -+ * -+ * \param[in] parent Create attribute XML as child element of this element -+ * \param[in] a Attribute to represent -+ * \param[in] v Attribute value to represent -+ * \param[in] force_write If true, value should be written even if unchanged -+ * -+ * \return XML representation of attribute -+ */ - static xmlNode * --build_attribute_xml( -- xmlNode *parent, const char *name, const char *set, const char *uuid, unsigned int timeout_ms, const char *user, -- gboolean is_private, const char *peer, uint32_t peerid, const char *value, gboolean is_force_write) -+add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v, -+ bool force_write) - { - xmlNode *xml = create_xml_node(parent, __func__); - -- crm_xml_add(xml, PCMK__XA_ATTR_NAME, name); -- crm_xml_add(xml, PCMK__XA_ATTR_SET, set); -- crm_xml_add(xml, PCMK__XA_ATTR_UUID, uuid); -- crm_xml_add(xml, PCMK__XA_ATTR_USER, user); -- crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, peer); -- if (peerid > 0) { -- crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, peerid); -+ crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id); -+ crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set); -+ crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid); -+ crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user); -+ crm_xml_add(xml, PCMK__XA_ATTR_NODE_NAME, v->nodename); -+ if (v->nodeid > 0) { -+ crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid); - } -- crm_xml_add(xml, PCMK__XA_ATTR_VALUE, value); -- crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, timeout_ms/1000); -- crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, is_private); -- crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, is_force_write); -+ crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); -+ crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000); -+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private); -+ crm_xml_add_int(xml, PCMK__XA_ATTR_FORCE, force_write); - - return xml; - } -@@ -695,8 +705,7 @@ attrd_peer_sync(crm_node_t *peer, xmlNode *xml) - g_hash_table_iter_init(&vIter, a->values); - while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) { - crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone"); -- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, -- v->nodename, v->nodeid, v->current, FALSE); -+ add_attribute_value_xml(sync, a, v, false); - } - } - -@@ -788,8 +797,7 @@ broadcast_unseen_local_values(crm_node_t *peer, xmlNode *xml) - sync = create_xml_node(NULL, __func__); - crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); - } -- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, a->user, a->is_private, -- v->nodename, v->nodeid, v->current, (a->timeout_ms && a->timer ? TRUE : FALSE)); -+ add_attribute_value_xml(sync, a, v, a->timeout_ms && a->timer); - } - } - } -@@ -820,9 +828,7 @@ broadcast_local_value(attribute_t *a) - xmlNode *sync = create_xml_node(NULL, __func__); - - crm_xml_add(sync, PCMK__XA_TASK, PCMK__ATTRD_CMD_SYNC_RESPONSE); -- build_attribute_xml(sync, a->id, a->set, a->uuid, a->timeout_ms, -- a->user, a->is_private, v->nodename, v->nodeid, -- v->current, FALSE); -+ add_attribute_value_xml(sync, a, v, false); - attrd_xml_add_writer(sync); - send_attrd_message(NULL, sync); - free_xml(sync); --- -1.8.3.1 - - -From 540d74130c5c8d9c626d6c50475e4dc4f64234e7 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 4 Jun 2021 16:34:26 -0500 -Subject: [PATCH 11/11] Fix: pacemaker-attrd: avoid repeated unfencing of - remote nodes - -The attribute manager can't record a remote node's attributes to the CIB until -it knows the node is remote. Normally, this is learned when the remote node -starts, because the controller clears the CRM_OP_PROBED attribute and indicates -that it is for a remote node. - -However, if a cluster node is down when a remote node starts, and later comes -up, it learns the remote node's existing attributes as part of the attribute -sync. Previously, this did not include whether each value is for a cluster or -remote node, so the newly joined attribute manager couldn't write out remote -nodes' attributes until it learned that via some other event -- which might not -happen before the node becomes DC, in which case its scheduler will not see any -unfencing-related node attributes and may wrongly schedule unfencing. - -The sync response handling already calls attrd_lookup_or_create_value(), which -checks PCMK__XA_ATTR_IS_REMOTE, so all we need to do is add that to the sync -response. ---- - daemons/attrd/attrd_commands.c | 6 +++++- - 1 file changed, 5 insertions(+), 1 deletion(-) - -diff --git a/daemons/attrd/attrd_commands.c b/daemons/attrd/attrd_commands.c -index 5b32a77..0142383 100644 ---- a/daemons/attrd/attrd_commands.c -+++ b/daemons/attrd/attrd_commands.c -@@ -43,8 +43,9 @@ - * 1 1.1.15 PCMK__ATTRD_CMD_UPDATE_BOTH, - * PCMK__ATTRD_CMD_UPDATE_DELAY - * 2 1.1.17 PCMK__ATTRD_CMD_CLEAR_FAILURE -+ * 3 2.1.1 PCMK__ATTRD_CMD_SYNC_RESPONSE indicates remote nodes - */ --#define ATTRD_PROTOCOL_VERSION "2" -+#define ATTRD_PROTOCOL_VERSION "3" - - int last_cib_op_done = 0; - GHashTable *attributes = NULL; -@@ -150,6 +151,9 @@ add_attribute_value_xml(xmlNode *parent, attribute_t *a, attribute_value_t *v, - if (v->nodeid > 0) { - crm_xml_add_int(xml, PCMK__XA_ATTR_NODE_ID, v->nodeid); - } -+ if (v->is_remote != 0) { -+ crm_xml_add_int(xml, PCMK__XA_ATTR_IS_REMOTE, 1); -+ } - crm_xml_add(xml, PCMK__XA_ATTR_VALUE, v->current); - crm_xml_add_int(xml, PCMK__XA_ATTR_DAMPENING, a->timeout_ms / 1000); - crm_xml_add_int(xml, PCMK__XA_ATTR_IS_PRIVATE, a->is_private); --- -1.8.3.1 - diff --git a/SOURCES/008-dynamic-list-fencing.patch b/SOURCES/008-dynamic-list-fencing.patch deleted file mode 100644 index 4a56117..0000000 --- a/SOURCES/008-dynamic-list-fencing.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 2d15fb37525f88ec8d5acb689b698044c4bb69b1 Mon Sep 17 00:00:00 2001 -From: Hideo Yamauchi -Date: Thu, 17 Jun 2021 22:39:12 +0900 -Subject: [PATCH 1/2] Low: fenced: Low: fenced: Remove unnecessary release. - ---- - daemons/fenced/fenced_commands.c | 3 --- - 1 file changed, 3 deletions(-) - -diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c -index fee55a7..35aec06 100644 ---- a/daemons/fenced/fenced_commands.c -+++ b/daemons/fenced/fenced_commands.c -@@ -1104,9 +1104,6 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - /* Fall back to status */ - g_hash_table_replace(dev->params, - strdup(PCMK_STONITH_HOST_CHECK), strdup("status")); -- -- g_list_free_full(dev->targets, free); -- dev->targets = NULL; - } else if (!rc) { - crm_info("Refreshing port list for %s", dev->id); - g_list_free_full(dev->targets, free); --- -1.8.3.1 - - -From a29f88f6020aac5f1ac32072942eb5713d7be50d Mon Sep 17 00:00:00 2001 -From: Hideo Yamauchi -Date: Thu, 17 Jun 2021 22:40:40 +0900 -Subject: [PATCH 2/2] High: fenced: Wrong device may be selected when - "dynamic-list" is specified. - ---- - daemons/fenced/fenced_commands.c | 67 +++++++++++++++++++++++----------------- - 1 file changed, 38 insertions(+), 29 deletions(-) - -diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c -index 35aec06..da076fb 100644 ---- a/daemons/fenced/fenced_commands.c -+++ b/daemons/fenced/fenced_commands.c -@@ -904,6 +904,31 @@ xml2device_params(const char *name, xmlNode *dev) - return params; - } - -+static const char * -+target_list_type(stonith_device_t * dev) -+{ -+ const char *check_type = NULL; -+ -+ check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); -+ -+ if (check_type == NULL) { -+ -+ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { -+ check_type = "static-list"; -+ } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { -+ check_type = "static-list"; -+ } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { -+ check_type = "dynamic-list"; -+ } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { -+ check_type = "status"; -+ } else { -+ check_type = "none"; -+ } -+ } -+ -+ return check_type; -+} -+ - static stonith_device_t * - build_device_from_xml(xmlNode * msg) - { -@@ -931,6 +956,12 @@ build_device_from_xml(xmlNode * msg) - value = g_hash_table_lookup(device->params, PCMK_STONITH_HOST_MAP); - device->aliases = build_port_aliases(value, &(device->targets)); - -+ value = target_list_type(device); -+ if (!pcmk__str_eq(value, "static-list", pcmk__str_casei) && device->targets) { -+ /* Other than "static-list", dev-> targets is unnecessary. */ -+ g_list_free_full(device->targets, free); -+ device->targets = NULL; -+ } - device->agent_metadata = get_agent_metadata(device->agent); - if (device->agent_metadata) { - read_action_metadata(device); -@@ -971,31 +1002,6 @@ build_device_from_xml(xmlNode * msg) - return device; - } - --static const char * --target_list_type(stonith_device_t * dev) --{ -- const char *check_type = NULL; -- -- check_type = g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK); -- -- if (check_type == NULL) { -- -- if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_LIST)) { -- check_type = "static-list"; -- } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP)) { -- check_type = "static-list"; -- } else if (pcmk_is_set(dev->flags, st_device_supports_list)) { -- check_type = "dynamic-list"; -- } else if (pcmk_is_set(dev->flags, st_device_supports_status)) { -- check_type = "status"; -- } else { -- check_type = "none"; -- } -- } -- -- return check_type; --} -- - static void - schedule_internal_command(const char *origin, - stonith_device_t * device, -@@ -1099,11 +1105,14 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - - /* If we successfully got the targets earlier, don't disable. */ - if (rc != 0 && !dev->targets) { -- crm_notice("Disabling port list queries for %s: %s " -- CRM_XS " rc=%d", dev->id, output, rc); -- /* Fall back to status */ -- g_hash_table_replace(dev->params, -+ if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_CHECK) == NULL) { -+ /* -+ If the operation fails if the user does not explicitly specify "dynamic-list", it will fall back to "status". -+ */ -+ crm_notice("Disabling port list queries for %s (%d): %s", dev->id, rc, output); -+ g_hash_table_replace(dev->params, - strdup(PCMK_STONITH_HOST_CHECK), strdup("status")); -+ } - } else if (!rc) { - crm_info("Refreshing port list for %s", dev->id); - g_list_free_full(dev->targets, free); --- -1.8.3.1 - diff --git a/SOURCES/008-fencing-history.patch b/SOURCES/008-fencing-history.patch new file mode 100644 index 0000000..1ea9ac7 --- /dev/null +++ b/SOURCES/008-fencing-history.patch @@ -0,0 +1,43 @@ +From 0339e89f3238b31df78b864dae8684b82c370741 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 13 Dec 2021 15:22:40 -0600 +Subject: [PATCH] Fix: fencer: get current time correctly + +f52bc8e1ce (2.1.2) introduced a regression by using clock_gettime() with +CLOCK_MONOTONIC to get the current time. Use qb_util_timespec_from_epoch_get() +instead (which as of this writing uses clock_gettime() with CLOCK_REALTIME if +available, and falls back to gettimeofday() if not). +--- + daemons/fenced/fenced_commands.c | 11 +++-------- + 1 file changed, 3 insertions(+), 8 deletions(-) + +diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c +index f34cb4f13..7685cb8c3 100644 +--- a/daemons/fenced/fenced_commands.c ++++ b/daemons/fenced/fenced_commands.c +@@ -2746,19 +2746,14 @@ bool fencing_peer_active(crm_node_t *peer) + return FALSE; + } + +-void set_fencing_completed(remote_fencing_op_t * op) ++void ++set_fencing_completed(remote_fencing_op_t *op) + { +-#ifdef CLOCK_MONOTONIC + struct timespec tv; + +- clock_gettime(CLOCK_MONOTONIC, &tv); +- ++ qb_util_timespec_from_epoch_get(&tv); + op->completed = tv.tv_sec; + op->completed_nsec = tv.tv_nsec; +-#else +- op->completed = time(NULL); +- op->completed_nsec = 0L; +-#endif + } + + /*! +-- +2.27.0 + diff --git a/SOURCES/009-crm_resource-messages.patch b/SOURCES/009-crm_resource-messages.patch deleted file mode 100644 index bdbcf03..0000000 --- a/SOURCES/009-crm_resource-messages.patch +++ /dev/null @@ -1,229 +0,0 @@ -From 5bcab230ad4c647ca78b18bd4a66e30a4bb4417f Mon Sep 17 00:00:00 2001 -From: Oyvind Albrigtsen -Date: Wed, 16 Jun 2021 11:19:03 +0200 -Subject: [PATCH 1/2] Feature: crm_resource: report not supported for --force-* - w/systemd, upstart, nagios and bundled resources - ---- - tools/crm_resource.c | 21 ++++---------- - tools/crm_resource_runtime.c | 67 +++++++++++++++++++++++++++++--------------- - 2 files changed, 51 insertions(+), 37 deletions(-) - -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index 4abdd03..fa7902c 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -660,21 +660,12 @@ attr_set_type_cb(const gchar *option_name, const gchar *optarg, gpointer data, G - - gboolean - class_cb(const gchar *option_name, const gchar *optarg, gpointer data, GError **error) { -- if (!(pcmk_get_ra_caps(optarg) & pcmk_ra_cap_params)) { -- if (!args->quiet) { -- g_set_error(error, G_OPTION_ERROR, CRM_EX_INVALID_PARAM, -- "Standard %s does not support parameters\n", optarg); -- } -- return FALSE; -- -- } else { -- if (options.v_class != NULL) { -- free(options.v_class); -- } -- -- options.v_class = strdup(optarg); -+ if (options.v_class != NULL) { -+ free(options.v_class); - } - -+ options.v_class = strdup(optarg); -+ - options.cmdline_config = TRUE; - options.require_resource = FALSE; - return TRUE; -@@ -1422,7 +1413,7 @@ validate_cmdline_config(void) - } else if (options.rsc_cmd != cmd_execute_agent) { - g_set_error(&error, PCMK__EXITC_ERROR, CRM_EX_USAGE, - "--class, --agent, and --provider can only be used with " -- "--validate"); -+ "--validate and --force-*"); - - // Not all of --class, --agent, and --provider need to be given. Not all - // classes support the concept of a provider. Check that what we were given -@@ -1841,7 +1832,7 @@ main(int argc, char **argv) - if (options.cmdline_config) { - exit_code = cli_resource_execute_from_params(out, NULL, - options.v_class, options.v_provider, options.v_agent, -- "validate-all", options.cmdline_params, -+ options.operation, options.cmdline_params, - options.override_params, options.timeout_ms, - args->verbosity, options.force, options.check_level); - } else { -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index fe42e60..59e6df5 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1674,24 +1674,59 @@ wait_till_stable(pcmk__output_t *out, int timeout_ms, cib_t * cib) - return rc; - } - -+static const char * -+get_action(const char *rsc_action) { -+ const char *action = NULL; -+ -+ if (pcmk__str_eq(rsc_action, "validate", pcmk__str_casei)) { -+ action = "validate-all"; -+ -+ } else if (pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) { -+ action = "monitor"; -+ -+ } else if (pcmk__strcase_any_of(rsc_action, "force-start", "force-stop", -+ "force-demote", "force-promote", NULL)) { -+ action = rsc_action+6; -+ } else { -+ action = rsc_action; -+ } -+ -+ return action; -+} -+ - crm_exit_t - cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - const char *rsc_class, const char *rsc_prov, -- const char *rsc_type, const char *action, -+ const char *rsc_type, const char *rsc_action, - GHashTable *params, GHashTable *override_hash, - int timeout_ms, int resource_verbose, gboolean force, - int check_level) - { -+ const char *action = NULL; - GHashTable *params_copy = NULL; - crm_exit_t exit_code = CRM_EX_OK; - svc_action_t *op = NULL; - - if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - out->err(out, "Sorry, the %s option doesn't support %s resources yet", -- action, rsc_class); -+ rsc_action, rsc_class); -+ crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); -+ } else if (pcmk__strcase_any_of(rsc_class, PCMK_RESOURCE_CLASS_SYSTEMD, -+ PCMK_RESOURCE_CLASS_UPSTART, PCMK_RESOURCE_CLASS_NAGIOS, NULL)) { -+ out->err(out, "Sorry, the %s option doesn't support %s resources", -+ rsc_action, rsc_class); -+ crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); -+ } else if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, -+ pcmk__str_casei) && !pcmk__str_eq( -+ resources_find_service_class(rsc_name), PCMK_RESOURCE_CLASS_LSB, -+ pcmk__str_casei)) { -+ out->err(out, "Sorry, the %s option doesn't support %s resources", -+ rsc_action, resources_find_service_class(rsc_name)); - crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); - } - -+ action = get_action(rsc_action); -+ - /* If no timeout was provided, grab the default. */ - if (timeout_ms == 0) { - timeout_ms = crm_get_msec(CRM_DEFAULT_OP_TIMEOUT_S); -@@ -1766,7 +1801,7 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - exit_code = op->rc; - - out->message(out, "resource-agent-action", resource_verbose, rsc_class, -- rsc_prov, rsc_type, rsc_name, action, override_hash, op->rc, -+ rsc_prov, rsc_type, rsc_name, rsc_action, override_hash, op->rc, - op->status, op->stdout_data, op->stderr_data); - } else { - exit_code = op->rc == 0 ? CRM_EX_ERROR : op->rc; -@@ -1790,27 +1825,15 @@ cli_resource_execute(pe_resource_t *rsc, const char *requested_name, - const char *rtype = NULL; - const char *rprov = NULL; - const char *rclass = NULL; -- const char *action = NULL; - GHashTable *params = NULL; - -- if (pcmk__str_eq(rsc_action, "validate", pcmk__str_casei)) { -- action = "validate-all"; -- -- } else if (pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) { -- action = "monitor"; -- -- } else if (pcmk__str_eq(rsc_action, "force-stop", pcmk__str_casei)) { -- action = rsc_action+6; -- -- } else if (pcmk__strcase_any_of(rsc_action, "force-start", "force-demote", -+ if (pcmk__strcase_any_of(rsc_action, "force-start", "force-demote", - "force-promote", NULL)) { -- action = rsc_action+6; -- - if(pe_rsc_is_clone(rsc)) { - GList *nodes = cli_resource_search(rsc, requested_name, data_set); - if(nodes != NULL && force == FALSE) { - out->err(out, "It is not safe to %s %s here: the cluster claims it is already active", -- action, rsc->id); -+ rsc_action, rsc->id); - out->err(out, "Try setting target-role=Stopped first or specifying " - "the force option"); - return CRM_EX_UNSAFE; -@@ -1818,9 +1841,6 @@ cli_resource_execute(pe_resource_t *rsc, const char *requested_name, - - g_list_free_full(nodes, free); - } -- -- } else { -- action = rsc_action; - } - - if(pe_rsc_is_clone(rsc)) { -@@ -1831,6 +1851,9 @@ cli_resource_execute(pe_resource_t *rsc, const char *requested_name, - if(rsc->variant == pe_group) { - out->err(out, "Sorry, the %s option doesn't support group resources", rsc_action); - return CRM_EX_UNIMPLEMENT_FEATURE; -+ } else if (rsc->variant == pe_container || pe_rsc_is_bundled(rsc)) { -+ out->err(out, "Sorry, the %s option doesn't support bundled resources", rsc_action); -+ return CRM_EX_UNIMPLEMENT_FEATURE; - } - - rclass = crm_element_value(rsc->xml, XML_AGENT_ATTR_CLASS); -@@ -1841,12 +1864,12 @@ cli_resource_execute(pe_resource_t *rsc, const char *requested_name, - data_set); - - if (timeout_ms == 0) { -- timeout_ms = pe_get_configured_timeout(rsc, action, data_set); -+ timeout_ms = pe_get_configured_timeout(rsc, get_action(rsc_action), data_set); - } - - rid = pe_rsc_is_anon_clone(rsc->parent)? requested_name : rsc->id; - -- exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, action, -+ exit_code = cli_resource_execute_from_params(out, rid, rclass, rprov, rtype, rsc_action, - params, override_hash, timeout_ms, - resource_verbose, force, check_level); - return exit_code; --- -1.8.3.1 - - -From 289cd231186755d99c1262eb9f968dc852409588 Mon Sep 17 00:00:00 2001 -From: Oyvind Albrigtsen -Date: Fri, 16 Jul 2021 13:20:55 +0200 -Subject: [PATCH 2/2] Refactor: crm_resource: remove duplicate Overriding - message that's handled elsewhere - ---- - tools/crm_resource_runtime.c | 2 -- - 1 file changed, 2 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index 59e6df5..ce037c5 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1791,8 +1791,6 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - - g_hash_table_iter_init(&iter, override_hash); - while (g_hash_table_iter_next(&iter, (gpointer *) & name, (gpointer *) & value)) { -- out->info(out, "Overriding the cluster configuration for '%s' with '%s' = '%s'", -- rsc_name, name, value); - g_hash_table_replace(op->params, strdup(name), strdup(value)); - } - } --- -1.8.3.1 - diff --git a/SOURCES/010-probe-pending.patch b/SOURCES/010-probe-pending.patch deleted file mode 100644 index 336c33e..0000000 --- a/SOURCES/010-probe-pending.patch +++ /dev/null @@ -1,715 +0,0 @@ -From b0347f7b8e609420a7055d5fe537cc40ac0d1bb2 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Fri, 16 Jul 2021 11:08:05 -0500 -Subject: [PATCH 1/3] Fix: scheduler: don't schedule probes of unmanaged - resources on pending nodes - -Previously, custom_action() would set an action's optional or runnable flag in -the same, exclusive if-else sequence. This means that if an action should be -optional *and* runnable, only one would be set. In particular, this meant that -if a resource is unmanaged *and* its allocated node is pending, any probe would -be set to optional, but not unrunnable, and the controller could wrongly -attempt the probe before the join completed. - -Now, optional is checked separately. ---- - lib/pengine/utils.c | 22 ++++++++++++++-------- - 1 file changed, 14 insertions(+), 8 deletions(-) - -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 5ef742e..965824b 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -541,6 +541,20 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, - FALSE, data_set); - } - -+ // Make the action optional if its resource is unmanaged -+ if (!pcmk_is_set(action->flags, pe_action_pseudo) -+ && (action->node != NULL) -+ && !pcmk_is_set(action->rsc->flags, pe_rsc_managed) -+ && (g_hash_table_lookup(action->meta, -+ XML_LRM_ATTR_INTERVAL_MS) == NULL)) { -+ pe_rsc_debug(rsc, "%s on %s is optional (%s is unmanaged)", -+ action->uuid, action->node->details->uname, -+ action->rsc->id); -+ pe__set_action_flags(action, pe_action_optional); -+ // We shouldn't clear runnable here because ... something -+ } -+ -+ // Make the action runnable or unrunnable as appropriate - if (pcmk_is_set(action->flags, pe_action_pseudo)) { - /* leave untouched */ - -@@ -549,14 +563,6 @@ custom_action(pe_resource_t * rsc, char *key, const char *task, - action->uuid); - pe__clear_action_flags(action, pe_action_runnable); - -- } else if (!pcmk_is_set(rsc->flags, pe_rsc_managed) -- && g_hash_table_lookup(action->meta, -- XML_LRM_ATTR_INTERVAL_MS) == NULL) { -- pe_rsc_debug(rsc, "%s on %s is optional (%s is unmanaged)", -- action->uuid, action->node->details->uname, rsc->id); -- pe__set_action_flags(action, pe_action_optional); -- //pe__clear_action_flags(action, pe_action_runnable); -- - } else if (!pcmk_is_set(action->flags, pe_action_dc) - && !(action->node->details->online) - && (!pe__is_guest_node(action->node) --- -1.8.3.1 - - -From 520303b90eb707f5b7a9afa9b106e4a38b90f0f9 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 14 Jul 2021 17:18:44 -0500 -Subject: [PATCH 2/3] Test: scheduler: update existing tests for probe - scheduling change - -This is an improvement. Looking at bundle-probe-order-2 for example, -the bundle's first instance has this status to start: - - * Replica[0] - * galera (ocf::heartbeat:galera): Stopped (unmanaged) - * galera-bundle-docker-0 (ocf::heartbeat:docker): Started centos2 (unmanaged) - * galera-bundle-0 (ocf::pacemaker:remote): Started centos2 (unmanaged) - -After the changes, we now schedule recurring monitors for -galera-bundle-docker-0 and galera-bundle-0 on centos2, and a probe of galera:0 -on galera-bundle-0, all of which are possible. ---- - cts/scheduler/dot/bundle-probe-order-2.dot | 3 ++ - cts/scheduler/dot/bundle-probe-order-3.dot | 1 + - cts/scheduler/exp/bundle-probe-order-2.exp | 33 ++++++++++++++++++++-- - cts/scheduler/exp/bundle-probe-order-3.exp | 21 ++++++++++---- - cts/scheduler/summary/bundle-probe-order-2.summary | 3 ++ - cts/scheduler/summary/bundle-probe-order-3.summary | 1 + - 6 files changed, 53 insertions(+), 9 deletions(-) - -diff --git a/cts/scheduler/dot/bundle-probe-order-2.dot b/cts/scheduler/dot/bundle-probe-order-2.dot -index 0cce3fd..7706195 100644 ---- a/cts/scheduler/dot/bundle-probe-order-2.dot -+++ b/cts/scheduler/dot/bundle-probe-order-2.dot -@@ -1,6 +1,9 @@ - digraph "g" { -+"galera-bundle-0_monitor_30000 centos2" [ style=bold color="green" fontcolor="black"] -+"galera-bundle-docker-0_monitor_60000 centos2" [ style=bold color="green" fontcolor="black"] - "galera-bundle-docker-1_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] - "galera-bundle-docker-2_monitor_0 centos1" [ style=bold color="green" fontcolor="black"] - "galera-bundle-docker-2_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] - "galera-bundle-docker-2_monitor_0 centos3" [ style=bold color="green" fontcolor="black"] -+"galera:0_monitor_0 galera-bundle-0" [ style=bold color="green" fontcolor="black"] - } -diff --git a/cts/scheduler/dot/bundle-probe-order-3.dot b/cts/scheduler/dot/bundle-probe-order-3.dot -index a4b109f..53a384b 100644 ---- a/cts/scheduler/dot/bundle-probe-order-3.dot -+++ b/cts/scheduler/dot/bundle-probe-order-3.dot -@@ -2,6 +2,7 @@ - "galera-bundle-0_monitor_0 centos1" [ style=bold color="green" fontcolor="black"] - "galera-bundle-0_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] - "galera-bundle-0_monitor_0 centos3" [ style=bold color="green" fontcolor="black"] -+"galera-bundle-docker-0_monitor_60000 centos2" [ style=bold color="green" fontcolor="black"] - "galera-bundle-docker-1_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] - "galera-bundle-docker-2_monitor_0 centos1" [ style=bold color="green" fontcolor="black"] - "galera-bundle-docker-2_monitor_0 centos2" [ style=bold color="green" fontcolor="black"] -diff --git a/cts/scheduler/exp/bundle-probe-order-2.exp b/cts/scheduler/exp/bundle-probe-order-2.exp -index d6174e7..5b28050 100644 ---- a/cts/scheduler/exp/bundle-probe-order-2.exp -+++ b/cts/scheduler/exp/bundle-probe-order-2.exp -@@ -1,6 +1,33 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -@@ -8,7 +35,7 @@ - - - -- -+ - - - -@@ -17,7 +44,7 @@ - - - -- -+ - - - -@@ -26,7 +53,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/exp/bundle-probe-order-3.exp b/cts/scheduler/exp/bundle-probe-order-3.exp -index e1f60e7..69140a4 100644 ---- a/cts/scheduler/exp/bundle-probe-order-3.exp -+++ b/cts/scheduler/exp/bundle-probe-order-3.exp -@@ -1,6 +1,15 @@ - - - -+ -+ -+ -+ -+ -+ -+ -+ -+ - - - -@@ -8,7 +17,7 @@ - - - -- -+ - - - -@@ -17,7 +26,7 @@ - - - -- -+ - - - -@@ -26,7 +35,7 @@ - - - -- -+ - - - -@@ -35,7 +44,7 @@ - - - -- -+ - - - -@@ -44,7 +53,7 @@ - - - -- -+ - - - -@@ -53,7 +62,7 @@ - - - -- -+ - - - -diff --git a/cts/scheduler/summary/bundle-probe-order-2.summary b/cts/scheduler/summary/bundle-probe-order-2.summary -index 681d607..024c472 100644 ---- a/cts/scheduler/summary/bundle-probe-order-2.summary -+++ b/cts/scheduler/summary/bundle-probe-order-2.summary -@@ -13,6 +13,9 @@ Current cluster status: - Transition Summary: - - Executing Cluster Transition: -+ * Resource action: galera:0 monitor on galera-bundle-0 -+ * Resource action: galera-bundle-docker-0 monitor=60000 on centos2 -+ * Resource action: galera-bundle-0 monitor=30000 on centos2 - * Resource action: galera-bundle-docker-1 monitor on centos2 - * Resource action: galera-bundle-docker-2 monitor on centos3 - * Resource action: galera-bundle-docker-2 monitor on centos2 -diff --git a/cts/scheduler/summary/bundle-probe-order-3.summary b/cts/scheduler/summary/bundle-probe-order-3.summary -index f089618..331bd87 100644 ---- a/cts/scheduler/summary/bundle-probe-order-3.summary -+++ b/cts/scheduler/summary/bundle-probe-order-3.summary -@@ -12,6 +12,7 @@ Current cluster status: - Transition Summary: - - Executing Cluster Transition: -+ * Resource action: galera-bundle-docker-0 monitor=60000 on centos2 - * Resource action: galera-bundle-0 monitor on centos3 - * Resource action: galera-bundle-0 monitor on centos2 - * Resource action: galera-bundle-0 monitor on centos1 --- -1.8.3.1 - - -From cb9c294a7ef22916866e0e42e51e88c2b1a61c2e Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 14 Jul 2021 17:23:11 -0500 -Subject: [PATCH 3/3] Test: scheduler: add test for probe of unmanaged resource - on pending node - -No probes should be scheduled in this case ---- - cts/cts-scheduler.in | 1 + - cts/scheduler/dot/probe-pending-node.dot | 2 + - cts/scheduler/exp/probe-pending-node.exp | 1 + - cts/scheduler/scores/probe-pending-node.scores | 61 ++++++ - cts/scheduler/summary/probe-pending-node.summary | 55 +++++ - cts/scheduler/xml/probe-pending-node.xml | 247 +++++++++++++++++++++++ - 6 files changed, 367 insertions(+) - create mode 100644 cts/scheduler/dot/probe-pending-node.dot - create mode 100644 cts/scheduler/exp/probe-pending-node.exp - create mode 100644 cts/scheduler/scores/probe-pending-node.scores - create mode 100644 cts/scheduler/summary/probe-pending-node.summary - create mode 100644 cts/scheduler/xml/probe-pending-node.xml - -diff --git a/cts/cts-scheduler.in b/cts/cts-scheduler.in -index fc9790b..7ba2415 100644 ---- a/cts/cts-scheduler.in -+++ b/cts/cts-scheduler.in -@@ -110,6 +110,7 @@ TESTS = [ - [ "probe-2", "Correctly re-probe cloned groups" ], - [ "probe-3", "Probe (pending node)" ], - [ "probe-4", "Probe (pending node + stopped resource)" ], -+ [ "probe-pending-node", "Probe (pending node + unmanaged resource)" ], - [ "standby", "Standby" ], - [ "comments", "Comments" ], - ], -diff --git a/cts/scheduler/dot/probe-pending-node.dot b/cts/scheduler/dot/probe-pending-node.dot -new file mode 100644 -index 0000000..d8f1c9f ---- /dev/null -+++ b/cts/scheduler/dot/probe-pending-node.dot -@@ -0,0 +1,2 @@ -+ digraph "g" { -+} -diff --git a/cts/scheduler/exp/probe-pending-node.exp b/cts/scheduler/exp/probe-pending-node.exp -new file mode 100644 -index 0000000..56e315f ---- /dev/null -+++ b/cts/scheduler/exp/probe-pending-node.exp -@@ -0,0 +1 @@ -+ -diff --git a/cts/scheduler/scores/probe-pending-node.scores b/cts/scheduler/scores/probe-pending-node.scores -new file mode 100644 -index 0000000..020a1a0 ---- /dev/null -+++ b/cts/scheduler/scores/probe-pending-node.scores -@@ -0,0 +1,61 @@ -+ -+pcmk__clone_allocate: fs_UC5_SAPMNT-clone allocation score on gcdoubwap01: 0 -+pcmk__clone_allocate: fs_UC5_SAPMNT-clone allocation score on gcdoubwap02: 0 -+pcmk__clone_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap01: 0 -+pcmk__clone_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap02: 0 -+pcmk__clone_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap01: 0 -+pcmk__clone_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap02: 0 -+pcmk__clone_allocate: fs_UC5_SYS-clone allocation score on gcdoubwap01: 0 -+pcmk__clone_allocate: fs_UC5_SYS-clone allocation score on gcdoubwap02: 0 -+pcmk__clone_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap01: 0 -+pcmk__clone_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap02: 0 -+pcmk__clone_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap01: 0 -+pcmk__clone_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: fs_UC5_ascs allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: fs_UC5_ascs allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: fs_UC5_ers allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: fs_UC5_ers allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: grp_UC5_ascs allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: grp_UC5_ascs allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: grp_UC5_ers allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: grp_UC5_ers allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap01: INFINITY -+pcmk__group_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: rsc_vip_init_ers allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: rsc_vip_init_ers allocation score on gcdoubwap02: 0 -+pcmk__group_allocate: rsc_vip_int_ascs allocation score on gcdoubwap01: 0 -+pcmk__group_allocate: rsc_vip_int_ascs allocation score on gcdoubwap02: 0 -+pcmk__native_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap01: 0 -+pcmk__native_allocate: fs_UC5_SAPMNT:0 allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap01: 0 -+pcmk__native_allocate: fs_UC5_SAPMNT:1 allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap01: 0 -+pcmk__native_allocate: fs_UC5_SYS:0 allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap01: 0 -+pcmk__native_allocate: fs_UC5_SYS:1 allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: fs_UC5_ascs allocation score on gcdoubwap01: 0 -+pcmk__native_allocate: fs_UC5_ascs allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: fs_UC5_ers allocation score on gcdoubwap01: -INFINITY -+pcmk__native_allocate: fs_UC5_ers allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap01: -INFINITY -+pcmk__native_allocate: rsc_sap_UC5_ASCS11 allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap01: -INFINITY -+pcmk__native_allocate: rsc_sap_UC5_ERS12 allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap01: -INFINITY -+pcmk__native_allocate: rsc_vip_gcp_ascs allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap01: -INFINITY -+pcmk__native_allocate: rsc_vip_gcp_ers allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: rsc_vip_init_ers allocation score on gcdoubwap01: 0 -+pcmk__native_allocate: rsc_vip_init_ers allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: rsc_vip_int_ascs allocation score on gcdoubwap01: INFINITY -+pcmk__native_allocate: rsc_vip_int_ascs allocation score on gcdoubwap02: -INFINITY -+pcmk__native_allocate: stonith_gcdoubwap01 allocation score on gcdoubwap01: -INFINITY -+pcmk__native_allocate: stonith_gcdoubwap01 allocation score on gcdoubwap02: 0 -+pcmk__native_allocate: stonith_gcdoubwap02 allocation score on gcdoubwap01: 0 -+pcmk__native_allocate: stonith_gcdoubwap02 allocation score on gcdoubwap02: -INFINITY -diff --git a/cts/scheduler/summary/probe-pending-node.summary b/cts/scheduler/summary/probe-pending-node.summary -new file mode 100644 -index 0000000..208186b ---- /dev/null -+++ b/cts/scheduler/summary/probe-pending-node.summary -@@ -0,0 +1,55 @@ -+Using the original execution date of: 2021-06-11 13:55:24Z -+ -+ *** Resource management is DISABLED *** -+ The cluster will not attempt to start, stop or recover services -+ -+Current cluster status: -+ * Node List: -+ * Node gcdoubwap02: pending -+ * Online: [ gcdoubwap01 ] -+ -+ * Full List of Resources: -+ * stonith_gcdoubwap01 (stonith:fence_gce): Stopped (unmanaged) -+ * stonith_gcdoubwap02 (stonith:fence_gce): Stopped (unmanaged) -+ * Clone Set: fs_UC5_SAPMNT-clone [fs_UC5_SAPMNT] (unmanaged): -+ * Stopped: [ gcdoubwap01 gcdoubwap02 ] -+ * Clone Set: fs_UC5_SYS-clone [fs_UC5_SYS] (unmanaged): -+ * Stopped: [ gcdoubwap01 gcdoubwap02 ] -+ * Resource Group: grp_UC5_ascs (unmanaged): -+ * rsc_vip_int_ascs (ocf:heartbeat:IPaddr2): Stopped (unmanaged) -+ * rsc_vip_gcp_ascs (ocf:heartbeat:gcp-vpc-move-vip): Started gcdoubwap01 (unmanaged) -+ * fs_UC5_ascs (ocf:heartbeat:Filesystem): Stopped (unmanaged) -+ * rsc_sap_UC5_ASCS11 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) -+ * Resource Group: grp_UC5_ers (unmanaged): -+ * rsc_vip_init_ers (ocf:heartbeat:IPaddr2): Stopped (unmanaged) -+ * rsc_vip_gcp_ers (ocf:heartbeat:gcp-vpc-move-vip): Stopped (unmanaged) -+ * fs_UC5_ers (ocf:heartbeat:Filesystem): Stopped (unmanaged) -+ * rsc_sap_UC5_ERS12 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) -+ -+Transition Summary: -+ -+Executing Cluster Transition: -+Using the original execution date of: 2021-06-11 13:55:24Z -+ -+Revised Cluster Status: -+ * Node List: -+ * Node gcdoubwap02: pending -+ * Online: [ gcdoubwap01 ] -+ -+ * Full List of Resources: -+ * stonith_gcdoubwap01 (stonith:fence_gce): Stopped (unmanaged) -+ * stonith_gcdoubwap02 (stonith:fence_gce): Stopped (unmanaged) -+ * Clone Set: fs_UC5_SAPMNT-clone [fs_UC5_SAPMNT] (unmanaged): -+ * Stopped: [ gcdoubwap01 gcdoubwap02 ] -+ * Clone Set: fs_UC5_SYS-clone [fs_UC5_SYS] (unmanaged): -+ * Stopped: [ gcdoubwap01 gcdoubwap02 ] -+ * Resource Group: grp_UC5_ascs (unmanaged): -+ * rsc_vip_int_ascs (ocf:heartbeat:IPaddr2): Stopped (unmanaged) -+ * rsc_vip_gcp_ascs (ocf:heartbeat:gcp-vpc-move-vip): Started gcdoubwap01 (unmanaged) -+ * fs_UC5_ascs (ocf:heartbeat:Filesystem): Stopped (unmanaged) -+ * rsc_sap_UC5_ASCS11 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) -+ * Resource Group: grp_UC5_ers (unmanaged): -+ * rsc_vip_init_ers (ocf:heartbeat:IPaddr2): Stopped (unmanaged) -+ * rsc_vip_gcp_ers (ocf:heartbeat:gcp-vpc-move-vip): Stopped (unmanaged) -+ * fs_UC5_ers (ocf:heartbeat:Filesystem): Stopped (unmanaged) -+ * rsc_sap_UC5_ERS12 (ocf:heartbeat:SAPInstance): Stopped (unmanaged) -diff --git a/cts/scheduler/xml/probe-pending-node.xml b/cts/scheduler/xml/probe-pending-node.xml -new file mode 100644 -index 0000000..9f55c92 ---- /dev/null -+++ b/cts/scheduler/xml/probe-pending-node.xml -@@ -0,0 +1,247 @@ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ -+ --- -1.8.3.1 - diff --git a/SOURCES/011-crm_attribute-regression.patch b/SOURCES/011-crm_attribute-regression.patch deleted file mode 100644 index 7263313..0000000 --- a/SOURCES/011-crm_attribute-regression.patch +++ /dev/null @@ -1,150 +0,0 @@ -From ea5510dd979bb6d375324cda26925d9e7c4362f5 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 19 Jul 2021 10:04:16 -0400 -Subject: [PATCH 1/2] Low: tools: The --get-value option does not require an - arg. - -Regression in 2.1.0 introduced by 15f5c2901. ---- - tools/crm_attribute.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c -index 2cc8d26..8a5b4e4 100644 ---- a/tools/crm_attribute.c -+++ b/tools/crm_attribute.c -@@ -242,7 +242,7 @@ static GOptionEntry deprecated_entries[] = { - NULL, NULL - }, - -- { "get-value", 0, G_OPTION_FLAG_HIDDEN, G_OPTION_ARG_CALLBACK, value_cb, -+ { "get-value", 0, G_OPTION_FLAG_HIDDEN|G_OPTION_FLAG_NO_ARG, G_OPTION_ARG_CALLBACK, value_cb, - NULL, NULL - }, - --- -1.8.3.1 - - -From ef054d943afe8e60017f6adc4e25f88a59ac91a4 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Mon, 19 Jul 2021 11:37:04 -0400 -Subject: [PATCH 2/2] Low: libcrmcommon: Allow negative numbers as cmdline - options. - -The bug here is that negative numbers (for instance, negative scores) -are not supported as command line arguments. Because we break up a -string that starts with a single dash into multiple arguments, "-1000" -becomes "-1", "-0", "-0", and "-0". - -Because we don't have enough information about what is happening on the -command line, the best we can do here is recognize something as a -negative number and pass it on. Any errors will have to be detected at -a later step. - -Also note that we only recognize negative numbers if they start with -1-9. Starting with 0 will be recognized as some sort of string. - -Regression in 2.1.0 caused by a long-standing bug in -pcmk__cmdline_preproc_test. ---- - lib/common/cmdline.c | 29 ++++++++++++++++++++++ - .../tests/cmdline/pcmk__cmdline_preproc_test.c | 24 +++++++++++++++++- - 2 files changed, 52 insertions(+), 1 deletion(-) - -diff --git a/lib/common/cmdline.c b/lib/common/cmdline.c -index 7c95d02..9c1b810 100644 ---- a/lib/common/cmdline.c -+++ b/lib/common/cmdline.c -@@ -9,6 +9,7 @@ - - #include - -+#include - #include - - #include -@@ -189,6 +190,34 @@ pcmk__cmdline_preproc(char **argv, const char *special) { - /* Skip over leading dash */ - char *ch = argv[i]+1; - -+ /* This looks like the start of a number, which means it is a negative -+ * number. It's probably the argument to the preceeding option, but -+ * we can't know that here. Copy it over and let whatever handles -+ * arguments next figure it out. -+ */ -+ if (*ch != '\0' && *ch >= '1' && *ch <= '9') { -+ bool is_numeric = true; -+ -+ while (*ch != '\0') { -+ if (!isdigit(*ch)) { -+ is_numeric = false; -+ break; -+ } -+ -+ ch++; -+ } -+ -+ if (is_numeric) { -+ g_ptr_array_add(arr, g_strdup_printf("%s", argv[i])); -+ continue; -+ } else { -+ /* This argument wasn't entirely numeric. Reset ch to the -+ * beginning so we can process it one character at a time. -+ */ -+ ch = argv[i]+1; -+ } -+ } -+ - while (*ch != '\0') { - /* This is a special short argument that takes an option. getopt - * allows values to be interspersed with a list of arguments, but -diff --git a/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c b/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c -index b8506c6..9a752ef 100644 ---- a/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c -+++ b/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c -@@ -1,5 +1,5 @@ - /* -- * Copyright 2020 the Pacemaker project contributors -+ * Copyright 2020-2021 the Pacemaker project contributors - * - * The version control history for this file may have further details. - * -@@ -86,6 +86,26 @@ long_arg(void) { - g_strfreev(processed); - } - -+static void -+negative_score(void) { -+ const char *argv[] = { "-v", "-1000", NULL }; -+ const gchar *expected[] = { "-v", "-1000", NULL }; -+ -+ gchar **processed = pcmk__cmdline_preproc((char **) argv, "v"); -+ LISTS_EQ(processed, expected); -+ g_strfreev(processed); -+} -+ -+static void -+negative_score_2(void) { -+ const char *argv[] = { "-1i3", NULL }; -+ const gchar *expected[] = { "-1", "-i", "-3", NULL }; -+ -+ gchar **processed = pcmk__cmdline_preproc((char **) argv, NULL); -+ LISTS_EQ(processed, expected); -+ g_strfreev(processed); -+} -+ - int - main(int argc, char **argv) - { -@@ -98,5 +118,7 @@ main(int argc, char **argv) - g_test_add_func("/common/cmdline/preproc/special_args", special_args); - g_test_add_func("/common/cmdline/preproc/special_arg_at_end", special_arg_at_end); - g_test_add_func("/common/cmdline/preproc/long_arg", long_arg); -+ g_test_add_func("/common/cmdline/preproc/negative_score", negative_score); -+ g_test_add_func("/common/cmdline/preproc/negative_score_2", negative_score_2); - return g_test_run(); - } --- -1.8.3.1 - diff --git a/SOURCES/012-string-arguments.patch b/SOURCES/012-string-arguments.patch deleted file mode 100644 index 6419117..0000000 --- a/SOURCES/012-string-arguments.patch +++ /dev/null @@ -1,221 +0,0 @@ -From 2eee93e8f9ea2daa81769bc69843d63ced1a7112 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 20 Jul 2021 16:39:07 -0400 -Subject: [PATCH 1/2] Low: tools: Audit command line options. - -This just goes through and makes sure the command line options that take -arguments are in the special parameter to pcmk__cmdline_preproc, and -that options that do not take arguments are not. ---- - tools/crm_attribute.c | 2 +- - tools/crm_error.c | 2 +- - tools/crm_resource.c | 2 +- - tools/crm_rule.c | 2 +- - tools/crm_simulate.c | 2 +- - tools/crmadmin.c | 2 +- - tools/stonith_admin.c | 2 +- - 7 files changed, 7 insertions(+), 7 deletions(-) - -diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c -index 8a5b4e4..6bd4e2a 100644 ---- a/tools/crm_attribute.c -+++ b/tools/crm_attribute.c -@@ -312,7 +312,7 @@ main(int argc, char **argv) - - GOptionGroup *output_group = NULL; - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); -- gchar **processed_args = pcmk__cmdline_preproc(argv, "DGNPdilnpstv"); -+ gchar **processed_args = pcmk__cmdline_preproc(argv, "NPUdilnpstv"); - GOptionContext *context = build_arg_context(args, &output_group); - - if (!g_option_context_parse_strv(context, &processed_args, &error)) { -diff --git a/tools/crm_error.c b/tools/crm_error.c -index b4328ce..923f393 100644 ---- a/tools/crm_error.c -+++ b/tools/crm_error.c -@@ -79,7 +79,7 @@ main(int argc, char **argv) - - GOptionGroup *output_group = NULL; - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); -- gchar **processed_args = pcmk__cmdline_preproc(argv, "lrnX"); -+ gchar **processed_args = pcmk__cmdline_preproc(argv, NULL); - GOptionContext *context = build_arg_context(args, &output_group); - - if (!g_option_context_parse_strv(context, &processed_args, &error)) { -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index fa7902c..d8e140f 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1530,7 +1530,7 @@ main(int argc, char **argv) - */ - - args = pcmk__new_common_args(SUMMARY); -- processed_args = pcmk__cmdline_preproc(argv, "GINSTdginpstuv"); -+ processed_args = pcmk__cmdline_preproc(argv, "GHINSTdginpstuvx"); - context = build_arg_context(args, &output_group); - - pcmk__register_formats(output_group, formats); -diff --git a/tools/crm_rule.c b/tools/crm_rule.c -index 8b19bcd..30c5155 100644 ---- a/tools/crm_rule.c -+++ b/tools/crm_rule.c -@@ -239,7 +239,7 @@ main(int argc, char **argv) - - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); - GOptionContext *context = build_arg_context(args); -- gchar **processed_args = pcmk__cmdline_preproc(argv, "nopNO"); -+ gchar **processed_args = pcmk__cmdline_preproc(argv, "drX"); - - if (!g_option_context_parse_strv(context, &processed_args, &error)) { - exit_code = CRM_EX_USAGE; -diff --git a/tools/crm_simulate.c b/tools/crm_simulate.c -index 0406bff..c83b1b1 100644 ---- a/tools/crm_simulate.c -+++ b/tools/crm_simulate.c -@@ -865,7 +865,7 @@ main(int argc, char **argv) - - GOptionGroup *output_group = NULL; - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); -- gchar **processed_args = pcmk__cmdline_preproc(argv, "bdefgiqrtuwxDFGINO"); -+ gchar **processed_args = pcmk__cmdline_preproc(argv, "bdefgiqrtuwxDFGINOP"); - GOptionContext *context = build_arg_context(args, &output_group); - - /* This must come before g_option_context_parse_strv. */ -diff --git a/tools/crmadmin.c b/tools/crmadmin.c -index 5cbde1b..b98f282 100644 ---- a/tools/crmadmin.c -+++ b/tools/crmadmin.c -@@ -188,7 +188,7 @@ main(int argc, char **argv) - - GOptionGroup *output_group = NULL; - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); -- gchar **processed_args = pcmk__cmdline_preproc(argv, "itBDEHKNPS"); -+ gchar **processed_args = pcmk__cmdline_preproc(argv, "itKNS"); - GOptionContext *context = build_arg_context(args, &output_group); - - pcmk__register_formats(output_group, formats); -diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c -index 6773cea..2d48326 100644 ---- a/tools/stonith_admin.c -+++ b/tools/stonith_admin.c -@@ -349,7 +349,7 @@ main(int argc, char **argv) - - GOptionGroup *output_group = NULL; - pcmk__common_args_t *args = pcmk__new_common_args(SUMMARY); -- gchar **processed_args = pcmk__cmdline_preproc(argv, "adehilorstvBCDFHQRTU"); -+ gchar **processed_args = pcmk__cmdline_preproc(argv, "adehilorstvyBCDFHQRTU"); - GOptionContext *context = build_arg_context(args, &output_group); - - pcmk__register_formats(output_group, formats); --- -1.8.3.1 - - -From 8301678ad1162450814d2fea5288aefe47a67a74 Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Tue, 20 Jul 2021 16:40:58 -0400 -Subject: [PATCH 2/2] Low: libcrmcommon: Also allow string arguments that start - with a dash. - -There's various places where an option to a command line argument could -itself be a valid command line argument. For instance: - - crm_attribute -n crm_mon_options -v "-1i3" - -The previous patching to pcmk__cmdline_preproc did not take this into -account. With this patch, options that are last in a string (or by -themselves) and take an argument will have the next command line option -grabbed and copied straight through without processing. - -Regression in 2.1.0 caused by a long-standing bug in pcmk__cmdline_preproc. ---- - lib/common/cmdline.c | 8 ++++++ - .../tests/cmdline/pcmk__cmdline_preproc_test.c | 33 ++++++++++++++++++++++ - 2 files changed, 41 insertions(+) - -diff --git a/lib/common/cmdline.c b/lib/common/cmdline.c -index 9c1b810..1ca6147 100644 ---- a/lib/common/cmdline.c -+++ b/lib/common/cmdline.c -@@ -146,6 +146,7 @@ gchar ** - pcmk__cmdline_preproc(char **argv, const char *special) { - GPtrArray *arr = NULL; - bool saw_dash_dash = false; -+ bool copy_option = false; - - if (argv == NULL) { - return NULL; -@@ -175,6 +176,12 @@ pcmk__cmdline_preproc(char **argv, const char *special) { - continue; - } - -+ if (copy_option == true) { -+ g_ptr_array_add(arr, g_strdup(argv[i])); -+ copy_option = false; -+ continue; -+ } -+ - /* This is just a dash by itself. That could indicate stdin/stdout, or - * it could be user error. Copy it over and let glib figure it out. - */ -@@ -239,6 +246,7 @@ pcmk__cmdline_preproc(char **argv, const char *special) { - */ - } else { - g_ptr_array_add(arr, g_strdup_printf("-%c", *ch)); -+ copy_option = true; - ch++; - } - -diff --git a/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c b/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c -index 9a752ef..edc5640 100644 ---- a/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c -+++ b/lib/common/tests/cmdline/pcmk__cmdline_preproc_test.c -@@ -106,6 +106,36 @@ negative_score_2(void) { - g_strfreev(processed); - } - -+static void -+string_arg_with_dash(void) { -+ const char *argv[] = { "-n", "crm_mon_options", "-v", "--opt1 --opt2", NULL }; -+ const gchar *expected[] = { "-n", "crm_mon_options", "-v", "--opt1 --opt2", NULL }; -+ -+ gchar **processed = pcmk__cmdline_preproc((char **) argv, "v"); -+ LISTS_EQ(processed, expected); -+ g_strfreev(processed); -+} -+ -+static void -+string_arg_with_dash_2(void) { -+ const char *argv[] = { "-n", "crm_mon_options", "-v", "-1i3", NULL }; -+ const gchar *expected[] = { "-n", "crm_mon_options", "-v", "-1i3", NULL }; -+ -+ gchar **processed = pcmk__cmdline_preproc((char **) argv, "v"); -+ LISTS_EQ(processed, expected); -+ g_strfreev(processed); -+} -+ -+static void -+string_arg_with_dash_3(void) { -+ const char *argv[] = { "-abc", "-1i3", NULL }; -+ const gchar *expected[] = { "-a", "-b", "-c", "-1i3", NULL }; -+ -+ gchar **processed = pcmk__cmdline_preproc((char **) argv, "c"); -+ LISTS_EQ(processed, expected); -+ g_strfreev(processed); -+} -+ - int - main(int argc, char **argv) - { -@@ -120,5 +150,8 @@ main(int argc, char **argv) - g_test_add_func("/common/cmdline/preproc/long_arg", long_arg); - g_test_add_func("/common/cmdline/preproc/negative_score", negative_score); - g_test_add_func("/common/cmdline/preproc/negative_score_2", negative_score_2); -+ g_test_add_func("/common/cmdline/preproc/string_arg_with_dash", string_arg_with_dash); -+ g_test_add_func("/common/cmdline/preproc/string_arg_with_dash_2", string_arg_with_dash_2); -+ g_test_add_func("/common/cmdline/preproc/string_arg_with_dash_3", string_arg_with_dash_3); - return g_test_run(); - } --- -1.8.3.1 - diff --git a/SOURCES/013-leaks.patch b/SOURCES/013-leaks.patch deleted file mode 100644 index daa42b8..0000000 --- a/SOURCES/013-leaks.patch +++ /dev/null @@ -1,241 +0,0 @@ -From bee54eba4d9c28d3a7907a3e13a5deeee6bc0916 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 27 Jul 2021 11:01:04 -0500 -Subject: [PATCH 1/2] Low: tools: avoid (insignificant) memory leaks - -detected by valgrind ---- - lib/pacemaker/pcmk_cluster_queries.c | 2 ++ - tools/crm_diff.c | 2 +- - tools/crm_resource.c | 33 ++++++++++++++++++++------------- - tools/crm_resource_ban.c | 2 +- - 4 files changed, 24 insertions(+), 15 deletions(-) - -diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c -index c68cf9d..46e5538 100644 ---- a/lib/pacemaker/pcmk_cluster_queries.c -+++ b/lib/pacemaker/pcmk_cluster_queries.c -@@ -440,6 +440,7 @@ pcmk__list_nodes(pcmk__output_t *out, char *node_types, gboolean BASH_EXPORT) - } - rc = the_cib->cmds->signon(the_cib, crm_system_name, cib_command); - if (rc != pcmk_ok) { -+ cib_delete(the_cib); - return pcmk_legacy2rc(rc); - } - -@@ -488,6 +489,7 @@ pcmk__list_nodes(pcmk__output_t *out, char *node_types, gboolean BASH_EXPORT) - free_xml(xml_node); - } - the_cib->cmds->signoff(the_cib); -+ cib_delete(the_cib); - return pcmk_legacy2rc(rc); - } - -diff --git a/tools/crm_diff.c b/tools/crm_diff.c -index b37f0ea..9890c10 100644 ---- a/tools/crm_diff.c -+++ b/tools/crm_diff.c -@@ -383,5 +383,5 @@ done: - free_xml(object_2); - - pcmk__output_and_clear_error(error, NULL); -- return exit_code; -+ crm_exit(exit_code); - } -diff --git a/tools/crm_resource.c b/tools/crm_resource.c -index d8e140f..8ca90cb 100644 ---- a/tools/crm_resource.c -+++ b/tools/crm_resource.c -@@ -1081,6 +1081,8 @@ clear_constraints(pcmk__output_t *out, xmlNodePtr *cib_xml_copy) - g_set_error(&error, PCMK__RC_ERROR, rc, - "Could not get modified CIB: %s\n", pcmk_strerror(rc)); - g_list_free(before); -+ free_xml(*cib_xml_copy); -+ *cib_xml_copy = NULL; - return rc; - } - -@@ -1232,29 +1234,34 @@ populate_working_set(xmlNodePtr *cib_xml_copy) - - if (options.xml_file != NULL) { - *cib_xml_copy = filename2xml(options.xml_file); -+ if (*cib_xml_copy == NULL) { -+ rc = pcmk_rc_cib_corrupt; -+ } - } else { - rc = cib_conn->cmds->query(cib_conn, NULL, cib_xml_copy, cib_scope_local | cib_sync_call); - rc = pcmk_legacy2rc(rc); - } - -- if(rc != pcmk_rc_ok) { -- return rc; -+ if (rc == pcmk_rc_ok) { -+ data_set = pe_new_working_set(); -+ if (data_set == NULL) { -+ rc = ENOMEM; -+ } else { -+ pe__set_working_set_flags(data_set, -+ pe_flag_no_counts|pe_flag_no_compat); -+ data_set->priv = out; -+ rc = update_working_set_xml(data_set, cib_xml_copy); -+ } - } - -- /* Populate the working set instance */ -- data_set = pe_new_working_set(); -- if (data_set == NULL) { -- rc = ENOMEM; -+ if (rc != pcmk_rc_ok) { -+ free_xml(*cib_xml_copy); -+ *cib_xml_copy = NULL; - return rc; - } - -- pe__set_working_set_flags(data_set, pe_flag_no_counts|pe_flag_no_compat); -- data_set->priv = out; -- rc = update_working_set_xml(data_set, cib_xml_copy); -- if (rc == pcmk_rc_ok) { -- cluster_status(data_set); -- } -- return rc; -+ cluster_status(data_set); -+ return pcmk_rc_ok; - } - - static int -diff --git a/tools/crm_resource_ban.c b/tools/crm_resource_ban.c -index a297d49..2c4f48d 100644 ---- a/tools/crm_resource_ban.c -+++ b/tools/crm_resource_ban.c -@@ -292,7 +292,7 @@ resource_clear_node_in_location(const char *rsc_id, const char *host, cib_t * ci - rc = pcmk_legacy2rc(rc); - } - -- free(fragment); -+ free_xml(fragment); - return rc; - } - --- -1.8.3.1 - - -From a30ff4a87f291a0c9e03c4efb9c9046d2ac594f1 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Tue, 27 Jul 2021 11:26:59 -0500 -Subject: [PATCH 2/2] Fix: tools: avoid memory leaks in crm_mon - -could be significant in an interactive session - -regressions introduced in 2.0.4 and 2.0.5 ---- - lib/pengine/bundle.c | 3 ++- - lib/pengine/clone.c | 5 ++--- - lib/pengine/pe_output.c | 3 +++ - 3 files changed, 7 insertions(+), 4 deletions(-) - -diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c -index 6ba786a..7e1d428 100644 ---- a/lib/pengine/bundle.c -+++ b/lib/pengine/bundle.c -@@ -1497,7 +1497,7 @@ pe__bundle_xml(pcmk__output_t *out, va_list args) - for (GList *gIter = bundle_data->replicas; gIter != NULL; - gIter = gIter->next) { - pe__bundle_replica_t *replica = gIter->data; -- char *id = pcmk__itoa(replica->offset); -+ char *id = NULL; - gboolean print_ip, print_child, print_ctnr, print_remote; - - CRM_ASSERT(replica); -@@ -1531,6 +1531,7 @@ pe__bundle_xml(pcmk__output_t *out, va_list args) - CRM_ASSERT(rc == pcmk_rc_ok); - } - -+ id = pcmk__itoa(replica->offset); - rc = pe__name_and_nvpairs_xml(out, true, "replica", 1, "id", id); - free(id); - CRM_ASSERT(rc == pcmk_rc_ok); -diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c -index 6323692..ab91fd1 100644 ---- a/lib/pengine/clone.c -+++ b/lib/pengine/clone.c -@@ -807,10 +807,10 @@ pe__clone_html(pcmk__output_t *out, va_list args) - pcmk__add_word(&list_text, &list_text_len, host->details->uname); - active_instances++; - } -+ g_list_free(promoted_list); - - if (list_text != NULL) { - out->list_item(out, NULL, PROMOTED_INSTANCES ": [ %s ]", list_text); -- g_list_free(promoted_list); - free(list_text); - list_text = NULL; - list_text_len = 0; -@@ -828,6 +828,7 @@ pe__clone_html(pcmk__output_t *out, va_list args) - pcmk__add_word(&list_text, &list_text_len, host->details->uname); - active_instances++; - } -+ g_list_free(started_list); - - if (list_text != NULL) { - if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { -@@ -847,7 +848,6 @@ pe__clone_html(pcmk__output_t *out, va_list args) - out->list_item(out, NULL, "Started: [ %s ]", list_text); - } - -- g_list_free(started_list); - free(list_text); - list_text = NULL; - list_text_len = 0; -@@ -1048,10 +1048,10 @@ pe__clone_text(pcmk__output_t *out, va_list args) - pcmk__add_word(&list_text, &list_text_len, host->details->uname); - active_instances++; - } -+ g_list_free(promoted_list); - - if (list_text != NULL) { - out->list_item(out, PROMOTED_INSTANCES, "[ %s ]", list_text); -- g_list_free(promoted_list); - free(list_text); - list_text = NULL; - list_text_len = 0; -@@ -1069,6 +1069,7 @@ pe__clone_text(pcmk__output_t *out, va_list args) - pcmk__add_word(&list_text, &list_text_len, host->details->uname); - active_instances++; - } -+ g_list_free(started_list); - - if (list_text != NULL) { - if (pcmk_is_set(rsc->flags, pe_rsc_promotable)) { -@@ -1084,7 +1085,6 @@ pe__clone_text(pcmk__output_t *out, va_list args) - out->list_item(out, "Started", "[ %s ]", list_text); - } - -- g_list_free(started_list); - free(list_text); - list_text = NULL; - } -diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c -index b8997c4..20bd1a9 100644 ---- a/lib/pengine/pe_output.c -+++ b/lib/pengine/pe_output.c -@@ -1410,6 +1410,8 @@ node_text(pcmk__output_t *out, va_list args) { - - out->end_list(out); - out->end_list(out); -+ -+ g_list_free(rscs); - } - - } else { -@@ -1739,6 +1741,7 @@ node_attribute_list(pcmk__output_t *out, va_list args) { - } - - if (!pcmk__str_in_list(only_node, node->details->uname)) { -+ g_list_free(attr_list); - continue; - } - --- -1.8.3.1 - diff --git a/SOURCES/014-str-list.patch b/SOURCES/014-str-list.patch deleted file mode 100644 index e6993ab..0000000 --- a/SOURCES/014-str-list.patch +++ /dev/null @@ -1,465 +0,0 @@ -From 45813df3eb4c8ad8b1744fa5dd56af86ad0fb3dd Mon Sep 17 00:00:00 2001 -From: Chris Lumens -Date: Thu, 17 Jun 2021 16:07:55 -0400 -Subject: [PATCH] Refactor: libs: pcmk__str_in_list should support pcmk__str_* - flags. - ---- - include/crm/common/strings_internal.h | 2 +- - lib/common/strings.c | 34 +++++++++++++++++++++++---- - lib/fencing/st_output.c | 10 ++++---- - lib/pengine/bundle.c | 8 +++---- - lib/pengine/clone.c | 28 +++++++++++----------- - lib/pengine/group.c | 18 +++++++------- - lib/pengine/native.c | 4 ++-- - lib/pengine/pe_output.c | 22 ++++++++--------- - lib/pengine/utils.c | 6 ++--- - 9 files changed, 79 insertions(+), 53 deletions(-) - -diff --git a/include/crm/common/strings_internal.h b/include/crm/common/strings_internal.h -index 94982cb4e..687079814 100644 ---- a/include/crm/common/strings_internal.h -+++ b/include/crm/common/strings_internal.h -@@ -117,7 +117,7 @@ pcmk__intkey_table_remove(GHashTable *hash_table, int key) - return g_hash_table_remove(hash_table, GINT_TO_POINTER(key)); - } - --gboolean pcmk__str_in_list(GList *lst, const gchar *s); -+gboolean pcmk__str_in_list(GList *lst, const gchar *s, uint32_t flags); - - bool pcmk__strcase_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED; - bool pcmk__str_any_of(const char *s, ...) G_GNUC_NULL_TERMINATED; -diff --git a/lib/common/strings.c b/lib/common/strings.c -index 3264db5b6..e1e98803b 100644 ---- a/lib/common/strings.c -+++ b/lib/common/strings.c -@@ -872,14 +872,30 @@ pcmk__parse_ll_range(const char *srcstring, long long *start, long long *end) - * Search \p lst for \p s, taking case into account. As a special case, - * if "*" is the only element of \p lst, the search is successful. - * -- * \param[in] lst List to search -- * \param[in] s String to search for -+ * Behavior can be changed with various flags: -+ * -+ * - pcmk__str_casei - By default, comparisons are done taking case into -+ * account. This flag makes comparisons case-insensitive. -+ * - pcmk__str_null_matches - If the input string is NULL, return TRUE. -+ * -+ * \note The special "*" matching rule takes precedence over flags. In -+ * particular, "*" will match a NULL input string even without -+ * pcmk__str_null_matches being specified. -+ * -+ * \note No matter what input string or flags are provided, an empty -+ * list will always return FALSE. -+ * -+ * \param[in] lst List to search -+ * \param[in] s String to search for -+ * \param[in] flags A bitfield of pcmk__str_flags to modify operation - * - * \return \c TRUE if \p s is in \p lst, or \c FALSE otherwise - */ - gboolean --pcmk__str_in_list(GList *lst, const gchar *s) -+pcmk__str_in_list(GList *lst, const gchar *s, uint32_t flags) - { -+ GCompareFunc fn; -+ - if (lst == NULL) { - return FALSE; - } -@@ -888,7 +904,17 @@ pcmk__str_in_list(GList *lst, const gchar *s) - return TRUE; - } - -- return g_list_find_custom(lst, s, (GCompareFunc) strcmp) != NULL; -+ if (s == NULL) { -+ return pcmk_is_set(flags, pcmk__str_null_matches); -+ } -+ -+ if (pcmk_is_set(flags, pcmk__str_casei)) { -+ fn = (GCompareFunc) strcasecmp; -+ } else { -+ fn = (GCompareFunc) strcmp; -+ } -+ -+ return g_list_find_custom(lst, s, fn) != NULL; - } - - static bool -diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c -index 568ae46a8..e1ae8ac87 100644 ---- a/lib/fencing/st_output.c -+++ b/lib/fencing/st_output.c -@@ -47,7 +47,7 @@ stonith__failed_history(pcmk__output_t *out, va_list args) { - continue; - } - -- if (!pcmk__str_in_list(only_node, hp->target)) { -+ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { - continue; - } - -@@ -72,7 +72,7 @@ stonith__history(pcmk__output_t *out, va_list args) { - int rc = pcmk_rc_no_output; - - for (stonith_history_t *hp = history; hp; hp = hp->next) { -- if (!pcmk__str_in_list(only_node, hp->target)) { -+ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { - continue; - } - -@@ -101,7 +101,7 @@ stonith__full_history(pcmk__output_t *out, va_list args) { - int rc = pcmk_rc_no_output; - - for (stonith_history_t *hp = history; hp; hp = hp->next) { -- if (!pcmk__str_in_list(only_node, hp->target)) { -+ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { - continue; - } - -@@ -129,7 +129,7 @@ full_history_xml(pcmk__output_t *out, va_list args) { - - if (history_rc == 0) { - for (stonith_history_t *hp = history; hp; hp = hp->next) { -- if (!pcmk__str_in_list(only_node, hp->target)) { -+ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { - continue; - } - -@@ -218,7 +218,7 @@ stonith__pending_actions(pcmk__output_t *out, va_list args) { - int rc = pcmk_rc_no_output; - - for (stonith_history_t *hp = history; hp; hp = hp->next) { -- if (!pcmk__str_in_list(only_node, hp->target)) { -+ if (!pcmk__str_in_list(only_node, hp->target, pcmk__str_none)) { - continue; - } - -diff --git a/lib/pengine/bundle.c b/lib/pengine/bundle.c -index 9237392e4..6ba786ae6 100644 ---- a/lib/pengine/bundle.c -+++ b/lib/pengine/bundle.c -@@ -1492,7 +1492,7 @@ pe__bundle_xml(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc->id); -+ print_everything = pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none); - - for (GList *gIter = bundle_data->replicas; gIter != NULL; - gIter = gIter->next) { -@@ -1614,7 +1614,7 @@ pe__bundle_html(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc->id); -+ print_everything = pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none); - - for (GList *gIter = bundle_data->replicas; gIter != NULL; - gIter = gIter->next) { -@@ -1742,7 +1742,7 @@ pe__bundle_text(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc->id); -+ print_everything = pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none); - - for (GList *gIter = bundle_data->replicas; gIter != NULL; - gIter = gIter->next) { -@@ -2044,7 +2044,7 @@ pe__bundle_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_paren - gboolean passes = FALSE; - pe__bundle_variant_data_t *bundle_data = NULL; - -- if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc))) { -+ if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none)) { - passes = TRUE; - } else { - get_bundle_variant_data(bundle_data, rsc); -diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c -index 5662338f3..5a6bfa61f 100644 ---- a/lib/pengine/clone.c -+++ b/lib/pengine/clone.c -@@ -624,8 +624,8 @@ pe__clone_xml(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || -- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); -+ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || -+ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); - - for (; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; -@@ -693,8 +693,8 @@ pe__clone_html(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || -- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); -+ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || -+ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); - - out->begin_list(out, NULL, NULL, "Clone Set: %s [%s]%s%s%s%s", - rsc->id, ID(clone_data->xml_obj_child), -@@ -801,7 +801,7 @@ pe__clone_html(pcmk__output_t *out, va_list args) - for (gIter = promoted_list; gIter; gIter = gIter->next) { - pe_node_t *host = gIter->data; - -- if (!pcmk__str_in_list(only_node, host->details->uname)) { -+ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { - continue; - } - -@@ -822,7 +822,7 @@ pe__clone_html(pcmk__output_t *out, va_list args) - for (gIter = started_list; gIter; gIter = gIter->next) { - pe_node_t *host = gIter->data; - -- if (!pcmk__str_in_list(only_node, host->details->uname)) { -+ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { - continue; - } - -@@ -884,7 +884,7 @@ pe__clone_html(pcmk__output_t *out, va_list args) - pe_node_t *node = (pe_node_t *)nIter->data; - - if (pe_find_node(rsc->running_on, node->details->uname) == NULL && -- pcmk__str_in_list(only_node, node->details->uname)) { -+ pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { - pcmk__add_word(&stopped_list, &stopped_list_len, - node->details->uname); - } -@@ -933,8 +933,8 @@ pe__clone_text(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || -- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); -+ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || -+ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); - - out->begin_list(out, NULL, NULL, "Clone Set: %s [%s]%s%s%s%s", - rsc->id, ID(clone_data->xml_obj_child), -@@ -1041,7 +1041,7 @@ pe__clone_text(pcmk__output_t *out, va_list args) - for (gIter = promoted_list; gIter; gIter = gIter->next) { - pe_node_t *host = gIter->data; - -- if (!pcmk__str_in_list(only_node, host->details->uname)) { -+ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { - continue; - } - -@@ -1062,7 +1062,7 @@ pe__clone_text(pcmk__output_t *out, va_list args) - for (gIter = started_list; gIter; gIter = gIter->next) { - pe_node_t *host = gIter->data; - -- if (!pcmk__str_in_list(only_node, host->details->uname)) { -+ if (!pcmk__str_in_list(only_node, host->details->uname, pcmk__str_none)) { - continue; - } - -@@ -1120,7 +1120,7 @@ pe__clone_text(pcmk__output_t *out, va_list args) - pe_node_t *node = (pe_node_t *)nIter->data; - - if (pe_find_node(rsc->running_on, node->details->uname) == NULL && -- pcmk__str_in_list(only_node, node->details->uname)) { -+ pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { - pcmk__add_word(&stopped_list, &stopped_list_len, - node->details->uname); - } -@@ -1220,11 +1220,11 @@ pe__clone_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent - gboolean passes = FALSE; - clone_variant_data_t *clone_data = NULL; - -- if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc))) { -+ if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none)) { - passes = TRUE; - } else { - get_clone_variant_data(clone_data, rsc); -- passes = pcmk__str_in_list(only_rsc, ID(clone_data->xml_obj_child)); -+ passes = pcmk__str_in_list(only_rsc, ID(clone_data->xml_obj_child), pcmk__str_none); - - if (!passes) { - for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { -diff --git a/lib/pengine/group.c b/lib/pengine/group.c -index 23a72cff7..5f9aa83ce 100644 ---- a/lib/pengine/group.c -+++ b/lib/pengine/group.c -@@ -201,8 +201,8 @@ pe__group_xml(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || -- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); -+ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || -+ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); - - for (; gIter != NULL; gIter = gIter->next) { - pe_resource_t *child_rsc = (pe_resource_t *) gIter->data; -@@ -248,8 +248,8 @@ pe__group_html(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || -- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); -+ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || -+ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); - - if (options & pe_print_brief) { - GList *rscs = pe__filter_rsc_list(rsc->children, only_rsc); -@@ -303,8 +303,8 @@ pe__group_text(pcmk__output_t *out, va_list args) - return rc; - } - -- print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || -- (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)); -+ print_everything = pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || -+ (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)); - - if (options & pe_print_brief) { - GList *rscs = pe__filter_rsc_list(rsc->children, only_rsc); -@@ -387,11 +387,11 @@ pe__group_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent - { - gboolean passes = FALSE; - -- if (check_parent && pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)))) { -+ if (check_parent && pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)), pcmk__str_none)) { - passes = TRUE; -- } else if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc))) { -+ } else if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none)) { - passes = TRUE; -- } else if (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id)) { -+ } else if (strstr(rsc->id, ":") != NULL && pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)) { - passes = TRUE; - } else { - for (GList *gIter = rsc->children; gIter != NULL; gIter = gIter->next) { -diff --git a/lib/pengine/native.c b/lib/pengine/native.c -index c2333d0d2..56054fc4a 100644 ---- a/lib/pengine/native.c -+++ b/lib/pengine/native.c -@@ -1338,8 +1338,8 @@ pe__rscs_brief_output(pcmk__output_t *out, GList *rsc_list, unsigned int show_op - gboolean - pe__native_is_filtered(pe_resource_t *rsc, GList *only_rsc, gboolean check_parent) - { -- if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) || -- pcmk__str_in_list(only_rsc, rsc->id)) { -+ if (pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) || -+ pcmk__str_in_list(only_rsc, rsc->id, pcmk__str_none)) { - return FALSE; - } else if (check_parent) { - pe_resource_t *up = uber_parent(rsc); -diff --git a/lib/pengine/pe_output.c b/lib/pengine/pe_output.c -index 727475735..a6dc4ade8 100644 ---- a/lib/pengine/pe_output.c -+++ b/lib/pengine/pe_output.c -@@ -670,8 +670,8 @@ ban_list(pcmk__output_t *out, va_list args) { - continue; - } - -- if (!pcmk__str_in_list(only_rsc, rsc_printable_id(location->rsc_lh)) && -- !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(location->rsc_lh)))) { -+ if (!pcmk__str_in_list(only_rsc, rsc_printable_id(location->rsc_lh), pcmk__str_none) && -+ !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(location->rsc_lh)), pcmk__str_none)) { - continue; - } - -@@ -1254,7 +1254,7 @@ failed_action_list(pcmk__output_t *out, va_list args) { - xml_op = pcmk__xml_next(xml_op)) { - char *rsc = NULL; - -- if (!pcmk__str_in_list(only_node, crm_element_value(xml_op, XML_ATTR_UNAME))) { -+ if (!pcmk__str_in_list(only_node, crm_element_value(xml_op, XML_ATTR_UNAME), pcmk__str_none)) { - continue; - } - -@@ -1263,7 +1263,7 @@ failed_action_list(pcmk__output_t *out, va_list args) { - continue; - } - -- if (!pcmk__str_in_list(only_rsc, rsc)) { -+ if (!pcmk__str_in_list(only_rsc, rsc, pcmk__str_none)) { - free(rsc); - continue; - } -@@ -1738,7 +1738,7 @@ node_attribute_list(pcmk__output_t *out, va_list args) { - continue; - } - -- if (!pcmk__str_in_list(only_node, node->details->uname)) { -+ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { - g_list_free(attr_list); - continue; - } -@@ -1835,8 +1835,8 @@ node_history_list(pcmk__output_t *out, va_list args) { - * For other resource types, is_filtered is okay. - */ - if (uber_parent(rsc)->variant == pe_group) { -- if (!pcmk__str_in_list(only_rsc, rsc_printable_id(rsc)) && -- !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)))) { -+ if (!pcmk__str_in_list(only_rsc, rsc_printable_id(rsc), pcmk__str_none) && -+ !pcmk__str_in_list(only_rsc, rsc_printable_id(uber_parent(rsc)), pcmk__str_none)) { - continue; - } - } else { -@@ -1899,7 +1899,7 @@ node_list_html(pcmk__output_t *out, va_list args) { - for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { - pe_node_t *node = (pe_node_t *) gIter->data; - -- if (!pcmk__str_in_list(only_node, node->details->uname)) { -+ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { - continue; - } - -@@ -1940,7 +1940,7 @@ pe__node_list_text(pcmk__output_t *out, va_list args) { - const char *node_mode = NULL; - char *node_name = pe__node_display_name(node, print_clone_detail); - -- if (!pcmk__str_in_list(only_node, node->details->uname)) { -+ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { - free(node_name); - continue; - } -@@ -2059,7 +2059,7 @@ node_list_xml(pcmk__output_t *out, va_list args) { - for (GList *gIter = nodes; gIter != NULL; gIter = gIter->next) { - pe_node_t *node = (pe_node_t *) gIter->data; - -- if (!pcmk__str_in_list(only_node, node->details->uname)) { -+ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { - continue; - } - -@@ -2097,7 +2097,7 @@ node_summary(pcmk__output_t *out, va_list args) { - continue; - } - -- if (!pcmk__str_in_list(only_node, node->details->uname)) { -+ if (!pcmk__str_in_list(only_node, node->details->uname, pcmk__str_none)) { - continue; - } - -diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c -index 450d8348c..d1be9e4ca 100644 ---- a/lib/pengine/utils.c -+++ b/lib/pengine/utils.c -@@ -2394,7 +2394,7 @@ pe__rsc_running_on_any_node_in_list(pe_resource_t *rsc, GList *node_list) - { - for (GList *ele = rsc->running_on; ele; ele = ele->next) { - pe_node_t *node = (pe_node_t *) ele->data; -- if (pcmk__str_in_list(node_list, node->details->uname)) { -+ if (pcmk__str_in_list(node_list, node->details->uname, pcmk__str_none)) { - return true; - } - } -@@ -2419,8 +2419,8 @@ pe__filter_rsc_list(GList *rscs, GList *filter) - /* I think the second condition is safe here for all callers of this - * function. If not, it needs to move into pe__node_text. - */ -- if (pcmk__str_in_list(filter, rsc_printable_id(rsc)) || -- (rsc->parent && pcmk__str_in_list(filter, rsc_printable_id(rsc->parent)))) { -+ if (pcmk__str_in_list(filter, rsc_printable_id(rsc), pcmk__str_none) || -+ (rsc->parent && pcmk__str_in_list(filter, rsc_printable_id(rsc->parent), pcmk__str_none))) { - retval = g_list_prepend(retval, rsc); - } - } --- -2.27.0 - diff --git a/SOURCES/015-sbd.patch b/SOURCES/015-sbd.patch deleted file mode 100644 index 9f47c35..0000000 --- a/SOURCES/015-sbd.patch +++ /dev/null @@ -1,1312 +0,0 @@ -From b49f49576ef9d801a48ce7a01a78c72e65be7880 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Fri, 30 Jul 2021 18:07:25 +0200 -Subject: [PATCH 1/3] Fix, Refactor: fenced: add return value to - get_agent_metadata - -Used to distinguish between empty metadata per design, -case of failed getting metadata that might succeed on a -retry and fatal failure. -Fixes as well regression that leads to endless retries getting -metadata for #watchdog - not superserious as it happens with -delays in between but still undesirable. ---- - daemons/fenced/fenced_commands.c | 92 +++++++++++++++++++------------- - 1 file changed, 55 insertions(+), 37 deletions(-) - -diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c -index a778801b1..cd9968f1a 100644 ---- a/daemons/fenced/fenced_commands.c -+++ b/daemons/fenced/fenced_commands.c -@@ -69,7 +69,7 @@ static void stonith_send_reply(xmlNode * reply, int call_options, const char *re - static void search_devices_record_result(struct device_search_s *search, const char *device, - gboolean can_fence); - --static xmlNode * get_agent_metadata(const char *agent); -+static int get_agent_metadata(const char *agent, xmlNode **metadata); - static void read_action_metadata(stonith_device_t *device); - - typedef struct async_command_s { -@@ -323,19 +323,26 @@ fork_cb(GPid pid, gpointer user_data) - static int - get_agent_metadata_cb(gpointer data) { - stonith_device_t *device = data; -+ guint period_ms; - -- device->agent_metadata = get_agent_metadata(device->agent); -- if (device->agent_metadata) { -- read_action_metadata(device); -- stonith__device_parameter_flags(&(device->flags), device->id, -+ switch (get_agent_metadata(device->agent, &device->agent_metadata)) { -+ case pcmk_rc_ok: -+ if (device->agent_metadata) { -+ read_action_metadata(device); -+ stonith__device_parameter_flags(&(device->flags), device->id, - device->agent_metadata); -- return G_SOURCE_REMOVE; -- } else { -- guint period_ms = pcmk__mainloop_timer_get_period(device->timer); -- if (period_ms < 160 * 1000) { -- mainloop_timer_set_period(device->timer, 2 * period_ms); -- } -- return G_SOURCE_CONTINUE; -+ } -+ return G_SOURCE_REMOVE; -+ -+ case EAGAIN: -+ period_ms = pcmk__mainloop_timer_get_period(device->timer); -+ if (period_ms < 160 * 1000) { -+ mainloop_timer_set_period(device->timer, 2 * period_ms); -+ } -+ return G_SOURCE_CONTINUE; -+ -+ default: -+ return G_SOURCE_REMOVE; - } - } - -@@ -700,38 +707,41 @@ init_metadata_cache(void) { - } - } - --static xmlNode * --get_agent_metadata(const char *agent) -+int -+get_agent_metadata(const char *agent, xmlNode ** metadata) - { -- xmlNode *xml = NULL; - char *buffer = NULL; - -+ if (metadata == NULL) { -+ return EINVAL; -+ } -+ *metadata = NULL; -+ if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { -+ return pcmk_rc_ok; -+ } - init_metadata_cache(); - buffer = g_hash_table_lookup(metadata_cache, agent); -- if(pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) { -- return NULL; -- -- } else if(buffer == NULL) { -+ if (buffer == NULL) { - stonith_t *st = stonith_api_new(); - int rc; - - if (st == NULL) { - crm_warn("Could not get agent meta-data: " - "API memory allocation failed"); -- return NULL; -+ return EAGAIN; - } -- rc = st->cmds->metadata(st, st_opt_sync_call, agent, NULL, &buffer, 10); -+ rc = st->cmds->metadata(st, st_opt_sync_call, agent, -+ NULL, &buffer, 10); - stonith_api_delete(st); - if (rc || !buffer) { - crm_err("Could not retrieve metadata for fencing agent %s", agent); -- return NULL; -+ return EAGAIN; - } - g_hash_table_replace(metadata_cache, strdup(agent), buffer); - } - -- xml = string2xml(buffer); -- -- return xml; -+ *metadata = string2xml(buffer); -+ return pcmk_rc_ok; - } - - static gboolean -@@ -962,19 +972,27 @@ build_device_from_xml(xmlNode * msg) - g_list_free_full(device->targets, free); - device->targets = NULL; - } -- device->agent_metadata = get_agent_metadata(device->agent); -- if (device->agent_metadata) { -- read_action_metadata(device); -- stonith__device_parameter_flags(&(device->flags), device->id, -- device->agent_metadata); -- } else { -- if (device->timer == NULL) { -- device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, -+ switch (get_agent_metadata(device->agent, &device->agent_metadata)) { -+ case pcmk_rc_ok: -+ if (device->agent_metadata) { -+ read_action_metadata(device); -+ stonith__device_parameter_flags(&(device->flags), device->id, -+ device->agent_metadata); -+ } -+ break; -+ -+ case EAGAIN: -+ if (device->timer == NULL) { -+ device->timer = mainloop_timer_add("get_agent_metadata", 10 * 1000, - TRUE, get_agent_metadata_cb, device); -- } -- if (!mainloop_timer_running(device->timer)) { -- mainloop_timer_start(device->timer); -- } -+ } -+ if (!mainloop_timer_running(device->timer)) { -+ mainloop_timer_start(device->timer); -+ } -+ break; -+ -+ default: -+ break; - } - - value = g_hash_table_lookup(device->params, "nodeid"); --- -2.27.0 - - -From 5dd1e4459335764e0adf5fa78d81c875ae2332e9 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Fri, 30 Jul 2021 18:15:10 +0200 -Subject: [PATCH 2/3] feature: watchdog-fencing: allow restriction to certain - nodes - -Bump CRM_FEATURE_SET to 3.11.0 to encourage cluster being -fully upgraded to a version that supports the feature -before explicitly adding a watchdog-fence-device. ---- - configure.ac | 1 + - daemons/controld/controld_control.c | 2 +- - daemons/controld/controld_fencing.c | 14 ++ - daemons/controld/controld_fencing.h | 1 + - daemons/fenced/Makefile.am | 2 +- - daemons/fenced/fence_watchdog.in | 283 ++++++++++++++++++++++++++++ - daemons/fenced/fenced_commands.c | 141 +++++++++++--- - daemons/fenced/fenced_remote.c | 71 ++++--- - daemons/fenced/pacemaker-fenced.c | 131 +++++++++---- - daemons/fenced/pacemaker-fenced.h | 5 +- - include/crm/crm.h | 2 +- - include/crm/fencing/internal.h | 8 +- - lib/fencing/st_client.c | 61 ++++++ - lib/lrmd/lrmd_client.c | 6 +- - rpm/pacemaker.spec.in | 3 + - 16 files changed, 635 insertions(+), 97 deletions(-) - create mode 100755 daemons/fenced/fence_watchdog.in - -diff --git a/configure.ac b/configure.ac -index 436100c81..013562e46 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -1972,6 +1972,7 @@ CONFIG_FILES_EXEC([cts/cts-cli], - [cts/support/fence_dummy], - [cts/support/pacemaker-cts-dummyd], - [daemons/fenced/fence_legacy], -+ [daemons/fenced/fence_watchdog], - [doc/abi-check], - [extra/resources/ClusterMon], - [extra/resources/HealthSMART], -diff --git a/daemons/controld/controld_control.c b/daemons/controld/controld_control.c -index 45a70bb92..b5da6a46c 100644 ---- a/daemons/controld/controld_control.c -+++ b/daemons/controld/controld_control.c -@@ -615,7 +615,7 @@ static pcmk__cluster_option_t crmd_opts[] = { - }, - { - "stonith-watchdog-timeout", NULL, "time", NULL, -- "0", pcmk__valid_sbd_timeout, -+ "0", controld_verify_stonith_watchdog_timeout, - "How long to wait before we can assume nodes are safely down " - "when watchdog-based self-fencing via SBD is in use", - "If nonzero, along with `have-watchdog=true` automatically set by the " -diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c -index 0fba6613b..6c2a6c550 100644 ---- a/daemons/controld/controld_fencing.c -+++ b/daemons/controld/controld_fencing.c -@@ -11,6 +11,7 @@ - #include - #include - #include -+#include - #include - - #include -@@ -886,6 +887,19 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action) - return TRUE; - } - -+bool -+controld_verify_stonith_watchdog_timeout(const char *value) -+{ -+ gboolean rv = TRUE; -+ -+ if (stonith_api && (stonith_api->state != stonith_disconnected) && -+ stonith__watchdog_fencing_enabled_for_node_api(stonith_api, -+ fsa_our_uname)) { -+ rv = pcmk__valid_sbd_timeout(value); -+ } -+ return rv; -+} -+ - /* end stonith API client functions */ - - -diff --git a/daemons/controld/controld_fencing.h b/daemons/controld/controld_fencing.h -index d0ecc8234..ef68a0c83 100644 ---- a/daemons/controld/controld_fencing.h -+++ b/daemons/controld/controld_fencing.h -@@ -24,6 +24,7 @@ void update_stonith_max_attempts(const char* value); - void controld_trigger_fencer_connect(void); - void controld_disconnect_fencer(bool destroy); - gboolean te_fence_node(crm_graph_t *graph, crm_action_t *action); -+bool controld_verify_stonith_watchdog_timeout(const char *value); - - // stonith cleanup list - void add_stonith_cleanup(const char *target); -diff --git a/daemons/fenced/Makefile.am b/daemons/fenced/Makefile.am -index 43413e11d..2923d7c9b 100644 ---- a/daemons/fenced/Makefile.am -+++ b/daemons/fenced/Makefile.am -@@ -15,7 +15,7 @@ halibdir = $(CRM_DAEMON_DIR) - - halib_PROGRAMS = pacemaker-fenced cts-fence-helper - --sbin_SCRIPTS = fence_legacy -+sbin_SCRIPTS = fence_legacy fence_watchdog - - noinst_HEADERS = pacemaker-fenced.h - -diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in -new file mode 100755 -index 000000000..c83304f1d ---- /dev/null -+++ b/daemons/fenced/fence_watchdog.in -@@ -0,0 +1,283 @@ -+#!@PYTHON@ -+"""Dummy watchdog fence agent for providing meta-data for the pacemaker internal agent -+""" -+ -+__copyright__ = "Copyright 2012-2021 the Pacemaker project contributors" -+__license__ = "GNU General Public License version 2 or later (GPLv2+) WITHOUT ANY WARRANTY" -+ -+import io -+import os -+import re -+import sys -+import atexit -+import getopt -+ -+SHORT_DESC = "Dummy watchdog fence agent" -+LONG_DESC = """fence_watchdog just provides -+meta-data - actual fencing is done by the pacemaker internal watchdog agent.""" -+ -+ALL_OPT = { -+ "version" : { -+ "getopt" : "V", -+ "longopt" : "version", -+ "help" : "-V, --version Display version information and exit", -+ "required" : "0", -+ "shortdesc" : "Display version information and exit", -+ "order" : 53 -+ }, -+ "help" : { -+ "getopt" : "h", -+ "longopt" : "help", -+ "help" : "-h, --help Display this help and exit", -+ "required" : "0", -+ "shortdesc" : "Display help and exit", -+ "order" : 54 -+ }, -+ "action" : { -+ "getopt" : "o:", -+ "longopt" : "action", -+ "help" : "-o, --action=[action] Action: metadata", -+ "required" : "1", -+ "shortdesc" : "Fencing Action", -+ "default" : "metadata", -+ "order" : 1 -+ }, -+ "nodename" : { -+ "getopt" : "N:", -+ "longopt" : "nodename", -+ "help" : "-N, --nodename Node name of fence victim (ignored)", -+ "required" : "0", -+ "shortdesc" : "Ignored", -+ "order" : 2 -+ }, -+ "plug" : { -+ "getopt" : "n:", -+ "longopt" : "plug", -+ "help" : "-n, --plug=[id] Physical plug number on device (ignored)", -+ "required" : "1", -+ "shortdesc" : "Ignored", -+ "order" : 4 -+ } -+} -+ -+ -+def agent(): -+ """ Return name this file was run as. """ -+ -+ return os.path.basename(sys.argv[0]) -+ -+ -+def fail_usage(message): -+ """ Print a usage message and exit. """ -+ -+ sys.exit("%s\nPlease use '-h' for usage" % message) -+ -+ -+def show_docs(options): -+ """ Handle informational options (display info and exit). """ -+ -+ device_opt = options["device_opt"] -+ -+ if "-h" in options: -+ usage(device_opt) -+ sys.exit(0) -+ -+ if "-o" in options and options["-o"].lower() == "metadata": -+ metadata(device_opt, options) -+ sys.exit(0) -+ -+ if "-V" in options: -+ print(AGENT_VERSION) -+ sys.exit(0) -+ -+ -+def sorted_options(avail_opt): -+ """ Return a list of all options, in their internally specified order. """ -+ -+ sorted_list = [(key, ALL_OPT[key]) for key in avail_opt] -+ sorted_list.sort(key=lambda x: x[1]["order"]) -+ return sorted_list -+ -+ -+def usage(avail_opt): -+ """ Print a usage message. """ -+ print(LONG_DESC) -+ print() -+ print("Usage:") -+ print("\t" + agent() + " [options]") -+ print("Options:") -+ -+ for dummy, value in sorted_options(avail_opt): -+ if len(value["help"]) != 0: -+ print(" " + value["help"]) -+ -+ -+def metadata(avail_opt, options): -+ """ Print agent metadata. """ -+ -+ print(""" -+ -+%s -+""" % (agent(), SHORT_DESC, LONG_DESC)) -+ -+ for option, dummy in sorted_options(avail_opt): -+ if "shortdesc" in ALL_OPT[option]: -+ print(' ') -+ -+ default = "" -+ default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1] -+ default_name_no_arg = "-" + ALL_OPT[option]["getopt"] -+ -+ if "default" in ALL_OPT[option]: -+ default = 'default="%s"' % str(ALL_OPT[option]["default"]) -+ elif default_name_arg in options: -+ if options[default_name_arg]: -+ try: -+ default = 'default="%s"' % options[default_name_arg] -+ except TypeError: -+ ## @todo/@note: Currently there is no clean way how to handle lists -+ ## we can create a string from it but we can't set it on command line -+ default = 'default="%s"' % str(options[default_name_arg]) -+ elif default_name_no_arg in options: -+ default = 'default="true"' -+ -+ mixed = ALL_OPT[option]["help"] -+ ## split it between option and help text -+ res = re.compile(r"^(.*--\S+)\s+", re.IGNORECASE | re.S).search(mixed) -+ if None != res: -+ mixed = res.group(1) -+ mixed = mixed.replace("<", "<").replace(">", ">") -+ print(' ') -+ -+ if ALL_OPT[option]["getopt"].count(":") > 0: -+ print(' ') -+ else: -+ print(' ') -+ -+ print(' ' + ALL_OPT[option]["shortdesc"] + '') -+ print(' ') -+ -+ print(' \n ') -+ print(' ') -+ print(' ') -+ print(' ') -+ print(' ') -+ print(' ') -+ print(' ') -+ print(' ') -+ print('') -+ -+ -+def option_longopt(option): -+ """ Return the getopt-compatible long-option name of the given option. """ -+ -+ if ALL_OPT[option]["getopt"].endswith(":"): -+ return ALL_OPT[option]["longopt"] + "=" -+ else: -+ return ALL_OPT[option]["longopt"] -+ -+ -+def opts_from_command_line(argv, avail_opt): -+ """ Read options from command-line arguments. """ -+ -+ # Prepare list of options for getopt -+ getopt_string = "" -+ longopt_list = [] -+ for k in avail_opt: -+ if k in ALL_OPT: -+ getopt_string += ALL_OPT[k]["getopt"] -+ else: -+ fail_usage("Parse error: unknown option '" + k + "'") -+ -+ if k in ALL_OPT and "longopt" in ALL_OPT[k]: -+ longopt_list.append(option_longopt(k)) -+ -+ try: -+ opt, dummy = getopt.gnu_getopt(argv, getopt_string, longopt_list) -+ except getopt.GetoptError as error: -+ fail_usage("Parse error: " + error.msg) -+ -+ # Transform longopt to short one which are used in fencing agents -+ old_opt = opt -+ opt = {} -+ for old_option in dict(old_opt).keys(): -+ if old_option.startswith("--"): -+ for option in ALL_OPT.keys(): -+ if "longopt" in ALL_OPT[option] and "--" + ALL_OPT[option]["longopt"] == old_option: -+ opt["-" + ALL_OPT[option]["getopt"].rstrip(":")] = dict(old_opt)[old_option] -+ else: -+ opt[old_option] = dict(old_opt)[old_option] -+ -+ return opt -+ -+ -+def opts_from_stdin(avail_opt): -+ """ Read options from standard input. """ -+ -+ opt = {} -+ name = "" -+ for line in sys.stdin.readlines(): -+ line = line.strip() -+ if line.startswith("#") or (len(line) == 0): -+ continue -+ -+ (name, value) = (line + "=").split("=", 1) -+ value = value[:-1] -+ -+ if name not in avail_opt: -+ print("Parse error: Ignoring unknown option '%s'" % line, -+ file=sys.stderr) -+ continue -+ -+ if ALL_OPT[name]["getopt"].endswith(":"): -+ opt["-"+ALL_OPT[name]["getopt"].rstrip(":")] = value -+ elif value.lower() in ["1", "yes", "on", "true"]: -+ opt["-"+ALL_OPT[name]["getopt"]] = "1" -+ -+ return opt -+ -+ -+def process_input(avail_opt): -+ """ Set standard environment variables, and parse all options. """ -+ -+ # Set standard environment -+ os.putenv("LANG", "C") -+ os.putenv("LC_ALL", "C") -+ -+ # Read options from command line or standard input -+ if len(sys.argv) > 1: -+ return opts_from_command_line(sys.argv[1:], avail_opt) -+ else: -+ return opts_from_stdin(avail_opt) -+ -+ -+def atexit_handler(): -+ """ Close stdout on exit. """ -+ -+ try: -+ sys.stdout.close() -+ os.close(1) -+ except IOError: -+ sys.exit("%s failed to close standard output" % agent()) -+ -+ -+def main(): -+ """ Make it so! """ -+ -+ device_opt = ALL_OPT.keys() -+ -+ ## Defaults for fence agent -+ atexit.register(atexit_handler) -+ options = process_input(device_opt) -+ options["device_opt"] = device_opt -+ show_docs(options) -+ -+ print("Watchdog fencing may be initiated only by the cluster, not this agent.", -+ file=sys.stderr) -+ -+ sys.exit(1) -+ -+ -+if __name__ == "__main__": -+ main() -diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c -index cd9968f1a..9470ea2c1 100644 ---- a/daemons/fenced/fenced_commands.c -+++ b/daemons/fenced/fenced_commands.c -@@ -397,15 +397,13 @@ stonith_device_execute(stonith_device_t * device) - return TRUE; - } - -- if(pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, pcmk__str_casei)) { -- if(pcmk__str_eq(cmd->action, "reboot", pcmk__str_casei)) { -- pcmk__panic(__func__); -- goto done; -- -- } else if(pcmk__str_eq(cmd->action, "off", pcmk__str_casei)) { -- pcmk__panic(__func__); -- goto done; -- -+ if (pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, -+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { -+ if (pcmk__strcase_any_of(cmd->action, "reboot", "off", NULL)) { -+ if (node_does_watchdog_fencing(stonith_our_uname)) { -+ pcmk__panic(__func__); -+ goto done; -+ } - } else { - crm_info("Faking success for %s watchdog operation", cmd->action); - cmd->done_cb(0, 0, NULL, cmd); -@@ -716,7 +714,7 @@ get_agent_metadata(const char *agent, xmlNode ** metadata) - return EINVAL; - } - *metadata = NULL; -- if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT, pcmk__str_none)) { -+ if (pcmk__str_eq(agent, STONITH_WATCHDOG_AGENT_INTERNAL, pcmk__str_none)) { - return pcmk_rc_ok; - } - init_metadata_cache(); -@@ -1050,24 +1048,6 @@ schedule_internal_command(const char *origin, - schedule_stonith_command(cmd, device); - } - --gboolean --string_in_list(GList *list, const char *item) --{ -- int lpc = 0; -- int max = g_list_length(list); -- -- for (lpc = 0; lpc < max; lpc++) { -- const char *value = g_list_nth_data(list, lpc); -- -- if (pcmk__str_eq(item, value, pcmk__str_casei)) { -- return TRUE; -- } else { -- crm_trace("%d: '%s' != '%s'", lpc, item, value); -- } -- } -- return FALSE; --} -- - static void - status_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - { -@@ -1144,7 +1124,7 @@ dynamic_list_search_cb(GPid pid, int rc, const char *output, gpointer user_data) - if (!alias) { - alias = search->host; - } -- if (string_in_list(dev->targets, alias)) { -+ if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) { - can_fence = TRUE; - } - } -@@ -1215,9 +1195,62 @@ stonith_device_register(xmlNode * msg, const char **desc, gboolean from_cib) - stonith_device_t *dup = NULL; - stonith_device_t *device = build_device_from_xml(msg); - guint ndevices = 0; -+ int rv = pcmk_ok; - - CRM_CHECK(device != NULL, return -ENOMEM); - -+ /* do we have a watchdog-device? */ -+ if (pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, pcmk__str_none) || -+ pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, -+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) do { -+ if (stonith_watchdog_timeout_ms <= 0) { -+ crm_err("Ignoring watchdog fence device without " -+ "stonith-watchdog-timeout set."); -+ rv = -ENODEV; -+ /* fall through to cleanup & return */ -+ } else if (!pcmk__str_any_of(device->agent, STONITH_WATCHDOG_AGENT, -+ STONITH_WATCHDOG_AGENT_INTERNAL, NULL)) { -+ crm_err("Ignoring watchdog fence device with unknown " -+ "agent '%s' unequal '" STONITH_WATCHDOG_AGENT "'.", -+ device->agent?device->agent:""); -+ rv = -ENODEV; -+ /* fall through to cleanup & return */ -+ } else if (!pcmk__str_eq(device->id, STONITH_WATCHDOG_ID, -+ pcmk__str_none)) { -+ crm_err("Ignoring watchdog fence device " -+ "named %s !='"STONITH_WATCHDOG_ID"'.", -+ device->id?device->id:""); -+ rv = -ENODEV; -+ /* fall through to cleanup & return */ -+ } else { -+ if (pcmk__str_eq(device->agent, STONITH_WATCHDOG_AGENT, -+ pcmk__str_none)) { -+ /* this either has an empty list or the targets -+ configured for watchdog-fencing -+ */ -+ g_list_free_full(stonith_watchdog_targets, free); -+ stonith_watchdog_targets = device->targets; -+ device->targets = NULL; -+ } -+ if (node_does_watchdog_fencing(stonith_our_uname)) { -+ g_list_free_full(device->targets, free); -+ device->targets = stonith__parse_targets(stonith_our_uname); -+ g_hash_table_replace(device->params, -+ strdup(PCMK_STONITH_HOST_LIST), -+ strdup(stonith_our_uname)); -+ /* proceed as with any other stonith-device */ -+ break; -+ } -+ -+ crm_debug("Skip registration of watchdog fence device on node not in host-list."); -+ /* cleanup and fall through to more cleanup and return */ -+ device->targets = NULL; -+ stonith_device_remove(device->id, from_cib); -+ } -+ free_device(device); -+ return rv; -+ } while (0); -+ - dup = device_has_duplicate(device); - if (dup) { - ndevices = g_hash_table_size(device_list); -@@ -1598,6 +1631,39 @@ stonith_level_remove(xmlNode *msg, char **desc) - * (CIB registration is not sufficient), because monitor should not be - * possible unless the device is "started" (API registered). - */ -+ -+static char * -+list_to_string(GList *list, const char *delim, gboolean terminate_with_delim) -+{ -+ int max = g_list_length(list); -+ size_t delim_len = delim?strlen(delim):0; -+ size_t alloc_size = 1 + (max?((max-1+(terminate_with_delim?1:0))*delim_len):0); -+ char *rv; -+ GList *gIter; -+ -+ for (gIter = list; gIter != NULL; gIter = gIter->next) { -+ const char *value = (const char *) gIter->data; -+ -+ alloc_size += strlen(value); -+ } -+ rv = calloc(alloc_size, sizeof(char)); -+ if (rv) { -+ char *pos = rv; -+ const char *lead_delim = ""; -+ -+ for (gIter = list; gIter != NULL; gIter = gIter->next) { -+ const char *value = (const char *) gIter->data; -+ -+ pos = &pos[sprintf(pos, "%s%s", lead_delim, value)]; -+ lead_delim = delim; -+ } -+ if (max && terminate_with_delim) { -+ sprintf(pos, "%s", delim); -+ } -+ } -+ return rv; -+} -+ - static int - stonith_device_action(xmlNode * msg, char **output) - { -@@ -1615,6 +1681,19 @@ stonith_device_action(xmlNode * msg, char **output) - return -EPROTO; - } - -+ if (pcmk__str_eq(id, STONITH_WATCHDOG_ID, pcmk__str_none)) { -+ if (stonith_watchdog_timeout_ms <= 0) { -+ return -ENODEV; -+ } else { -+ if (pcmk__str_eq(action, "list", pcmk__str_casei)) { -+ *output = list_to_string(stonith_watchdog_targets, "\n", TRUE); -+ return pcmk_ok; -+ } else if (pcmk__str_eq(action, "monitor", pcmk__str_casei)) { -+ return pcmk_ok; -+ } -+ } -+ } -+ - device = g_hash_table_lookup(device_list, id); - if ((device == NULL) - || (!device->api_registered && !strcmp(action, "monitor"))) { -@@ -1742,7 +1821,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc - * Only use if all hosts on which the device can be active can always fence all listed hosts - */ - -- if (string_in_list(dev->targets, host)) { -+ if (pcmk__str_in_list(dev->targets, host, pcmk__str_casei)) { - can = TRUE; - } else if (g_hash_table_lookup(dev->params, PCMK_STONITH_HOST_MAP) - && g_hash_table_lookup(dev->aliases, host)) { -@@ -1763,7 +1842,7 @@ can_fence_host_with_device(stonith_device_t * dev, struct device_search_s *searc - return; - } - -- if (string_in_list(dev->targets, alias)) { -+ if (pcmk__str_in_list(dev->targets, alias, pcmk__str_casei)) { - can = TRUE; - } - -diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c -index cf91acaed..224f2baba 100644 ---- a/daemons/fenced/fenced_remote.c -+++ b/daemons/fenced/fenced_remote.c -@@ -1522,6 +1522,25 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device, - } - } - -+static gboolean -+check_watchdog_fencing_and_wait(remote_fencing_op_t * op) -+{ -+ if (node_does_watchdog_fencing(op->target)) { -+ -+ crm_notice("Waiting %lds for %s to self-fence (%s) for " -+ "client %s " CRM_XS " id=%.8s", -+ (stonith_watchdog_timeout_ms / 1000), -+ op->target, op->action, op->client_name, op->id); -+ op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, -+ remote_op_watchdog_done, op); -+ return TRUE; -+ } else { -+ crm_debug("Skipping fallback to watchdog-fencing as %s is " -+ "not in host-list", op->target); -+ } -+ return FALSE; -+} -+ - void - call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) - { -@@ -1592,26 +1611,33 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) - g_source_remove(op->op_timer_one); - } - -- if(stonith_watchdog_timeout_ms > 0 && device && pcmk__str_eq(device, "watchdog", pcmk__str_casei)) { -- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s " -- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), -- op->target, op->action, op->client_name, op->id); -- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); -- -- /* TODO check devices to verify watchdog will be in use */ -- } else if(stonith_watchdog_timeout_ms > 0 -- && pcmk__str_eq(peer->host, op->target, pcmk__str_casei) -- && !pcmk__str_eq(op->action, "on", pcmk__str_casei)) { -- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s " -- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), -- op->target, op->action, op->client_name, op->id); -- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); -- -- } else { -+ if (!(stonith_watchdog_timeout_ms > 0 && ( -+ (pcmk__str_eq(device, STONITH_WATCHDOG_ID, -+ pcmk__str_none)) || -+ (pcmk__str_eq(peer->host, op->target, pcmk__str_casei) -+ && !pcmk__str_eq(op->action, "on", pcmk__str_casei))) && -+ check_watchdog_fencing_and_wait(op))) { -+ -+ /* Some thoughts about self-fencing cases reaching this point: -+ - Actually check in check_watchdog_fencing_and_wait -+ shouldn't fail if STONITH_WATCHDOG_ID is -+ chosen as fencing-device and it being present implies -+ watchdog-fencing is enabled anyway -+ - If watchdog-fencing is disabled either in general or for -+ a specific target - detected in check_watchdog_fencing_and_wait - -+ for some other kind of self-fencing we can't expect -+ a success answer but timeout is fine if the node doesn't -+ come back in between -+ - Delicate might be the case where we have watchdog-fencing -+ enabled for a node but the watchdog-fencing-device isn't -+ explicitly chosen for suicide. Local pe-execution in sbd -+ may detect the node as unclean and lead to timely suicide. -+ Otherwise the selection of stonith-watchdog-timeout at -+ least is questionable. -+ */ - op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op); - } - -- - send_cluster_message(crm_get_peer(0, peer->host), crm_msg_stonith_ng, remote_op, FALSE); - peer->tried = TRUE; - free_xml(remote_op); -@@ -1645,12 +1671,11 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer, int rc) - * but we have all the expected replies, then no devices - * are available to execute the fencing operation. */ - -- if(stonith_watchdog_timeout_ms && pcmk__str_eq(device, "watchdog", pcmk__str_null_matches | pcmk__str_casei)) { -- crm_notice("Waiting %lds for %s to self-fence (%s) for client %s " -- CRM_XS " id=%.8s", (stonith_watchdog_timeout_ms / 1000), -- op->target, op->action, op->client_name, op->id); -- op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms, remote_op_watchdog_done, op); -- return; -+ if(stonith_watchdog_timeout_ms > 0 && pcmk__str_eq(device, -+ STONITH_WATCHDOG_ID, pcmk__str_null_matches)) { -+ if (check_watchdog_fencing_and_wait(op)) { -+ return; -+ } - } - - if (op->state == st_query) { -diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c -index 39738d8be..7f8b427d9 100644 ---- a/daemons/fenced/pacemaker-fenced.c -+++ b/daemons/fenced/pacemaker-fenced.c -@@ -42,6 +42,7 @@ - - char *stonith_our_uname = NULL; - long stonith_watchdog_timeout_ms = 0; -+GList *stonith_watchdog_targets = NULL; - - static GMainLoop *mainloop = NULL; - -@@ -578,7 +579,44 @@ our_node_allowed_for(pe_resource_t *rsc) - } - - static void --watchdog_device_update(xmlNode *cib) -+watchdog_device_update(void) -+{ -+ if (stonith_watchdog_timeout_ms > 0) { -+ if (!g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID) && -+ !stonith_watchdog_targets) { -+ /* getting here watchdog-fencing enabled, no device there yet -+ and reason isn't stonith_watchdog_targets preventing that -+ */ -+ int rc; -+ xmlNode *xml; -+ -+ xml = create_device_registration_xml( -+ STONITH_WATCHDOG_ID, -+ st_namespace_internal, -+ STONITH_WATCHDOG_AGENT, -+ NULL, /* stonith_device_register will add our -+ own name as PCMK_STONITH_HOST_LIST param -+ so we can skip that here -+ */ -+ NULL); -+ rc = stonith_device_register(xml, NULL, TRUE); -+ free_xml(xml); -+ if (rc != pcmk_ok) { -+ crm_crit("Cannot register watchdog pseudo fence agent"); -+ crm_exit(CRM_EX_FATAL); -+ } -+ } -+ -+ } else { -+ /* be silent if no device - todo parameter to stonith_device_remove */ -+ if (g_hash_table_lookup(device_list, STONITH_WATCHDOG_ID)) { -+ stonith_device_remove(STONITH_WATCHDOG_ID, TRUE); -+ } -+ } -+} -+ -+static void -+update_stonith_watchdog_timeout_ms(xmlNode *cib) - { - xmlNode *stonith_enabled_xml = NULL; - const char *stonith_enabled_s = NULL; -@@ -608,33 +646,7 @@ watchdog_device_update(xmlNode *cib) - } - } - -- if (timeout_ms != stonith_watchdog_timeout_ms) { -- crm_notice("New watchdog timeout %lds (was %lds)", timeout_ms/1000, stonith_watchdog_timeout_ms/1000); -- stonith_watchdog_timeout_ms = timeout_ms; -- -- if (stonith_watchdog_timeout_ms > 0) { -- int rc; -- xmlNode *xml; -- stonith_key_value_t *params = NULL; -- -- params = stonith_key_value_add(params, PCMK_STONITH_HOST_LIST, -- stonith_our_uname); -- -- xml = create_device_registration_xml("watchdog", st_namespace_internal, -- STONITH_WATCHDOG_AGENT, params, -- NULL); -- stonith_key_value_freeall(params, 1, 1); -- rc = stonith_device_register(xml, NULL, FALSE); -- free_xml(xml); -- if (rc != pcmk_ok) { -- crm_crit("Cannot register watchdog pseudo fence agent"); -- crm_exit(CRM_EX_FATAL); -- } -- -- } else { -- stonith_device_remove("watchdog", FALSE); -- } -- } -+ stonith_watchdog_timeout_ms = timeout_ms; - } - - /*! -@@ -677,6 +689,16 @@ static void cib_device_update(pe_resource_t *rsc, pe_working_set_t *data_set) - return; - } - -+ /* if watchdog-fencing is disabled handle any watchdog-fence -+ resource as if it was disabled -+ */ -+ if ((stonith_watchdog_timeout_ms <= 0) && -+ pcmk__str_eq(rsc->id, STONITH_WATCHDOG_ID, pcmk__str_none)) { -+ crm_info("Watchdog-fencing disabled thus handling " -+ "device %s as disabled", rsc->id); -+ return; -+ } -+ - /* Check whether our node is allowed for this resource (and its parent if in a group) */ - node = our_node_allowed_for(rsc); - if (rsc->parent && (rsc->parent->variant == pe_group)) { -@@ -772,6 +794,12 @@ cib_devices_update(void) - } - } - -+ /* have list repopulated if cib has a watchdog-fencing-resource -+ TODO: keep a cached list for queries happening while we are refreshing -+ */ -+ g_list_free_full(stonith_watchdog_targets, free); -+ stonith_watchdog_targets = NULL; -+ - for (gIter = fenced_data_set->resources; gIter != NULL; gIter = gIter->next) { - cib_device_update(gIter->data, fenced_data_set); - } -@@ -825,6 +853,8 @@ update_cib_stonith_devices_v2(const char *event, xmlNode * msg) - if (search != NULL) { - *search = 0; - stonith_device_remove(rsc_id, TRUE); -+ /* watchdog_device_update called afterwards -+ to fall back to implicit definition if needed */ - } else { - crm_warn("Ignoring malformed CIB update (resource deletion)"); - } -@@ -968,6 +998,24 @@ node_has_attr(const char *node, const char *name, const char *value) - return (match != NULL); - } - -+/*! -+ * \internal -+ * \brief Check whether a node does watchdog-fencing -+ * -+ * \param[in] node Name of node to check -+ * -+ * \return TRUE if node found in stonith_watchdog_targets -+ * or stonith_watchdog_targets is empty indicating -+ * all nodes are doing watchdog-fencing -+ */ -+gboolean -+node_does_watchdog_fencing(const char *node) -+{ -+ return ((stonith_watchdog_targets == NULL) || -+ pcmk__str_in_list(stonith_watchdog_targets, node, pcmk__str_casei)); -+} -+ -+ - static void - update_fencing_topology(const char *event, xmlNode * msg) - { -@@ -1073,6 +1121,8 @@ update_cib_cache_cb(const char *event, xmlNode * msg) - xmlNode *stonith_enabled_xml = NULL; - const char *stonith_enabled_s = NULL; - static gboolean stonith_enabled_saved = TRUE; -+ long timeout_ms_saved = stonith_watchdog_timeout_ms; -+ gboolean need_full_refresh = FALSE; - - if(!have_cib_devices) { - crm_trace("Skipping updates until we get a full dump"); -@@ -1127,6 +1177,7 @@ update_cib_cache_cb(const char *event, xmlNode * msg) - } - - pcmk__refresh_node_caches_from_cib(local_cib); -+ update_stonith_watchdog_timeout_ms(local_cib); - - stonith_enabled_xml = get_xpath_object("//nvpair[@name='stonith-enabled']", - local_cib, LOG_NEVER); -@@ -1134,23 +1185,30 @@ update_cib_cache_cb(const char *event, xmlNode * msg) - stonith_enabled_s = crm_element_value(stonith_enabled_xml, XML_NVPAIR_ATTR_VALUE); - } - -- watchdog_device_update(local_cib); -- - if (stonith_enabled_s && crm_is_true(stonith_enabled_s) == FALSE) { - crm_trace("Ignoring CIB updates while fencing is disabled"); - stonith_enabled_saved = FALSE; -- return; - - } else if (stonith_enabled_saved == FALSE) { - crm_info("Updating fencing device and topology lists " - "now that fencing is enabled"); - stonith_enabled_saved = TRUE; -- fencing_topology_init(); -- cib_devices_update(); -+ need_full_refresh = TRUE; - - } else { -- update_fencing_topology(event, msg); -- update_cib_stonith_devices(event, msg); -+ if (timeout_ms_saved != stonith_watchdog_timeout_ms) { -+ need_full_refresh = TRUE; -+ } else { -+ update_fencing_topology(event, msg); -+ update_cib_stonith_devices(event, msg); -+ watchdog_device_update(); -+ } -+ } -+ -+ if (need_full_refresh) { -+ fencing_topology_init(); -+ cib_devices_update(); -+ watchdog_device_update(); - } - } - -@@ -1162,10 +1220,11 @@ init_cib_cache_cb(xmlNode * msg, int call_id, int rc, xmlNode * output, void *us - local_cib = copy_xml(output); - - pcmk__refresh_node_caches_from_cib(local_cib); -+ update_stonith_watchdog_timeout_ms(local_cib); - - fencing_topology_init(); -- watchdog_device_update(local_cib); - cib_devices_update(); -+ watchdog_device_update(); - } - - static void -diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h -index d330fda4d..14e085e98 100644 ---- a/daemons/fenced/pacemaker-fenced.h -+++ b/daemons/fenced/pacemaker-fenced.h -@@ -260,14 +260,15 @@ bool fencing_peer_active(crm_node_t *peer); - - int stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op); - --gboolean string_in_list(GList *list, const char *item); -- - gboolean node_has_attr(const char *node, const char *name, const char *value); - -+gboolean node_does_watchdog_fencing(const char *node); -+ - extern char *stonith_our_uname; - extern gboolean stand_alone; - extern GHashTable *device_list; - extern GHashTable *topology; - extern long stonith_watchdog_timeout_ms; -+extern GList *stonith_watchdog_targets; - - extern GHashTable *stonith_remote_op_list; -diff --git a/include/crm/crm.h b/include/crm/crm.h -index ee52c3630..7861c160e 100644 ---- a/include/crm/crm.h -+++ b/include/crm/crm.h -@@ -66,7 +66,7 @@ extern "C" { - * >=3.0.13: Fail counts include operation name and interval - * >=3.2.0: DC supports PCMK_LRM_OP_INVALID and PCMK_LRM_OP_NOT_CONNECTED - */ --# define CRM_FEATURE_SET "3.10.2" -+# define CRM_FEATURE_SET "3.11.0" - - /* Pacemaker's CPG protocols use fixed-width binary fields for the sender and - * recipient of a CPG message. This imposes an arbitrary limit on cluster node -diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h -index 8bcb544d8..f222edba3 100644 ---- a/include/crm/fencing/internal.h -+++ b/include/crm/fencing/internal.h -@@ -164,7 +164,10 @@ void stonith__device_parameter_flags(uint32_t *device_flags, - # define STONITH_OP_LEVEL_ADD "st_level_add" - # define STONITH_OP_LEVEL_DEL "st_level_remove" - --# define STONITH_WATCHDOG_AGENT "#watchdog" -+# define STONITH_WATCHDOG_AGENT "fence_watchdog" -+/* Don't change 2 below as it would break rolling upgrade */ -+# define STONITH_WATCHDOG_AGENT_INTERNAL "#watchdog" -+# define STONITH_WATCHDOG_ID "watchdog" - - # ifdef HAVE_STONITH_STONITH_H - // utilities from st_lha.c -@@ -211,4 +214,7 @@ stonith__op_state_pending(enum op_state state) - return state != st_failed && state != st_done; - } - -+gboolean stonith__watchdog_fencing_enabled_for_node(const char *node); -+gboolean stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node); -+ - #endif -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index e285f51e2..0ff98157b 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -195,6 +195,67 @@ stonith_get_namespace(const char *agent, const char *namespace_s) - return st_namespace_invalid; - } - -+gboolean -+stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) -+{ -+ gboolean rv = FALSE; -+ stonith_t *stonith_api = st?st:stonith_api_new(); -+ char *list = NULL; -+ -+ if(stonith_api) { -+ if (stonith_api->state == stonith_disconnected) { -+ int rc = stonith_api->cmds->connect(stonith_api, "stonith-api", NULL); -+ -+ if (rc != pcmk_ok) { -+ crm_err("Failed connecting to Stonith-API for watchdog-fencing-query."); -+ } -+ } -+ -+ if (stonith_api->state != stonith_disconnected) { -+ /* caveat!!! -+ * this might fail when when stonithd is just updating the device-list -+ * probably something we should fix as well for other api-calls */ -+ int rc = stonith_api->cmds->list(stonith_api, st_opt_sync_call, STONITH_WATCHDOG_ID, &list, 0); -+ if ((rc != pcmk_ok) || (list == NULL)) { -+ /* due to the race described above it can happen that -+ * we drop in here - so as not to make remote nodes -+ * panic on that answer -+ */ -+ crm_warn("watchdog-fencing-query failed"); -+ } else if (list[0] == '\0') { -+ crm_warn("watchdog-fencing-query returned an empty list - any node"); -+ rv = TRUE; -+ } else { -+ GList *targets = stonith__parse_targets(list); -+ rv = pcmk__str_in_list(targets, node, pcmk__str_casei); -+ g_list_free_full(targets, free); -+ } -+ free(list); -+ if (!st) { -+ /* if we're provided the api we still might have done the -+ * connection - but let's assume the caller won't bother -+ */ -+ stonith_api->cmds->disconnect(stonith_api); -+ } -+ } -+ -+ if (!st) { -+ stonith_api_delete(stonith_api); -+ } -+ } else { -+ crm_err("Stonith-API for watchdog-fencing-query couldn't be created."); -+ } -+ crm_trace("Pacemaker assumes node %s %sto do watchdog-fencing.", -+ node, rv?"":"not "); -+ return rv; -+} -+ -+gboolean -+stonith__watchdog_fencing_enabled_for_node(const char *node) -+{ -+ return stonith__watchdog_fencing_enabled_for_node_api(NULL, node); -+} -+ - static void - log_action(stonith_action_t *action, pid_t pid) - { -diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c -index 87d050ed1..bf4bceb42 100644 ---- a/lib/lrmd/lrmd_client.c -+++ b/lib/lrmd/lrmd_client.c -@@ -34,6 +34,7 @@ - #include - - #include -+#include - - #ifdef HAVE_GNUTLS_GNUTLS_H - # undef KEYFILE -@@ -934,7 +935,10 @@ lrmd__validate_remote_settings(lrmd_t *lrmd, GHashTable *hash) - crm_xml_add(data, F_LRMD_ORIGIN, __func__); - - value = g_hash_table_lookup(hash, "stonith-watchdog-timeout"); -- crm_xml_add(data, F_LRMD_WATCHDOG, value); -+ if ((value) && -+ (stonith__watchdog_fencing_enabled_for_node(native->remote_nodename))) { -+ crm_xml_add(data, F_LRMD_WATCHDOG, value); -+ } - - rc = lrmd_send_command(lrmd, LRMD_OP_CHECK, data, NULL, 0, 0, - (native->type == pcmk__client_ipc)); -diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in -index 79e78ede9..f58357a77 100644 ---- a/rpm/pacemaker.spec.in -+++ b/rpm/pacemaker.spec.in -@@ -744,6 +744,7 @@ exit 0 - %doc %{_mandir}/man8/crm_attribute.* - %doc %{_mandir}/man8/crm_master.* - %doc %{_mandir}/man8/fence_legacy.* -+%doc %{_mandir}/man8/fence_watchdog.* - %doc %{_mandir}/man8/pacemakerd.* - - %doc %{_datadir}/pacemaker/alerts -@@ -796,6 +797,7 @@ exit 0 - %{_sbindir}/crm_simulate - %{_sbindir}/crm_report - %{_sbindir}/crm_ticket -+%{_sbindir}/fence_watchdog - %{_sbindir}/stonith_admin - # "dirname" is owned by -schemas, which is a prerequisite - %{_datadir}/pacemaker/report.collector -@@ -822,6 +824,7 @@ exit 0 - %exclude %{_mandir}/man8/crm_attribute.* - %exclude %{_mandir}/man8/crm_master.* - %exclude %{_mandir}/man8/fence_legacy.* -+%exclude %{_mandir}/man8/fence_watchdog.* - %exclude %{_mandir}/man8/pacemakerd.* - %exclude %{_mandir}/man8/pacemaker-remoted.* - --- -2.27.0 - - -From 53dd360f096e5f005e3221e8d44d82d3654b5172 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Wed, 4 Aug 2021 15:57:23 +0200 -Subject: [PATCH 3/3] Fix: watchdog-fencing: Silence warning without node - restriction - ---- - lib/fencing/st_client.c | 1 - - 1 file changed, 1 deletion(-) - -diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c -index 0ff98157b..14fa7b2a6 100644 ---- a/lib/fencing/st_client.c -+++ b/lib/fencing/st_client.c -@@ -223,7 +223,6 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node) - */ - crm_warn("watchdog-fencing-query failed"); - } else if (list[0] == '\0') { -- crm_warn("watchdog-fencing-query returned an empty list - any node"); - rv = TRUE; - } else { - GList *targets = stonith__parse_targets(list); --- -2.27.0 - diff --git a/SOURCES/016-cts.patch b/SOURCES/016-cts.patch deleted file mode 100644 index 195afc3..0000000 --- a/SOURCES/016-cts.patch +++ /dev/null @@ -1,59 +0,0 @@ -From b37391fef92548f31822f9df2a9b5fa2a61b4514 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Wed, 23 Jun 2021 15:17:54 -0500 -Subject: [PATCH] Fix: CTS: handle longer Corosync token timeouts - -Previously, startall() would call cluster_stable() immediately after detecting -the "controller successfully started" message. If the Corosync token timeout is -small enough, this will be fine. However with a token timeout of more than -about 1 second, the controllers will not have formed a membership by this -point, causing cluster_stable() to think there are multiple partitions, and -wait for a DC to be elected in each one, when really they will unite into a -single partition in a short time, and only elect a single DC. - -Now, startall() waits until seeing that each node is a cluster member before -calling cluster_stable(). ---- - cts/lab/CTS.py.in | 3 ++- - cts/lab/patterns.py | 2 ++ - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/cts/lab/CTS.py.in b/cts/lab/CTS.py.in -index abcb9d285..d9924437b 100644 ---- a/cts/lab/CTS.py.in -+++ b/cts/lab/CTS.py.in -@@ -628,9 +628,10 @@ class ClusterManager(UserDict): - watchpats = [ ] - watchpats.append(self.templates["Pat:DC_IDLE"]) - for node in nodelist: -- watchpats.append(self.templates["Pat:Local_started"] % node) - watchpats.append(self.templates["Pat:InfraUp"] % node) - watchpats.append(self.templates["Pat:PacemakerUp"] % node) -+ watchpats.append(self.templates["Pat:Local_started"] % node) -+ watchpats.append(self.templates["Pat:They_up"] % (nodelist[0], node)) - - # Start all the nodes - at about the same time... - watch = LogWatcher(self.Env["LogFileName"], watchpats, "fast-start", self.Env["DeadTime"]+10, hosts=self.Env["nodes"], kind=self.Env["LogWatcher"]) -diff --git a/cts/lab/patterns.py b/cts/lab/patterns.py -index e21a016ff..400fd3dc8 100644 ---- a/cts/lab/patterns.py -+++ b/cts/lab/patterns.py -@@ -61,6 +61,7 @@ class BasePatterns(object): - "Pat:We_stopped" : "%s\W.*OVERRIDE THIS PATTERN", - "Pat:They_stopped" : "%s\W.*LOST:.* %s ", - "Pat:They_dead" : "node %s.*: is dead", -+ "Pat:They_up" : "%s %s\W.*OVERRIDE THIS PATTERN", - "Pat:TransitionComplete" : "Transition status: Complete: complete", - - "Pat:Fencing_start" : r"Requesting peer fencing .* targeting %s", -@@ -130,6 +131,7 @@ class crm_corosync(BasePatterns): - "Pat:We_stopped" : "%s\W.*Unloading all Corosync service engines", - "Pat:They_stopped" : "%s\W.*pacemaker-controld.*Node %s(\[|\s).*state is now lost", - "Pat:They_dead" : "pacemaker-controld.*Node %s(\[|\s).*state is now lost", -+ "Pat:They_up" : "\W%s\W.*pacemaker-controld.*Node %s state is now member", - - "Pat:ChildExit" : r"\[[0-9]+\] exited with status [0-9]+ \(", - # "with signal 9" == pcmk_child_exit(), "$" == check_active_before_startup_processes() --- -2.27.0 - diff --git a/SOURCES/017-watchdog-fixes.patch b/SOURCES/017-watchdog-fixes.patch deleted file mode 100644 index d3df876..0000000 --- a/SOURCES/017-watchdog-fixes.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 61eb9c240004d1dbd0b5973e2fecda3686bb4c53 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 10 Aug 2021 09:06:55 +0200 -Subject: [PATCH 1/2] Build: rpm: package fence_watchdog in base-package - ---- - rpm/pacemaker.spec.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in -index f58357a77..0c569b9ca 100644 ---- a/rpm/pacemaker.spec.in -+++ b/rpm/pacemaker.spec.in -@@ -734,6 +734,7 @@ exit 0 - %{_sbindir}/crm_attribute - %{_sbindir}/crm_master - %{_sbindir}/fence_legacy -+%{_sbindir}/fence_watchdog - - %doc %{_mandir}/man7/pacemaker-controld.* - %doc %{_mandir}/man7/pacemaker-schedulerd.* -@@ -797,7 +798,6 @@ exit 0 - %{_sbindir}/crm_simulate - %{_sbindir}/crm_report - %{_sbindir}/crm_ticket --%{_sbindir}/fence_watchdog - %{_sbindir}/stonith_admin - # "dirname" is owned by -schemas, which is a prerequisite - %{_datadir}/pacemaker/report.collector --- -2.27.0 - - -From 88e75d5b98df197fa731e7642434951a24a67095 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Tue, 10 Aug 2021 09:10:23 +0200 -Subject: [PATCH 2/2] Fix: fence_watchdog: fix version output needed for - help2man - ---- - daemons/fenced/fence_watchdog.in | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in -index c83304f1d..700065e0e 100755 ---- a/daemons/fenced/fence_watchdog.in -+++ b/daemons/fenced/fence_watchdog.in -@@ -12,6 +12,7 @@ import sys - import atexit - import getopt - -+AGENT_VERSION = "1.0.0" - SHORT_DESC = "Dummy watchdog fence agent" - LONG_DESC = """fence_watchdog just provides - meta-data - actual fencing is done by the pacemaker internal watchdog agent.""" --- -2.27.0 - diff --git a/SOURCES/018-controller.patch b/SOURCES/018-controller.patch deleted file mode 100644 index a2094e3..0000000 --- a/SOURCES/018-controller.patch +++ /dev/null @@ -1,122 +0,0 @@ -From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001 -From: Ken Gaillot -Date: Mon, 9 Aug 2021 14:48:57 -0500 -Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are - cleared without DC - -Previously, peer_update_callback() cleared a lost node's transient attributes -if either the local node is DC, or there is no DC. - -However, that left the possibility of the DC being lost at the same time as -another node -- the local node would still have fsa_our_dc set while processing -the leave notifications, so no node would clear the attributes for the non-DC -node. - -Now, the controller has its own CPG configuration change callback, which sets a -global boolean before calling the usual one, so that peer_update_callback() can -know when the DC has been lost. ---- - daemons/controld/controld_callbacks.c | 4 +- - daemons/controld/controld_corosync.c | 57 ++++++++++++++++++++++++++- - 2 files changed, 59 insertions(+), 2 deletions(-) - -diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c -index af24856ae..e564b3dcd 100644 ---- a/daemons/controld/controld_callbacks.c -+++ b/daemons/controld/controld_callbacks.c -@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node) - - #define state_text(state) ((state)? (const char *)(state) : "in unknown state") - -+bool controld_dc_left = false; -+ - void - peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) - { -@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d - cib_scope_local); - } - -- } else if (AM_I_DC || (fsa_our_dc == NULL)) { -+ } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) { - /* This only needs to be done once, so normally the DC should do - * it. However if there is no DC, every node must do it, since - * there is no other way to ensure some one node does it. -diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c -index db99630fb..c5ab6580a 100644 ---- a/daemons/controld/controld_corosync.c -+++ b/daemons/controld/controld_corosync.c -@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data) - } - } - -+extern bool controld_dc_left; -+ -+/*! -+ * \brief Handle a Corosync notification of a CPG configuration change -+ * -+ * \param[in] handle CPG connection -+ * \param[in] cpg_name CPG group name -+ * \param[in] member_list List of current CPG members -+ * \param[in] member_list_entries Number of entries in \p member_list -+ * \param[in] left_list List of CPG members that left -+ * \param[in] left_list_entries Number of entries in \p left_list -+ * \param[in] joined_list List of CPG members that joined -+ * \param[in] joined_list_entries Number of entries in \p joined_list -+ */ -+static void -+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, -+ const struct cpg_address *member_list, -+ size_t member_list_entries, -+ const struct cpg_address *left_list, -+ size_t left_list_entries, -+ const struct cpg_address *joined_list, -+ size_t joined_list_entries) -+{ -+ /* When nodes leave CPG, the DC clears their transient node attributes. -+ * -+ * However if there is no DC, or the DC is among the nodes that left, each -+ * remaining node needs to do the clearing, to ensure it gets done. -+ * Otherwise, the attributes would persist when the nodes rejoin, which -+ * could have serious consequences for unfencing, agents that use attributes -+ * for internal logic, etc. -+ * -+ * Here, we set a global boolean if the DC is among the nodes that left, for -+ * use by the peer callback. -+ */ -+ if (fsa_our_dc != NULL) { -+ crm_node_t *peer = pcmk__search_cluster_node_cache(0, fsa_our_dc); -+ -+ if (peer != NULL) { -+ for (int i = 0; i < left_list_entries; ++i) { -+ if (left_list[i].nodeid == peer->id) { -+ controld_dc_left = true; -+ break; -+ } -+ } -+ } -+ } -+ -+ // Process the change normally, which will call the peer callback as needed -+ pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries, -+ left_list, left_list_entries, -+ joined_list, joined_list_entries); -+ -+ controld_dc_left = false; -+} -+ - extern gboolean crm_connect_corosync(crm_cluster_t * cluster); - - gboolean -@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster) - if (is_corosync_cluster()) { - crm_set_status_callback(&peer_update_callback); - cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; -- cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; -+ cluster->cpg.cpg_confchg_fn = cpg_membership_callback; - cluster->destroy = crmd_cs_destroy; - - if (crm_cluster_connect(cluster)) { --- -2.27.0 - diff --git a/SOURCES/019-crm_resource.patch b/SOURCES/019-crm_resource.patch deleted file mode 100644 index 237dde2..0000000 --- a/SOURCES/019-crm_resource.patch +++ /dev/null @@ -1,114 +0,0 @@ -From b4e426a016a4d7c9ade39e60a83644fc537bce26 Mon Sep 17 00:00:00 2001 -From: Oyvind Albrigtsen -Date: Wed, 11 Aug 2021 12:10:32 +0200 -Subject: [PATCH 1/2] Fix: crm_resource: translate LSB rc to exit code and fix - resources_find_service_class() call - ---- - tools/crm_resource_runtime.c | 16 ++++++++++++---- - 1 file changed, 12 insertions(+), 4 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index ce037c514..e9d8aa687 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1718,10 +1718,10 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); - } else if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, - pcmk__str_casei) && !pcmk__str_eq( -- resources_find_service_class(rsc_name), PCMK_RESOURCE_CLASS_LSB, -+ resources_find_service_class(rsc_type), PCMK_RESOURCE_CLASS_LSB, - pcmk__str_casei)) { - out->err(out, "Sorry, the %s option doesn't support %s resources", -- rsc_action, resources_find_service_class(rsc_name)); -+ rsc_action, resources_find_service_class(rsc_type)); - crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); - } - -@@ -1798,9 +1798,17 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - if (services_action_sync(op)) { - exit_code = op->rc; - -+ /* Lookup exit code based on rc for LSB resources */ -+ if (( pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) || -+ (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei) && -+ pcmk__str_eq(resources_find_service_class(rsc_type), PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) ) && -+ pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) { -+ exit_code = services_get_ocf_exitcode(action, exit_code); -+ } -+ - out->message(out, "resource-agent-action", resource_verbose, rsc_class, -- rsc_prov, rsc_type, rsc_name, rsc_action, override_hash, op->rc, -- op->status, op->stdout_data, op->stderr_data); -+ rsc_prov, rsc_type, rsc_name, rsc_action, override_hash, -+ exit_code, op->status, op->stdout_data, op->stderr_data); - } else { - exit_code = op->rc == 0 ? CRM_EX_ERROR : op->rc; - } --- -2.27.0 - - -From 9a6beb74adfb4710fb3a4e588bef79a562c101f3 Mon Sep 17 00:00:00 2001 -From: Oyvind Albrigtsen -Date: Thu, 12 Aug 2021 18:54:30 +0200 -Subject: [PATCH 2/2] Refactor: crm_resource: simplify rsc_class logic by - getting actual class early if it's of class "service" - ---- - tools/crm_resource_runtime.c | 23 +++++++++-------------- - 1 file changed, 9 insertions(+), 14 deletions(-) - -diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c -index e9d8aa687..13b78b6b9 100644 ---- a/tools/crm_resource_runtime.c -+++ b/tools/crm_resource_runtime.c -@@ -1702,26 +1702,23 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - int timeout_ms, int resource_verbose, gboolean force, - int check_level) - { -+ const char *class = NULL; - const char *action = NULL; - GHashTable *params_copy = NULL; - crm_exit_t exit_code = CRM_EX_OK; - svc_action_t *op = NULL; - -- if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { -+ class = !pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei) ? -+ rsc_class : resources_find_service_class(rsc_type); -+ -+ if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_STONITH, pcmk__str_casei)) { - out->err(out, "Sorry, the %s option doesn't support %s resources yet", -- rsc_action, rsc_class); -+ rsc_action, class); - crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); -- } else if (pcmk__strcase_any_of(rsc_class, PCMK_RESOURCE_CLASS_SYSTEMD, -+ } else if (pcmk__strcase_any_of(class, PCMK_RESOURCE_CLASS_SYSTEMD, - PCMK_RESOURCE_CLASS_UPSTART, PCMK_RESOURCE_CLASS_NAGIOS, NULL)) { - out->err(out, "Sorry, the %s option doesn't support %s resources", -- rsc_action, rsc_class); -- crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); -- } else if (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, -- pcmk__str_casei) && !pcmk__str_eq( -- resources_find_service_class(rsc_type), PCMK_RESOURCE_CLASS_LSB, -- pcmk__str_casei)) { -- out->err(out, "Sorry, the %s option doesn't support %s resources", -- rsc_action, resources_find_service_class(rsc_type)); -+ rsc_action, class); - crm_exit(CRM_EX_UNIMPLEMENT_FEATURE); - } - -@@ -1799,9 +1796,7 @@ cli_resource_execute_from_params(pcmk__output_t *out, const char *rsc_name, - exit_code = op->rc; - - /* Lookup exit code based on rc for LSB resources */ -- if (( pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) || -- (pcmk__str_eq(rsc_class, PCMK_RESOURCE_CLASS_SERVICE, pcmk__str_casei) && -- pcmk__str_eq(resources_find_service_class(rsc_type), PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei)) ) && -+ if (pcmk__str_eq(class, PCMK_RESOURCE_CLASS_LSB, pcmk__str_casei) && - pcmk__str_eq(rsc_action, "force-check", pcmk__str_casei)) { - exit_code = services_get_ocf_exitcode(action, exit_code); - } --- -2.27.0 - diff --git a/SOURCES/020-fence_watchdog.patch b/SOURCES/020-fence_watchdog.patch deleted file mode 100644 index 76abe27..0000000 --- a/SOURCES/020-fence_watchdog.patch +++ /dev/null @@ -1,25 +0,0 @@ -From 46dd1118cae948649e000b2159e8e92623520ad9 Mon Sep 17 00:00:00 2001 -From: Klaus Wenninger -Date: Thu, 19 Aug 2021 09:28:54 +0200 -Subject: [PATCH] Fix: fence_watchdog: fix malformed xml in metadata - ---- - daemons/fenced/fence_watchdog.in | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in -index 700065e0e..eefa7395e 100755 ---- a/daemons/fenced/fence_watchdog.in -+++ b/daemons/fenced/fence_watchdog.in -@@ -124,7 +124,7 @@ def metadata(avail_opt, options): - for option, dummy in sorted_options(avail_opt): - if "shortdesc" in ALL_OPT[option]: - print(' ') -+ '" required="' + ALL_OPT[option]["required"] + '">') - - default = "" - default_name_arg = "-" + ALL_OPT[option]["getopt"][:-1] --- -2.27.0 - diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index b493a07..d4c1b5b 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -35,11 +35,11 @@ ## Upstream pacemaker version, and its package version (specversion ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) -%global pcmkversion 2.1.0 -%global specversion 11 +%global pcmkversion 2.1.2 +%global specversion 2 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build -%global commit 7c3f660707a495a1331716ad32cd3ac9d9f8ff58 +%global commit ada5c3b36e2adf1703d54d39f40a4b8628eca175 ## Since git v2.11, the extent of abbreviation is autoscaled by default ## (used to be constant of 7), so we need to convey it for non-tags, too. @@ -242,26 +242,14 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz/%{nagios_archive_github_url} # upstream commits -Patch1: 001-ping-agent.patch -Patch2: 002-pacemakerd-options.patch -Patch3: 003-pacemakerd-output.patch -Patch4: 004-check-level.patch -Patch5: 005-crm_resource.patch -Patch6: 006-crm_simulate.patch -Patch7: 007-unfencing-loop.patch -Patch8: 008-dynamic-list-fencing.patch -Patch9: 009-crm_resource-messages.patch -Patch10: 010-probe-pending.patch -Patch11: 011-crm_attribute-regression.patch -Patch12: 012-string-arguments.patch -Patch13: 013-leaks.patch -Patch14: 014-str-list.patch -Patch15: 015-sbd.patch -Patch16: 016-cts.patch -Patch17: 017-watchdog-fixes.patch -Patch18: 018-controller.patch -Patch19: 019-crm_resource.patch -Patch20: 020-fence_watchdog.patch +Patch1: 001-acl-group-schema.patch +Patch2: 002-fencing-reasons.patch +Patch3: 003-fencing-reasons.patch +Patch4: 004-systemd-metadata.patch +Patch5: 005-fencing-reasons.patch +Patch6: 006-stateful-metadata.patch +Patch7: 007-memory-leak.patch +Patch8: 008-fencing-history.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -313,13 +301,20 @@ BuildRequires: %{pkgname_gnutls_devel} BuildRequires: help2man BuildRequires: ncurses-devel BuildRequires: pam-devel + +# Required for "make check" +BuildRequires: libcmocka-devel + BuildRequires: pkgconfig(systemd) # RH patches are created by git, so we need git to apply them BuildRequires: git -Requires: corosync >= 2.0.0 -BuildRequires: corosync-devel >= 2.0.0 +# The RHEL 9 build root has corosync_cfg_trackstart() available, so +# Pacemaker's configure script will build support for it. Add a hard dependency +# to ensure users have compatible Corosync libraries if they upgrade Pacemaker. +Requires: corosync >= 3.1.1 +BuildRequires: corosync-devel >= 3.1.1 %if %{with stonithd} BuildRequires: %{pkgname_glue_libs}-devel @@ -337,7 +332,7 @@ Provides: pcmk-cluster-manager%{?_isa} = %{version}-%{release} # Bundled bits ## Pacemaker uses the crypto/md5-buffer module from gnulib %if 0%{?fedora} || 0%{?rhel} -Provides: bundled(gnulib) +Provides: bundled(gnulib) = 20200404 %endif %description @@ -361,8 +356,8 @@ Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} %if 0%{?supports_recommends} Recommends: pcmk-cluster-manager = %{version}-%{release} # For crm_report -Recommends: tar -Recommends: bzip2 +Requires: tar +Requires: bzip2 %endif Requires: perl-TimeDate Requires: %{pkgname_procps} @@ -590,10 +585,6 @@ rm -f %{buildroot}/%{_mandir}/man8/fence_legacy.* rm -f %{buildroot}/%{_sbindir}/notifyServicelogEvent rm -f %{buildroot}/%{_sbindir}/ipmiservicelogd -# Don't ship init scripts for systemd based platforms -rm -f %{buildroot}/%{_initrddir}/pacemaker -rm -f %{buildroot}/%{_initrddir}/pacemaker_remote - # Byte-compile Python sources where suitable and the distro procedures known %if %{defined py_byte_compile} %{py_byte_compile %{python_path} %{buildroot}%{_datadir}/pacemaker/tests} @@ -864,6 +855,18 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Thu Dec 16 2021 Ken Gaillot - 2.1.2-2 +- Correctly get metadata for systemd agent names that end in '@' +- Use correct OCF 1.1 syntax in ocf:pacemaker:Stateful meta-data +- Fix regression in displayed times in crm_mon's fence history +- Resolves: rhbz2032031 +- Resolves: rhbz2032032 +- Resolves: rhbz2031765 + +* Tue Nov 30 2021 Ken Gaillot - 2.1.2-1 +- Rebase on upstream 2.1.2 +- Resolves: rhbz2011974 + * Fri Aug 20 2021 Ken Gaillot - 2.1.0-11 - Fix XML issue with fence_watchdog meta-data - Resolves: rhbz1988568