pacemaker/022-failure-messages.patch
Ken Gaillot 87bc6c8acd Backport selected patches from upstream main branch
- Detect an unresponsive subdaemon
- Handle certain probe failures as stopped instead of failed
- Update pcmk_delay_base option meta-data
- Avoid crash when using clone notifications
- Retry Corosync shutdown tracking if first attempt fails
- Improve display of failed actions
- Resolves: rhbz1707851
- Resolves: rhbz2039982
- Resolves: rhbz2032032
- Resolves: rhbz2040443
- Resolves: rhbz2042367
- Resolves: rhbz2042546
2022-01-24 10:24:48 -06:00

1339 lines
54 KiB
Diff

From 9ee3d6c9b0aba6aae022cc152a3b3472fe388fa3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 16:44:32 -0600
Subject: [PATCH 01/15] Refactor: fencer: add exit reason to fencing operation
object
In order to pass a fencing action's exit reason with the action history,
we need the exit reason in remote_fencing_op_t. Nothing sets or uses it as of
this commit.
---
daemons/fenced/fenced_remote.c | 2 ++
daemons/fenced/pacemaker-fenced.h | 4 +++-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 6eebb7381e..0fa9706140 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -260,6 +260,8 @@ free_remote_op(gpointer data)
}
g_list_free_full(op->automatic_list, free);
g_list_free(op->duplicates);
+
+ pcmk__reset_result(&op->result);
free(op);
}
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 502fcc9a29..1a5c933ea7 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2021 the Pacemaker project contributors
+ * Copyright 2009-2022 the Pacemaker project contributors
*
* This source code is licensed under the GNU General Public License version 2
* or later (GPLv2+) WITHOUT ANY WARRANTY.
@@ -151,6 +151,8 @@ typedef struct remote_fencing_op_s {
/*! The point at which the remote operation completed(nsec) */
long long completed_nsec;
+ /*! The (potentially intermediate) result of the operation */
+ pcmk__action_result_t result;
} remote_fencing_op_t;
void fenced_broadcast_op_result(remote_fencing_op_t *op,
--
2.27.0
From 97a2c318866adc5ef5e426c5c3b753df1fa3ab66 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:08:42 -0600
Subject: [PATCH 02/15] Refactor: fencer: track full result in
remote_fencing_op_t
Now that remote_fencing_op_t has a place for the full result,
set it before calling finalize_op(), instead of passing a separate result
object to finalize_op().
As a bonus, this simplifies the memory management, reducing the chance of
mistakes.
---
daemons/fenced/fenced_remote.c | 161 ++++++++++++++++-----------------
1 file changed, 77 insertions(+), 84 deletions(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 0fa9706140..30edbff890 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -82,8 +82,7 @@ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op
static void request_peer_fencing(remote_fencing_op_t *op,
peer_device_info_t *peer,
pcmk__action_result_t *result);
-static void finalize_op(remote_fencing_op_t *op, xmlNode *data,
- pcmk__action_result_t *result, bool dup);
+static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
static int get_op_total_timeout(const remote_fencing_op_t *op,
const peer_device_info_t *chosen_peer);
@@ -485,7 +484,9 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
other->client_name, other->originator,
pcmk_exec_status_str(result->execution_status),
other->id);
- finalize_op(other, data, result, true);
+ pcmk__set_result(&other->result, result->exit_status,
+ result->execution_status, result->exit_reason);
+ finalize_op(other, data, true);
} else {
// Possible if (for example) it timed out already
@@ -520,20 +521,20 @@ delegate_from_xml(xmlNode *xml)
*
* \param[in] op Fencer operation that completed
* \param[in] data If not NULL, XML reply of last delegated fencing operation
- * \param[in] result Full operation result
* \param[in] dup Whether this operation is a duplicate of another
* (in which case, do not broadcast the result)
+ *
+ * \note The operation result should be set before calling this function.
*/
static void
-finalize_op(remote_fencing_op_t *op, xmlNode *data,
- pcmk__action_result_t *result, bool dup)
+finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
{
int level = LOG_ERR;
const char *subt = NULL;
xmlNode *local_data = NULL;
gboolean op_merged = FALSE;
- CRM_CHECK((op != NULL) && (result != NULL), return);
+ CRM_CHECK((op != NULL), return);
if (op->notify_sent) {
// Most likely, this is a timed-out action that eventually completed
@@ -557,11 +558,11 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
local_data = data;
} else if (op->delegate == NULL) {
- switch (result->execution_status) {
+ switch (op->result.execution_status) {
case PCMK_EXEC_NO_FENCE_DEVICE:
break;
case PCMK_EXEC_INVALID:
- if (result->exit_status == CRM_EX_EXPIRED) {
+ if (op->result.exit_status == CRM_EX_EXPIRED) {
break;
}
// else fall through
@@ -581,12 +582,12 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
subt = crm_element_value(data, F_SUBTYPE);
if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
/* Defer notification until the bcast message arrives */
- fenced_broadcast_op_result(op, result, op_merged);
+ fenced_broadcast_op_result(op, &op->result, op_merged);
free_xml(local_data);
return;
}
- if (pcmk__result_ok(result) || dup
+ if (pcmk__result_ok(&op->result) || dup
|| !pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
level = LOG_NOTICE;
}
@@ -595,16 +596,17 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data,
(op->target? op->target : ""),
(op->delegate? op->delegate : "unknown node"),
op->client_name, op->originator,
- (op_merged? " (merged)" : ""), crm_exit_str(result->exit_status),
- pcmk_exec_status_str(result->execution_status),
- ((result->exit_reason == NULL)? "" : ": "),
- ((result->exit_reason == NULL)? "" : result->exit_reason),
+ (op_merged? " (merged)" : ""),
+ crm_exit_str(op->result.exit_status),
+ pcmk_exec_status_str(op->result.execution_status),
+ ((op->result.exit_reason == NULL)? "" : ": "),
+ ((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
op->id);
- handle_local_reply_and_notify(op, data, result);
+ handle_local_reply_and_notify(op, data, &op->result);
if (!dup) {
- finalize_op_duplicates(op, data, result);
+ finalize_op_duplicates(op, data, &op->result);
}
/* Free non-essential parts of the record
@@ -634,7 +636,6 @@ static gboolean
remote_op_watchdog_done(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
op->op_timer_one = 0;
@@ -642,8 +643,8 @@ remote_op_watchdog_done(gpointer userdata)
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->id);
op->state = st_done;
- pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
- finalize_op(op, NULL, &result, false);
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ finalize_op(op, NULL, false);
return G_SOURCE_REMOVE;
}
@@ -676,8 +677,6 @@ remote_op_timeout_one(gpointer userdata)
static void
finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
{
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
op->op_timer_total = 0;
crm_debug("Action '%s' targeting %s for client %s timed out "
@@ -690,13 +689,12 @@ finalize_timed_out_op(remote_fencing_op_t *op, const char *reason)
* devices, and return success.
*/
op->state = st_done;
- pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
} else {
op->state = st_failed;
- pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
+ pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT, reason);
}
- finalize_op(op, NULL, &result, false);
- pcmk__reset_result(&result);
+ finalize_op(op, NULL, false);
}
/*!
@@ -1094,13 +1092,9 @@ fenced_handle_manual_confirmation(pcmk__client_t *client, xmlNode *msg)
set_fencing_completed(op);
op->delegate = strdup("a human");
- {
- // For the fencer's purposes, the fencing operation is done
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
- pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
- finalize_op(op, msg, &result, false);
- }
+ // For the fencer's purposes, the fencing operation is done
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ finalize_op(op, msg, false);
/* For the requester's purposes, the operation is still pending. The
* actual result will be sent asynchronously via the operation's done_cb().
@@ -1279,16 +1273,11 @@ initiate_remote_stonith_op(pcmk__client_t *client, xmlNode *request,
switch (op->state) {
case st_failed:
// advance_topology_level() exhausted levels
- {
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
- pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
- "All topology levels failed");
- crm_warn("Could not request peer fencing (%s) targeting %s "
- CRM_XS " id=%.8s", op->action, op->target, op->id);
- finalize_op(op, NULL, &result, false);
- pcmk__reset_result(&result);
- }
+ pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_ERROR,
+ "All topology levels failed");
+ crm_warn("Could not request peer fencing (%s) targeting %s "
+ CRM_XS " id=%.8s", op->action, op->target, op->id);
+ finalize_op(op, NULL, false);
return op;
case st_duplicate:
@@ -1613,10 +1602,6 @@ static void
advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
xmlNode *msg)
{
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
- pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
-
/* Advance to the next device at this topology level, if any */
if (op->devices) {
op->devices = op->devices->next;
@@ -1644,6 +1629,10 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
}
if (op->devices) {
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
+
+ pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+
/* Necessary devices remain, so execute the next one */
crm_trace("Next targeting %s on behalf of %s@%s",
op->target, op->client_name, op->originator);
@@ -1659,7 +1648,8 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
crm_trace("Marking complex fencing op targeting %s as complete",
op->target);
op->state = st_done;
- finalize_op(op, msg, &result, false);
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ finalize_op(op, msg, false);
}
}
@@ -1868,7 +1858,9 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
}
op->state = st_failed;
- finalize_op(op, NULL, result, false);
+ pcmk__set_result(&op->result, result->exit_status,
+ result->execution_status, result->exit_reason);
+ finalize_op(op, NULL, false);
} else {
crm_info("Waiting for additional peers capable of fencing (%s) %s%s%s "
@@ -2245,31 +2237,34 @@ fenced_process_fencing_reply(xmlNode *msg)
/* Could be for an event that began before we started */
/* TODO: Record the op for later querying */
crm_info("Received peer result of unknown or expired operation %s", id);
- goto done;
+ pcmk__reset_result(&result);
+ return;
}
+ op->result = result; // The operation takes ownership of the result
+
if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
crm_err("Received outdated reply for device %s (instead of %s) to "
"fence (%s) %s. Operation already timed out at peer level.",
device, (const char *) op->devices->data, op->action, op->target);
- goto done;
+ return;
}
if (pcmk__str_eq(crm_element_value(msg, F_SUBTYPE), "broadcast", pcmk__str_casei)) {
crm_debug("Finalizing action '%s' targeting %s on behalf of %s@%s: %s%s%s%s "
CRM_XS " id=%.8s",
op->action, op->target, op->client_name, op->originator,
- pcmk_exec_status_str(result.execution_status),
- (result.exit_reason == NULL)? "" : " (",
- (result.exit_reason == NULL)? "" : result.exit_reason,
- (result.exit_reason == NULL)? "" : ")", op->id);
- if (pcmk__result_ok(&result)) {
+ pcmk_exec_status_str(op->result.execution_status),
+ (op->result.exit_reason == NULL)? "" : " (",
+ (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
+ (op->result.exit_reason == NULL)? "" : ")", op->id);
+ if (pcmk__result_ok(&op->result)) {
op->state = st_done;
} else {
op->state = st_failed;
}
- finalize_op(op, msg, &result, false);
- goto done;
+ finalize_op(op, msg, false);
+ return;
} else if (!pcmk__str_eq(op->originator, stonith_our_uname, pcmk__str_casei)) {
/* If this isn't a remote level broadcast, and we are not the
@@ -2277,7 +2272,7 @@ fenced_process_fencing_reply(xmlNode *msg)
crm_err("Received non-broadcast fencing result for operation %.8s "
"we do not own (device %s targeting %s)",
op->id, device, op->target);
- goto done;
+ return;
}
if (pcmk_is_set(op->call_options, st_opt_topology)) {
@@ -2286,58 +2281,58 @@ fenced_process_fencing_reply(xmlNode *msg)
crm_notice("Action '%s' targeting %s using %s on behalf of %s@%s: %s%s%s%s",
op->action, op->target, device, op->client_name,
op->originator,
- pcmk_exec_status_str(result.execution_status),
- (result.exit_reason == NULL)? "" : " (",
- (result.exit_reason == NULL)? "" : result.exit_reason,
- (result.exit_reason == NULL)? "" : ")");
+ pcmk_exec_status_str(op->result.execution_status),
+ (op->result.exit_reason == NULL)? "" : " (",
+ (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
+ (op->result.exit_reason == NULL)? "" : ")");
/* We own the op, and it is complete. broadcast the result to all nodes
* and notify our local clients. */
if (op->state == st_done) {
- finalize_op(op, msg, &result, false);
- goto done;
+ finalize_op(op, msg, false);
+ return;
}
- if ((op->phase == 2) && !pcmk__result_ok(&result)) {
+ if ((op->phase == 2) && !pcmk__result_ok(&op->result)) {
/* A remapped "on" failed, but the node was already turned off
* successfully, so ignore the error and continue.
*/
crm_warn("Ignoring %s 'on' failure (%s%s%s) targeting %s "
"after successful 'off'",
- device, pcmk_exec_status_str(result.execution_status),
- (result.exit_reason == NULL)? "" : ": ",
- (result.exit_reason == NULL)? "" : result.exit_reason,
+ device, pcmk_exec_status_str(op->result.execution_status),
+ (op->result.exit_reason == NULL)? "" : ": ",
+ (op->result.exit_reason == NULL)? "" : op->result.exit_reason,
op->target);
- pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
}
- if (pcmk__result_ok(&result)) {
+ if (pcmk__result_ok(&op->result)) {
/* An operation completed successfully. Try another device if
* necessary, otherwise mark the operation as done. */
advance_topology_device_in_level(op, device, msg);
- goto done;
+ return;
} else {
/* This device failed, time to try another topology level. If no other
* levels are available, mark this operation as failed and report results. */
if (advance_topology_level(op, false) != pcmk_rc_ok) {
op->state = st_failed;
- finalize_op(op, msg, &result, false);
- goto done;
+ finalize_op(op, msg, false);
+ return;
}
}
- } else if (pcmk__result_ok(&result) && (op->devices == NULL)) {
+ } else if (pcmk__result_ok(&op->result) && (op->devices == NULL)) {
crm_trace("All done for %s", op->target);
op->state = st_done;
- finalize_op(op, msg, &result, false);
- goto done;
+ finalize_op(op, msg, false);
+ return;
- } else if ((result.execution_status == PCMK_EXEC_TIMEOUT)
+ } else if ((op->result.execution_status == PCMK_EXEC_TIMEOUT)
&& (op->devices == NULL)) {
/* If the operation timed out don't bother retrying other peers. */
op->state = st_failed;
- finalize_op(op, msg, &result, false);
- goto done;
+ finalize_op(op, msg, false);
+ return;
} else {
/* fall-through and attempt other fencing action using another peer */
@@ -2346,10 +2341,8 @@ fenced_process_fencing_reply(xmlNode *msg)
/* Retry on failure */
crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
op->target, op->originator, op->client_name,
- pcmk_exec_status_str(result.execution_status));
- request_peer_fencing(op, NULL, &result);
-done:
- pcmk__reset_result(&result);
+ pcmk_exec_status_str(op->result.execution_status));
+ request_peer_fencing(op, NULL, &op->result);
}
gboolean
--
2.27.0
From c59d062154f7c9e15e90929a20ea244d7efd7247 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:11:12 -0600
Subject: [PATCH 03/15] Refactor: fencer: drop redundant argument from
finalize_op_duplicates()
... now that the result is in the op
---
daemons/fenced/fenced_remote.c | 13 ++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 30edbff890..8b496e1042 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -468,11 +468,9 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
*
* \param[in] op Fencer operation that completed
* \param[in] data Top-level XML to add notification to
- * \param[in] result Full operation result
*/
static void
-finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
- pcmk__action_result_t *result)
+finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data)
{
for (GList *iter = op->duplicates; iter != NULL; iter = iter->next) {
remote_fencing_op_t *other = iter->data;
@@ -482,10 +480,11 @@ finalize_op_duplicates(remote_fencing_op_t *op, xmlNode *data,
crm_debug("Performing duplicate notification for %s@%s: %s "
CRM_XS " id=%.8s",
other->client_name, other->originator,
- pcmk_exec_status_str(result->execution_status),
+ pcmk_exec_status_str(op->result.execution_status),
other->id);
- pcmk__set_result(&other->result, result->exit_status,
- result->execution_status, result->exit_reason);
+ pcmk__set_result(&other->result, op->result.exit_status,
+ op->result.execution_status,
+ op->result.exit_reason);
finalize_op(other, data, true);
} else {
@@ -606,7 +605,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
handle_local_reply_and_notify(op, data, &op->result);
if (!dup) {
- finalize_op_duplicates(op, data, &op->result);
+ finalize_op_duplicates(op, data);
}
/* Free non-essential parts of the record
--
2.27.0
From 6c49675855323a52a534afa112a0861ba2e3b1ad Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:15:17 -0600
Subject: [PATCH 04/15] Refactor: fencer: drop redundant argument from
fenced_broadcast_op_result()
... now that the op includes the result
---
daemons/fenced/fenced_history.c | 9 +++------
daemons/fenced/fenced_remote.c | 8 +++-----
daemons/fenced/pacemaker-fenced.h | 3 +--
3 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 0157deadb3..5cacf36ca8 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2021 the Pacemaker project contributors
+ * Copyright 2009-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -359,8 +359,6 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
}
if (remote_history) {
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
init_stonith_remote_op_hash_table(&stonith_remote_op_list);
updated |= g_hash_table_size(remote_history);
@@ -378,10 +376,10 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
/* CRM_EX_EXPIRED + PCMK_EXEC_INVALID prevents finalize_op()
* from setting a delegate
*/
- pcmk__set_result(&result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
+ pcmk__set_result(&op->result, CRM_EX_EXPIRED, PCMK_EXEC_INVALID,
"Initiated by earlier fencer "
"process and presumed failed");
- fenced_broadcast_op_result(op, &result, false);
+ fenced_broadcast_op_result(op, false);
}
g_hash_table_iter_steal(&iter);
@@ -396,7 +394,6 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
*/
}
- pcmk__reset_result(&result);
g_hash_table_destroy(remote_history); /* remove what is left */
}
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 8b496e1042..fb5a5e980e 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -390,16 +390,14 @@ fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
* \brief Broadcast a fence result notification to all CPG peers
*
* \param[in] op Fencer operation that completed
- * \param[in] result Full operation result
* \param[in] op_merged Whether this operation is a duplicate of another
*/
void
-fenced_broadcast_op_result(remote_fencing_op_t *op,
- pcmk__action_result_t *result, bool op_merged)
+fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
{
static int count = 0;
xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
- xmlNode *notify_data = fencing_result2xml(op, result);
+ xmlNode *notify_data = fencing_result2xml(op, &op->result);
count++;
crm_trace("Broadcasting result to peers");
@@ -581,7 +579,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
subt = crm_element_value(data, F_SUBTYPE);
if (!dup && !pcmk__str_eq(subt, "broadcast", pcmk__str_casei)) {
/* Defer notification until the bcast message arrives */
- fenced_broadcast_op_result(op, &op->result, op_merged);
+ fenced_broadcast_op_result(op, op_merged);
free_xml(local_data);
return;
}
diff --git a/daemons/fenced/pacemaker-fenced.h b/daemons/fenced/pacemaker-fenced.h
index 1a5c933ea7..6213407da3 100644
--- a/daemons/fenced/pacemaker-fenced.h
+++ b/daemons/fenced/pacemaker-fenced.h
@@ -155,8 +155,7 @@ typedef struct remote_fencing_op_s {
pcmk__action_result_t result;
} remote_fencing_op_t;
-void fenced_broadcast_op_result(remote_fencing_op_t *op,
- pcmk__action_result_t *result, bool op_merged);
+void fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged);
// Fencer-specific client flags
enum st_client_flags {
--
2.27.0
From 73994fc740b8833457b130368db479502d49f285 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:17:33 -0600
Subject: [PATCH 05/15] Refactor: fencer: drop redundant argument from
handle_local_reply_and_notify()
... now that the op includes the result
---
daemons/fenced/fenced_remote.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index fb5a5e980e..2621cb2f19 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -424,11 +424,9 @@ fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
*
* \param[in] op Fencer operation that completed
* \param[in] data Top-level XML to add notification to
- * \param[in] result Full operation result
*/
static void
-handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
- pcmk__action_result_t *result)
+handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
{
xmlNode *notify_data = NULL;
xmlNode *reply = NULL;
@@ -443,15 +441,15 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data,
crm_xml_add(data, F_STONITH_TARGET, op->target);
crm_xml_add(data, F_STONITH_OPERATION, op->action);
- reply = fenced_construct_reply(op->request, data, result);
+ reply = fenced_construct_reply(op->request, data, &op->result);
crm_xml_add(reply, F_STONITH_DELEGATE, op->delegate);
/* Send fencing OP reply to local client that initiated fencing */
do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
/* bcast to all local clients that the fencing operation happend */
- notify_data = fencing_result2xml(op, result);
- fenced_send_notification(T_STONITH_NOTIFY_FENCE, result, notify_data);
+ notify_data = fencing_result2xml(op, &op->result);
+ fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
free_xml(notify_data);
fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
@@ -600,7 +598,7 @@ finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup)
((op->result.exit_reason == NULL)? "" : op->result.exit_reason),
op->id);
- handle_local_reply_and_notify(op, data, &op->result);
+ handle_local_reply_and_notify(op, data);
if (!dup) {
finalize_op_duplicates(op, data);
--
2.27.0
From 194056d18d3b550d3a53b94d558ceed03b5e5442 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:18:27 -0600
Subject: [PATCH 06/15] Refactor: fencer: drop redundant argument from
fencing_result2xml()
... now that the op includes the result
---
daemons/fenced/fenced_remote.c | 9 ++++-----
1 file changed, 4 insertions(+), 5 deletions(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 2621cb2f19..8d4f53eef6 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -362,13 +362,12 @@ undo_op_remap(remote_fencing_op_t *op)
* \brief Create notification data XML for a fencing operation result
*
* \param[in] op Fencer operation that completed
- * \param[in] result Full operation result
*
* \return Newly created XML to add as notification data
* \note The caller is responsible for freeing the result.
*/
static xmlNode *
-fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
+fencing_result2xml(remote_fencing_op_t *op)
{
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
@@ -381,7 +380,7 @@ fencing_result2xml(remote_fencing_op_t *op, pcmk__action_result_t *result)
crm_xml_add(notify_data, F_STONITH_CLIENTID, op->client_id);
crm_xml_add(notify_data, F_STONITH_CLIENTNAME, op->client_name);
- stonith__xe_set_result(notify_data, result);
+ stonith__xe_set_result(notify_data, &op->result);
return notify_data;
}
@@ -397,7 +396,7 @@ fenced_broadcast_op_result(remote_fencing_op_t *op, bool op_merged)
{
static int count = 0;
xmlNode *bcast = create_xml_node(NULL, T_STONITH_REPLY);
- xmlNode *notify_data = fencing_result2xml(op, &op->result);
+ xmlNode *notify_data = fencing_result2xml(op);
count++;
crm_trace("Broadcasting result to peers");
@@ -448,7 +447,7 @@ handle_local_reply_and_notify(remote_fencing_op_t *op, xmlNode *data)
do_local_reply(reply, op->client_id, op->call_options & st_opt_sync_call, FALSE);
/* bcast to all local clients that the fencing operation happend */
- notify_data = fencing_result2xml(op, &op->result);
+ notify_data = fencing_result2xml(op);
fenced_send_notification(T_STONITH_NOTIFY_FENCE, &op->result, notify_data);
free_xml(notify_data);
fenced_send_notification(T_STONITH_NOTIFY_HISTORY, NULL, NULL);
--
2.27.0
From c5d38cb201a1219ca95127cba9c3a778e31966a2 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:35:43 -0600
Subject: [PATCH 07/15] Refactor: fencer: drop redundant argument from
request_peer_fencing()
... now that the op includes the result
---
daemons/fenced/fenced_remote.c | 66 +++++++++++++---------------------
1 file changed, 25 insertions(+), 41 deletions(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 8d4f53eef6..7fb7695fba 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -80,8 +80,7 @@ extern xmlNode *stonith_create_op(int call_id, const char *token, const char *op
int call_options);
static void request_peer_fencing(remote_fencing_op_t *op,
- peer_device_info_t *peer,
- pcmk__action_result_t *result);
+ peer_device_info_t *peer);
static void finalize_op(remote_fencing_op_t *op, xmlNode *data, bool dup);
static void report_timeout_period(remote_fencing_op_t * op, int op_timeout);
static int get_op_total_timeout(const remote_fencing_op_t *op,
@@ -646,18 +645,16 @@ static gboolean
remote_op_timeout_one(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
op->op_timer_one = 0;
crm_notice("Peer's '%s' action targeting %s for client %s timed out " CRM_XS
" id=%.8s", op->action, op->target, op->client_name, op->id);
- pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
+ pcmk__set_result(&op->result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
"Peer did not return fence result within timeout");
-
// Try another device, if appropriate
- request_peer_fencing(op, NULL, &result);
+ request_peer_fencing(op, NULL);
return FALSE;
}
@@ -730,13 +727,10 @@ remote_op_query_timeout(gpointer data)
crm_debug("Operation %.8s targeting %s already in progress",
op->id, op->target);
} else if (op->query_results) {
- // Result won't be used in this case, but we need to pass something
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
// Query succeeded, so attempt the actual fencing
crm_debug("Query %.8s targeting %s complete (state=%s)",
op->id, op->target, stonith_op_state_str(op->state));
- request_peer_fencing(op, NULL, &result);
+ request_peer_fencing(op, NULL);
} else {
crm_debug("Query %.8s targeting %s timed out (state=%s)",
op->id, op->target, stonith_op_state_str(op->state));
@@ -1622,11 +1616,10 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
op_phase_on(op);
}
- if (op->devices) {
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
-
- pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ // This function is only called if the previous device succeeded
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ if (op->devices) {
/* Necessary devices remain, so execute the next one */
crm_trace("Next targeting %s on behalf of %s@%s",
op->target, op->client_name, op->originator);
@@ -1636,13 +1629,12 @@ advance_topology_device_in_level(remote_fencing_op_t *op, const char *device,
op->delay = 0;
}
- request_peer_fencing(op, NULL, &result);
+ request_peer_fencing(op, NULL);
} else {
/* We're done with all devices and phases, so finalize operation */
crm_trace("Marking complex fencing op targeting %s as complete",
op->target);
op->state = st_done;
- pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
finalize_op(op, msg, false);
}
}
@@ -1673,13 +1665,9 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
* \param[in] op Fencing operation to be executed
* \param[in] peer If NULL or topology is in use, choose best peer to execute
* the fencing, otherwise use this peer
- * \param[in] result Full result of previous failed attempt, if any (used as
- * final result only if a previous attempt failed, topology
- * is not in use, and no devices remain to be attempted)
*/
static void
-request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
- pcmk__action_result_t *result)
+request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
{
const char *device = NULL;
int timeout;
@@ -1822,27 +1810,26 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
}
}
- // This is the only case in which result will be used
- CRM_CHECK(result != NULL, return);
-
if (op->state == st_query) {
crm_info("No peers (out of %d) have devices capable of fencing "
"(%s) %s for client %s " CRM_XS " state=%s",
op->replies, op->action, op->target, op->client_name,
stonith_op_state_str(op->state));
- pcmk__reset_result(result);
- pcmk__set_result(result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
- NULL);
+ pcmk__reset_result(&op->result);
+ pcmk__set_result(&op->result, CRM_EX_ERROR,
+ PCMK_EXEC_NO_FENCE_DEVICE, NULL);
} else {
if (pcmk_is_set(op->call_options, st_opt_topology)) {
- pcmk__reset_result(result);
- pcmk__set_result(result, CRM_EX_ERROR,
+ pcmk__reset_result(&op->result);
+ pcmk__set_result(&op->result, CRM_EX_ERROR,
PCMK_EXEC_NO_FENCE_DEVICE, NULL);
}
- /* ... else use result provided by caller -- overwriting it with
- PCMK_EXEC_NO_FENCE_DEVICE would prevent finalize_op() from
- setting the correct delegate if needed.
+ /* ... else use existing result from previous failed attempt
+ * (topology is not in use, and no devices remain to be attempted).
+ * Overwriting the result with PCMK_EXEC_NO_FENCE_DEVICE would
+ * prevent finalize_op() from setting the correct delegate if
+ * needed.
*/
crm_info("No peers (out of %d) are capable of fencing (%s) %s "
@@ -1852,8 +1839,6 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer,
}
op->state = st_failed;
- pcmk__set_result(&op->result, result->exit_status,
- result->execution_status, result->exit_reason);
finalize_op(op, NULL, false);
} else {
@@ -2104,7 +2089,6 @@ process_remote_stonith_query(xmlNode * msg)
peer_device_info_t *peer = NULL;
uint32_t replies_expected;
xmlNode *dev = get_xpath_object("//@" F_STONITH_REMOTE_OP_ID, msg, LOG_ERR);
- pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
CRM_CHECK(dev != NULL, return -EPROTO);
@@ -2139,7 +2123,7 @@ process_remote_stonith_query(xmlNode * msg)
peer = add_result(op, host, ndevices, dev);
}
- pcmk__set_result(&result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
+ pcmk__set_result(&op->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
if (pcmk_is_set(op->call_options, st_opt_topology)) {
/* If we start the fencing before all the topology results are in,
@@ -2148,12 +2132,12 @@ process_remote_stonith_query(xmlNode * msg)
if (op->state == st_query && all_topology_devices_found(op)) {
/* All the query results are in for the topology, start the fencing ops. */
crm_trace("All topology devices found");
- request_peer_fencing(op, peer, &result);
+ request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All topology query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
- request_peer_fencing(op, NULL, &result);
+ request_peer_fencing(op, NULL);
}
} else if (op->state == st_query) {
@@ -2165,12 +2149,12 @@ process_remote_stonith_query(xmlNode * msg)
/* we have a verified device living on a peer that is not the target */
crm_trace("Found %d verified device%s",
nverified, pcmk__plural_s(nverified));
- request_peer_fencing(op, peer, &result);
+ request_peer_fencing(op, peer);
} else if (have_all_replies) {
crm_info("All query replies have arrived, continuing (%d expected/%d received) ",
replies_expected, op->replies);
- request_peer_fencing(op, NULL, &result);
+ request_peer_fencing(op, NULL);
} else {
crm_trace("Waiting for more peer results before launching fencing operation");
@@ -2336,7 +2320,7 @@ fenced_process_fencing_reply(xmlNode *msg)
crm_trace("Next for %s on behalf of %s@%s (result was: %s)",
op->target, op->originator, op->client_name,
pcmk_exec_status_str(op->result.execution_status));
- request_peer_fencing(op, NULL, &op->result);
+ request_peer_fencing(op, NULL);
}
gboolean
--
2.27.0
From be0a0b652c13161a82b05d3104449b7bfc06e8ac Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 17:56:24 -0600
Subject: [PATCH 08/15] Feature: fencer: track full result in fencing history
Add fencing operation results when creating XML in
stonith_local_history_diff_and_merge(), and parse the results from the received
XML in stonith_xml_history_to_list().
With this, the fencer now always has full results in its op list, and returns
them in the reply for STONITH_OP_FENCE_HISTORY requests (though nothing uses
that as of this commit).
---
daemons/fenced/fenced_history.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/daemons/fenced/fenced_history.c b/daemons/fenced/fenced_history.c
index 5cacf36ca8..3ebf016e67 100644
--- a/daemons/fenced/fenced_history.c
+++ b/daemons/fenced/fenced_history.c
@@ -257,6 +257,7 @@ stonith_xml_history_to_list(xmlNode *history)
op->completed_nsec = completed_nsec;
crm_element_value_int(xml_op, F_STONITH_STATE, &state);
op->state = (enum op_state) state;
+ stonith__xe_get_result(xml_op, &op->result);
g_hash_table_replace(rv, id, op);
CRM_LOG_ASSERT(g_hash_table_lookup(rv, id) != NULL);
@@ -355,6 +356,7 @@ stonith_local_history_diff_and_merge(GHashTable *remote_history,
crm_xml_add_ll(entry, F_STONITH_DATE, op->completed);
crm_xml_add_ll(entry, F_STONITH_DATE_NSEC, op->completed_nsec);
crm_xml_add_int(entry, F_STONITH_STATE, op->state);
+ stonith__xe_set_result(entry, &op->result);
}
}
--
2.27.0
From afc5292036e212bcfc7475893e0b326b2a69ac58 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 22 Dec 2021 17:17:21 -0600
Subject: [PATCH 09/15] API: libstonithd: add exit_reason member to
stonith_history_t
not yet used, but will be
---
include/crm/stonith-ng.h | 3 ++-
lib/fencing/st_client.c | 3 ++-
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/include/crm/stonith-ng.h b/include/crm/stonith-ng.h
index 3fe9cf54f8..2c79bfa579 100644
--- a/include/crm/stonith-ng.h
+++ b/include/crm/stonith-ng.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -111,6 +111,7 @@ typedef struct stonith_history_s {
time_t completed;
struct stonith_history_s *next;
long completed_nsec;
+ char *exit_reason;
} stonith_history_t;
typedef struct stonith_s stonith_t;
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 718739b321..57a2e03361 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2021 the Pacemaker project contributors
+ * Copyright 2004-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -735,6 +735,7 @@ void stonith_history_free(stonith_history_t *history)
free(hp->origin);
free(hp->delegate);
free(hp->client);
+ free(hp->exit_reason);
}
}
--
2.27.0
From 1b9e2896322849002a5c0a3a34c9375ea32571d6 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 6 Jan 2022 18:04:15 -0600
Subject: [PATCH 10/15] Feature: fencing: return exit reason with fencing
history
libstonithd's stonith_t:cmds->history() method now parses exit reasons from the
fencer reply, and returns them in the stonith_history_t results.
---
lib/fencing/st_client.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 57a2e03361..d229b34805 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -698,6 +698,7 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node,
stonith_history_t *kvp;
long long completed;
long long completed_nsec = 0L;
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
kvp = calloc(1, sizeof(stonith_history_t));
kvp->target = crm_element_value_copy(op, F_STONITH_TARGET);
@@ -711,6 +712,11 @@ stonith_api_history(stonith_t * stonith, int call_options, const char *node,
kvp->completed_nsec = completed_nsec;
crm_element_value_int(op, F_STONITH_STATE, &kvp->state);
+ stonith__xe_get_result(op, &result);
+ kvp->exit_reason = result.exit_reason;
+ result.exit_reason = NULL;
+ pcmk__reset_result(&result);
+
if (last) {
last->next = kvp;
} else {
--
2.27.0
From ba4e77242e9be4ebeb2843b444ee4afad43c29f3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 09:44:39 -0600
Subject: [PATCH 11/15] Feature: fencing: display exit reasons with failed
fencing events
... when available
---
lib/fencing/st_output.c | 20 ++++++++++++++++----
tools/crm_mon_curses.c | 9 +++++++--
2 files changed, 23 insertions(+), 6 deletions(-)
diff --git a/lib/fencing/st_output.c b/lib/fencing/st_output.c
index e484278867..18924d795d 100644
--- a/lib/fencing/st_output.c
+++ b/lib/fencing/st_output.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2019-2021 the Pacemaker project contributors
+ * Copyright 2019-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -11,6 +11,7 @@
#include <stdarg.h>
#include <crm/stonith-ng.h>
+#include <crm/msg_xml.h>
#include <crm/common/iso8601.h>
#include <crm/common/util.h>
#include <crm/common/xml.h>
@@ -263,8 +264,12 @@ stonith_event_html(pcmk__output_t *out, va_list args) {
char *failed_s = time_t_string(event->completed);
out->list_item(out, "failed-stonith-event",
- "%s of %s failed : delegate=%s, client=%s, origin=%s, %s='%s' %s",
+ "%s of %s failed%s%s%s: "
+ "delegate=%s, client=%s, origin=%s, %s='%s' %s",
stonith_action_str(event->action), event->target,
+ (event->exit_reason == NULL)? "" : " (",
+ (event->exit_reason == NULL)? "" : event->exit_reason,
+ (event->exit_reason == NULL)? "" : ")",
event->delegate ? event->delegate : "",
event->client, event->origin,
full_history ? "completed" : "last-failed",
@@ -296,8 +301,13 @@ stonith_event_text(pcmk__output_t *out, va_list args) {
switch (event->state) {
case st_failed:
- pcmk__indented_printf(out, "%s of %s failed: delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
+ pcmk__indented_printf(out,
+ "%s of %s failed%s%s%s: "
+ "delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
stonith_action_str(event->action), event->target,
+ (event->exit_reason == NULL)? "" : " (",
+ (event->exit_reason == NULL)? "" : event->exit_reason,
+ (event->exit_reason == NULL)? "" : ")",
event->delegate ? event->delegate : "",
event->client, event->origin,
full_history ? "completed" : "last-failed", buf,
@@ -341,7 +351,9 @@ stonith_event_xml(pcmk__output_t *out, va_list args) {
switch (event->state) {
case st_failed:
- crm_xml_add(node, "status", "failed");
+ pcmk__xe_set_props(node, "status", "failed",
+ XML_LRM_ATTR_EXIT_REASON, event->exit_reason,
+ NULL);
break;
case st_done:
diff --git a/tools/crm_mon_curses.c b/tools/crm_mon_curses.c
index bae3710c44..73c8516a8c 100644
--- a/tools/crm_mon_curses.c
+++ b/tools/crm_mon_curses.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2019-2021 the Pacemaker project contributors
+ * Copyright 2019-2022 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -463,8 +463,13 @@ stonith_event_console(pcmk__output_t *out, va_list args) {
switch (event->state) {
case st_failed:
- curses_indented_printf(out, "%s of %s failed: delegate=%s, client=%s, origin=%s, %s='%s'%s\n",
+ curses_indented_printf(out,
+ "%s of %s failed%s%s%s: "
+ "delegate=%s, client=%s, origin=%s, %s='%s' %s\n",
stonith_action_str(event->action), event->target,
+ (event->exit_reason == NULL)? "" : " (",
+ (event->exit_reason == NULL)? "" : event->exit_reason,
+ (event->exit_reason == NULL)? "" : ")",
event->delegate ? event->delegate : "",
event->client, event->origin,
full_history ? "completed" : "last-failed", buf,
--
2.27.0
From 8105fb4a3a786780fdf85b3d0308eaf6df1ea434 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 09:45:22 -0600
Subject: [PATCH 12/15] Low: schemas: copy fence-event API schema in
preparation for changes
---
include/crm/common/output_internal.h | 2 +-
xml/api/fence-event-2.15.rng | 33 ++++++++++++++++++++++++++++
2 files changed, 34 insertions(+), 1 deletion(-)
create mode 100644 xml/api/fence-event-2.15.rng
diff --git a/include/crm/common/output_internal.h b/include/crm/common/output_internal.h
index 479f0e4b43..8c5dcee17c 100644
--- a/include/crm/common/output_internal.h
+++ b/include/crm/common/output_internal.h
@@ -27,7 +27,7 @@ extern "C" {
# include <glib.h>
# include <crm/common/results.h>
-# define PCMK__API_VERSION "2.14"
+# define PCMK__API_VERSION "2.15"
#if defined(PCMK__WITH_ATTRIBUTE_OUTPUT_ARGS)
# define PCMK__OUTPUT_ARGS(ARGS...) __attribute__((output_args(ARGS)))
diff --git a/xml/api/fence-event-2.15.rng b/xml/api/fence-event-2.15.rng
new file mode 100644
index 0000000000..e54687cd25
--- /dev/null
+++ b/xml/api/fence-event-2.15.rng
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<grammar xmlns="http://relaxng.org/ns/structure/1.0"
+ datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
+
+ <start>
+ <ref name="fencing-history-event"/>
+ </start>
+
+ <define name="fencing-history-event">
+ <element name="fence_event">
+ <attribute name="status">
+ <choice>
+ <value>failed</value>
+ <value>success</value>
+ <value>pending</value>
+ </choice>
+ </attribute>
+ <optional>
+ <attribute name="extended-status"> <text /> </attribute>
+ </optional>
+ <optional>
+ <attribute name="delegate"> <text /> </attribute>
+ </optional>
+ <attribute name="action"> <text /> </attribute>
+ <attribute name="target"> <text /> </attribute>
+ <attribute name="client"> <text /> </attribute>
+ <attribute name="origin"> <text /> </attribute>
+ <optional>
+ <attribute name="completed"> <text /> </attribute>
+ </optional>
+ </element>
+ </define>
+</grammar>
--
2.27.0
From 46dd9b74d2ee8f7ab70a0c7fe3a998954d4029e8 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 09:47:16 -0600
Subject: [PATCH 13/15] Low: schemas: update fence-event API schema for recent
change
---
xml/api/fence-event-2.15.rng | 3 +++
1 file changed, 3 insertions(+)
diff --git a/xml/api/fence-event-2.15.rng b/xml/api/fence-event-2.15.rng
index e54687cd25..8e000cafa5 100644
--- a/xml/api/fence-event-2.15.rng
+++ b/xml/api/fence-event-2.15.rng
@@ -18,6 +18,9 @@
<optional>
<attribute name="extended-status"> <text /> </attribute>
</optional>
+ <optional>
+ <attribute name="exit-reason"> <text /> </attribute>
+ </optional>
<optional>
<attribute name="delegate"> <text /> </attribute>
</optional>
--
2.27.0
From 350e71772f67f28af6b67f864cbabc481730035c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Fri, 7 Jan 2022 11:32:09 -0600
Subject: [PATCH 14/15] Build: libstonithd: bump shared library version
... for stonith_history_t change since 2.1.2.
The struct should only ever be returned by the library as a pointer, so the
changes can be considered backward-compatible. Normally we wouldn't bump shared
library versions mid-cycle, but this will simplify expected backports of this
change.
---
lib/fencing/Makefile.am | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/fencing/Makefile.am b/lib/fencing/Makefile.am
index 1ffa3e051b..a10ddb88ec 100644
--- a/lib/fencing/Makefile.am
+++ b/lib/fencing/Makefile.am
@@ -2,7 +2,7 @@
# Original Author: Sun Jiang Dong <sunjd@cn.ibm.com>
# Copyright 2004 International Business Machines
#
-# with later changes copyright 2004-2021 the Pacemaker project contributors.
+# with later changes copyright 2004-2022 the Pacemaker project contributors.
# The version control history for this file may have further details.
#
# This source code is licensed under the GNU General Public License version 2
@@ -14,7 +14,7 @@ noinst_HEADERS = fencing_private.h
lib_LTLIBRARIES = libstonithd.la
-libstonithd_la_LDFLAGS = -version-info 33:0:7
+libstonithd_la_LDFLAGS = -version-info 34:0:8
libstonithd_la_CFLAGS = $(CFLAGS_HARDENED_LIB)
libstonithd_la_LDFLAGS += $(LDFLAGS_HARDENED_LIB)
--
2.27.0
From 63ea88620a62ff0759560a02bb5e284ebdd03eb6 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 19 Jan 2022 16:53:45 -0600
Subject: [PATCH 15/15] Low: fencer: reset op result before grabbing new one
just in case
---
daemons/fenced/fenced_remote.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 7fb7695fba..dc4649e0fc 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -2219,6 +2219,7 @@ fenced_process_fencing_reply(xmlNode *msg)
return;
}
+ pcmk__reset_result(&op->result);
op->result = result; // The operation takes ownership of the result
if (op->devices && device && !pcmk__str_eq(op->devices->data, device, pcmk__str_casei)) {
--
2.27.0