1094 lines
40 KiB
Diff
1094 lines
40 KiB
Diff
|
From 87365f49b1bee0baa536783865fbd835a9cacc97 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Thu, 2 Dec 2021 16:12:24 -0600
|
||
|
Subject: [PATCH 01/11] Refactor: libstonithd: functionize getting notification
|
||
|
data XML
|
||
|
|
||
|
Also, only get the data when needed.
|
||
|
---
|
||
|
lib/fencing/st_client.c | 32 +++++++++++++++++++++++---------
|
||
|
1 file changed, 23 insertions(+), 9 deletions(-)
|
||
|
|
||
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||
|
index 4823751267..72a0a49408 100644
|
||
|
--- a/lib/fencing/st_client.c
|
||
|
+++ b/lib/fencing/st_client.c
|
||
|
@@ -1312,6 +1312,23 @@ stonith_dump_pending_callbacks(stonith_t * stonith)
|
||
|
return g_hash_table_foreach(private->stonith_op_callback_table, stonith_dump_pending_op, NULL);
|
||
|
}
|
||
|
|
||
|
+/*!
|
||
|
+ * \internal
|
||
|
+ * \brief Get the data section of a fencer notification
|
||
|
+ *
|
||
|
+ * \param[in] msg Notification XML
|
||
|
+ * \param[in] ntype Notification type
|
||
|
+ */
|
||
|
+static xmlNode *
|
||
|
+get_event_data_xml(xmlNode *msg, const char *ntype)
|
||
|
+{
|
||
|
+ char *data_addr = crm_strdup_printf("//%s", ntype);
|
||
|
+ xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
|
||
|
+
|
||
|
+ free(data_addr);
|
||
|
+ return data;
|
||
|
+}
|
||
|
+
|
||
|
/*
|
||
|
<notify t="st_notify" subt="st_device_register" st_op="st_device_register" st_rc="0" >
|
||
|
<st_calldata >
|
||
|
@@ -1336,17 +1353,18 @@ xml_to_event(xmlNode * msg)
|
||
|
{
|
||
|
stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
|
||
|
const char *ntype = crm_element_value(msg, F_SUBTYPE);
|
||
|
- char *data_addr = crm_strdup_printf("//%s", ntype);
|
||
|
- xmlNode *data = get_xpath_object(data_addr, msg, LOG_DEBUG);
|
||
|
|
||
|
crm_log_xml_trace(msg, "stonith_notify");
|
||
|
|
||
|
crm_element_value_int(msg, F_STONITH_RC, &(event->result));
|
||
|
|
||
|
if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
|
||
|
- event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
|
||
|
+ xmlNode *data = get_event_data_xml(msg, ntype);
|
||
|
|
||
|
- if (data) {
|
||
|
+ if (data == NULL) {
|
||
|
+ crm_err("No data for %s event", ntype);
|
||
|
+ crm_log_xml_notice(msg, "BadEvent");
|
||
|
+ } else {
|
||
|
event->origin = crm_element_value_copy(data, F_STONITH_ORIGIN);
|
||
|
event->action = crm_element_value_copy(data, F_STONITH_ACTION);
|
||
|
event->target = crm_element_value_copy(data, F_STONITH_TARGET);
|
||
|
@@ -1354,14 +1372,10 @@ xml_to_event(xmlNode * msg)
|
||
|
event->id = crm_element_value_copy(data, F_STONITH_REMOTE_OP_ID);
|
||
|
event->client_origin = crm_element_value_copy(data, F_STONITH_CLIENTNAME);
|
||
|
event->device = crm_element_value_copy(data, F_STONITH_DEVICE);
|
||
|
-
|
||
|
- } else {
|
||
|
- crm_err("No data for %s event", ntype);
|
||
|
- crm_log_xml_notice(msg, "BadEvent");
|
||
|
}
|
||
|
+ event->operation = crm_element_value_copy(msg, F_STONITH_OPERATION);
|
||
|
}
|
||
|
|
||
|
- free(data_addr);
|
||
|
return event;
|
||
|
}
|
||
|
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 448f86a029d5d7e3c255d813929003a8cc2cffba Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Fri, 19 Nov 2021 17:01:23 -0600
|
||
|
Subject: [PATCH 02/11] Refactor: fencing: parse full result from fencer
|
||
|
notifications
|
||
|
|
||
|
stonith_event_t previously contained only the legacy return code for the
|
||
|
notification event. Use its new opaque member to store the full result, along
|
||
|
with accessors (available only internally for now). Nothing uses them yet.
|
||
|
---
|
||
|
include/crm/fencing/internal.h | 5 +++
|
||
|
lib/fencing/st_client.c | 68 ++++++++++++++++++++++++++++++++--
|
||
|
2 files changed, 70 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
|
||
|
index eff689e59b..acc16d05e9 100644
|
||
|
--- a/include/crm/fencing/internal.h
|
||
|
+++ b/include/crm/fencing/internal.h
|
||
|
@@ -187,10 +187,15 @@ bool stonith__event_state_eq(stonith_history_t *history, void *user_data);
|
||
|
bool stonith__event_state_neq(stonith_history_t *history, void *user_data);
|
||
|
|
||
|
int stonith__legacy2status(int rc);
|
||
|
+
|
||
|
int stonith__exit_status(stonith_callback_data_t *data);
|
||
|
int stonith__execution_status(stonith_callback_data_t *data);
|
||
|
const char *stonith__exit_reason(stonith_callback_data_t *data);
|
||
|
|
||
|
+int stonith__event_exit_status(stonith_event_t *event);
|
||
|
+int stonith__event_execution_status(stonith_event_t *event);
|
||
|
+const char *stonith__event_exit_reason(stonith_event_t *event);
|
||
|
+
|
||
|
/*!
|
||
|
* \internal
|
||
|
* \brief Is a fencing operation in pending state?
|
||
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||
|
index 72a0a49408..f58b3a6745 100644
|
||
|
--- a/lib/fencing/st_client.c
|
||
|
+++ b/lib/fencing/st_client.c
|
||
|
@@ -1349,15 +1349,23 @@ get_event_data_xml(xmlNode *msg, const char *ntype)
|
||
|
</notify>
|
||
|
*/
|
||
|
static stonith_event_t *
|
||
|
-xml_to_event(xmlNode * msg)
|
||
|
+xml_to_event(xmlNode *msg, pcmk__action_result_t *result)
|
||
|
{
|
||
|
stonith_event_t *event = calloc(1, sizeof(stonith_event_t));
|
||
|
const char *ntype = crm_element_value(msg, F_SUBTYPE);
|
||
|
|
||
|
+ CRM_ASSERT((event != NULL) && (result != NULL));
|
||
|
+
|
||
|
crm_log_xml_trace(msg, "stonith_notify");
|
||
|
|
||
|
- crm_element_value_int(msg, F_STONITH_RC, &(event->result));
|
||
|
+ // All notification types have the operation result
|
||
|
+ event->opaque = result;
|
||
|
+ stonith__xe_get_result(msg, result);
|
||
|
+
|
||
|
+ // @COMPAT The API originally provided the result as a legacy return code
|
||
|
+ event->result = pcmk_rc2legacy(stonith__result2rc(result));
|
||
|
|
||
|
+ // Fence notifications have additional information
|
||
|
if (pcmk__str_eq(ntype, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
|
||
|
xmlNode *data = get_event_data_xml(msg, ntype);
|
||
|
|
||
|
@@ -1392,6 +1400,7 @@ event_free(stonith_event_t * event)
|
||
|
free(event->executioner);
|
||
|
free(event->device);
|
||
|
free(event->client_origin);
|
||
|
+ pcmk__reset_result((pcmk__action_result_t *) (event->opaque));
|
||
|
free(event);
|
||
|
}
|
||
|
|
||
|
@@ -1402,6 +1411,7 @@ stonith_send_notification(gpointer data, gpointer user_data)
|
||
|
stonith_notify_client_t *entry = data;
|
||
|
stonith_event_t *st_event = NULL;
|
||
|
const char *event = NULL;
|
||
|
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
|
||
|
|
||
|
if (blob->xml == NULL) {
|
||
|
crm_warn("Skipping callback - NULL message");
|
||
|
@@ -1427,7 +1437,7 @@ stonith_send_notification(gpointer data, gpointer user_data)
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
- st_event = xml_to_event(blob->xml);
|
||
|
+ st_event = xml_to_event(blob->xml, &result);
|
||
|
|
||
|
crm_trace("Invoking callback for %p/%s event...", entry, event);
|
||
|
entry->notify(blob->stonith, st_event);
|
||
|
@@ -2366,6 +2376,58 @@ stonith__exit_reason(stonith_callback_data_t *data)
|
||
|
return ((pcmk__action_result_t *) data->opaque)->exit_reason;
|
||
|
}
|
||
|
|
||
|
+/*!
|
||
|
+ * \internal
|
||
|
+ * \brief Return the exit status from an event notification
|
||
|
+ *
|
||
|
+ * \param[in] event Event
|
||
|
+ *
|
||
|
+ * \return Exit status from event
|
||
|
+ */
|
||
|
+int
|
||
|
+stonith__event_exit_status(stonith_event_t *event)
|
||
|
+{
|
||
|
+ if ((event == NULL) || (event->opaque == NULL)) {
|
||
|
+ return CRM_EX_ERROR;
|
||
|
+ }
|
||
|
+ return ((pcmk__action_result_t *) event->opaque)->exit_status;
|
||
|
+}
|
||
|
+
|
||
|
+/*!
|
||
|
+ * \internal
|
||
|
+ * \brief Return the execution status from an event notification
|
||
|
+ *
|
||
|
+ * \param[in] event Event
|
||
|
+ *
|
||
|
+ * \return Execution status from event
|
||
|
+ */
|
||
|
+int
|
||
|
+stonith__event_execution_status(stonith_event_t *event)
|
||
|
+{
|
||
|
+ if ((event == NULL) || (event->opaque == NULL)) {
|
||
|
+ return PCMK_EXEC_UNKNOWN;
|
||
|
+ }
|
||
|
+ return ((pcmk__action_result_t *) event->opaque)->execution_status;
|
||
|
+}
|
||
|
+
|
||
|
+/*!
|
||
|
+ * \internal
|
||
|
+ * \brief Return the exit reason from an event notification
|
||
|
+ *
|
||
|
+ * \param[in] event Event
|
||
|
+ *
|
||
|
+ * \return Exit reason from event
|
||
|
+ */
|
||
|
+const char *
|
||
|
+stonith__event_exit_reason(stonith_event_t *event)
|
||
|
+{
|
||
|
+ if ((event == NULL) || (event->opaque == NULL)) {
|
||
|
+ return NULL;
|
||
|
+ }
|
||
|
+ return ((pcmk__action_result_t *) event->opaque)->exit_reason;
|
||
|
+}
|
||
|
+
|
||
|
+
|
||
|
// Deprecated functions kept only for backward API compatibility
|
||
|
// LCOV_EXCL_START
|
||
|
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 8dab65e65fe760052d1151749a7bfb2203445813 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Fri, 19 Nov 2021 17:02:28 -0600
|
||
|
Subject: [PATCH 03/11] Refactor: fencing: parse full result from synchronous
|
||
|
fencer replies
|
||
|
|
||
|
stonith_send_command() now parses the full result from synchronous fencer
|
||
|
replies, and maps that to a legacy return code, rather than parse the legacy
|
||
|
return code directly.
|
||
|
|
||
|
The full result is not used yet, and won't be until we can break backward API
|
||
|
compatibility, since the API functions that call stonith_send_command()
|
||
|
currently return a legacy code.
|
||
|
---
|
||
|
lib/fencing/st_client.c | 8 +++++---
|
||
|
1 file changed, 5 insertions(+), 3 deletions(-)
|
||
|
|
||
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||
|
index f58b3a6745..5fec7529e3 100644
|
||
|
--- a/lib/fencing/st_client.c
|
||
|
+++ b/lib/fencing/st_client.c
|
||
|
@@ -1537,11 +1537,13 @@ stonith_send_command(stonith_t * stonith, const char *op, xmlNode * data, xmlNod
|
||
|
crm_element_value_int(op_reply, F_STONITH_CALLID, &reply_id);
|
||
|
|
||
|
if (reply_id == stonith->call_id) {
|
||
|
+ pcmk__action_result_t result = PCMK__UNKNOWN_RESULT;
|
||
|
+
|
||
|
crm_trace("Synchronous reply %d received", reply_id);
|
||
|
|
||
|
- if (crm_element_value_int(op_reply, F_STONITH_RC, &rc) != 0) {
|
||
|
- rc = -ENOMSG;
|
||
|
- }
|
||
|
+ stonith__xe_get_result(op_reply, &result);
|
||
|
+ rc = pcmk_rc2legacy(stonith__result2rc(&result));
|
||
|
+ pcmk__reset_result(&result);
|
||
|
|
||
|
if ((call_options & st_opt_discard_reply) || output_data == NULL) {
|
||
|
crm_trace("Discarding reply");
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 1beb319d8c62ab93b4c08b26a4e03151906c6189 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Mon, 6 Dec 2021 17:13:44 -0600
|
||
|
Subject: [PATCH 04/11] Log: fencing: improve cts-fence-helper result logs
|
||
|
|
||
|
Use the full result from the fencing event
|
||
|
---
|
||
|
daemons/fenced/cts-fence-helper.c | 12 ++++++++----
|
||
|
1 file changed, 8 insertions(+), 4 deletions(-)
|
||
|
|
||
|
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
|
||
|
index e222a59f9f..858cddc9de 100644
|
||
|
--- a/daemons/fenced/cts-fence-helper.c
|
||
|
+++ b/daemons/fenced/cts-fence-helper.c
|
||
|
@@ -125,10 +125,14 @@ st_callback(stonith_t * st, stonith_event_t * e)
|
||
|
crm_exit(CRM_EX_DISCONNECT);
|
||
|
}
|
||
|
|
||
|
- crm_notice("Operation %s requested by %s %s for peer %s. %s reported: %s (ref=%s)",
|
||
|
- e->operation, e->origin, e->result == pcmk_ok ? "completed" : "failed",
|
||
|
- e->target, e->executioner ? e->executioner : "<none>",
|
||
|
- pcmk_strerror(e->result), e->id);
|
||
|
+ crm_notice("Operation '%s' targeting %s by %s for %s: %s (exit=%d, ref=%s)",
|
||
|
+ ((e->operation == NULL)? "unknown" : e->operation),
|
||
|
+ ((e->target == NULL)? "no node" : e->target),
|
||
|
+ ((e->executioner == NULL)? "any node" : e->executioner),
|
||
|
+ ((e->origin == NULL)? "unknown client" : e->origin),
|
||
|
+ pcmk_exec_status_str(stonith__event_execution_status(e)),
|
||
|
+ stonith__event_exit_status(e),
|
||
|
+ ((e->id == NULL)? "none" : e->id));
|
||
|
|
||
|
if (expected_notifications) {
|
||
|
expected_notifications--;
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From b26f701833ade5d7441fba317832d6e827bd16d0 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Tue, 14 Dec 2021 16:52:09 -0600
|
||
|
Subject: [PATCH 05/11] Test: cts-fence-helper: update expected return code
|
||
|
|
||
|
Before recent changes, libstonithd obtained the fence API's legacy result code
|
||
|
directly from the fencer's XML reply, meaning that the legacy code was the
|
||
|
result of the fencer's mapping of the full result (including the action stderr).
|
||
|
|
||
|
After those changes, libstonithd now ignores the legacy code in the fencer's
|
||
|
reply, and instead maps the legacy code itself from the full result in the
|
||
|
fencer's reply.
|
||
|
|
||
|
However, the fencer's reply does not have the action stderr, so failures that
|
||
|
mapped to -pcmk_err_generic on the server side now map to -ENODATA on the
|
||
|
client side. Update cts-fence-helper's expected return code to match (neither
|
||
|
code is particularly useful, so there wouldn't be much benefit from having the
|
||
|
fencer pass the action stderr with replies, which would be considerable
|
||
|
additional work).
|
||
|
---
|
||
|
daemons/fenced/cts-fence-helper.c | 4 ++--
|
||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/daemons/fenced/cts-fence-helper.c b/daemons/fenced/cts-fence-helper.c
|
||
|
index 858cddc9de..e3113452ef 100644
|
||
|
--- a/daemons/fenced/cts-fence-helper.c
|
||
|
+++ b/daemons/fenced/cts-fence-helper.c
|
||
|
@@ -207,10 +207,10 @@ run_fence_failure_test(void)
|
||
|
"Register device1 for failure test", 1, 0);
|
||
|
|
||
|
single_test(st->cmds->fence(st, st_opts, "false_1_node2", "off", 3, 0),
|
||
|
- "Fence failure results off", 1, -pcmk_err_generic);
|
||
|
+ "Fence failure results off", 1, -ENODATA);
|
||
|
|
||
|
single_test(st->cmds->fence(st, st_opts, "false_1_node2", "reboot", 3, 0),
|
||
|
- "Fence failure results reboot", 1, -pcmk_err_generic);
|
||
|
+ "Fence failure results reboot", 1, -ENODATA);
|
||
|
|
||
|
single_test(st->cmds->remove_device(st, st_opts, "test-id1"),
|
||
|
"Remove device1 for failure test", 1, 0);
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 123429de229c2148e320c76530b95e6ba458b9f6 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Tue, 7 Dec 2021 10:28:48 -0600
|
||
|
Subject: [PATCH 06/11] Low: controller: compare fencing targets
|
||
|
case-insensitively
|
||
|
|
||
|
... since they are node names
|
||
|
---
|
||
|
daemons/controld/controld_fencing.c | 2 +-
|
||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||
|
|
||
|
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
||
|
index f8d2fc13f4..70e141dc28 100644
|
||
|
--- a/daemons/controld/controld_fencing.c
|
||
|
+++ b/daemons/controld/controld_fencing.c
|
||
|
@@ -466,7 +466,7 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
||
|
return;
|
||
|
|
||
|
} else if ((st_event->result == pcmk_ok)
|
||
|
- && pcmk__str_eq(st_event->target, fsa_our_uname, pcmk__str_none)) {
|
||
|
+ && pcmk__str_eq(st_event->target, fsa_our_uname, pcmk__str_casei)) {
|
||
|
|
||
|
/* We were notified of our own fencing. Most likely, either fencing was
|
||
|
* misconfigured, or fabric fencing that doesn't cut cluster
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 3a067b8e58b3aefb49b2af1c35d0ad28b2de8784 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Tue, 7 Dec 2021 10:37:56 -0600
|
||
|
Subject: [PATCH 07/11] Refactor: controller: best practices for handling
|
||
|
fencing notifications
|
||
|
|
||
|
Rename tengine_stonith_notify() to handle_fence_notification(), rename its
|
||
|
st_event argument to event, add a doxygen block, and use some new variables and
|
||
|
reformatting to make it easier to follow (and change later).
|
||
|
---
|
||
|
daemons/controld/controld_fencing.c | 131 ++++++++++++++++------------
|
||
|
1 file changed, 75 insertions(+), 56 deletions(-)
|
||
|
|
||
|
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
||
|
index 70e141dc28..00626444da 100644
|
||
|
--- a/daemons/controld/controld_fencing.c
|
||
|
+++ b/daemons/controld/controld_fencing.c
|
||
|
@@ -435,39 +435,59 @@ tengine_stonith_connection_destroy(stonith_t *st, stonith_event_t *e)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
+/*!
|
||
|
+ * \internal
|
||
|
+ * \brief Handle an event notification from the fencing API
|
||
|
+ *
|
||
|
+ * \param[in] st Fencing API connection
|
||
|
+ * \param[in] event Fencing API event notification
|
||
|
+ */
|
||
|
static void
|
||
|
-tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
||
|
+handle_fence_notification(stonith_t *st, stonith_event_t *event)
|
||
|
{
|
||
|
+ bool succeeded = true;
|
||
|
+ const char *executioner = "the cluster";
|
||
|
+ const char *client = "a client";
|
||
|
+
|
||
|
if (te_client_id == NULL) {
|
||
|
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
|
||
|
(unsigned long) getpid());
|
||
|
}
|
||
|
|
||
|
- if (st_event == NULL) {
|
||
|
+ if (event == NULL) {
|
||
|
crm_err("Notify data not found");
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
- crmd_alert_fencing_op(st_event);
|
||
|
+ if (event->executioner != NULL) {
|
||
|
+ executioner = event->executioner;
|
||
|
+ }
|
||
|
+ if (event->client_origin != NULL) {
|
||
|
+ client = event->client_origin;
|
||
|
+ }
|
||
|
|
||
|
- if ((st_event->result == pcmk_ok) && pcmk__str_eq("on", st_event->action, pcmk__str_casei)) {
|
||
|
- crm_notice("%s was successfully unfenced by %s (at the request of %s)",
|
||
|
- st_event->target,
|
||
|
- st_event->executioner? st_event->executioner : "<anyone>",
|
||
|
- st_event->origin);
|
||
|
- /* TODO: Hook up st_event->device */
|
||
|
- return;
|
||
|
+ if (event->result != pcmk_ok) {
|
||
|
+ succeeded = false;
|
||
|
+ }
|
||
|
|
||
|
- } else if (pcmk__str_eq("on", st_event->action, pcmk__str_casei)) {
|
||
|
- crm_err("Unfencing of %s by %s failed: %s (%d)",
|
||
|
- st_event->target,
|
||
|
- st_event->executioner? st_event->executioner : "<anyone>",
|
||
|
- pcmk_strerror(st_event->result), st_event->result);
|
||
|
- return;
|
||
|
+ crmd_alert_fencing_op(event);
|
||
|
|
||
|
- } else if ((st_event->result == pcmk_ok)
|
||
|
- && pcmk__str_eq(st_event->target, fsa_our_uname, pcmk__str_casei)) {
|
||
|
+ if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
|
||
|
+ // Unfencing doesn't need special handling, just a log message
|
||
|
+ if (succeeded) {
|
||
|
+ crm_notice("%s was successfully unfenced by %s (at the request of %s)",
|
||
|
+ event->target, executioner, event->origin);
|
||
|
+ /* TODO: Hook up event->device */
|
||
|
+ } else {
|
||
|
+ crm_err("Unfencing of %s by %s failed: %s (%d)",
|
||
|
+ event->target, executioner,
|
||
|
+ pcmk_strerror(st_event->result), st_event->result);
|
||
|
+ }
|
||
|
+ return;
|
||
|
+ }
|
||
|
|
||
|
+ if (succeeded
|
||
|
+ && pcmk__str_eq(event->target, fsa_our_uname, pcmk__str_casei)) {
|
||
|
/* We were notified of our own fencing. Most likely, either fencing was
|
||
|
* misconfigured, or fabric fencing that doesn't cut cluster
|
||
|
* communication is in use.
|
||
|
@@ -478,44 +498,41 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
||
|
* our subsequent election votes as "not part of our cluster".
|
||
|
*/
|
||
|
crm_crit("We were allegedly just fenced by %s for %s!",
|
||
|
- st_event->executioner? st_event->executioner : "the cluster",
|
||
|
- st_event->origin); /* Dumps blackbox if enabled */
|
||
|
+ executioner, event->origin); // Dumps blackbox if enabled
|
||
|
if (fence_reaction_panic) {
|
||
|
pcmk__panic(__func__);
|
||
|
} else {
|
||
|
crm_exit(CRM_EX_FATAL);
|
||
|
}
|
||
|
- return;
|
||
|
+ return; // Should never get here
|
||
|
}
|
||
|
|
||
|
- /* Update the count of stonith failures for this target, in case we become
|
||
|
+ /* Update the count of fencing failures for this target, in case we become
|
||
|
* DC later. The current DC has already updated its fail count in
|
||
|
* tengine_stonith_callback().
|
||
|
*/
|
||
|
- if (!AM_I_DC && pcmk__str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE, pcmk__str_casei)) {
|
||
|
- if (st_event->result == pcmk_ok) {
|
||
|
- st_fail_count_reset(st_event->target);
|
||
|
+ if (!AM_I_DC
|
||
|
+ && pcmk__str_eq(event->operation, T_STONITH_NOTIFY_FENCE,
|
||
|
+ pcmk__str_casei)) {
|
||
|
+
|
||
|
+ if (succeeded) {
|
||
|
+ st_fail_count_reset(event->target);
|
||
|
} else {
|
||
|
- st_fail_count_increment(st_event->target);
|
||
|
+ st_fail_count_increment(event->target);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
|
||
|
CRM_XS " initiator=%s ref=%s",
|
||
|
- st_event->target, st_event->result == pcmk_ok ? "" : " not",
|
||
|
- st_event->action,
|
||
|
- st_event->executioner ? st_event->executioner : "<anyone>",
|
||
|
- (st_event->client_origin? st_event->client_origin : "<unknown>"),
|
||
|
- pcmk_strerror(st_event->result),
|
||
|
- st_event->origin, st_event->id);
|
||
|
-
|
||
|
- if (st_event->result == pcmk_ok) {
|
||
|
- crm_node_t *peer = pcmk__search_known_node_cache(0, st_event->target,
|
||
|
+ event->target, (succeeded? "" : " not"),
|
||
|
+ event->action, executioner, client,
|
||
|
+ pcmk_strerror(event->result),
|
||
|
+ event->origin, event->id);
|
||
|
+
|
||
|
+ if (succeeded) {
|
||
|
+ crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
|
||
|
CRM_GET_PEER_ANY);
|
||
|
const char *uuid = NULL;
|
||
|
- gboolean we_are_executioner = pcmk__str_eq(st_event->executioner,
|
||
|
- fsa_our_uname,
|
||
|
- pcmk__str_casei);
|
||
|
|
||
|
if (peer == NULL) {
|
||
|
return;
|
||
|
@@ -523,10 +540,9 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
||
|
|
||
|
uuid = crm_peer_uuid(peer);
|
||
|
|
||
|
- crm_trace("target=%s dc=%s", st_event->target, fsa_our_dc);
|
||
|
- if(AM_I_DC) {
|
||
|
+ if (AM_I_DC) {
|
||
|
/* The DC always sends updates */
|
||
|
- send_stonith_update(NULL, st_event->target, uuid);
|
||
|
+ send_stonith_update(NULL, event->target, uuid);
|
||
|
|
||
|
/* @TODO Ideally, at this point, we'd check whether the fenced node
|
||
|
* hosted any guest nodes, and call remote_node_down() for them.
|
||
|
@@ -536,31 +552,33 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
||
|
* on the scheduler creating fence pseudo-events for the guests.
|
||
|
*/
|
||
|
|
||
|
- if (st_event->client_origin
|
||
|
- && !pcmk__str_eq(st_event->client_origin, te_client_id, pcmk__str_casei)) {
|
||
|
-
|
||
|
- /* Abort the current transition graph if it wasn't us
|
||
|
- * that invoked stonith to fence someone
|
||
|
+ if (!pcmk__str_eq(client, te_client_id, pcmk__str_casei)) {
|
||
|
+ /* Abort the current transition if it wasn't the cluster that
|
||
|
+ * initiated fencing.
|
||
|
*/
|
||
|
- crm_info("External fencing operation from %s fenced %s", st_event->client_origin, st_event->target);
|
||
|
- abort_transition(INFINITY, tg_restart, "External Fencing Operation", NULL);
|
||
|
+ crm_info("External fencing operation from %s fenced %s",
|
||
|
+ client, event->target);
|
||
|
+ abort_transition(INFINITY, tg_restart,
|
||
|
+ "External Fencing Operation", NULL);
|
||
|
}
|
||
|
|
||
|
/* Assume it was our leader if we don't currently have one */
|
||
|
- } else if (pcmk__str_eq(fsa_our_dc, st_event->target, pcmk__str_null_matches | pcmk__str_casei)
|
||
|
+ } else if (pcmk__str_eq(fsa_our_dc, event->target,
|
||
|
+ pcmk__str_null_matches|pcmk__str_casei)
|
||
|
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
|
||
|
|
||
|
crm_notice("Fencing target %s %s our leader",
|
||
|
- st_event->target, (fsa_our_dc? "was" : "may have been"));
|
||
|
+ event->target, (fsa_our_dc? "was" : "may have been"));
|
||
|
|
||
|
/* Given the CIB resyncing that occurs around elections,
|
||
|
* have one node update the CIB now and, if the new DC is different,
|
||
|
* have them do so too after the election
|
||
|
*/
|
||
|
- if (we_are_executioner) {
|
||
|
- send_stonith_update(NULL, st_event->target, uuid);
|
||
|
+ if (pcmk__str_eq(event->executioner, fsa_our_uname,
|
||
|
+ pcmk__str_casei)) {
|
||
|
+ send_stonith_update(NULL, event->target, uuid);
|
||
|
}
|
||
|
- add_stonith_cleanup(st_event->target);
|
||
|
+ add_stonith_cleanup(event->target);
|
||
|
}
|
||
|
|
||
|
/* If the target is a remote node, and we host its connection,
|
||
|
@@ -569,7 +587,7 @@ tengine_stonith_notify(stonith_t *st, stonith_event_t *st_event)
|
||
|
* so the failure might not otherwise be detected until the next poke.
|
||
|
*/
|
||
|
if (pcmk_is_set(peer->flags, crm_remote_node)) {
|
||
|
- remote_ra_fail(st_event->target);
|
||
|
+ remote_ra_fail(event->target);
|
||
|
}
|
||
|
|
||
|
crmd_peer_down(peer, TRUE);
|
||
|
@@ -632,7 +650,7 @@ te_connect_stonith(gpointer user_data)
|
||
|
tengine_stonith_connection_destroy);
|
||
|
stonith_api->cmds->register_notification(stonith_api,
|
||
|
T_STONITH_NOTIFY_FENCE,
|
||
|
- tengine_stonith_notify);
|
||
|
+ handle_fence_notification);
|
||
|
stonith_api->cmds->register_notification(stonith_api,
|
||
|
T_STONITH_NOTIFY_HISTORY_SYNCED,
|
||
|
tengine_stonith_history_synced);
|
||
|
@@ -837,7 +855,8 @@ tengine_stonith_callback(stonith_t *stonith, stonith_callback_data_t *data)
|
||
|
}
|
||
|
|
||
|
/* Increment the fail count now, so abort_for_stonith_failure() can
|
||
|
- * check it. Non-DC nodes will increment it in tengine_stonith_notify().
|
||
|
+ * check it. Non-DC nodes will increment it in
|
||
|
+ * handle_fence_notification().
|
||
|
*/
|
||
|
st_fail_count_increment(target);
|
||
|
abort_for_stonith_failure(abort_action, target, NULL);
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 5ec9dcbbe1ee7f6252968f87d7df5a5ea17244fb Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Tue, 7 Dec 2021 10:40:21 -0600
|
||
|
Subject: [PATCH 08/11] Log: controller: improve messages when handling fencing
|
||
|
notifications
|
||
|
|
||
|
Now that the fencing API provides a full result including exit reasons with
|
||
|
fencing event notifications, make the controller logs more useful and
|
||
|
consistent.
|
||
|
---
|
||
|
daemons/controld/controld_fencing.c | 34 ++++++++++++++++++++---------
|
||
|
1 file changed, 24 insertions(+), 10 deletions(-)
|
||
|
|
||
|
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
||
|
index 00626444da..0aa9ef083c 100644
|
||
|
--- a/daemons/controld/controld_fencing.c
|
||
|
+++ b/daemons/controld/controld_fencing.c
|
||
|
@@ -448,6 +448,8 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
|
||
|
bool succeeded = true;
|
||
|
const char *executioner = "the cluster";
|
||
|
const char *client = "a client";
|
||
|
+ const char *reason = NULL;
|
||
|
+ int exec_status;
|
||
|
|
||
|
if (te_client_id == NULL) {
|
||
|
te_client_id = crm_strdup_printf("%s.%lu", crm_system_name,
|
||
|
@@ -466,22 +468,31 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
|
||
|
client = event->client_origin;
|
||
|
}
|
||
|
|
||
|
- if (event->result != pcmk_ok) {
|
||
|
+ exec_status = stonith__event_execution_status(event);
|
||
|
+ if ((stonith__event_exit_status(event) != CRM_EX_OK)
|
||
|
+ || (exec_status != PCMK_EXEC_DONE)) {
|
||
|
succeeded = false;
|
||
|
+ if (exec_status == PCMK_EXEC_DONE) {
|
||
|
+ exec_status = PCMK_EXEC_ERROR;
|
||
|
+ }
|
||
|
}
|
||
|
+ reason = stonith__event_exit_reason(event);
|
||
|
|
||
|
crmd_alert_fencing_op(event);
|
||
|
|
||
|
if (pcmk__str_eq("on", event->action, pcmk__str_none)) {
|
||
|
// Unfencing doesn't need special handling, just a log message
|
||
|
if (succeeded) {
|
||
|
- crm_notice("%s was successfully unfenced by %s (at the request of %s)",
|
||
|
- event->target, executioner, event->origin);
|
||
|
+ crm_notice("%s was unfenced by %s at the request of %s@%s",
|
||
|
+ event->target, executioner, client, event->origin);
|
||
|
/* TODO: Hook up event->device */
|
||
|
} else {
|
||
|
- crm_err("Unfencing of %s by %s failed: %s (%d)",
|
||
|
+ crm_err("Unfencing of %s by %s failed (%s%s%s) with exit status %d",
|
||
|
event->target, executioner,
|
||
|
- pcmk_strerror(st_event->result), st_event->result);
|
||
|
+ pcmk_exec_status_str(exec_status),
|
||
|
+ ((reason == NULL)? "" : ": "),
|
||
|
+ ((reason == NULL)? "" : reason),
|
||
|
+ stonith__event_exit_status(event));
|
||
|
}
|
||
|
return;
|
||
|
}
|
||
|
@@ -522,12 +533,15 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
- crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s: %s "
|
||
|
- CRM_XS " initiator=%s ref=%s",
|
||
|
+ crm_notice("Peer %s was%s terminated (%s) by %s on behalf of %s@%s: "
|
||
|
+ "%s%s%s%s " CRM_XS " event=%s",
|
||
|
event->target, (succeeded? "" : " not"),
|
||
|
- event->action, executioner, client,
|
||
|
- pcmk_strerror(event->result),
|
||
|
- event->origin, event->id);
|
||
|
+ event->action, executioner, client, event->origin,
|
||
|
+ (succeeded? "OK" : pcmk_exec_status_str(exec_status)),
|
||
|
+ ((reason == NULL)? "" : " ("),
|
||
|
+ ((reason == NULL)? "" : reason),
|
||
|
+ ((reason == NULL)? "" : ")"),
|
||
|
+ event->id);
|
||
|
|
||
|
if (succeeded) {
|
||
|
crm_node_t *peer = pcmk__search_known_node_cache(0, event->target,
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From fb484933ce7c8f3325300a9e01a114db1bbb5b70 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Tue, 7 Dec 2021 11:33:15 -0600
|
||
|
Subject: [PATCH 09/11] Refactor: controller: move alert functions into own
|
||
|
source file
|
||
|
|
||
|
---
|
||
|
daemons/controld/Makefile.am | 1 +
|
||
|
daemons/controld/controld_alerts.c | 92 +++++++++++++++++++++++++
|
||
|
daemons/controld/controld_execd_state.c | 75 --------------------
|
||
|
3 files changed, 93 insertions(+), 75 deletions(-)
|
||
|
create mode 100644 daemons/controld/controld_alerts.c
|
||
|
|
||
|
diff --git a/daemons/controld/Makefile.am b/daemons/controld/Makefile.am
|
||
|
index db45bcba4a..0a29925c0b 100644
|
||
|
--- a/daemons/controld/Makefile.am
|
||
|
+++ b/daemons/controld/Makefile.am
|
||
|
@@ -43,6 +43,7 @@ pacemaker_controld_LDADD = $(top_builddir)/lib/fencing/libstonithd.la \
|
||
|
$(CLUSTERLIBS)
|
||
|
|
||
|
pacemaker_controld_SOURCES = pacemaker-controld.c \
|
||
|
+ controld_alerts.c \
|
||
|
controld_attrd.c \
|
||
|
controld_callbacks.c \
|
||
|
controld_based.c \
|
||
|
diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c
|
||
|
new file mode 100644
|
||
|
index 0000000000..bd92795cf0
|
||
|
--- /dev/null
|
||
|
+++ b/daemons/controld/controld_alerts.c
|
||
|
@@ -0,0 +1,92 @@
|
||
|
+/*
|
||
|
+ * Copyright 2012-2021 the Pacemaker project contributors
|
||
|
+ *
|
||
|
+ * The version control history for this file may have further details.
|
||
|
+ *
|
||
|
+ * This source code is licensed under the GNU General Public License version 2
|
||
|
+ * or later (GPLv2+) WITHOUT ANY WARRANTY.
|
||
|
+ */
|
||
|
+
|
||
|
+#include <crm_internal.h>
|
||
|
+
|
||
|
+#include <glib.h>
|
||
|
+#include <libxml/tree.h>
|
||
|
+
|
||
|
+#include <crm/lrmd.h>
|
||
|
+#include <crm/lrmd_internal.h>
|
||
|
+#include <crm/pengine/rules_internal.h>
|
||
|
+#include <crm/pengine/status.h>
|
||
|
+#include <crm/stonith-ng.h>
|
||
|
+
|
||
|
+#include <pacemaker-controld.h>
|
||
|
+
|
||
|
+static GList *crmd_alert_list = NULL;
|
||
|
+
|
||
|
+void
|
||
|
+crmd_unpack_alerts(xmlNode *alerts)
|
||
|
+{
|
||
|
+ pe_free_alert_list(crmd_alert_list);
|
||
|
+ crmd_alert_list = pe_unpack_alerts(alerts);
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+crmd_alert_node_event(crm_node_t *node)
|
||
|
+{
|
||
|
+ lrm_state_t *lrm_state;
|
||
|
+
|
||
|
+ if (crmd_alert_list == NULL) {
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ lrm_state = lrm_state_find(fsa_our_uname);
|
||
|
+ if (lrm_state == NULL) {
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
|
||
|
+ node->uname, node->id, node->state);
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+crmd_alert_fencing_op(stonith_event_t * e)
|
||
|
+{
|
||
|
+ char *desc;
|
||
|
+ lrm_state_t *lrm_state;
|
||
|
+
|
||
|
+ if (crmd_alert_list == NULL) {
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ lrm_state = lrm_state_find(fsa_our_uname);
|
||
|
+ if (lrm_state == NULL) {
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
|
||
|
+ e->action, e->target,
|
||
|
+ (e->executioner? e->executioner : "<no-one>"),
|
||
|
+ e->client_origin, e->origin,
|
||
|
+ pcmk_strerror(e->result), e->id);
|
||
|
+
|
||
|
+ lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
|
||
|
+ e->target, e->operation, desc, e->result);
|
||
|
+ free(desc);
|
||
|
+}
|
||
|
+
|
||
|
+void
|
||
|
+crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
|
||
|
+{
|
||
|
+ lrm_state_t *lrm_state;
|
||
|
+
|
||
|
+ if (crmd_alert_list == NULL) {
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ lrm_state = lrm_state_find(fsa_our_uname);
|
||
|
+ if (lrm_state == NULL) {
|
||
|
+ return;
|
||
|
+ }
|
||
|
+
|
||
|
+ lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
|
||
|
+ op);
|
||
|
+}
|
||
|
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
|
||
|
index 67c376a426..5dce6c6d59 100644
|
||
|
--- a/daemons/controld/controld_execd_state.c
|
||
|
+++ b/daemons/controld/controld_execd_state.c
|
||
|
@@ -777,78 +777,3 @@ lrm_state_unregister_rsc(lrm_state_t * lrm_state,
|
||
|
*/
|
||
|
return ((lrmd_t *) lrm_state->conn)->cmds->unregister_rsc(lrm_state->conn, rsc_id, options);
|
||
|
}
|
||
|
-
|
||
|
-/*
|
||
|
- * Functions for sending alerts via local executor connection
|
||
|
- */
|
||
|
-
|
||
|
-static GList *crmd_alert_list = NULL;
|
||
|
-
|
||
|
-void
|
||
|
-crmd_unpack_alerts(xmlNode *alerts)
|
||
|
-{
|
||
|
- pe_free_alert_list(crmd_alert_list);
|
||
|
- crmd_alert_list = pe_unpack_alerts(alerts);
|
||
|
-}
|
||
|
-
|
||
|
-void
|
||
|
-crmd_alert_node_event(crm_node_t *node)
|
||
|
-{
|
||
|
- lrm_state_t *lrm_state;
|
||
|
-
|
||
|
- if (crmd_alert_list == NULL) {
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- lrm_state = lrm_state_find(fsa_our_uname);
|
||
|
- if (lrm_state == NULL) {
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- lrmd_send_node_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
|
||
|
- node->uname, node->id, node->state);
|
||
|
-}
|
||
|
-
|
||
|
-void
|
||
|
-crmd_alert_fencing_op(stonith_event_t * e)
|
||
|
-{
|
||
|
- char *desc;
|
||
|
- lrm_state_t *lrm_state;
|
||
|
-
|
||
|
- if (crmd_alert_list == NULL) {
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- lrm_state = lrm_state_find(fsa_our_uname);
|
||
|
- if (lrm_state == NULL) {
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
|
||
|
- e->action, e->target,
|
||
|
- (e->executioner? e->executioner : "<no-one>"),
|
||
|
- e->client_origin, e->origin,
|
||
|
- pcmk_strerror(e->result), e->id);
|
||
|
-
|
||
|
- lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
|
||
|
- e->target, e->operation, desc, e->result);
|
||
|
- free(desc);
|
||
|
-}
|
||
|
-
|
||
|
-void
|
||
|
-crmd_alert_resource_op(const char *node, lrmd_event_data_t * op)
|
||
|
-{
|
||
|
- lrm_state_t *lrm_state;
|
||
|
-
|
||
|
- if (crmd_alert_list == NULL) {
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- lrm_state = lrm_state_find(fsa_our_uname);
|
||
|
- if (lrm_state == NULL) {
|
||
|
- return;
|
||
|
- }
|
||
|
-
|
||
|
- lrmd_send_resource_alert((lrmd_t *) lrm_state->conn, crmd_alert_list, node,
|
||
|
- op);
|
||
|
-}
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 3d0b57406bcde6682623e9d62c8ee95878345eb1 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Tue, 7 Dec 2021 11:25:41 -0600
|
||
|
Subject: [PATCH 10/11] Feature: controller,tools: improve description for
|
||
|
fencing alerts/traps
|
||
|
|
||
|
This functionizes creating a description for fencing events, so it can be used
|
||
|
by both the controller for alerts and crm_mon for traps, for consistency.
|
||
|
|
||
|
Now that we have the full result including exit reason, we can improve the
|
||
|
description, but the format is kept similar to before to minimize the change.
|
||
|
|
||
|
The alert/trap also includes the legacy return code for the event, but we can't
|
||
|
change that now because lrmd_send_fencing_alert() and the alert/trap
|
||
|
environment variables are public API.
|
||
|
---
|
||
|
daemons/controld/controld_alerts.c | 8 ++-----
|
||
|
include/crm/fencing/internal.h | 1 +
|
||
|
lib/fencing/st_client.c | 38 ++++++++++++++++++++++++++++++
|
||
|
tools/crm_mon.c | 5 ++--
|
||
|
4 files changed, 43 insertions(+), 9 deletions(-)
|
||
|
|
||
|
diff --git a/daemons/controld/controld_alerts.c b/daemons/controld/controld_alerts.c
|
||
|
index bd92795cf0..2e0a67dba2 100644
|
||
|
--- a/daemons/controld/controld_alerts.c
|
||
|
+++ b/daemons/controld/controld_alerts.c
|
||
|
@@ -12,6 +12,7 @@
|
||
|
#include <glib.h>
|
||
|
#include <libxml/tree.h>
|
||
|
|
||
|
+#include <crm/fencing/internal.h>
|
||
|
#include <crm/lrmd.h>
|
||
|
#include <crm/lrmd_internal.h>
|
||
|
#include <crm/pengine/rules_internal.h>
|
||
|
@@ -62,12 +63,7 @@ crmd_alert_fencing_op(stonith_event_t * e)
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
- desc = crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s (ref=%s)",
|
||
|
- e->action, e->target,
|
||
|
- (e->executioner? e->executioner : "<no-one>"),
|
||
|
- e->client_origin, e->origin,
|
||
|
- pcmk_strerror(e->result), e->id);
|
||
|
-
|
||
|
+ desc = stonith__event_description(e);
|
||
|
lrmd_send_fencing_alert((lrmd_t *) lrm_state->conn, crmd_alert_list,
|
||
|
e->target, e->operation, desc, e->result);
|
||
|
free(desc);
|
||
|
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
|
||
|
index acc16d05e9..d2b49f831a 100644
|
||
|
--- a/include/crm/fencing/internal.h
|
||
|
+++ b/include/crm/fencing/internal.h
|
||
|
@@ -195,6 +195,7 @@ const char *stonith__exit_reason(stonith_callback_data_t *data);
|
||
|
int stonith__event_exit_status(stonith_event_t *event);
|
||
|
int stonith__event_execution_status(stonith_event_t *event);
|
||
|
const char *stonith__event_exit_reason(stonith_event_t *event);
|
||
|
+char *stonith__event_description(stonith_event_t *event);
|
||
|
|
||
|
/*!
|
||
|
* \internal
|
||
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||
|
index 5fec7529e3..b1de912b2a 100644
|
||
|
--- a/lib/fencing/st_client.c
|
||
|
+++ b/lib/fencing/st_client.c
|
||
|
@@ -2429,6 +2429,44 @@ stonith__event_exit_reason(stonith_event_t *event)
|
||
|
return ((pcmk__action_result_t *) event->opaque)->exit_reason;
|
||
|
}
|
||
|
|
||
|
+/*!
|
||
|
+ * \internal
|
||
|
+ * \brief Return a human-friendly description of a fencing event
|
||
|
+ *
|
||
|
+ * \param[in] event Event to describe
|
||
|
+ *
|
||
|
+ * \return Newly allocated string with description of \p event
|
||
|
+ * \note The caller is responsible for freeing the return value.
|
||
|
+ * This function asserts on memory errors and never returns NULL.
|
||
|
+ * \note This currently is useful only for events of type
|
||
|
+ * T_STONITH_NOTIFY_FENCE.
|
||
|
+ */
|
||
|
+char *
|
||
|
+stonith__event_description(stonith_event_t *event)
|
||
|
+{
|
||
|
+ const char *reason;
|
||
|
+ const char *status;
|
||
|
+
|
||
|
+ if (stonith__event_execution_status(event) != PCMK_EXEC_DONE) {
|
||
|
+ status = pcmk_exec_status_str(stonith__event_execution_status(event));
|
||
|
+ } else if (stonith__event_exit_status(event) != CRM_EX_OK) {
|
||
|
+ status = pcmk_exec_status_str(PCMK_EXEC_ERROR);
|
||
|
+ } else {
|
||
|
+ status = crm_exit_str(CRM_EX_OK);
|
||
|
+ }
|
||
|
+ reason = stonith__event_exit_reason(event);
|
||
|
+
|
||
|
+ return crm_strdup_printf("Operation %s of %s by %s for %s@%s: %s%s%s%s (ref=%s)",
|
||
|
+ event->action, event->target,
|
||
|
+ (event->executioner? event->executioner : "the cluster"),
|
||
|
+ (event->client_origin? event->client_origin : "a client"),
|
||
|
+ event->origin, status,
|
||
|
+ ((reason == NULL)? "" : " ("),
|
||
|
+ ((reason == NULL)? "" : reason),
|
||
|
+ ((reason == NULL)? "" : ")"),
|
||
|
+ event->id);
|
||
|
+}
|
||
|
+
|
||
|
|
||
|
// Deprecated functions kept only for backward API compatibility
|
||
|
// LCOV_EXCL_START
|
||
|
diff --git a/tools/crm_mon.c b/tools/crm_mon.c
|
||
|
index a6c459aaf7..e7b4fe2847 100644
|
||
|
--- a/tools/crm_mon.c
|
||
|
+++ b/tools/crm_mon.c
|
||
|
@@ -2237,9 +2237,8 @@ mon_st_callback_event(stonith_t * st, stonith_event_t * e)
|
||
|
/* disconnect cib as well and have everything reconnect */
|
||
|
mon_cib_connection_destroy(NULL);
|
||
|
} else if (options.external_agent) {
|
||
|
- char *desc = crm_strdup_printf("Operation %s requested by %s for peer %s: %s (ref=%s)",
|
||
|
- e->operation, e->origin, e->target, pcmk_strerror(e->result),
|
||
|
- e->id);
|
||
|
+ char *desc = stonith__event_description(e);
|
||
|
+
|
||
|
send_custom_trap(e->target, NULL, e->operation, pcmk_ok, e->result, 0, desc);
|
||
|
free(desc);
|
||
|
}
|
||
|
--
|
||
|
2.27.0
|
||
|
|
||
|
|
||
|
From 2fe03c2165680c717a1f6106c5150be7d117f1a5 Mon Sep 17 00:00:00 2001
|
||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||
|
Date: Fri, 14 Jan 2022 10:45:03 -0600
|
||
|
Subject: [PATCH 11/11] Low: controller: compare case-sensitively where
|
||
|
appropriate
|
||
|
|
||
|
---
|
||
|
daemons/controld/controld_fencing.c | 4 ++--
|
||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||
|
|
||
|
diff --git a/daemons/controld/controld_fencing.c b/daemons/controld/controld_fencing.c
|
||
|
index 0aa9ef083c..15954b2358 100644
|
||
|
--- a/daemons/controld/controld_fencing.c
|
||
|
+++ b/daemons/controld/controld_fencing.c
|
||
|
@@ -1,5 +1,5 @@
|
||
|
/*
|
||
|
- * Copyright 2004-2021 the Pacemaker project contributors
|
||
|
+ * Copyright 2004-2022 the Pacemaker project contributors
|
||
|
*
|
||
|
* The version control history for this file may have further details.
|
||
|
*
|
||
|
@@ -524,7 +524,7 @@ handle_fence_notification(stonith_t *st, stonith_event_t *event)
|
||
|
*/
|
||
|
if (!AM_I_DC
|
||
|
&& pcmk__str_eq(event->operation, T_STONITH_NOTIFY_FENCE,
|
||
|
- pcmk__str_casei)) {
|
||
|
+ pcmk__str_none)) {
|
||
|
|
||
|
if (succeeded) {
|
||
|
st_fail_count_reset(event->target);
|
||
|
--
|
||
|
2.27.0
|
||
|
|