pacemaker/SOURCES/002-fencing-reasons.patch

2101 lines
86 KiB
Diff
Raw Normal View History

2022-05-10 07:06:13 +00:00
From 95b4f87aae5fb2cf771cf9a8f8e5420b65fb213f Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Sep 2021 10:47:51 -0500
Subject: [PATCH 01/12] Refactor: fencing: use pcmk__action_result_t in
stonith_action_t
stonith_action_t previously had an rc member for a legacy return code, along
with output and error members for action stdout/stderr. When setting rc based
on the svc_action_t result, it used a mapping function svc_action_to_errno().
This replaces those with a pcmk__action_result_t member, which means we now
track the exit status and execution status as originally set by libcrmservice,
rather than the mapped rc. The library now calls the mapping function, now
returning standard codes and called result2rc(), when calling the client
callback.
The exit_reason member is unused as of this commit.
The behavior should be identical, with the small exception of
services_action_async() failure leaving the exit status as set by the services
library, which means callers will get the result2rc() mapping of the actual
result instead of the former -ECONNABORTED.
---
lib/fencing/st_client.c | 118 +++++++++++++++++++++++-----------------
1 file changed, 68 insertions(+), 50 deletions(-)
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 08adb51c6..6c607b010 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -29,6 +29,7 @@
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
#include <crm/common/xml_internal.h>
+#include <crm/services_internal.h>
#include <crm/common/mainloop.h>
@@ -57,9 +58,7 @@ struct stonith_action_s {
int max_retries;
int pid;
- int rc;
- char *output;
- char *error;
+ pcmk__action_result_t result;
};
typedef struct stonith_private_s {
@@ -120,6 +119,7 @@ static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int internal_stonith_action_execute(stonith_action_t * action);
static void log_action(stonith_action_t *action, pid_t pid);
+static int result2rc(const pcmk__action_result_t *result);
/*!
* \brief Get agent namespace by name
@@ -196,6 +196,23 @@ stonith_get_namespace(const char *agent, const char *namespace_s)
return st_namespace_invalid;
}
+/*!
+ * \internal
+ * \brief Set an action's result based on services library result
+ *
+ * \param[in] action Fence action to set result for
+ * \param[in] svc_action Service action to get result from
+ */
+static void
+set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
+{
+ pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
+ NULL);
+ pcmk__set_result_output(&(action->result),
+ services__grab_stdout(svc_action),
+ services__grab_stderr(svc_action));
+}
+
gboolean
stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
{
@@ -259,19 +276,19 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
static void
log_action(stonith_action_t *action, pid_t pid)
{
- if (action->output) {
+ if (action->result.action_stdout != NULL) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
- crm_log_output(LOG_TRACE, prefix, action->output);
+ crm_log_output(LOG_TRACE, prefix, action->result.action_stdout);
free(prefix);
}
- if (action->error) {
+ if (action->result.action_stderr != NULL) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
- crm_log_output(LOG_WARNING, prefix, action->error);
+ crm_log_output(LOG_WARNING, prefix, action->result.action_stderr);
free(prefix);
}
}
@@ -645,8 +662,7 @@ stonith__destroy_action(stonith_action_t *action)
if (action->svc_action) {
services_action_free(action->svc_action);
}
- free(action->output);
- free(action->error);
+ pcmk__reset_result(&(action->result));
free(action);
}
}
@@ -678,15 +694,15 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output,
}
if (action != NULL) {
if (rc) {
- *rc = action->rc;
+ *rc = pcmk_rc2legacy(result2rc(&(action->result)));
}
- if (output && action->output) {
- *output = action->output;
- action->output = NULL; // hand off memory management to caller
+ if ((output != NULL) && (action->result.action_stdout != NULL)) {
+ *output = action->result.action_stdout;
+ action->result.action_stdout = NULL; // hand off ownership to caller
}
- if (error_output && action->error) {
- *error_output = action->error;
- action->error = NULL; // hand off memory management to caller
+ if ((error_output != NULL) && (action->result.action_stderr != NULL)) {
+ *error_output = action->result.action_stderr;
+ action->result.action_stderr = NULL; // hand off ownership to caller
}
}
}
@@ -715,6 +731,9 @@ stonith_action_create(const char *agent,
action->timeout = action->remaining_timeout = timeout;
action->max_retries = FAILURE_MAX_RETRIES;
+ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
+ NULL);
+
if (device_args) {
char buffer[512];
const char *value = NULL;
@@ -739,7 +758,8 @@ update_remaining_timeout(stonith_action_t * action)
crm_info("Attempted to execute agent %s (%s) the maximum number of times (%d) allowed",
action->agent, action->action, action->max_retries);
action->remaining_timeout = 0;
- } else if ((action->rc != -ETIME) && diff < (action->timeout * 0.7)) {
+ } else if ((action->result.execution_status != PCMK_EXEC_TIMEOUT)
+ && (diff < (action->timeout * 0.7))) {
/* only set remaining timeout period if there is 30%
* or greater of the original timeout period left */
action->remaining_timeout = action->timeout - diff;
@@ -750,31 +770,31 @@ update_remaining_timeout(stonith_action_t * action)
}
static int
-svc_action_to_errno(svc_action_t *svc_action) {
- int rv = pcmk_ok;
+result2rc(const pcmk__action_result_t *result) {
+ int rc = pcmk_rc_ok;
- if (svc_action->status == PCMK_EXEC_TIMEOUT) {
- rv = -ETIME;
+ if (result->execution_status == PCMK_EXEC_TIMEOUT) {
+ rc = ETIME;
- } else if (svc_action->rc != PCMK_OCF_OK) {
+ } else if (result->exit_status != CRM_EX_OK) {
/* Try to provide a useful error code based on the fence agent's
* error output.
*/
- if (svc_action->stderr_data == NULL) {
- rv = -ENODATA;
+ if (result->action_stderr == NULL) {
+ rc = ENODATA;
- } else if (strstr(svc_action->stderr_data, "imed out")) {
+ } else if (strstr(result->action_stderr, "imed out")) {
/* Some agents have their own internal timeouts */
- rv = -ETIME;
+ rc = ETIME;
- } else if (strstr(svc_action->stderr_data, "Unrecognised action")) {
- rv = -EOPNOTSUPP;
+ } else if (strstr(result->action_stderr, "Unrecognised action")) {
+ rc = EOPNOTSUPP;
} else {
- rv = -pcmk_err_generic;
+ rc = pcmk_rc_error;
}
}
- return rv;
+ return rc;
}
static void
@@ -782,11 +802,7 @@ stonith_action_async_done(svc_action_t *svc_action)
{
stonith_action_t *action = (stonith_action_t *) svc_action->cb_data;
- action->rc = svc_action_to_errno(svc_action);
- action->output = svc_action->stdout_data;
- svc_action->stdout_data = NULL;
- action->error = svc_action->stderr_data;
- svc_action->stderr_data = NULL;
+ set_result_from_svc_action(action, svc_action);
svc_action->params = NULL;
@@ -795,7 +811,9 @@ stonith_action_async_done(svc_action_t *svc_action)
log_action(action, action->pid);
- if (action->rc != pcmk_ok && update_remaining_timeout(action)) {
+ if ((action->result.exit_status != CRM_EX_OK)
+ && update_remaining_timeout(action)) {
+
int rc = internal_stonith_action_execute(action);
if (rc == pcmk_ok) {
return;
@@ -803,7 +821,8 @@ stonith_action_async_done(svc_action_t *svc_action)
}
if (action->done_cb) {
- action->done_cb(action->pid, action->rc, action->output, action->userdata);
+ action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))),
+ action->result.action_stdout, action->userdata);
}
action->svc_action = NULL; // don't remove our caller
@@ -835,9 +854,13 @@ internal_stonith_action_execute(stonith_action_t * action)
static int stonith_sequence = 0;
char *buffer = NULL;
- if ((action == NULL) || (action->action == NULL) || (action->args == NULL)
+ CRM_CHECK(action != NULL, return -EINVAL);
+
+ if ((action->action == NULL) || (action->args == NULL)
|| (action->agent == NULL)) {
- return -EPROTO;
+ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
+ PCMK_EXEC_ERROR_FATAL, NULL);
+ return -EINVAL;
}
if (!action->tries) {
@@ -857,6 +880,7 @@ internal_stonith_action_execute(stonith_action_t * action)
free(buffer);
if (svc_action->rc != PCMK_OCF_UNKNOWN) {
+ set_result_from_svc_action(action, svc_action);
services_action_free(svc_action);
return -E2BIG;
}
@@ -877,10 +901,7 @@ internal_stonith_action_execute(stonith_action_t * action)
/* keep retries from executing out of control and free previous results */
if (is_retry) {
- free(action->output);
- action->output = NULL;
- free(action->error);
- action->error = NULL;
+ pcmk__reset_result(&(action->result));
sleep(1);
}
@@ -889,22 +910,19 @@ internal_stonith_action_execute(stonith_action_t * action)
if (services_action_async_fork_notify(svc_action,
&stonith_action_async_done,
&stonith_action_async_forked)) {
+ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
+ PCMK_EXEC_PENDING, NULL);
return pcmk_ok;
}
} else if (services_action_sync(svc_action)) { // sync success
rc = pcmk_ok;
- action->rc = svc_action_to_errno(svc_action);
- action->output = svc_action->stdout_data;
- svc_action->stdout_data = NULL;
- action->error = svc_action->stderr_data;
- svc_action->stderr_data = NULL;
} else { // sync failure
- action->rc = -ECONNABORTED;
- rc = action->rc;
+ rc = -ECONNABORTED;
}
+ set_result_from_svc_action(action, svc_action);
svc_action->params = NULL;
services_action_free(svc_action);
return rc;
--
2.27.0
From 4c8e0b0ecc53cb3883f0da0eede20b900fff48d1 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Sep 2021 11:14:31 -0500
Subject: [PATCH 02/12] Low: fencing: improve return code given back to library
callers
Expose result2rc() internally for future reuse, and expand it to handle more
cases. In theory, this can give us better log messages and status output for
failures.
---
include/crm/fencing/internal.h | 1 +
lib/fencing/st_client.c | 63 +++++++++++++++++++++-------------
2 files changed, 41 insertions(+), 23 deletions(-)
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
index fa9059e6f..0d23967bb 100644
--- a/include/crm/fencing/internal.h
+++ b/include/crm/fencing/internal.h
@@ -60,6 +60,7 @@ stonith_action_t *stonith_action_create(const char *agent,
void stonith__destroy_action(stonith_action_t *action);
void stonith__action_result(stonith_action_t *action, int *rc, char **output,
char **error_output);
+int stonith__result2rc(const pcmk__action_result_t *result);
int
stonith_action_execute_async(stonith_action_t * action,
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 6c607b010..809be1640 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -119,7 +119,6 @@ static void stonith_connection_destroy(gpointer user_data);
static void stonith_send_notification(gpointer data, gpointer user_data);
static int internal_stonith_action_execute(stonith_action_t * action);
static void log_action(stonith_action_t *action, pid_t pid);
-static int result2rc(const pcmk__action_result_t *result);
/*!
* \brief Get agent namespace by name
@@ -694,7 +693,7 @@ stonith__action_result(stonith_action_t *action, int *rc, char **output,
}
if (action != NULL) {
if (rc) {
- *rc = pcmk_rc2legacy(result2rc(&(action->result)));
+ *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result)));
}
if ((output != NULL) && (action->result.action_stdout != NULL)) {
*output = action->result.action_stdout;
@@ -769,32 +768,49 @@ update_remaining_timeout(stonith_action_t * action)
return action->remaining_timeout ? TRUE : FALSE;
}
-static int
-result2rc(const pcmk__action_result_t *result) {
- int rc = pcmk_rc_ok;
+/*!
+ * \internal
+ * \brief Map a fencing action result to a standard return code
+ *
+ * \param[in] result Fencing action result to map
+ *
+ * \return Standard Pacemaker return code that best corresponds to \p result
+ */
+int
+stonith__result2rc(const pcmk__action_result_t *result)
+{
+ switch (result->execution_status) {
+ case PCMK_EXEC_CANCELLED: return ECANCELED;
+ case PCMK_EXEC_TIMEOUT: return ETIME;
+ case PCMK_EXEC_NOT_INSTALLED: return ENOENT;
+ case PCMK_EXEC_NOT_SUPPORTED: return EOPNOTSUPP;
+ case PCMK_EXEC_NOT_CONNECTED: return ENOTCONN;
+ case PCMK_EXEC_NO_FENCE_DEVICE: return ENODEV;
+ case PCMK_EXEC_NO_SECRETS: return EACCES;
+ default: break;
+ }
- if (result->execution_status == PCMK_EXEC_TIMEOUT) {
- rc = ETIME;
+ if (result->exit_status == CRM_EX_OK) {
+ return pcmk_rc_ok;
+ }
- } else if (result->exit_status != CRM_EX_OK) {
- /* Try to provide a useful error code based on the fence agent's
- * error output.
- */
- if (result->action_stderr == NULL) {
- rc = ENODATA;
+ // Try to provide useful error code based on result's error output
- } else if (strstr(result->action_stderr, "imed out")) {
- /* Some agents have their own internal timeouts */
- rc = ETIME;
+ if (result->action_stderr == NULL) {
+ return ENODATA;
- } else if (strstr(result->action_stderr, "Unrecognised action")) {
- rc = EOPNOTSUPP;
+ } else if (strcasestr(result->action_stderr, "timed out")
+ || strcasestr(result->action_stderr, "timeout")) {
+ return ETIME;
- } else {
- rc = pcmk_rc_error;
- }
+ } else if (strcasestr(result->action_stderr, "unrecognised action")
+ || strcasestr(result->action_stderr, "unrecognized action")
+ || strcasestr(result->action_stderr, "unsupported action")) {
+ return EOPNOTSUPP;
}
- return rc;
+
+ // Oh well, we tried
+ return pcmk_rc_error;
}
static void
@@ -821,7 +837,8 @@ stonith_action_async_done(svc_action_t *svc_action)
}
if (action->done_cb) {
- action->done_cb(action->pid, pcmk_rc2legacy(result2rc(&(action->result))),
+ action->done_cb(action->pid,
+ pcmk_rc2legacy(stonith__result2rc(&(action->result))),
action->result.action_stdout, action->userdata);
}
--
2.27.0
From 153c9b552a5bad9dd36e8635fa478ed9cad1f240 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 7 Oct 2021 11:35:44 -0500
Subject: [PATCH 03/12] Refactor: fencing: return full result from
stonith__action_result()
Previously, stonith__action_result() grabbed an action's legacy rc, stdout, and
stderr separately. Now, directly return a pointer to the action's result
object, and map that to a legacy rc in the callers when needed.
---
include/crm/fencing/internal.h | 3 +--
lib/fencing/st_client.c | 36 ++++---------------------
lib/fencing/st_rhcs.c | 48 ++++++++++++++++++++++++----------
3 files changed, 40 insertions(+), 47 deletions(-)
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
index 0d23967bb..4e9f50fe8 100644
--- a/include/crm/fencing/internal.h
+++ b/include/crm/fencing/internal.h
@@ -58,8 +58,7 @@ stonith_action_t *stonith_action_create(const char *agent,
GHashTable * port_map,
const char * host_arg);
void stonith__destroy_action(stonith_action_t *action);
-void stonith__action_result(stonith_action_t *action, int *rc, char **output,
- char **error_output);
+pcmk__action_result_t *stonith__action_result(stonith_action_t *action);
int stonith__result2rc(const pcmk__action_result_t *result);
int
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 809be1640..b9df18465 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -670,40 +670,14 @@ stonith__destroy_action(stonith_action_t *action)
* \internal
* \brief Get the result of an executed stonith action
*
- * \param[in,out] action Executed action
- * \param[out] rc Where to store result code (or NULL)
- * \param[out] output Where to store standard output (or NULL)
- * \param[out] error_output Where to store standard error output (or NULL)
+ * \param[in] action Executed action
*
- * \note If output or error_output is not NULL, the caller is responsible for
- * freeing the memory.
+ * \return Pointer to action's result (or NULL if \p action is NULL)
*/
-void
-stonith__action_result(stonith_action_t *action, int *rc, char **output,
- char **error_output)
+pcmk__action_result_t *
+stonith__action_result(stonith_action_t *action)
{
- if (rc) {
- *rc = pcmk_ok;
- }
- if (output) {
- *output = NULL;
- }
- if (error_output) {
- *error_output = NULL;
- }
- if (action != NULL) {
- if (rc) {
- *rc = pcmk_rc2legacy(stonith__result2rc(&(action->result)));
- }
- if ((output != NULL) && (action->result.action_stdout != NULL)) {
- *output = action->result.action_stdout;
- action->result.action_stdout = NULL; // hand off ownership to caller
- }
- if ((error_output != NULL) && (action->result.action_stderr != NULL)) {
- *error_output = action->result.action_stderr;
- action->result.action_stderr = NULL; // hand off ownership to caller
- }
- }
+ return (action == NULL)? NULL : &(action->result);
}
#define FAILURE_MAX_RETRIES 2
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
index 89a2625bd..23e694975 100644
--- a/lib/fencing/st_rhcs.c
+++ b/lib/fencing/st_rhcs.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2020 the Pacemaker project contributors
+ * Copyright 2004-2021 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -123,10 +123,10 @@ stonith_rhcs_parameter_not_required(xmlNode *metadata, const char *parameter)
static int
stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
{
- char *buffer = NULL;
xmlNode *xml = NULL;
xmlNode *actions = NULL;
xmlXPathObject *xpathObj = NULL;
+ pcmk__action_result_t *result = NULL;
stonith_action_t *action = stonith_action_create(agent, "metadata", NULL, 0,
5, NULL, NULL, NULL);
int rc = stonith__execute(action);
@@ -138,23 +138,31 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
return rc;
}
- stonith__action_result(action, &rc, &buffer, NULL);
- stonith__destroy_action(action);
- if (rc < 0) {
- crm_warn("Metadata action for %s failed: %s " CRM_XS "rc=%d",
- agent, pcmk_strerror(rc), rc);
- free(buffer);
- return rc;
+ result = stonith__action_result(action);
+
+ if (result->execution_status != PCMK_EXEC_DONE) {
+ crm_warn("Could not execute metadata action for %s: %s",
+ agent, pcmk_exec_status_str(result->execution_status));
+ stonith__destroy_action(action);
+ return pcmk_rc2legacy(stonith__result2rc(result));
}
- if (buffer == NULL) {
+ if (result->exit_status != CRM_EX_OK) {
+ crm_warn("Metadata action for %s returned error code %d",
+ agent, result->exit_status);
+ stonith__destroy_action(action);
+ return pcmk_rc2legacy(stonith__result2rc(result));
+ }
+
+ if (result->action_stdout == NULL) {
crm_warn("Metadata action for %s returned no data", agent);
+ stonith__destroy_action(action);
return -ENODATA;
}
- xml = string2xml(buffer);
- free(buffer);
- buffer = NULL;
+ xml = string2xml(result->action_stdout);
+ stonith__destroy_action(action);
+
if (xml == NULL) {
crm_warn("Metadata for %s is invalid", agent);
return -pcmk_err_schema_validation;
@@ -289,7 +297,19 @@ stonith__rhcs_validate(stonith_t *st, int call_options, const char *target,
rc = stonith__execute(action);
if (rc == pcmk_ok) {
- stonith__action_result(action, &rc, output, error_output);
+ pcmk__action_result_t *result = stonith__action_result(action);
+
+ rc = pcmk_rc2legacy(stonith__result2rc(result));
+
+ // Take ownership of output so stonith__destroy_action() doesn't free it
+ if (output != NULL) {
+ *output = result->action_stdout;
+ result->action_stdout = NULL;
+ }
+ if (error_output != NULL) {
+ *error_output = result->action_stderr;
+ result->action_stderr = NULL;
+ }
}
stonith__destroy_action(action);
return rc;
--
2.27.0
From 7f7067014357cccb229a0bef091e234eb3765f7a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Sep 2021 13:05:54 -0500
Subject: [PATCH 04/12] Refactor: fencing: pass full result to async action
callback
When executing an asynchronous fence agent command, the fencing library gets
the full result (exit status, execution status, and exit reason) from the
services library, then maps that to a legacy return code.
Now, pass the full result object to the fencing async callback, rather than
separate arguments for legacy code and stdout. The mapping to a legacy code now
happens in the fencer rather than the fencing library.
The goal of this and following commits is to push the full result object
further down the code path, so that ultimately the full result is always
available internally, and the legacy code mapping is only done for backward
compatibility when sending the result back to a client.
This commit focuses on the async callback (done_cb() in both the fencer's
async_command_t and the fencing library's stonith_action_t). Later commits will
follow the chain:
st_child_done() and stonith_fence_get_devices_cb()
-> stonith_send_async_reply()
-> stonith_construct_async_reply() and log_async_result()
---
daemons/fenced/fenced_commands.c | 78 +++++++++++++++++++++-----------
include/crm/fencing/internal.h | 3 +-
lib/fencing/st_client.c | 10 ++--
3 files changed, 58 insertions(+), 33 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index b5ae28d90..d5d04ae69 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -62,7 +62,8 @@ struct device_search_s {
};
static gboolean stonith_device_dispatch(gpointer user_data);
-static void st_child_done(int pid, int rc, const char *output, void *user_data);
+static void st_child_done(int pid, const pcmk__action_result_t *result,
+ void *user_data);
static void stonith_send_reply(xmlNode * reply, int call_options, const char *remote_peer,
const char *client_id);
@@ -99,7 +100,8 @@ typedef struct async_command_s {
GList *device_next;
void *internal_user_data;
- void (*done_cb) (int pid, int rc, const char *output, void *user_data);
+ void (*done_cb) (int pid, const pcmk__action_result_t *result,
+ void *user_data);
guint timer_sigterm;
guint timer_sigkill;
/*! If the operation timed out, this is the last signal
@@ -377,13 +379,25 @@ get_agent_metadata_cb(gpointer data) {
* \internal
* \brief Call a command's action callback for an internal (not library) result
*
- * \param[in] cmd Command to report result for
- * \param[in] rc Legacy return code to pass to callback
+ * \param[in] cmd Command to report result for
+ * \param[in] execution_status Execution status to use for result
+ * \param[in] exit_status Exit status to use for result
+ * \param[in] exit_reason Exit reason to use for result
*/
static void
-report_internal_result(async_command_t *cmd, int rc)
+report_internal_result(async_command_t *cmd, int exit_status,
+ int execution_status, const char *exit_reason)
{
- cmd->done_cb(0, rc, NULL, cmd);
+ pcmk__action_result_t result = {
+ // Ensure we don't pass garbage to free()
+ .exit_reason = NULL,
+ .action_stdout = NULL,
+ .action_stderr = NULL
+ };
+
+ pcmk__set_result(&result, exit_status, execution_status, exit_reason);
+ cmd->done_cb(0, &result, cmd);
+ pcmk__reset_result(&result);
}
static gboolean
@@ -446,7 +460,7 @@ stonith_device_execute(stonith_device_t * device)
}
} else {
crm_info("Faking success for %s watchdog operation", cmd->action);
- report_internal_result(cmd, pcmk_ok);
+ report_internal_result(cmd, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
goto done;
}
}
@@ -462,7 +476,8 @@ stonith_device_execute(stonith_device_t * device)
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
device->id, pcmk_rc_str(exec_rc));
- report_internal_result(cmd, -EACCES);
+ report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
+ NULL);
goto done;
}
}
@@ -501,7 +516,7 @@ stonith_device_execute(stonith_device_t * device)
cmd->done_cb, fork_cb);
if (exec_rc < 0) {
cmd->activating_on = NULL;
- report_internal_result(cmd, exec_rc);
+ cmd->done_cb(0, stonith__action_result(action), cmd);
stonith__destroy_action(action);
}
@@ -625,7 +640,8 @@ free_device(gpointer data)
async_command_t *cmd = gIter->data;
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
- report_internal_result(cmd, -ENODEV);
+ report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ NULL);
}
g_list_free(device->pending_ops);
@@ -1079,7 +1095,8 @@ schedule_internal_command(const char *origin,
const char *victim,
int timeout,
void *internal_user_data,
- void (*done_cb) (int pid, int rc, const char *output,
+ void (*done_cb) (int pid,
+ const pcmk__action_result_t *result,
void *user_data))
{
async_command_t *cmd = NULL;
@@ -1111,7 +1128,7 @@ enum fence_status_code {
};
static void
-status_search_cb(int pid, int rc, const char *output, void *user_data)
+status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
@@ -1127,7 +1144,7 @@ status_search_cb(int pid, int rc, const char *output, void *user_data)
mainloop_set_trigger(dev->work);
- switch (rc) {
+ switch (result->exit_status) {
case fence_status_unknown:
crm_trace("%s reported it cannot fence %s", dev->id, search->host);
break;
@@ -1141,14 +1158,15 @@ status_search_cb(int pid, int rc, const char *output, void *user_data)
default:
crm_warn("Assuming %s cannot fence %s "
"(status returned unknown code %d)",
- dev->id, search->host, rc);
+ dev->id, search->host, result->exit_status);
break;
}
search_devices_record_result(search, dev->id, can);
}
static void
-dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data)
+dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
+ void *user_data)
{
async_command_t *cmd = user_data;
struct device_search_s *search = cmd->internal_user_data;
@@ -1169,21 +1187,21 @@ dynamic_list_search_cb(int pid, int rc, const char *output, void *user_data)
mainloop_set_trigger(dev->work);
- if (rc == CRM_EX_OK) {
+ if (result->exit_status == CRM_EX_OK) {
crm_info("Refreshing target list for %s", dev->id);
g_list_free_full(dev->targets, free);
- dev->targets = stonith__parse_targets(output);
+ dev->targets = stonith__parse_targets(result->action_stdout);
dev->targets_age = time(NULL);
} else if (dev->targets != NULL) {
crm_info("Reusing most recent target list for %s "
"because list returned error code %d",
- dev->id, rc);
+ dev->id, result->exit_status);
} else { // We have never successfully executed list
crm_warn("Assuming %s cannot fence %s "
"because list returned error code %d",
- dev->id, search->host, rc);
+ dev->id, search->host, result->exit_status);
/* Fall back to pcmk_host_check="status" if the user didn't explicitly
* specify "dynamic-list".
@@ -2407,7 +2425,7 @@ cancel_stonith_command(async_command_t * cmd)
}
static void
-st_child_done(int pid, int rc, const char *output, void *user_data)
+st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
{
stonith_device_t *device = NULL;
stonith_device_t *next_device = NULL;
@@ -2423,7 +2441,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
/* The device is ready to do something else now */
device = g_hash_table_lookup(device_list, cmd->device);
if (device) {
- if (!device->verified && (rc == pcmk_ok) &&
+ if (!device->verified && (result->exit_status == CRM_EX_OK) &&
(pcmk__strcase_any_of(cmd->action, "list", "monitor", "status", NULL))) {
device->verified = TRUE;
@@ -2432,7 +2450,7 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
mainloop_set_trigger(device->work);
}
- if (rc == 0) {
+ if (result->exit_status == CRM_EX_OK) {
GList *iter;
/* see if there are any required devices left to execute for this op */
for (iter = cmd->device_next; iter != NULL; iter = iter->next) {
@@ -2445,7 +2463,8 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
next_device = NULL;
}
- } else if (rc != 0 && cmd->device_next && (is_action_required(cmd->action, device) == FALSE)) {
+ } else if ((cmd->device_next != NULL)
+ && !is_action_required(cmd->action, device)) {
/* if this device didn't work out, see if there are any others we can try.
* if the failed device was 'required', we can't pick another device. */
next_device = g_hash_table_lookup(device_list, cmd->device_next->data);
@@ -2454,16 +2473,19 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
/* this operation requires more fencing, hooray! */
if (next_device) {
- log_async_result(cmd, rc, pid, next_device->id, output, FALSE);
+ log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid,
+ next_device->id, result->action_stdout, FALSE);
schedule_stonith_command(cmd, next_device);
/* Prevent cmd from being freed */
cmd = NULL;
goto done;
}
- stonith_send_async_reply(cmd, output, rc, pid, false);
+ stonith_send_async_reply(cmd, result->action_stdout,
+ pcmk_rc2legacy(stonith__result2rc(result)), pid,
+ false);
- if (rc != 0) {
+ if (result->exit_status != CRM_EX_OK) {
goto done;
}
@@ -2509,7 +2531,9 @@ st_child_done(int pid, int rc, const char *output, void *user_data)
cmd_list = g_list_remove_link(cmd_list, gIter);
- stonith_send_async_reply(cmd_other, output, rc, pid, true);
+ stonith_send_async_reply(cmd_other, result->action_stdout,
+ pcmk_rc2legacy(stonith__result2rc(result)),
+ pid, true);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
diff --git a/include/crm/fencing/internal.h b/include/crm/fencing/internal.h
index 4e9f50fe8..6a7e4232c 100644
--- a/include/crm/fencing/internal.h
+++ b/include/crm/fencing/internal.h
@@ -64,7 +64,8 @@ int stonith__result2rc(const pcmk__action_result_t *result);
int
stonith_action_execute_async(stonith_action_t * action,
void *userdata,
- void (*done) (int pid, int rc, const char *output,
+ void (*done) (int pid,
+ const pcmk__action_result_t *result,
void *user_data),
void (*fork_cb) (int pid, void *user_data));
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index b9df18465..59dcab9a3 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -46,7 +46,8 @@ struct stonith_action_s {
int timeout;
int async;
void *userdata;
- void (*done_cb) (int pid, int status, const char *output, void *user_data);
+ void (*done_cb) (int pid, const pcmk__action_result_t *result,
+ void *user_data);
void (*fork_cb) (int pid, void *user_data);
svc_action_t *svc_action;
@@ -811,9 +812,7 @@ stonith_action_async_done(svc_action_t *svc_action)
}
if (action->done_cb) {
- action->done_cb(action->pid,
- pcmk_rc2legacy(stonith__result2rc(&(action->result))),
- action->result.action_stdout, action->userdata);
+ action->done_cb(action->pid, &(action->result), action->userdata);
}
action->svc_action = NULL; // don't remove our caller
@@ -933,7 +932,8 @@ internal_stonith_action_execute(stonith_action_t * action)
int
stonith_action_execute_async(stonith_action_t * action,
void *userdata,
- void (*done) (int pid, int rc, const char *output,
+ void (*done) (int pid,
+ const pcmk__action_result_t *result,
void *user_data),
void (*fork_cb) (int pid, void *user_data))
{
--
2.27.0
From bbd022306df7a873c0ecb2be2d33c56fbf327b8c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Sep 2021 11:51:28 -0500
Subject: [PATCH 05/12] Feature: fencing: set exit reason for internal
execution errors
... most importantly, copying any exit reason set by the services library.
This ensures that the stonith_action_t exit reason is set when appropriate.
However, nothing uses it as of this commit.
---
daemons/fenced/fenced_commands.c | 4 ++--
lib/fencing/st_client.c | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index d5d04ae69..f55a32649 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -477,7 +477,7 @@ stonith_device_execute(stonith_device_t * device)
"because unable to load CIB secrets: %s",
device->id, pcmk_rc_str(exec_rc));
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_SECRETS,
- NULL);
+ "Failed to get CIB secrets");
goto done;
}
}
@@ -641,7 +641,7 @@ free_device(gpointer data)
crm_warn("Removal of device '%s' purged operation '%s'", device->id, cmd->action);
report_internal_result(cmd, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
- NULL);
+ "Device was removed before action could be executed");
}
g_list_free(device->pending_ops);
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 59dcab9a3..3d4127eff 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -207,7 +207,7 @@ static void
set_result_from_svc_action(stonith_action_t *action, svc_action_t *svc_action)
{
pcmk__set_result(&(action->result), svc_action->rc, svc_action->status,
- NULL);
+ services__exit_reason(svc_action));
pcmk__set_result_output(&(action->result),
services__grab_stdout(svc_action),
services__grab_stderr(svc_action));
@@ -706,7 +706,7 @@ stonith_action_create(const char *agent,
action->max_retries = FAILURE_MAX_RETRIES;
pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_UNKNOWN,
- NULL);
+ "Initialization bug in fencing library");
if (device_args) {
char buffer[512];
@@ -849,7 +849,7 @@ internal_stonith_action_execute(stonith_action_t * action)
if ((action->action == NULL) || (action->args == NULL)
|| (action->agent == NULL)) {
pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN_ERROR,
- PCMK_EXEC_ERROR_FATAL, NULL);
+ PCMK_EXEC_ERROR_FATAL, "Bug in fencing library");
return -EINVAL;
}
--
2.27.0
From ed08f600688af1d25412d2427502ba5d4a55c0d6 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 7 Oct 2021 12:06:10 -0500
Subject: [PATCH 06/12] Fix: fencer: handle dynamic target query failures
better
Previously, the callbacks for list and status queries checked only the result's
exit status. However, the services library will use PCMK_OCF_UNKNOWN_ERROR (1)
as the exit status for internal failures, and that value signifies a recognized
node (not an error) for fence list actions.
Now, the callbacks check the execution status as well.
---
daemons/fenced/fenced_commands.c | 46 +++++++++++++++++++++++++++-----
1 file changed, 39 insertions(+), 7 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index f55a32649..7b3fb25a1 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -1144,6 +1144,18 @@ status_search_cb(int pid, const pcmk__action_result_t *result, void *user_data)
mainloop_set_trigger(dev->work);
+ if (result->execution_status != PCMK_EXEC_DONE) {
+ crm_warn("Assuming %s cannot fence %s "
+ "because status could not be executed: %s%s%s%s",
+ dev->id, search->host,
+ pcmk_exec_status_str(result->execution_status),
+ ((result->exit_reason == NULL)? "" : " ("),
+ ((result->exit_reason == NULL)? "" : result->exit_reason),
+ ((result->exit_reason == NULL)? "" : ")"));
+ search_devices_record_result(search, dev->id, FALSE);
+ return;
+ }
+
switch (result->exit_status) {
case fence_status_unknown:
crm_trace("%s reported it cannot fence %s", dev->id, search->host);
@@ -1187,21 +1199,41 @@ dynamic_list_search_cb(int pid, const pcmk__action_result_t *result,
mainloop_set_trigger(dev->work);
- if (result->exit_status == CRM_EX_OK) {
+ if ((result->execution_status == PCMK_EXEC_DONE)
+ && (result->exit_status == CRM_EX_OK)) {
crm_info("Refreshing target list for %s", dev->id);
g_list_free_full(dev->targets, free);
dev->targets = stonith__parse_targets(result->action_stdout);
dev->targets_age = time(NULL);
} else if (dev->targets != NULL) {
- crm_info("Reusing most recent target list for %s "
- "because list returned error code %d",
- dev->id, result->exit_status);
+ if (result->execution_status == PCMK_EXEC_DONE) {
+ crm_info("Reusing most recent target list for %s "
+ "because list returned error code %d",
+ dev->id, result->exit_status);
+ } else {
+ crm_info("Reusing most recent target list for %s "
+ "because list could not be executed: %s%s%s%s",
+ dev->id, pcmk_exec_status_str(result->execution_status),
+ ((result->exit_reason == NULL)? "" : " ("),
+ ((result->exit_reason == NULL)? "" : result->exit_reason),
+ ((result->exit_reason == NULL)? "" : ")"));
+ }
} else { // We have never successfully executed list
- crm_warn("Assuming %s cannot fence %s "
- "because list returned error code %d",
- dev->id, search->host, result->exit_status);
+ if (result->execution_status == PCMK_EXEC_DONE) {
+ crm_warn("Assuming %s cannot fence %s "
+ "because list returned error code %d",
+ dev->id, search->host, result->exit_status);
+ } else {
+ crm_warn("Assuming %s cannot fence %s "
+ "because list could not be executed: %s%s%s%s",
+ dev->id, search->host,
+ pcmk_exec_status_str(result->execution_status),
+ ((result->exit_reason == NULL)? "" : " ("),
+ ((result->exit_reason == NULL)? "" : result->exit_reason),
+ ((result->exit_reason == NULL)? "" : ")"));
+ }
/* Fall back to pcmk_host_check="status" if the user didn't explicitly
* specify "dynamic-list".
--
2.27.0
From 5a30238a3b8691a5fc20f53906c0efcc50193306 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 21 Sep 2021 15:57:50 -0500
Subject: [PATCH 07/12] Refactor: fencer: pass result object when sending an
async reply
... via stonith_send_async_reply(), instead of sending the mapped legacy code
and action stdout separately. Also, drop the "stonith_" prefix since the
function is static.
This moves the mapping from the stonith_send_async_reply() callers to the
function itself, so we use the result object and standard codes as long as
possible, and map to a legacy code only where needed.
---
daemons/fenced/fenced_commands.c | 62 +++++++++++++++++++-------------
daemons/fenced/fenced_remote.c | 2 +-
2 files changed, 39 insertions(+), 25 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 7b3fb25a1..e5f8162ce 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2376,12 +2376,28 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
}
}
+/*!
+ * \internal
+ * \brief Reply to requester after asynchronous command completion
+ *
+ * \param[in] cmd Command that completed
+ * \param[in] result Result of command
+ * \param[in] pid Process ID of command, if available
+ * \param[in] merged If true, command was merged with another, not executed
+ */
static void
-stonith_send_async_reply(async_command_t *cmd, const char *output, int rc,
- int pid, bool merged)
+send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
+ int pid, bool merged)
{
xmlNode *reply = NULL;
gboolean bcast = FALSE;
+ const char *output = NULL;
+ int rc = pcmk_ok;
+
+ CRM_CHECK((cmd != NULL) && (result != NULL), return);
+
+ output = result->action_stdout;
+ rc = pcmk_rc2legacy(stonith__result2rc(result));
reply = stonith_construct_async_reply(cmd, output, NULL, rc);
@@ -2513,9 +2529,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
goto done;
}
- stonith_send_async_reply(cmd, result->action_stdout,
- pcmk_rc2legacy(stonith__result2rc(result)), pid,
- false);
+ send_async_reply(cmd, result, pid, false);
if (result->exit_status != CRM_EX_OK) {
goto done;
@@ -2563,9 +2577,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
cmd_list = g_list_remove_link(cmd_list, gIter);
- stonith_send_async_reply(cmd_other, result->action_stdout,
- pcmk_rc2legacy(stonith__result2rc(result)),
- pid, true);
+ send_async_reply(cmd_other, result, pid, true);
cancel_stonith_command(cmd_other);
free_async_command(cmd_other);
@@ -2604,26 +2616,28 @@ stonith_fence_get_devices_cb(GList * devices, void *user_data)
/* Order based on priority */
devices = g_list_sort(devices, sort_device_priority);
device = g_hash_table_lookup(device_list, devices->data);
-
- if (device) {
- cmd->device_list = devices;
- cmd->device_next = devices->next;
- devices = NULL; /* list owned by cmd now */
- }
}
- /* we have a device, schedule it for fencing. */
- if (device) {
- schedule_stonith_command(cmd, device);
- /* in progress */
- return;
- }
+ if (device == NULL) { // No device found
+ pcmk__action_result_t result = {
+ // Ensure we don't pass garbage to free()
+ .exit_reason = NULL,
+ .action_stdout = NULL,
+ .action_stderr = NULL
+ };
- /* no device found! */
- stonith_send_async_reply(cmd, NULL, -ENODEV, 0, false);
+ pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_NO_FENCE_DEVICE,
+ "No fence device configured for target");
+ send_async_reply(cmd, &result, 0, false);
+ pcmk__reset_result(&result);
+ free_async_command(cmd);
+ g_list_free_full(devices, free);
- free_async_command(cmd);
- g_list_free_full(devices, free);
+ } else { // Device found, schedule it for fencing
+ cmd->device_list = devices;
+ cmd->device_next = devices->next;
+ schedule_stonith_command(cmd, device);
+ }
}
static int
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index ffaf60018..b09d2865e 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -996,7 +996,7 @@ stonith_manual_ack(xmlNode * msg, remote_fencing_op_t * op)
remote_op_done(op, msg, pcmk_ok, FALSE);
- /* Replies are sent via done_cb->stonith_send_async_reply()->do_local_reply() */
+ // Replies are sent via done_cb -> send_async_reply() -> do_local_reply()
return -EINPROGRESS;
}
--
2.27.0
From c67b6bfbe0baa1253058417ddfb9bc4cf0844e27 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 7 Oct 2021 17:25:38 -0500
Subject: [PATCH 08/12] Refactor: fencer: pass result object when building
async reply
... via stonith_construct_async_reply(), instead of passing a mapped legacy rc
and action output separately, which will be helpful when we add the exit reason
to the reply. Also, drop the "stonith_" prefix since the function is static, and
drop an unused argument.
---
daemons/fenced/fenced_commands.c | 33 +++++++++++++++-----------------
1 file changed, 15 insertions(+), 18 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index e5f8162ce..6bc12e6c4 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -112,8 +112,8 @@ typedef struct async_command_s {
stonith_device_t *activating_on;
} async_command_t;
-static xmlNode *stonith_construct_async_reply(async_command_t * cmd, const char *output,
- xmlNode * data, int rc);
+static xmlNode *construct_async_reply(async_command_t *cmd,
+ const pcmk__action_result_t *result);
static gboolean
is_action_required(const char *action, stonith_device_t *device)
@@ -2399,7 +2399,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
output = result->action_stdout;
rc = pcmk_rc2legacy(stonith__result2rc(result));
- reply = stonith_construct_async_reply(cmd, output, NULL, rc);
+ reply = construct_async_reply(cmd, result);
// Only replies for certain actions are broadcast
if (pcmk__str_any_of(cmd->action, "metadata", "monitor", "list", "status",
@@ -2732,17 +2732,20 @@ stonith_construct_reply(xmlNode * request, const char *output, xmlNode * data, i
return reply;
}
+/*!
+ * \internal
+ * \brief Build an XML reply to an asynchronous fencing command
+ *
+ * \param[in] cmd Fencing command that reply is for
+ * \param[in] result Command result
+ */
static xmlNode *
-stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode * data, int rc)
+construct_async_reply(async_command_t *cmd, const pcmk__action_result_t *result)
{
- xmlNode *reply = NULL;
-
- crm_trace("Creating a basic reply");
- reply = create_xml_node(NULL, T_STONITH_REPLY);
+ xmlNode *reply = create_xml_node(NULL, T_STONITH_REPLY);
crm_xml_add(reply, "st_origin", __func__);
crm_xml_add(reply, F_TYPE, T_STONITH_NG);
-
crm_xml_add(reply, F_STONITH_OPERATION, cmd->op);
crm_xml_add(reply, F_STONITH_DEVICE, cmd->device);
crm_xml_add(reply, F_STONITH_REMOTE_OP_ID, cmd->remote_op_id);
@@ -2753,15 +2756,9 @@ stonith_construct_async_reply(async_command_t * cmd, const char *output, xmlNode
crm_xml_add(reply, F_STONITH_ORIGIN, cmd->origin);
crm_xml_add_int(reply, F_STONITH_CALLID, cmd->id);
crm_xml_add_int(reply, F_STONITH_CALLOPTS, cmd->options);
-
- crm_xml_add_int(reply, F_STONITH_RC, rc);
-
- crm_xml_add(reply, "st_output", output);
-
- if (data != NULL) {
- crm_info("Attaching reply output");
- add_message_xml(reply, F_STONITH_CALLDATA, data);
- }
+ crm_xml_add_int(reply, F_STONITH_RC,
+ pcmk_rc2legacy(stonith__result2rc(result)));
+ crm_xml_add(reply, "st_output", result->action_stdout);
return reply;
}
--
2.27.0
From 2686caeb3b74f687ddd86a4e483250ca8096ba7c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 19 Oct 2021 18:27:31 -0500
Subject: [PATCH 09/12] Log: fencer: improve messages for asynchronous results
Now that we have the full result object, pass it to log_async_result().
Instead of logging a mapped legacy rc, log the execution status or exit status
as appropriate, along with the exit reason.
---
daemons/fenced/fenced_commands.c | 43 +++++++++++++++++---------------
1 file changed, 23 insertions(+), 20 deletions(-)
diff --git a/daemons/fenced/fenced_commands.c b/daemons/fenced/fenced_commands.c
index 6bc12e6c4..9d06c68dc 100644
--- a/daemons/fenced/fenced_commands.c
+++ b/daemons/fenced/fenced_commands.c
@@ -2305,15 +2305,14 @@ stonith_query(xmlNode * msg, const char *remote_peer, const char *client_id, int
* \brief Log the result of an asynchronous command
*
* \param[in] cmd Command the result is for
- * \param[in] rc Legacy return code corresponding to result
+ * \param[in] result Result of command
* \param[in] pid Process ID of command, if available
* \param[in] next Alternate device that will be tried if command failed
- * \param[in] output Command output, if any
* \param[in] op_merged Whether this command was merged with an earlier one
*/
static void
-log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
- const char *output, gboolean op_merged)
+log_async_result(async_command_t *cmd, const pcmk__action_result_t *result,
+ int pid, const char *next, bool op_merged)
{
int log_level = LOG_ERR;
int output_log_level = LOG_NEVER;
@@ -2321,17 +2320,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
GString *msg = g_string_sized_new(80); // Reasonable starting size
- // Choose log levels appropriately
- if (rc == 0) { // Success
+ // Choose log levels appropriately if we have a result
+ if ((result->execution_status == PCMK_EXEC_DONE)
+ && (result->exit_status == CRM_EX_OK)) { // Success
log_level = (cmd->victim == NULL)? LOG_DEBUG : LOG_NOTICE;
- if ((output != NULL)
+ if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
output_log_level = LOG_DEBUG;
}
next = NULL;
} else { // Failure
log_level = (cmd->victim == NULL)? LOG_NOTICE : LOG_ERR;
- if ((output != NULL)
+ if ((result->action_stdout != NULL)
&& !pcmk__str_eq(cmd->action, "metadata", pcmk__str_casei)) {
output_log_level = LOG_WARNING;
}
@@ -2347,10 +2347,18 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
}
g_string_append_printf(msg, "using %s ", cmd->device);
- // Add result
- g_string_append_printf(msg, "returned %d (%s)", rc, pcmk_strerror(rc));
+ // Add exit status or execution status as appropriate
+ if (result->execution_status == PCMK_EXEC_DONE) {
+ g_string_append_printf(msg, "returned %d", result->exit_status);
+ } else {
+ g_string_append_printf(msg, "could not be executed: %s",
+ pcmk_exec_status_str(result->execution_status));
+ }
- // Add next device if appropriate
+ // Add exit reason and next device if appropriate
+ if (result->exit_reason != NULL) {
+ g_string_append_printf(msg, " (%s)", result->exit_reason);
+ }
if (next != NULL) {
g_string_append_printf(msg, ", retrying with %s", next);
}
@@ -2371,7 +2379,7 @@ log_async_result(async_command_t *cmd, int rc, int pid, const char *next,
if (output_log_level != LOG_NEVER) {
char *prefix = crm_strdup_printf("%s[%d]", cmd->device, pid);
- crm_log_output(output_log_level, prefix, output);
+ crm_log_output(output_log_level, prefix, result->action_stdout);
free(prefix);
}
}
@@ -2391,14 +2399,9 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
{
xmlNode *reply = NULL;
gboolean bcast = FALSE;
- const char *output = NULL;
- int rc = pcmk_ok;
CRM_CHECK((cmd != NULL) && (result != NULL), return);
- output = result->action_stdout;
- rc = pcmk_rc2legacy(stonith__result2rc(result));
-
reply = construct_async_reply(cmd, result);
// Only replies for certain actions are broadcast
@@ -2412,7 +2415,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
bcast = TRUE;
}
- log_async_result(cmd, rc, pid, NULL, output, merged);
+ log_async_result(cmd, result, pid, NULL, merged);
crm_log_xml_trace(reply, "Reply");
if (merged) {
@@ -2436,6 +2439,7 @@ send_async_reply(async_command_t *cmd, const pcmk__action_result_t *result,
if (stand_alone) {
/* Do notification with a clean data object */
xmlNode *notify_data = create_xml_node(NULL, T_STONITH_NOTIFY_FENCE);
+ int rc = pcmk_rc2legacy(stonith__result2rc(result));
crm_xml_add_int(notify_data, F_STONITH_RC, rc);
crm_xml_add(notify_data, F_STONITH_TARGET, cmd->victim);
@@ -2521,8 +2525,7 @@ st_child_done(int pid, const pcmk__action_result_t *result, void *user_data)
/* this operation requires more fencing, hooray! */
if (next_device) {
- log_async_result(cmd, pcmk_rc2legacy(stonith__result2rc(result)), pid,
- next_device->id, result->action_stdout, FALSE);
+ log_async_result(cmd, result, pid, next_device->id, false);
schedule_stonith_command(cmd, next_device);
/* Prevent cmd from being freed */
cmd = NULL;
--
2.27.0
From 9f9dea518da50f629589d505ea0f330a47111d76 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 28 Oct 2021 13:29:31 -0500
Subject: [PATCH 10/12] Test: cts-fencing: update expected log messages
... which now log the original exit status rather than a mapped legacy rc
---
cts/cts-fencing.in | 28 ++++++++++++++--------------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/cts/cts-fencing.in b/cts/cts-fencing.in
index babfb6351..5cd9f7b8f 100644
--- a/cts/cts-fencing.in
+++ b/cts/cts-fencing.in
@@ -886,7 +886,7 @@ class Tests(object):
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
test.add_stonith_log_pattern("Total timeout set to 40")
- test.add_stonith_log_pattern("targeting node3 using false returned -201")
+ test.add_stonith_log_pattern("targeting node3 using false returned 1")
test.add_stonith_log_pattern("targeting node3 using true returned 0")
# test what happens when the first fencing level fails.
@@ -920,8 +920,8 @@ class Tests(object):
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 3")
test.add_stonith_log_pattern("Total timeout set to 18")
- test.add_stonith_log_pattern("targeting node3 using false1 returned -201")
- test.add_stonith_log_pattern("targeting node3 using false2 returned -201")
+ test.add_stonith_log_pattern("targeting node3 using false1 returned 1")
+ test.add_stonith_log_pattern("targeting node3 using false2 returned 1")
test.add_stonith_log_pattern("targeting node3 using true3 returned 0")
test.add_stonith_log_pattern("targeting node3 using true4 returned 0")
@@ -987,7 +987,7 @@ class Tests(object):
test.add_cmd("stonith_admin", "--output-as=xml -F node3 -t 20")
test.add_stonith_log_pattern("Total timeout set to 8")
- test.add_stonith_log_pattern("targeting node3 using false1 returned -201")
+ test.add_stonith_log_pattern("targeting node3 using false1 returned 1")
test.add_stonith_neg_log_pattern("targeting node3 using false2 returned ")
test.add_stonith_log_pattern("targeting node3 using true3 returned 0")
test.add_stonith_log_pattern("targeting node3 using true4 returned 0")
@@ -1147,7 +1147,7 @@ class Tests(object):
"--output-as=xml -R true1 -a fence_dummy_no_reboot -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
test.add_stonith_log_pattern("does not support reboot")
- test.add_stonith_log_pattern("using true1 returned 0 (OK)")
+ test.add_stonith_log_pattern("using true1 returned 0")
# make sure reboot is used when reboot action is advertised
for test_type in test_types:
@@ -1158,7 +1158,7 @@ class Tests(object):
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
test.add_cmd("stonith_admin", "--output-as=xml -B node1 -t 5 -V")
test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'")
- test.add_stonith_log_pattern("using true1 returned 0 (OK)")
+ test.add_stonith_log_pattern("using true1 returned 0")
# make sure requested fencing delay is applied only for the first device in the first level
# make sure static delay from pcmk_delay_base is added
@@ -1240,8 +1240,8 @@ class Tests(object):
'--output-as=xml -R true2 -a fence_dummy_auto_unfence -o "mode=pass" -o "pcmk_host_list=%s"' % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
# both devices should be executed
- test.add_stonith_log_pattern("using true1 returned 0 (OK)")
- test.add_stonith_log_pattern("using true2 returned 0 (OK)")
+ test.add_stonith_log_pattern("using true1 returned 0")
+ test.add_stonith_log_pattern("using true2 returned 0")
### verify unfencing using automatic unfencing fails if any of the required agents fail
test = self.new_test("cpg_unfence_required_2",
@@ -1264,8 +1264,8 @@ class Tests(object):
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 1 -v true1" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 2 -v true2" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
- test.add_stonith_log_pattern("using true1 returned 0 (OK)")
- test.add_stonith_log_pattern("using true2 returned 0 (OK)")
+ test.add_stonith_log_pattern("using true1 returned 0")
+ test.add_stonith_log_pattern("using true2 returned 0")
### verify unfencing using automatic devices with topology
test = self.new_test("cpg_unfence_required_4",
@@ -1296,10 +1296,10 @@ class Tests(object):
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 3 -v false4" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -r %s -i 4 -v true4" % (our_uname))
test.add_cmd("stonith_admin", "--output-as=xml -U %s -t 3" % (our_uname))
- test.add_stonith_log_pattern("using true1 returned 0 (OK)")
- test.add_stonith_log_pattern("using true2 returned 0 (OK)")
- test.add_stonith_log_pattern("using true3 returned 0 (OK)")
- test.add_stonith_log_pattern("using true4 returned 0 (OK)")
+ test.add_stonith_log_pattern("using true1 returned 0")
+ test.add_stonith_log_pattern("using true2 returned 0")
+ test.add_stonith_log_pattern("using true3 returned 0")
+ test.add_stonith_log_pattern("using true4 returned 0")
def build_unfence_on_target_tests(self):
""" Register tests that verify unfencing that runs on the target """
--
2.27.0
From be72166ed9ccb53c218529783660503df95da719 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 16 Sep 2021 16:50:23 -0500
Subject: [PATCH 11/12] Log: libcrmservice: downgrade failed action messages
Previously, we would often get duplicate log messages for failed actions,
from the service library and again from its callers.
Now that the service library tracks and provides exit reasons, callers can log
sufficient detail with better context, so downgrade the library's messages to
info level or lower. Similarly, avoid duplicate logs of process output.
Certain messages (such as out-of-memory) remain at higher severity.
---
daemons/controld/controld_execd.c | 15 +++---
lib/fencing/st_client.c | 11 ++---
lib/services/services.c | 14 +++---
lib/services/services_linux.c | 80 ++++++++++++++++---------------
lib/services/systemd.c | 20 ++++----
lib/services/upstart.c | 19 ++++----
6 files changed, 80 insertions(+), 79 deletions(-)
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index bded6e6b6..3ddff6e13 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -2684,16 +2684,15 @@ log_executor_event(lrmd_event_data_t *op, const char *op_key,
do_crm_log(log_level, "%s", str->str);
g_string_free(str, TRUE);
- if (op->output != NULL) {
- char *prefix = crm_strdup_printf("%s-" PCMK__OP_FMT ":%d", node_name,
+ /* The services library has already logged the output at info or debug
+ * level, so just raise to notice if it looks like a failure.
+ */
+ if ((op->output != NULL) && (op->rc != PCMK_OCF_OK)) {
+ char *prefix = crm_strdup_printf(PCMK__OP_FMT "@%s output",
op->rsc_id, op->op_type,
- op->interval_ms, op->call_id);
+ op->interval_ms, node_name);
- if (op->rc) {
- crm_log_output(LOG_NOTICE, prefix, op->output);
- } else {
- crm_log_output(LOG_DEBUG, prefix, op->output);
- }
+ crm_log_output(LOG_NOTICE, prefix, op->output);
free(prefix);
}
}
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
index 3d4127eff..2fbff7f24 100644
--- a/lib/fencing/st_client.c
+++ b/lib/fencing/st_client.c
@@ -276,14 +276,9 @@ stonith__watchdog_fencing_enabled_for_node(const char *node)
static void
log_action(stonith_action_t *action, pid_t pid)
{
- if (action->result.action_stdout != NULL) {
- /* Logging the whole string confuses syslog when the string is xml */
- char *prefix = crm_strdup_printf("%s[%d] stdout:", action->agent, pid);
-
- crm_log_output(LOG_TRACE, prefix, action->result.action_stdout);
- free(prefix);
- }
-
+ /* The services library has already logged the output at info or debug
+ * level, so just raise to warning for stderr.
+ */
if (action->result.action_stderr != NULL) {
/* Logging the whole string confuses syslog when the string is xml */
char *prefix = crm_strdup_printf("%s[%d] stderr:", action->agent, pid);
diff --git a/lib/services/services.c b/lib/services/services.c
index 86a0a213c..cf8bbc70e 100644
--- a/lib/services/services.c
+++ b/lib/services/services.c
@@ -319,13 +319,13 @@ services__create_resource_action(const char *name, const char *standard,
rc = services__nagios_prepare(op);
#endif
} else {
- crm_err("Unknown resource standard: %s", op->standard);
+ crm_info("Unknown resource standard: %s", op->standard);
rc = ENOENT;
}
if (rc != pcmk_rc_ok) {
- crm_err("Cannot prepare %s operation for %s: %s",
- action, name, strerror(rc));
+ crm_info("Cannot prepare %s operation for %s: %s",
+ action, name, strerror(rc));
services__handle_exec_error(op, rc);
}
return op;
@@ -967,14 +967,14 @@ execute_metadata_action(svc_action_t *op)
const char *class = op->standard;
if (op->agent == NULL) {
- crm_err("meta-data requested without specifying agent");
+ crm_info("Meta-data requested without specifying agent");
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR_FATAL, "Agent not specified");
return EINVAL;
}
if (class == NULL) {
- crm_err("meta-data requested for agent %s without specifying class",
+ crm_info("Meta-data requested for agent %s without specifying class",
op->agent);
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR_FATAL,
@@ -986,8 +986,8 @@ execute_metadata_action(svc_action_t *op)
class = resources_find_service_class(op->agent);
}
if (class == NULL) {
- crm_err("meta-data requested for %s, but could not determine class",
- op->agent);
+ crm_info("Meta-data requested for %s, but could not determine class",
+ op->agent);
services__set_result(op, services__generic_error(op),
PCMK_EXEC_ERROR_HARD,
"Agent standard could not be determined");
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
index b2ff27a0d..9a4c6cf80 100644
--- a/lib/services/services_linux.c
+++ b/lib/services/services_linux.c
@@ -64,8 +64,8 @@ sigchld_setup(struct sigchld_data_s *data)
// Block SIGCHLD (saving previous set of blocked signals to restore later)
if (sigprocmask(SIG_BLOCK, &(data->mask), &(data->old_mask)) < 0) {
- crm_err("Wait for child process completion failed: %s "
- CRM_XS " source=sigprocmask", pcmk_strerror(errno));
+ crm_info("Wait for child process completion failed: %s "
+ CRM_XS " source=sigprocmask", pcmk_strerror(errno));
return false;
}
return true;
@@ -81,8 +81,8 @@ sigchld_open(struct sigchld_data_s *data)
fd = signalfd(-1, &(data->mask), SFD_NONBLOCK);
if (fd < 0) {
- crm_err("Wait for child process completion failed: %s "
- CRM_XS " source=signalfd", pcmk_strerror(errno));
+ crm_info("Wait for child process completion failed: %s "
+ CRM_XS " source=signalfd", pcmk_strerror(errno));
}
return fd;
}
@@ -108,8 +108,8 @@ sigchld_received(int fd)
}
s = read(fd, &fdsi, sizeof(struct signalfd_siginfo));
if (s != sizeof(struct signalfd_siginfo)) {
- crm_err("Wait for child process completion failed: %s "
- CRM_XS " source=read", pcmk_strerror(errno));
+ crm_info("Wait for child process completion failed: %s "
+ CRM_XS " source=read", pcmk_strerror(errno));
} else if (fdsi.ssi_signo == SIGCHLD) {
return true;
@@ -149,8 +149,8 @@ sigchld_handler()
if ((last_sigchld_data != NULL)
&& (last_sigchld_data->pipe_fd[1] >= 0)
&& (write(last_sigchld_data->pipe_fd[1], "", 1) == -1)) {
- crm_err("Wait for child process completion failed: %s "
- CRM_XS " source=write", pcmk_strerror(errno));
+ crm_info("Wait for child process completion failed: %s "
+ CRM_XS " source=write", pcmk_strerror(errno));
}
}
@@ -162,19 +162,19 @@ sigchld_setup(struct sigchld_data_s *data)
data->pipe_fd[0] = data->pipe_fd[1] = -1;
if (pipe(data->pipe_fd) == -1) {
- crm_err("Wait for child process completion failed: %s "
- CRM_XS " source=pipe", pcmk_strerror(errno));
+ crm_info("Wait for child process completion failed: %s "
+ CRM_XS " source=pipe", pcmk_strerror(errno));
return false;
}
rc = pcmk__set_nonblocking(data->pipe_fd[0]);
if (rc != pcmk_rc_ok) {
- crm_warn("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
+ crm_info("Could not set pipe input non-blocking: %s " CRM_XS " rc=%d",
pcmk_rc_str(rc), rc);
}
rc = pcmk__set_nonblocking(data->pipe_fd[1]);
if (rc != pcmk_rc_ok) {
- crm_warn("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
+ crm_info("Could not set pipe output non-blocking: %s " CRM_XS " rc=%d",
pcmk_rc_str(rc), rc);
}
@@ -183,8 +183,8 @@ sigchld_setup(struct sigchld_data_s *data)
data->sa.sa_flags = 0;
sigemptyset(&(data->sa.sa_mask));
if (sigaction(SIGCHLD, &(data->sa), &(data->old_sa)) < 0) {
- crm_err("Wait for child process completion failed: %s "
- CRM_XS " source=sigaction", pcmk_strerror(errno));
+ crm_info("Wait for child process completion failed: %s "
+ CRM_XS " source=sigaction", pcmk_strerror(errno));
}
// Remember data for use in signal handler
@@ -585,7 +585,11 @@ log_op_output(svc_action_t *op)
{
char *prefix = crm_strdup_printf("%s[%d] error output", op->id, op->pid);
- crm_log_output(LOG_NOTICE, prefix, op->stderr_data);
+ /* The library caller has better context to know how important the output
+ * is, so log it at info and debug severity here. They can log it again at
+ * higher severity if appropriate.
+ */
+ crm_log_output(LOG_INFO, prefix, op->stderr_data);
strcpy(prefix + strlen(prefix) - strlen("error output"), "output");
crm_log_output(LOG_DEBUG, prefix, op->stdout_data);
free(prefix);
@@ -673,7 +677,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
parse_exit_reason_from_stderr(op);
} else if (mainloop_child_timeout(p)) {
- crm_warn("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
+ crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
"Process did not exit within specified timeout");
@@ -686,7 +690,7 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_CANCELLED, NULL);
} else {
- crm_warn("%s[%d] terminated with signal %d (%s)",
+ crm_info("%s[%d] terminated with signal %d (%s)",
op->id, op->pid, signo, strsignal(signo));
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"Process interrupted by signal");
@@ -908,12 +912,12 @@ action_launch_child(svc_action_t *op)
sp.sched_priority = 0;
if (sched_setscheduler(0, SCHED_OTHER, &sp) == -1) {
- crm_warn("Could not reset scheduling policy for %s", op->id);
+ crm_info("Could not reset scheduling policy for %s", op->id);
}
}
#endif
if (setpriority(PRIO_PROCESS, 0, 0) == -1) {
- crm_warn("Could not reset process priority for %s", op->id);
+ crm_info("Could not reset process priority for %s", op->id);
}
/* Man: The call setpgrp() is equivalent to setpgid(0,0)
@@ -941,7 +945,7 @@ action_launch_child(svc_action_t *op)
} else {
crm_err("Considering %s unconfigured "
"because unable to load CIB secrets: %s",
- op->rsc, pcmk_rc_str(rc));
+ op->rsc, pcmk_rc_str(rc));
exit_child(op, services__configuration_error(op, false),
"Unable to load CIB secrets");
}
@@ -1043,7 +1047,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
} else if (wait_rc < 0) {
wait_reason = pcmk_rc_str(errno);
- crm_warn("Wait for completion of %s[%d] failed: %s "
+ crm_info("Wait for completion of %s[%d] failed: %s "
CRM_XS " source=waitpid",
op->id, op->pid, wait_reason);
wait_rc = 0; // Act as if process is still running
@@ -1057,8 +1061,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
} else if ((poll_rc < 0) && (errno != EINTR)) {
wait_reason = pcmk_rc_str(errno);
- crm_err("Wait for completion of %s[%d] failed: %s "
- CRM_XS " source=poll", op->id, op->pid, wait_reason);
+ crm_info("Wait for completion of %s[%d] failed: %s "
+ CRM_XS " source=poll", op->id, op->pid, wait_reason);
break;
}
@@ -1078,7 +1082,7 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
services__set_result(op, services__generic_error(op),
PCMK_EXEC_TIMEOUT,
"Process did not exit within specified timeout");
- crm_warn("%s[%d] timed out after %dms",
+ crm_info("%s[%d] timed out after %dms",
op->id, op->pid, op->timeout);
} else {
@@ -1110,8 +1114,8 @@ wait_for_sync_result(svc_action_t *op, struct sigchld_data_s *data)
services__set_result(op, services__generic_error(op), PCMK_EXEC_ERROR,
"Process interrupted by signal");
- crm_err("%s[%d] terminated with signal %d (%s)",
- op->id, op->pid, signo, strsignal(signo));
+ crm_info("%s[%d] terminated with signal %d (%s)",
+ op->id, op->pid, signo, strsignal(signo));
#ifdef WCOREDUMP
if (WCOREDUMP(status)) {
@@ -1155,7 +1159,7 @@ services__execute_file(svc_action_t *op)
// Catch common failure conditions early
if (stat(op->opaque->exec, &st) != 0) {
rc = errno;
- crm_warn("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
+ crm_info("Cannot execute '%s': %s " CRM_XS " stat rc=%d",
op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
@@ -1163,8 +1167,8 @@ services__execute_file(svc_action_t *op)
if (pipe(stdout_fd) < 0) {
rc = errno;
- crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
- op->opaque->exec, pcmk_strerror(rc), rc);
+ crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdout) rc=%d",
+ op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
}
@@ -1174,8 +1178,8 @@ services__execute_file(svc_action_t *op)
close_pipe(stdout_fd);
- crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
- op->opaque->exec, pcmk_strerror(rc), rc);
+ crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stderr) rc=%d",
+ op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
}
@@ -1187,8 +1191,8 @@ services__execute_file(svc_action_t *op)
close_pipe(stdout_fd);
close_pipe(stderr_fd);
- crm_err("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
- op->opaque->exec, pcmk_strerror(rc), rc);
+ crm_info("Cannot execute '%s': %s " CRM_XS " pipe(stdin) rc=%d",
+ op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
goto done;
}
@@ -1212,8 +1216,8 @@ services__execute_file(svc_action_t *op)
close_pipe(stdout_fd);
close_pipe(stderr_fd);
- crm_err("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
- op->opaque->exec, pcmk_strerror(rc), rc);
+ crm_info("Cannot execute '%s': %s " CRM_XS " fork rc=%d",
+ op->opaque->exec, pcmk_strerror(rc), rc);
services__handle_exec_error(op, rc);
if (op->synchronous) {
sigchld_cleanup(&data);
@@ -1271,7 +1275,7 @@ services__execute_file(svc_action_t *op)
op->opaque->stdout_fd = stdout_fd[0];
rc = pcmk__set_nonblocking(op->opaque->stdout_fd);
if (rc != pcmk_rc_ok) {
- crm_warn("Could not set '%s' output non-blocking: %s "
+ crm_info("Could not set '%s' output non-blocking: %s "
CRM_XS " rc=%d",
op->opaque->exec, pcmk_rc_str(rc), rc);
}
@@ -1279,7 +1283,7 @@ services__execute_file(svc_action_t *op)
op->opaque->stderr_fd = stderr_fd[0];
rc = pcmk__set_nonblocking(op->opaque->stderr_fd);
if (rc != pcmk_rc_ok) {
- crm_warn("Could not set '%s' error output non-blocking: %s "
+ crm_info("Could not set '%s' error output non-blocking: %s "
CRM_XS " rc=%d",
op->opaque->exec, pcmk_rc_str(rc), rc);
}
@@ -1290,7 +1294,7 @@ services__execute_file(svc_action_t *op)
// as long as no other standard uses stdin_fd assume stonith
rc = pcmk__set_nonblocking(op->opaque->stdin_fd);
if (rc != pcmk_rc_ok) {
- crm_warn("Could not set '%s' input non-blocking: %s "
+ crm_info("Could not set '%s' input non-blocking: %s "
CRM_XS " fd=%d,rc=%d", op->opaque->exec,
pcmk_rc_str(rc), op->opaque->stdin_fd, rc);
}
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
index 6f5bef960..8e9fff484 100644
--- a/lib/services/systemd.c
+++ b/lib/services/systemd.c
@@ -232,7 +232,8 @@ systemd_daemon_reload_complete(DBusPendingCall *pending, void *user_data)
}
if (pcmk_dbus_find_error(pending, reply, &error)) {
- crm_err("Could not issue systemd reload %d: %s", reload_count, error.message);
+ crm_warn("Could not issue systemd reload %d: %s",
+ reload_count, error.message);
dbus_error_free(&error);
} else {
@@ -291,8 +292,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error)
PCMK_EXEC_NOT_INSTALLED, "systemd unit not found");
}
- crm_err("DBus request for %s of systemd unit %s for resource %s failed: %s",
- op->action, op->agent, crm_str(op->rsc), error->message);
+ crm_info("DBus request for %s of systemd unit %s for resource %s failed: %s",
+ op->action, op->agent, crm_str(op->rsc), error->message);
}
/*!
@@ -325,11 +326,11 @@ execute_after_loadunit(DBusMessage *reply, svc_action_t *op)
if (op != NULL) {
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_ERROR,
"systemd DBus method had unexpected reply");
- crm_err("Could not load systemd unit %s for %s: "
- "DBus reply has unexpected type", op->agent, op->id);
+ crm_info("Could not load systemd unit %s for %s: "
+ "DBus reply has unexpected type", op->agent, op->id);
} else {
- crm_err("Could not load systemd unit: "
- "DBus reply has unexpected type");
+ crm_info("Could not load systemd unit: "
+ "DBus reply has unexpected type");
}
} else {
@@ -688,7 +689,7 @@ process_unit_method_reply(DBusMessage *reply, svc_action_t *op)
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
- crm_warn("DBus request for %s of %s succeeded but "
+ crm_info("DBus request for %s of %s succeeded but "
"return type was unexpected", op->action, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE,
"systemd DBus method had unexpected reply");
@@ -981,7 +982,8 @@ systemd_timeout_callback(gpointer p)
svc_action_t * op = p;
op->opaque->timerid = 0;
- crm_warn("%s operation on systemd unit %s named '%s' timed out", op->action, op->agent, op->rsc);
+ crm_info("%s action for systemd unit %s named '%s' timed out",
+ op->action, op->agent, op->rsc);
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
"Systemd action did not complete within specified timeout");
services__finalize_async_op(op);
diff --git a/lib/services/upstart.c b/lib/services/upstart.c
index 2fdc229ad..2ece803e1 100644
--- a/lib/services/upstart.c
+++ b/lib/services/upstart.c
@@ -308,21 +308,21 @@ get_first_instance(const gchar * job, int timeout)
dbus_message_unref(msg);
if (dbus_error_is_set(&error)) {
- crm_err("Call to %s failed: %s", method, error.message);
+ crm_info("Call to %s failed: %s", method, error.message);
dbus_error_free(&error);
goto done;
} else if(reply == NULL) {
- crm_err("Call to %s failed: no reply", method);
+ crm_info("Call to %s failed: no reply", method);
goto done;
} else if (!dbus_message_iter_init(reply, &args)) {
- crm_err("Call to %s failed: Message has no arguments", method);
+ crm_info("Call to %s failed: Message has no arguments", method);
goto done;
}
if(!pcmk_dbus_type_check(reply, &args, DBUS_TYPE_ARRAY, __func__, __LINE__)) {
- crm_err("Call to %s failed: Message has invalid arguments", method);
+ crm_info("Call to %s failed: Message has invalid arguments", method);
goto done;
}
@@ -432,8 +432,8 @@ set_result_from_method_error(svc_action_t *op, const DBusError *error)
return;
}
- crm_err("DBus request for %s of Upstart job %s for resource %s failed: %s",
- op->action, op->agent, crm_str(op->rsc), error->message);
+ crm_info("DBus request for %s of Upstart job %s for resource %s failed: %s",
+ op->action, op->agent, crm_str(op->rsc), error->message);
}
/*!
@@ -468,7 +468,7 @@ job_method_complete(DBusPendingCall *pending, void *user_data)
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
- crm_warn("DBus request for %s of %s succeeded but "
+ crm_info("DBus request for %s of %s succeeded but "
"return type was unexpected", op->action, crm_str(op->rsc));
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
@@ -667,7 +667,8 @@ services__execute_upstart(svc_action_t *op)
} else if (!pcmk_dbus_type_check(reply, NULL, DBUS_TYPE_OBJECT_PATH,
__func__, __LINE__)) {
- crm_warn("Call to %s passed but return type was unexpected", op->action);
+ crm_info("Call to %s passed but return type was unexpected",
+ op->action);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
} else {
@@ -675,7 +676,7 @@ services__execute_upstart(svc_action_t *op)
dbus_message_get_args(reply, NULL, DBUS_TYPE_OBJECT_PATH, &path,
DBUS_TYPE_INVALID);
- crm_info("Call to %s passed: %s", op->action, path);
+ crm_debug("Call to %s passed: %s", op->action, path);
services__set_result(op, PCMK_OCF_OK, PCMK_EXEC_DONE, NULL);
}
--
2.27.0
From 39f6861c72eb9dd76d2cf3da287fe7485615631b Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 8 Nov 2021 09:43:38 -0600
Subject: [PATCH 12/12] Low: fencing: avoid use-after-free with new result
object
itnroduced by 153c9b552 (not released)
---
lib/fencing/st_rhcs.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/fencing/st_rhcs.c b/lib/fencing/st_rhcs.c
index 23e694975..6c8cbedc7 100644
--- a/lib/fencing/st_rhcs.c
+++ b/lib/fencing/st_rhcs.c
@@ -143,15 +143,17 @@ stonith__rhcs_get_metadata(const char *agent, int timeout, xmlNode **metadata)
if (result->execution_status != PCMK_EXEC_DONE) {
crm_warn("Could not execute metadata action for %s: %s",
agent, pcmk_exec_status_str(result->execution_status));
+ rc = pcmk_rc2legacy(stonith__result2rc(result));
stonith__destroy_action(action);
- return pcmk_rc2legacy(stonith__result2rc(result));
+ return rc;
}
if (result->exit_status != CRM_EX_OK) {
crm_warn("Metadata action for %s returned error code %d",
agent, result->exit_status);
+ rc = pcmk_rc2legacy(stonith__result2rc(result));
stonith__destroy_action(action);
- return pcmk_rc2legacy(stonith__result2rc(result));
+ return rc;
}
if (result->action_stdout == NULL) {
--
2.27.0