import pacemaker-2.1.2-4.el9
This commit is contained in:
parent
f997a33b7b
commit
7d8673c267
2985
SOURCES/009-fencing-reasons.patch
Normal file
2985
SOURCES/009-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
4157
SOURCES/010-probe-failures.patch
Normal file
4157
SOURCES/010-probe-failures.patch
Normal file
File diff suppressed because it is too large
Load Diff
1450
SOURCES/011-fencing-reasons.patch
Normal file
1450
SOURCES/011-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
65
SOURCES/012-notify-crash.patch
Normal file
65
SOURCES/012-notify-crash.patch
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
From ed8b2c86ab77aaa3d7fd688c049ad5e1b922a9c6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Reid Wahl <nrwahl@protonmail.com>
|
||||||
|
Date: Thu, 13 Jan 2022 02:56:55 -0800
|
||||||
|
Subject: [PATCH] Fix: liblrmd: Avoid double-free during notify operation
|
||||||
|
|
||||||
|
This commit fixes a regression introduced by 31c7fa8a, causing a
|
||||||
|
double-free in notify operations. lrmd_dispatch_internal() assigns the
|
||||||
|
exit_reason string directly from an XML node to a new lrmd_event_data_t
|
||||||
|
object (without duplicating), and this string gets freed twice.
|
||||||
|
|
||||||
|
Free #1: pcmk__create_history_xml() (reached via callback) calls
|
||||||
|
lrmd__set_result(), which frees event.exit_reason and sets it to NULL.
|
||||||
|
Free #2: lrmd_ipc_dispatch() frees the XML node, which contains a
|
||||||
|
pointer to the exit_reason string just freed, after
|
||||||
|
lrmd_dispatch_internal() returns.
|
||||||
|
|
||||||
|
Prior to 31c7fa8a, pcmk__create_history_xml reset event.rc and
|
||||||
|
event.op_status but **not** event.exit_reason.
|
||||||
|
|
||||||
|
In this commit we simply make a copy of event.exit_reason in
|
||||||
|
lrmd_dispatch_internal() before the callback. This way we don't have to
|
||||||
|
worry about whatever happens in the callback, and we can continue to
|
||||||
|
unset the exit_reason alongside the rc and op_status. The added overhead
|
||||||
|
should be minimal.
|
||||||
|
|
||||||
|
This commit also makes a copy of output. That's not strictly necessary
|
||||||
|
but adds some futureproofing and allows us to call lrmd__reset_result()
|
||||||
|
at the end of lrmd_dispatch_internal().
|
||||||
|
|
||||||
|
Resolves: RHBZ#2039675
|
||||||
|
|
||||||
|
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
|
||||||
|
---
|
||||||
|
lib/lrmd/lrmd_client.c | 8 +++++---
|
||||||
|
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/lrmd/lrmd_client.c b/lib/lrmd/lrmd_client.c
|
||||||
|
index ee31bb5ae9..5131a648b7 100644
|
||||||
|
--- a/lib/lrmd/lrmd_client.c
|
||||||
|
+++ b/lib/lrmd/lrmd_client.c
|
||||||
|
@@ -305,9 +305,10 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
|
||||||
|
event.user_data = crm_element_value(msg, F_LRMD_RSC_USERDATA_STR);
|
||||||
|
event.type = lrmd_event_exec_complete;
|
||||||
|
|
||||||
|
- // No need to duplicate the memory, so don't use setter functions
|
||||||
|
- event.output = crm_element_value(msg, F_LRMD_RSC_OUTPUT);
|
||||||
|
- event.exit_reason = crm_element_value(msg, F_LRMD_RSC_EXIT_REASON);
|
||||||
|
+ /* output and exit_reason may be freed by a callback */
|
||||||
|
+ event.output = crm_element_value_copy(msg, F_LRMD_RSC_OUTPUT);
|
||||||
|
+ lrmd__set_result(&event, event.rc, event.op_status,
|
||||||
|
+ crm_element_value(msg, F_LRMD_RSC_EXIT_REASON));
|
||||||
|
|
||||||
|
event.params = xml2list(msg);
|
||||||
|
} else if (pcmk__str_eq(type, LRMD_OP_NEW_CLIENT, pcmk__str_none)) {
|
||||||
|
@@ -324,6 +325,7 @@ lrmd_dispatch_internal(lrmd_t * lrmd, xmlNode * msg)
|
||||||
|
if (event.params) {
|
||||||
|
g_hash_table_destroy(event.params);
|
||||||
|
}
|
||||||
|
+ lrmd__reset_result(&event);
|
||||||
|
}
|
||||||
|
|
||||||
|
// \return Always 0, to indicate that IPC mainloop source should be kept
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
26
SOURCES/013-probe-failures.patch
Normal file
26
SOURCES/013-probe-failures.patch
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
From 186d5a02fba919c455fd6eeb050b4be107f82159 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chris Lumens <clumens@redhat.com>
|
||||||
|
Date: Thu, 13 Jan 2022 17:02:47 -0500
|
||||||
|
Subject: [PATCH] Low: scheduler: Use the old RC code to log maskable probe
|
||||||
|
failures.
|
||||||
|
|
||||||
|
---
|
||||||
|
lib/pengine/unpack.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
|
||||||
|
index 8a2d2a6d6d..b01f86257a 100644
|
||||||
|
--- a/lib/pengine/unpack.c
|
||||||
|
+++ b/lib/pengine/unpack.c
|
||||||
|
@@ -3780,7 +3780,7 @@ unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
|
||||||
|
|
||||||
|
if (maskable_probe_failure) {
|
||||||
|
crm_notice("Treating probe result '%s' for %s on %s as 'not running'",
|
||||||
|
- services_ocf_exitcode_str(rc), rsc->id, node->details->uname);
|
||||||
|
+ services_ocf_exitcode_str(old_rc), rsc->id, node->details->uname);
|
||||||
|
update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure,
|
||||||
|
on_fail, data_set);
|
||||||
|
crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
43
SOURCES/014-pcmk_delay_base.patch
Normal file
43
SOURCES/014-pcmk_delay_base.patch
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
From 9d812b0401d4cedef53a3cc3653ec782a5c49e37 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Thu, 13 Jan 2022 10:42:02 -0600
|
||||||
|
Subject: [PATCH] Doc: fencer: improve pcmk_delay_base meta-data
|
||||||
|
|
||||||
|
Update its type, since its value can now be a node map as well as a string,
|
||||||
|
and add more detail to its description.
|
||||||
|
---
|
||||||
|
daemons/fenced/pacemaker-fenced.c | 18 +++++++++++-------
|
||||||
|
1 file changed, 11 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/fenced/pacemaker-fenced.c b/daemons/fenced/pacemaker-fenced.c
|
||||||
|
index 1b954be5a4..12f331496c 100644
|
||||||
|
--- a/daemons/fenced/pacemaker-fenced.c
|
||||||
|
+++ b/daemons/fenced/pacemaker-fenced.c
|
||||||
|
@@ -1548,13 +1548,17 @@ main(int argc, char **argv)
|
||||||
|
PCMK_STONITH_DELAY_BASE);
|
||||||
|
printf(" <shortdesc lang=\"en\">Enable a base delay for "
|
||||||
|
"fencing actions and specify base delay value.</shortdesc>\n");
|
||||||
|
- printf(" <longdesc lang=\"en\">This prevents double fencing when "
|
||||||
|
- "different delays are configured on the nodes.\nUse this to "
|
||||||
|
- "enable a static delay for fencing actions.\nThe overall delay "
|
||||||
|
- "is derived from a random delay value adding this static delay "
|
||||||
|
- "so that the sum is kept below the maximum delay.\nSet to eg. "
|
||||||
|
- "node1:1s;node2:5 to set different value per node.</longdesc>\n");
|
||||||
|
- printf(" <content type=\"time\" default=\"0s\"/>\n");
|
||||||
|
+ printf(" <longdesc lang=\"en\">This enables a static delay for "
|
||||||
|
+ "fencing actions, which can help avoid \"death matches\" where "
|
||||||
|
+ "two nodes try to fence each other at the same time. If "
|
||||||
|
+ PCMK_STONITH_DELAY_MAX " is also used, a random delay will be "
|
||||||
|
+ "added such that the total delay is kept below that value.\n"
|
||||||
|
+ "This can be set to a single time value to apply to any node "
|
||||||
|
+ "targeted by this device (useful if a separate device is "
|
||||||
|
+ "configured for each target), or to a node map (for example, "
|
||||||
|
+ "\"node1:1s;node2:5\") to set a different value per target.\n"
|
||||||
|
+ " </longdesc>\n");
|
||||||
|
+ printf(" <content type=\"string\" default=\"0s\"/>\n");
|
||||||
|
printf(" </parameter>\n");
|
||||||
|
|
||||||
|
printf(" <parameter name=\"%s\" unique=\"0\">\n",
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
1093
SOURCES/015-fencing-reasons.patch
Normal file
1093
SOURCES/015-fencing-reasons.patch
Normal file
File diff suppressed because it is too large
Load Diff
56
SOURCES/016-fencing-crash.patch
Normal file
56
SOURCES/016-fencing-crash.patch
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
From e330568504ec379ea42460d21a2e20b1652d9445 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Reid Wahl <nrwahl@protonmail.com>
|
||||||
|
Date: Fri, 14 Jan 2022 01:35:35 -0800
|
||||||
|
Subject: [PATCH] Fix: fencing: Don't set stonith action to pending if fork
|
||||||
|
fails
|
||||||
|
|
||||||
|
Currently, we set a stonith action to pending if
|
||||||
|
services_action_async_fork_notify() returns true. However, "true" means
|
||||||
|
that the svc_action should not be freed. This might be because the
|
||||||
|
svc_action forked successfully and is pending, or it might be because
|
||||||
|
the svc_action has already been freed.
|
||||||
|
|
||||||
|
In the case of stonith actions, if we fail to fork, the stonith_action_t
|
||||||
|
object stored in svc_action->cb_data gets freed by the done callback,
|
||||||
|
and services_action_async_fork_notify() returns true. If we try to set
|
||||||
|
the action to pending, it causes a segfault.
|
||||||
|
|
||||||
|
This commit moves the "set to pending" step to the
|
||||||
|
stonith_action_async_forked() callback. We avoid the segfault and only
|
||||||
|
set it to pending if it's actually pending.
|
||||||
|
|
||||||
|
A slight difference in ordering was required to achieve this. Now, the
|
||||||
|
action gets set to pending immediately before being added to the
|
||||||
|
mainloop, instead of immediately after.
|
||||||
|
|
||||||
|
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
|
||||||
|
---
|
||||||
|
lib/fencing/st_actions.c | 5 +++--
|
||||||
|
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/fencing/st_actions.c b/lib/fencing/st_actions.c
|
||||||
|
index e4e43225cd..306001af69 100644
|
||||||
|
--- a/lib/fencing/st_actions.c
|
||||||
|
+++ b/lib/fencing/st_actions.c
|
||||||
|
@@ -550,6 +550,9 @@ stonith_action_async_forked(svc_action_t *svc_action)
|
||||||
|
(action->fork_cb) (svc_action->pid, action->userdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
+ pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN, PCMK_EXEC_PENDING,
|
||||||
|
+ NULL);
|
||||||
|
+
|
||||||
|
crm_trace("Child process %d performing action '%s' successfully forked",
|
||||||
|
action->pid, action->action);
|
||||||
|
}
|
||||||
|
@@ -619,8 +622,6 @@ internal_stonith_action_execute(stonith_action_t * action)
|
||||||
|
if (services_action_async_fork_notify(svc_action,
|
||||||
|
&stonith_action_async_done,
|
||||||
|
&stonith_action_async_forked)) {
|
||||||
|
- pcmk__set_result(&(action->result), PCMK_OCF_UNKNOWN,
|
||||||
|
- PCMK_EXEC_PENDING, NULL);
|
||||||
|
return pcmk_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
875
SOURCES/017-fencing-reasons.patch
Normal file
875
SOURCES/017-fencing-reasons.patch
Normal file
@ -0,0 +1,875 @@
|
|||||||
|
From 523f62eb235836a01ea039c23ada261a494f7b32 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 10 Nov 2021 15:22:47 -0600
|
||||||
|
Subject: [PATCH 01/11] Feature: libpacemaker: improve result for high-level
|
||||||
|
fencing API
|
||||||
|
|
||||||
|
Previously, pcmk__fencing_action()'s helpers for asynchronous fencing actions
|
||||||
|
initialized the result to a generic error, and then overrode that only on
|
||||||
|
success.
|
||||||
|
|
||||||
|
Now, set a detailed result for early failures, and use the full result when
|
||||||
|
available from the fencing API.
|
||||||
|
|
||||||
|
A standard return code is still returned to callers at this point.
|
||||||
|
---
|
||||||
|
lib/pacemaker/pcmk_fence.c | 31 ++++++++++++++++++-------------
|
||||||
|
1 file changed, 18 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||||
|
index 7d6acd0de6..125e1b268b 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_fence.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_fence.c
|
||||||
|
@@ -32,8 +32,8 @@ static struct {
|
||||||
|
unsigned int timeout;
|
||||||
|
unsigned int tolerance;
|
||||||
|
int delay;
|
||||||
|
- int rc;
|
||||||
|
-} async_fence_data;
|
||||||
|
+ pcmk__action_result_t result;
|
||||||
|
+} async_fence_data = { NULL, };
|
||||||
|
|
||||||
|
static int
|
||||||
|
handle_level(stonith_t *st, char *target, int fence_level,
|
||||||
|
@@ -76,14 +76,13 @@ handle_level(stonith_t *st, char *target, int fence_level,
|
||||||
|
static void
|
||||||
|
notify_callback(stonith_t * st, stonith_event_t * e)
|
||||||
|
{
|
||||||
|
- if (e->result != pcmk_ok) {
|
||||||
|
- return;
|
||||||
|
- }
|
||||||
|
+ if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
|
||||||
|
+ && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
|
||||||
|
|
||||||
|
- if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei) &&
|
||||||
|
- pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
|
||||||
|
-
|
||||||
|
- async_fence_data.rc = e->result;
|
||||||
|
+ pcmk__set_result(&async_fence_data.result,
|
||||||
|
+ stonith__event_exit_status(e),
|
||||||
|
+ stonith__event_execution_status(e),
|
||||||
|
+ stonith__event_exit_reason(e));
|
||||||
|
g_main_loop_quit(mainloop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -91,8 +90,9 @@ notify_callback(stonith_t * st, stonith_event_t * e)
|
||||||
|
static void
|
||||||
|
fence_callback(stonith_t * stonith, stonith_callback_data_t * data)
|
||||||
|
{
|
||||||
|
- async_fence_data.rc = data->rc;
|
||||||
|
-
|
||||||
|
+ pcmk__set_result(&async_fence_data.result, stonith__exit_status(data),
|
||||||
|
+ stonith__execution_status(data),
|
||||||
|
+ stonith__exit_reason(data));
|
||||||
|
g_main_loop_quit(mainloop);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -106,6 +106,8 @@ async_fence_helper(gpointer user_data)
|
||||||
|
if (rc != pcmk_ok) {
|
||||||
|
fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
|
||||||
|
g_main_loop_quit(mainloop);
|
||||||
|
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
|
||||||
|
+ PCMK_EXEC_NOT_CONNECTED, NULL);
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -121,6 +123,8 @@ async_fence_helper(gpointer user_data)
|
||||||
|
|
||||||
|
if (call_id < 0) {
|
||||||
|
g_main_loop_quit(mainloop);
|
||||||
|
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
|
||||||
|
+ PCMK_EXEC_ERROR, pcmk_strerror(call_id));
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -146,7 +150,8 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
async_fence_data.timeout = timeout;
|
||||||
|
async_fence_data.tolerance = tolerance;
|
||||||
|
async_fence_data.delay = delay;
|
||||||
|
- async_fence_data.rc = pcmk_err_generic;
|
||||||
|
+ pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR, PCMK_EXEC_UNKNOWN,
|
||||||
|
+ NULL);
|
||||||
|
|
||||||
|
trig = mainloop_add_trigger(G_PRIORITY_HIGH, async_fence_helper, NULL);
|
||||||
|
mainloop_set_trigger(trig);
|
||||||
|
@@ -156,7 +161,7 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
|
||||||
|
free(async_fence_data.name);
|
||||||
|
|
||||||
|
- return pcmk_legacy2rc(async_fence_data.rc);
|
||||||
|
+ return stonith__result2rc(&async_fence_data.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 008868fae5d1b0d6d8dc61f7acfb3856801ddd52 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Fri, 10 Dec 2021 15:36:10 -0600
|
||||||
|
Subject: [PATCH 02/11] Refactor: libpacemaker: add exit reason to high-level
|
||||||
|
fencing API
|
||||||
|
|
||||||
|
Nothing uses it as of this commit
|
||||||
|
---
|
||||||
|
include/pacemaker.h | 5 ++++-
|
||||||
|
include/pcmki/pcmki_fence.h | 5 ++++-
|
||||||
|
lib/pacemaker/pcmk_fence.c | 10 +++++++---
|
||||||
|
tools/stonith_admin.c | 6 +++---
|
||||||
|
4 files changed, 18 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/pacemaker.h b/include/pacemaker.h
|
||||||
|
index a8523c969e..0daa4c5945 100644
|
||||||
|
--- a/include/pacemaker.h
|
||||||
|
+++ b/include/pacemaker.h
|
||||||
|
@@ -189,12 +189,15 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
|
||||||
|
* again.
|
||||||
|
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||||
|
* static/random fencing delays from pcmk_delay_base/max.
|
||||||
|
+ * \param[out] reason If not NULL, where to put descriptive failure reason
|
||||||
|
*
|
||||||
|
* \return Standard Pacemaker return code
|
||||||
|
+ * \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||||
|
+ * returned value.
|
||||||
|
*/
|
||||||
|
int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay);
|
||||||
|
+ int delay, char **reason);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief List the fencing operations that have occurred for a specific node.
|
||||||
|
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
|
||||||
|
index d4cef68f5c..c3da0361d7 100644
|
||||||
|
--- a/include/pcmki/pcmki_fence.h
|
||||||
|
+++ b/include/pcmki/pcmki_fence.h
|
||||||
|
@@ -28,12 +28,15 @@
|
||||||
|
* again.
|
||||||
|
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||||
|
* static/random fencing delays from pcmk_delay_base/max
|
||||||
|
+ * \param[out] reason If not NULL, where to put descriptive failure reason
|
||||||
|
*
|
||||||
|
* \return Standard Pacemaker return code
|
||||||
|
+ * \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||||
|
+ * returned value.
|
||||||
|
*/
|
||||||
|
int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay);
|
||||||
|
+ int delay, char **reason);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief List the fencing operations that have occurred for a specific node.
|
||||||
|
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||||
|
index 125e1b268b..dbf084fb6b 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_fence.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_fence.c
|
||||||
|
@@ -139,7 +139,7 @@ async_fence_helper(gpointer user_data)
|
||||||
|
int
|
||||||
|
pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay)
|
||||||
|
+ int delay, char **reason)
|
||||||
|
{
|
||||||
|
crm_trigger_t *trig;
|
||||||
|
|
||||||
|
@@ -161,6 +161,9 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
|
||||||
|
free(async_fence_data.name);
|
||||||
|
|
||||||
|
+ if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
|
||||||
|
+ *reason = strdup(async_fence_data.result.exit_reason);
|
||||||
|
+ }
|
||||||
|
return stonith__result2rc(&async_fence_data.result);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -168,9 +171,10 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
int
|
||||||
|
pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay)
|
||||||
|
+ int delay, char **reason)
|
||||||
|
{
|
||||||
|
- return pcmk__fence_action(st, target, action, name, timeout, tolerance, delay);
|
||||||
|
+ return pcmk__fence_action(st, target, action, name, timeout, tolerance,
|
||||||
|
+ delay, reason);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||||
|
index 2d48326e1b..fdc7c46d49 100644
|
||||||
|
--- a/tools/stonith_admin.c
|
||||||
|
+++ b/tools/stonith_admin.c
|
||||||
|
@@ -571,17 +571,17 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
|
case 'B':
|
||||||
|
rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
|
||||||
|
- options.tolerance*1000, options.delay);
|
||||||
|
+ options.tolerance*1000, options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'F':
|
||||||
|
rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
|
||||||
|
- options.tolerance*1000, options.delay);
|
||||||
|
+ options.tolerance*1000, options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'U':
|
||||||
|
rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
|
||||||
|
- options.tolerance*1000, options.delay);
|
||||||
|
+ options.tolerance*1000, options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'h':
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 7570510f9985ba75ef73fb824f28109e135ace0a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Fri, 10 Dec 2021 15:40:48 -0600
|
||||||
|
Subject: [PATCH 03/11] Refactor: libpacemaker: rename high-level fencing API
|
||||||
|
|
||||||
|
Rename pcmk_fence_action() to pcmk_request_fencing(), and its internal
|
||||||
|
equivalent pcmk__fence_action() to pcmk__request_fencing(). The change is
|
||||||
|
backward-compatible because pcmk_fence_action() has not been exposed publicly
|
||||||
|
yet.
|
||||||
|
|
||||||
|
"Fence action" can be easily confused with libcrmservice actions, liblrmd
|
||||||
|
actions, libstonithd actions, scheduler actions, and so forth.
|
||||||
|
|
||||||
|
Also, the new name makes it clearer that the caller is requesting that the
|
||||||
|
cluster perform fencing, and not directly performing fencing.
|
||||||
|
---
|
||||||
|
include/pacemaker.h | 20 ++++++++++----------
|
||||||
|
include/pcmki/pcmki_fence.h | 16 ++++++++--------
|
||||||
|
lib/pacemaker/pcmk_fence.c | 16 ++++++++--------
|
||||||
|
tools/stonith_admin.c | 18 ++++++++++++------
|
||||||
|
4 files changed, 38 insertions(+), 32 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/pacemaker.h b/include/pacemaker.h
|
||||||
|
index 0daa4c5945..e581f975a9 100644
|
||||||
|
--- a/include/pacemaker.h
|
||||||
|
+++ b/include/pacemaker.h
|
||||||
|
@@ -177,27 +177,27 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
|
||||||
|
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||||
|
|
||||||
|
/*!
|
||||||
|
- * \brief Perform a STONITH action.
|
||||||
|
+ * \brief Ask the cluster to perform fencing
|
||||||
|
*
|
||||||
|
- * \param[in] st A connection to the STONITH API.
|
||||||
|
- * \param[in] target The node receiving the action.
|
||||||
|
- * \param[in] action The action to perform.
|
||||||
|
+ * \param[in] st A connection to the fencer API
|
||||||
|
+ * \param[in] target The node that should be fenced
|
||||||
|
+ * \param[in] action The fencing action (on, off, reboot) to perform
|
||||||
|
* \param[in] name Who requested the fence action?
|
||||||
|
- * \param[in] timeout How long to wait for the operation to complete (in ms).
|
||||||
|
+ * \param[in] timeout How long to wait for the operation to complete (in ms)
|
||||||
|
* \param[in] tolerance If a successful action for \p target happened within
|
||||||
|
* this many ms, return 0 without performing the action
|
||||||
|
- * again.
|
||||||
|
+ * again
|
||||||
|
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||||
|
- * static/random fencing delays from pcmk_delay_base/max.
|
||||||
|
+ * static/random fencing delays from pcmk_delay_base/max
|
||||||
|
* \param[out] reason If not NULL, where to put descriptive failure reason
|
||||||
|
*
|
||||||
|
* \return Standard Pacemaker return code
|
||||||
|
* \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||||
|
* returned value.
|
||||||
|
*/
|
||||||
|
-int pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay, char **reason);
|
||||||
|
+int pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
|
||||||
|
+ const char *name, unsigned int timeout,
|
||||||
|
+ unsigned int tolerance, int delay, char **reason);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief List the fencing operations that have occurred for a specific node.
|
||||||
|
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
|
||||||
|
index c3da0361d7..e3a7e27264 100644
|
||||||
|
--- a/include/pcmki/pcmki_fence.h
|
||||||
|
+++ b/include/pcmki/pcmki_fence.h
|
||||||
|
@@ -13,14 +13,14 @@
|
||||||
|
# include <crm/common/output_internal.h>
|
||||||
|
|
||||||
|
/*!
|
||||||
|
- * \brief Perform a STONITH action.
|
||||||
|
+ * \brief Ask the cluster to perform fencing
|
||||||
|
*
|
||||||
|
- * \note This is the internal version of pcmk_fence_action(). External users
|
||||||
|
+ * \note This is the internal version of pcmk_request_fencing(). External users
|
||||||
|
* of the pacemaker API should use that function instead.
|
||||||
|
*
|
||||||
|
- * \param[in] st A connection to the STONITH API.
|
||||||
|
- * \param[in] target The node receiving the action.
|
||||||
|
- * \param[in] action The action to perform.
|
||||||
|
+ * \param[in] st A connection to the fencer API
|
||||||
|
+ * \param[in] target The node that should be fenced
|
||||||
|
+ * \param[in] action The fencing action (on, off, reboot) to perform
|
||||||
|
* \param[in] name Who requested the fence action?
|
||||||
|
* \param[in] timeout How long to wait for the operation to complete (in ms).
|
||||||
|
* \param[in] tolerance If a successful action for \p target happened within
|
||||||
|
@@ -34,9 +34,9 @@
|
||||||
|
* \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||||
|
* returned value.
|
||||||
|
*/
|
||||||
|
-int pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay, char **reason);
|
||||||
|
+int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||||
|
+ const char *name, unsigned int timeout,
|
||||||
|
+ unsigned int tolerance, int delay, char **reason);
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief List the fencing operations that have occurred for a specific node.
|
||||||
|
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||||
|
index dbf084fb6b..1b7feb54b2 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_fence.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_fence.c
|
||||||
|
@@ -137,9 +137,9 @@ async_fence_helper(gpointer user_data)
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
-pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay, char **reason)
|
||||||
|
+pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||||
|
+ const char *name, unsigned int timeout,
|
||||||
|
+ unsigned int tolerance, int delay, char **reason)
|
||||||
|
{
|
||||||
|
crm_trigger_t *trig;
|
||||||
|
|
||||||
|
@@ -169,12 +169,12 @@ pcmk__fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
|
||||||
|
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||||
|
int
|
||||||
|
-pcmk_fence_action(stonith_t *st, const char *target, const char *action,
|
||||||
|
- const char *name, unsigned int timeout, unsigned int tolerance,
|
||||||
|
- int delay, char **reason)
|
||||||
|
+pcmk_request_fencing(stonith_t *st, const char *target, const char *action,
|
||||||
|
+ const char *name, unsigned int timeout,
|
||||||
|
+ unsigned int tolerance, int delay, char **reason)
|
||||||
|
{
|
||||||
|
- return pcmk__fence_action(st, target, action, name, timeout, tolerance,
|
||||||
|
- delay, reason);
|
||||||
|
+ return pcmk__request_fencing(st, target, action, name, timeout, tolerance,
|
||||||
|
+ delay, reason);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||||
|
index fdc7c46d49..56948b3875 100644
|
||||||
|
--- a/tools/stonith_admin.c
|
||||||
|
+++ b/tools/stonith_admin.c
|
||||||
|
@@ -570,18 +570,24 @@ main(int argc, char **argv)
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'B':
|
||||||
|
- rc = pcmk__fence_action(st, target, "reboot", name, options.timeout*1000,
|
||||||
|
- options.tolerance*1000, options.delay, NULL);
|
||||||
|
+ rc = pcmk__request_fencing(st, target, "reboot", name,
|
||||||
|
+ options.timeout * 1000,
|
||||||
|
+ options.tolerance * 1000,
|
||||||
|
+ options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'F':
|
||||||
|
- rc = pcmk__fence_action(st, target, "off", name, options.timeout*1000,
|
||||||
|
- options.tolerance*1000, options.delay, NULL);
|
||||||
|
+ rc = pcmk__request_fencing(st, target, "off", name,
|
||||||
|
+ options.timeout * 1000,
|
||||||
|
+ options.tolerance * 1000,
|
||||||
|
+ options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'U':
|
||||||
|
- rc = pcmk__fence_action(st, target, "on", name, options.timeout*1000,
|
||||||
|
- options.tolerance*1000, options.delay, NULL);
|
||||||
|
+ rc = pcmk__request_fencing(st, target, "on", name,
|
||||||
|
+ options.timeout * 1000,
|
||||||
|
+ options.tolerance * 1000,
|
||||||
|
+ options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'h':
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 247eb303df934944c0b72b162bb661cee6e0ed8b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Fri, 10 Dec 2021 15:52:37 -0600
|
||||||
|
Subject: [PATCH 04/11] Refactor: tools: drop unnecessary string duplication in
|
||||||
|
stonith_admin
|
||||||
|
|
||||||
|
---
|
||||||
|
tools/stonith_admin.c | 11 ++++-------
|
||||||
|
1 file changed, 4 insertions(+), 7 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||||
|
index 56948b3875..c11e302e76 100644
|
||||||
|
--- a/tools/stonith_admin.c
|
||||||
|
+++ b/tools/stonith_admin.c
|
||||||
|
@@ -360,8 +360,6 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
|
pcmk__cli_init_logging("stonith_admin", args->verbosity);
|
||||||
|
|
||||||
|
- name = strdup(crm_system_name);
|
||||||
|
-
|
||||||
|
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
|
||||||
|
if (rc != pcmk_rc_ok) {
|
||||||
|
exit_code = CRM_EX_ERROR;
|
||||||
|
@@ -496,7 +494,7 @@ main(int argc, char **argv)
|
||||||
|
if (st == NULL) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
} else if (!no_connect) {
|
||||||
|
- rc = st->cmds->connect(st, name, NULL);
|
||||||
|
+ rc = st->cmds->connect(st, crm_system_name, NULL);
|
||||||
|
}
|
||||||
|
if (rc < 0) {
|
||||||
|
out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
|
||||||
|
@@ -570,21 +568,21 @@ main(int argc, char **argv)
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'B':
|
||||||
|
- rc = pcmk__request_fencing(st, target, "reboot", name,
|
||||||
|
+ rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
|
||||||
|
options.timeout * 1000,
|
||||||
|
options.tolerance * 1000,
|
||||||
|
options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'F':
|
||||||
|
- rc = pcmk__request_fencing(st, target, "off", name,
|
||||||
|
+ rc = pcmk__request_fencing(st, target, "off", crm_system_name,
|
||||||
|
options.timeout * 1000,
|
||||||
|
options.tolerance * 1000,
|
||||||
|
options.delay, NULL);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'U':
|
||||||
|
- rc = pcmk__request_fencing(st, target, "on", name,
|
||||||
|
+ rc = pcmk__request_fencing(st, target, "on", crm_system_name,
|
||||||
|
options.timeout * 1000,
|
||||||
|
options.tolerance * 1000,
|
||||||
|
options.delay, NULL);
|
||||||
|
@@ -619,7 +617,6 @@ main(int argc, char **argv)
|
||||||
|
out->finish(out, exit_code, true, NULL);
|
||||||
|
pcmk__output_free(out);
|
||||||
|
}
|
||||||
|
- free(name);
|
||||||
|
stonith_key_value_freeall(options.params, 1, 1);
|
||||||
|
|
||||||
|
if (st != NULL) {
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From a7888bf6868d8d9d9c77f65ae9983cf748bb0548 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Fri, 10 Dec 2021 15:56:34 -0600
|
||||||
|
Subject: [PATCH 05/11] Refactor: tools: functionize requesting fencing in
|
||||||
|
stonith_admin
|
||||||
|
|
||||||
|
... to reduce code duplication and improve readability
|
||||||
|
---
|
||||||
|
tools/stonith_admin.c | 27 +++++++++++++++------------
|
||||||
|
1 file changed, 15 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||||
|
index c11e302e76..f738a9c888 100644
|
||||||
|
--- a/tools/stonith_admin.c
|
||||||
|
+++ b/tools/stonith_admin.c
|
||||||
|
@@ -331,6 +331,18 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
|
||||||
|
return context;
|
||||||
|
}
|
||||||
|
|
||||||
|
+// \return Standard Pacemaker return code
|
||||||
|
+static int
|
||||||
|
+request_fencing(stonith_t *st, const char *target, const char *command)
|
||||||
|
+{
|
||||||
|
+ int rc = pcmk__request_fencing(st, target, command, crm_system_name,
|
||||||
|
+ options.timeout * 1000,
|
||||||
|
+ options.tolerance * 1000,
|
||||||
|
+ options.delay, NULL);
|
||||||
|
+
|
||||||
|
+ return rc;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
int
|
||||||
|
main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
@@ -568,24 +580,15 @@ main(int argc, char **argv)
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'B':
|
||||||
|
- rc = pcmk__request_fencing(st, target, "reboot", crm_system_name,
|
||||||
|
- options.timeout * 1000,
|
||||||
|
- options.tolerance * 1000,
|
||||||
|
- options.delay, NULL);
|
||||||
|
+ rc = request_fencing(st, target, "reboot");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'F':
|
||||||
|
- rc = pcmk__request_fencing(st, target, "off", crm_system_name,
|
||||||
|
- options.timeout * 1000,
|
||||||
|
- options.tolerance * 1000,
|
||||||
|
- options.delay, NULL);
|
||||||
|
+ rc = request_fencing(st, target, "off");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'U':
|
||||||
|
- rc = pcmk__request_fencing(st, target, "on", crm_system_name,
|
||||||
|
- options.timeout * 1000,
|
||||||
|
- options.tolerance * 1000,
|
||||||
|
- options.delay, NULL);
|
||||||
|
+ rc = request_fencing(st, target, "on");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'h':
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 2da32df780983ec1197e857eed5eeb5bf1101889 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Fri, 10 Dec 2021 16:05:19 -0600
|
||||||
|
Subject: [PATCH 06/11] Feature: tools: display failure reasons for
|
||||||
|
stonith_admin fencing commands
|
||||||
|
|
||||||
|
Previously, stonith_admin's --fence/--unfence/--reboot options did not output
|
||||||
|
any error message on failure. Now, they do, including the exit reason, if
|
||||||
|
available.
|
||||||
|
---
|
||||||
|
tools/stonith_admin.c | 30 +++++++++++++++++++++++++-----
|
||||||
|
1 file changed, 25 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||||
|
index f738a9c888..5590faf11e 100644
|
||||||
|
--- a/tools/stonith_admin.c
|
||||||
|
+++ b/tools/stonith_admin.c
|
||||||
|
@@ -333,13 +333,33 @@ build_arg_context(pcmk__common_args_t *args, GOptionGroup **group) {
|
||||||
|
|
||||||
|
// \return Standard Pacemaker return code
|
||||||
|
static int
|
||||||
|
-request_fencing(stonith_t *st, const char *target, const char *command)
|
||||||
|
+request_fencing(stonith_t *st, const char *target, const char *command,
|
||||||
|
+ GError **error)
|
||||||
|
{
|
||||||
|
+ char *reason = NULL;
|
||||||
|
int rc = pcmk__request_fencing(st, target, command, crm_system_name,
|
||||||
|
options.timeout * 1000,
|
||||||
|
options.tolerance * 1000,
|
||||||
|
- options.delay, NULL);
|
||||||
|
+ options.delay, &reason);
|
||||||
|
|
||||||
|
+ if (rc != pcmk_rc_ok) {
|
||||||
|
+ const char *rc_str = pcmk_rc_str(rc);
|
||||||
|
+
|
||||||
|
+ // If reason is identical to return code string, don't display it twice
|
||||||
|
+ if (pcmk__str_eq(rc_str, reason, pcmk__str_none)) {
|
||||||
|
+ free(reason);
|
||||||
|
+ reason = NULL;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ g_set_error(error, PCMK__RC_ERROR, rc,
|
||||||
|
+ "Couldn't %sfence %s: %s%s%s%s",
|
||||||
|
+ ((strcmp(command, "on") == 0)? "un" : ""),
|
||||||
|
+ target, pcmk_rc_str(rc),
|
||||||
|
+ ((reason == NULL)? "" : " ("),
|
||||||
|
+ ((reason == NULL)? "" : reason),
|
||||||
|
+ ((reason == NULL)? "" : ")"));
|
||||||
|
+ }
|
||||||
|
+ free(reason);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -580,15 +600,15 @@ main(int argc, char **argv)
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'B':
|
||||||
|
- rc = request_fencing(st, target, "reboot");
|
||||||
|
+ rc = request_fencing(st, target, "reboot", &error);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'F':
|
||||||
|
- rc = request_fencing(st, target, "off");
|
||||||
|
+ rc = request_fencing(st, target, "off", &error);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'U':
|
||||||
|
- rc = request_fencing(st, target, "on");
|
||||||
|
+ rc = request_fencing(st, target, "on", &error);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'h':
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 2d99eba4c326d3b13dbbe446971ea5febd5d05be Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Fri, 10 Dec 2021 16:08:49 -0600
|
||||||
|
Subject: [PATCH 07/11] Feature: libpacemaker: return exit reason for fencer
|
||||||
|
connection failures
|
||||||
|
|
||||||
|
... instead of outputting to stderr directly, so that the caller (i.e.
|
||||||
|
stonith_admin) can output the error in the correct output format.
|
||||||
|
---
|
||||||
|
lib/pacemaker/pcmk_fence.c | 3 +--
|
||||||
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||||
|
index 1b7feb54b2..d17b07cda2 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_fence.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_fence.c
|
||||||
|
@@ -104,10 +104,9 @@ async_fence_helper(gpointer user_data)
|
||||||
|
int rc = stonith_api_connect_retry(st, async_fence_data.name, 10);
|
||||||
|
|
||||||
|
if (rc != pcmk_ok) {
|
||||||
|
- fprintf(stderr, "Could not connect to fencer: %s\n", pcmk_strerror(rc));
|
||||||
|
g_main_loop_quit(mainloop);
|
||||||
|
pcmk__set_result(&async_fence_data.result, CRM_EX_ERROR,
|
||||||
|
- PCMK_EXEC_NOT_CONNECTED, NULL);
|
||||||
|
+ PCMK_EXEC_NOT_CONNECTED, pcmk_strerror(rc));
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 4480ef0602f47450bdddfbde360a6a8327710927 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 17 Jan 2022 09:39:39 -0600
|
||||||
|
Subject: [PATCH 08/11] Low: libpacemaker: compare fence action names
|
||||||
|
case-sensitively
|
||||||
|
|
||||||
|
---
|
||||||
|
lib/pacemaker/pcmk_fence.c | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||||
|
index d17b07cda2..2a8f50a555 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_fence.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_fence.c
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/*
|
||||||
|
- * Copyright 2009-2021 the Pacemaker project contributors
|
||||||
|
+ * Copyright 2009-2022 the Pacemaker project contributors
|
||||||
|
*
|
||||||
|
* The version control history for this file may have further details.
|
||||||
|
*
|
||||||
|
@@ -77,7 +77,7 @@ static void
|
||||||
|
notify_callback(stonith_t * st, stonith_event_t * e)
|
||||||
|
{
|
||||||
|
if (pcmk__str_eq(async_fence_data.target, e->target, pcmk__str_casei)
|
||||||
|
- && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_casei)) {
|
||||||
|
+ && pcmk__str_eq(async_fence_data.action, e->action, pcmk__str_none)) {
|
||||||
|
|
||||||
|
pcmk__set_result(&async_fence_data.result,
|
||||||
|
stonith__event_exit_status(e),
|
||||||
|
@@ -549,7 +549,7 @@ pcmk__reduce_fence_history(stonith_history_t *history)
|
||||||
|
if ((hp->state == st_done) || (hp->state == st_failed)) {
|
||||||
|
/* action not in progress */
|
||||||
|
if (pcmk__str_eq(hp->target, np->target, pcmk__str_casei) &&
|
||||||
|
- pcmk__str_eq(hp->action, np->action, pcmk__str_casei) &&
|
||||||
|
+ pcmk__str_eq(hp->action, np->action, pcmk__str_none) &&
|
||||||
|
(hp->state == np->state) &&
|
||||||
|
((hp->state == st_done) ||
|
||||||
|
pcmk__str_eq(hp->delegate, np->delegate, pcmk__str_casei))) {
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From fe4c65a3b9e715c2b535709f989f2369d3637b78 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 17 Jan 2022 09:45:24 -0600
|
||||||
|
Subject: [PATCH 09/11] Refactor: libpacemaker: avoid unnecessary string
|
||||||
|
duplication
|
||||||
|
|
||||||
|
... and don't leave any dynamic memory hanging around
|
||||||
|
---
|
||||||
|
lib/pacemaker/pcmk_fence.c | 11 ++++++++---
|
||||||
|
1 file changed, 8 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/pacemaker/pcmk_fence.c b/lib/pacemaker/pcmk_fence.c
|
||||||
|
index 2a8f50a555..260fa5ab8e 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_fence.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_fence.c
|
||||||
|
@@ -141,6 +141,7 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||||
|
unsigned int tolerance, int delay, char **reason)
|
||||||
|
{
|
||||||
|
crm_trigger_t *trig;
|
||||||
|
+ int rc = pcmk_rc_ok;
|
||||||
|
|
||||||
|
async_fence_data.st = st;
|
||||||
|
async_fence_data.name = strdup(name);
|
||||||
|
@@ -160,10 +161,14 @@ pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||||
|
|
||||||
|
free(async_fence_data.name);
|
||||||
|
|
||||||
|
- if ((reason != NULL) && (async_fence_data.result.exit_reason != NULL)) {
|
||||||
|
- *reason = strdup(async_fence_data.result.exit_reason);
|
||||||
|
+ if (reason != NULL) {
|
||||||
|
+ // Give the caller ownership of the exit reason
|
||||||
|
+ *reason = async_fence_data.result.exit_reason;
|
||||||
|
+ async_fence_data.result.exit_reason = NULL;
|
||||||
|
}
|
||||||
|
- return stonith__result2rc(&async_fence_data.result);
|
||||||
|
+ rc = stonith__result2rc(&async_fence_data.result);
|
||||||
|
+ pcmk__reset_result(&async_fence_data.result);
|
||||||
|
+ return rc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef BUILD_PUBLIC_LIBPACEMAKER
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 7b7af07796f05a1adabdac655582be2e17106f81 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 17 Jan 2022 10:07:10 -0600
|
||||||
|
Subject: [PATCH 10/11] Doc: libpacemaker: improve pcmk__request_fencing()
|
||||||
|
doxygen block
|
||||||
|
|
||||||
|
---
|
||||||
|
include/pacemaker.h | 6 ++++--
|
||||||
|
include/pcmki/pcmki_fence.h | 15 +++++++++------
|
||||||
|
2 files changed, 13 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/pacemaker.h b/include/pacemaker.h
|
||||||
|
index e581f975a9..266a844892 100644
|
||||||
|
--- a/include/pacemaker.h
|
||||||
|
+++ b/include/pacemaker.h
|
||||||
|
@@ -187,8 +187,10 @@ int pcmk_list_nodes(xmlNodePtr *xml, char *node_types);
|
||||||
|
* \param[in] tolerance If a successful action for \p target happened within
|
||||||
|
* this many ms, return 0 without performing the action
|
||||||
|
* again
|
||||||
|
- * \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||||
|
- * static/random fencing delays from pcmk_delay_base/max
|
||||||
|
+ * \param[in] delay Apply this delay (in milliseconds) before initiating the
|
||||||
|
+ * fencing action (a value of -1 applies no delay and also
|
||||||
|
+ * disables any fencing delay from pcmk_delay_base and
|
||||||
|
+ * pcmk_delay_max)
|
||||||
|
* \param[out] reason If not NULL, where to put descriptive failure reason
|
||||||
|
*
|
||||||
|
* \return Standard Pacemaker return code
|
||||||
|
diff --git a/include/pcmki/pcmki_fence.h b/include/pcmki/pcmki_fence.h
|
||||||
|
index e3a7e27264..4a2fe3c481 100644
|
||||||
|
--- a/include/pcmki/pcmki_fence.h
|
||||||
|
+++ b/include/pcmki/pcmki_fence.h
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/*
|
||||||
|
- * Copyright 2019-2021 the Pacemaker project contributors
|
||||||
|
+ * Copyright 2019-2022 the Pacemaker project contributors
|
||||||
|
*
|
||||||
|
* The version control history for this file may have further details.
|
||||||
|
*
|
||||||
|
@@ -22,17 +22,20 @@
|
||||||
|
* \param[in] target The node that should be fenced
|
||||||
|
* \param[in] action The fencing action (on, off, reboot) to perform
|
||||||
|
* \param[in] name Who requested the fence action?
|
||||||
|
- * \param[in] timeout How long to wait for the operation to complete (in ms).
|
||||||
|
+ * \param[in] timeout How long to wait for the operation to complete (in ms)
|
||||||
|
* \param[in] tolerance If a successful action for \p target happened within
|
||||||
|
- * this many ms, return 0 without performing the action
|
||||||
|
- * again.
|
||||||
|
- * \param[in] delay Apply a fencing delay. Value -1 means disable also any
|
||||||
|
- * static/random fencing delays from pcmk_delay_base/max
|
||||||
|
+ * this many milliseconds, return success without
|
||||||
|
+ * performing the action again
|
||||||
|
+ * \param[in] delay Apply this delay (in milliseconds) before initiating the
|
||||||
|
+ * fencing action (a value of -1 applies no delay and also
|
||||||
|
+ * disables any fencing delay from pcmk_delay_base and
|
||||||
|
+ * pcmk_delay_max)
|
||||||
|
* \param[out] reason If not NULL, where to put descriptive failure reason
|
||||||
|
*
|
||||||
|
* \return Standard Pacemaker return code
|
||||||
|
* \note If \p reason is not NULL, the caller is responsible for freeing its
|
||||||
|
* returned value.
|
||||||
|
+ * \todo delay is eventually used with g_timeout_add() and should be guint
|
||||||
|
*/
|
||||||
|
int pcmk__request_fencing(stonith_t *st, const char *target, const char *action,
|
||||||
|
const char *name, unsigned int timeout,
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 61fb7271712e1246eb6d9472dc1afc7cd10e0a79 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 17 Jan 2022 10:18:02 -0600
|
||||||
|
Subject: [PATCH 11/11] Fix: tools: get stonith_admin -T option working again
|
||||||
|
|
||||||
|
Regression introduced in 2.0.3 by 3910b6fec
|
||||||
|
|
||||||
|
This reverts commit 247eb303df934944c0b72b162bb661cee6e0ed8b
|
||||||
|
("Refactor: tools: drop unnecessary string duplication in stonith_admin")
|
||||||
|
and fixes a regression introduced when stonith_admin was converted to use
|
||||||
|
GOption.
|
||||||
|
|
||||||
|
The -T option is intended to override the client name passed to the fencer API,
|
||||||
|
but the client name was set to the default (crm_system_name) after option
|
||||||
|
processing had already been done, so any value for -T was overwritten by the
|
||||||
|
default, and its memory was leaked.
|
||||||
|
|
||||||
|
This commit sets the default only if -T was not used.
|
||||||
|
---
|
||||||
|
tools/stonith_admin.c | 15 ++++++++++-----
|
||||||
|
1 file changed, 10 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/stonith_admin.c b/tools/stonith_admin.c
|
||||||
|
index 5590faf11e..54774b6fee 100644
|
||||||
|
--- a/tools/stonith_admin.c
|
||||||
|
+++ b/tools/stonith_admin.c
|
||||||
|
@@ -337,10 +337,10 @@ request_fencing(stonith_t *st, const char *target, const char *command,
|
||||||
|
GError **error)
|
||||||
|
{
|
||||||
|
char *reason = NULL;
|
||||||
|
- int rc = pcmk__request_fencing(st, target, command, crm_system_name,
|
||||||
|
- options.timeout * 1000,
|
||||||
|
- options.tolerance * 1000,
|
||||||
|
- options.delay, &reason);
|
||||||
|
+ int rc = pcmk__request_fencing(st, target, command, name,
|
||||||
|
+ options.timeout * 1000,
|
||||||
|
+ options.tolerance * 1000,
|
||||||
|
+ options.delay, &reason);
|
||||||
|
|
||||||
|
if (rc != pcmk_rc_ok) {
|
||||||
|
const char *rc_str = pcmk_rc_str(rc);
|
||||||
|
@@ -392,6 +392,10 @@ main(int argc, char **argv)
|
||||||
|
|
||||||
|
pcmk__cli_init_logging("stonith_admin", args->verbosity);
|
||||||
|
|
||||||
|
+ if (name == NULL) {
|
||||||
|
+ name = strdup(crm_system_name);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
rc = pcmk__output_new(&out, args->output_ty, args->output_dest, argv);
|
||||||
|
if (rc != pcmk_rc_ok) {
|
||||||
|
exit_code = CRM_EX_ERROR;
|
||||||
|
@@ -526,7 +530,7 @@ main(int argc, char **argv)
|
||||||
|
if (st == NULL) {
|
||||||
|
rc = -ENOMEM;
|
||||||
|
} else if (!no_connect) {
|
||||||
|
- rc = st->cmds->connect(st, crm_system_name, NULL);
|
||||||
|
+ rc = st->cmds->connect(st, name, NULL);
|
||||||
|
}
|
||||||
|
if (rc < 0) {
|
||||||
|
out->err(out, "Could not connect to fencer: %s", pcmk_strerror(rc));
|
||||||
|
@@ -640,6 +644,7 @@ main(int argc, char **argv)
|
||||||
|
out->finish(out, exit_code, true, NULL);
|
||||||
|
pcmk__output_free(out);
|
||||||
|
}
|
||||||
|
+ free(name);
|
||||||
|
stonith_key_value_freeall(options.params, 1, 1);
|
||||||
|
|
||||||
|
if (st != NULL) {
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
796
SOURCES/018-failure-messages.patch
Normal file
796
SOURCES/018-failure-messages.patch
Normal file
@ -0,0 +1,796 @@
|
|||||||
|
From 08c3420f2c857e7b27cd960f355d787af534da7d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Tue, 18 Jan 2022 16:04:49 -0600
|
||||||
|
Subject: [PATCH 01/12] Log: libcrmcommon: improve description for "not
|
||||||
|
connected" status
|
||||||
|
|
||||||
|
PCMK_EXEC_NOT_CONNECTED was originally added to represent "No executor
|
||||||
|
connection", but it can also now mean no fencer connection, so change it to
|
||||||
|
"Internal communication failure" which is probably less mysterious to end users
|
||||||
|
anyway (especially since it should be accompanied by a more descriptive exit
|
||||||
|
reason).
|
||||||
|
---
|
||||||
|
include/crm/common/results.h | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/include/crm/common/results.h b/include/crm/common/results.h
|
||||||
|
index 873faf5c43..3d322a7ce6 100644
|
||||||
|
--- a/include/crm/common/results.h
|
||||||
|
+++ b/include/crm/common/results.h
|
||||||
|
@@ -349,7 +349,7 @@ pcmk_exec_status_str(enum pcmk_exec_status status)
|
||||||
|
case PCMK_EXEC_ERROR_HARD: return "Hard error";
|
||||||
|
case PCMK_EXEC_ERROR_FATAL: return "Fatal error";
|
||||||
|
case PCMK_EXEC_NOT_INSTALLED: return "Not installed";
|
||||||
|
- case PCMK_EXEC_NOT_CONNECTED: return "No executor connection";
|
||||||
|
+ case PCMK_EXEC_NOT_CONNECTED: return "Internal communication failure";
|
||||||
|
case PCMK_EXEC_INVALID: return "Cannot execute now";
|
||||||
|
case PCMK_EXEC_NO_FENCE_DEVICE: return "No fence device";
|
||||||
|
case PCMK_EXEC_NO_SECRETS: return "CIB secrets unavailable";
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 7c345cf8cf0cb054f5634206880df035bfef7311 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 20 Dec 2021 15:12:36 -0600
|
||||||
|
Subject: [PATCH 02/12] Refactor: libcrmcommon: drop unnecessary system error
|
||||||
|
redefinitions
|
||||||
|
|
||||||
|
portability.h defines some system error codes that might not be present on
|
||||||
|
non-Linux systems.
|
||||||
|
|
||||||
|
This was a bad idea, since there's no way to ensure the defined values don't
|
||||||
|
conflict with existing system codes. However, we use a number of them, so it's
|
||||||
|
probably best to keep them, at least until we can make a backward compatibility
|
||||||
|
break.
|
||||||
|
|
||||||
|
However, we don't use EUNATCH, ENOSR, or ENOSTR, so we can delete those.
|
||||||
|
---
|
||||||
|
include/portability.h | 12 ------------
|
||||||
|
lib/common/results.c | 9 ++++++---
|
||||||
|
2 files changed, 6 insertions(+), 15 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/portability.h b/include/portability.h
|
||||||
|
index 9a60c583a7..ee065a376d 100644
|
||||||
|
--- a/include/portability.h
|
||||||
|
+++ b/include/portability.h
|
||||||
|
@@ -131,10 +131,6 @@ typedef union
|
||||||
|
# define EREMOTEIO 193
|
||||||
|
# endif
|
||||||
|
|
||||||
|
-# ifndef EUNATCH
|
||||||
|
-# define EUNATCH 194
|
||||||
|
-# endif
|
||||||
|
-
|
||||||
|
# ifndef ENOKEY
|
||||||
|
# define ENOKEY 195
|
||||||
|
# endif
|
||||||
|
@@ -147,14 +143,6 @@ typedef union
|
||||||
|
# define ETIME 197
|
||||||
|
# endif
|
||||||
|
|
||||||
|
-# ifndef ENOSR
|
||||||
|
-# define ENOSR 198
|
||||||
|
-# endif
|
||||||
|
-
|
||||||
|
-# ifndef ENOSTR
|
||||||
|
-# define ENOSTR 199
|
||||||
|
-# endif
|
||||||
|
-
|
||||||
|
# ifndef EKEYREJECTED
|
||||||
|
# define EKEYREJECTED 200
|
||||||
|
# endif
|
||||||
|
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||||
|
index 6d120694cd..96cd4e5659 100644
|
||||||
|
--- a/lib/common/results.c
|
||||||
|
+++ b/lib/common/results.c
|
||||||
|
@@ -118,9 +118,6 @@ pcmk_strerror(int rc)
|
||||||
|
case EREMOTEIO:
|
||||||
|
return "Remote I/O error";
|
||||||
|
/* coverity[dead_error_condition] False positive on non-Linux */
|
||||||
|
- case EUNATCH:
|
||||||
|
- return "Protocol driver not attached";
|
||||||
|
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||||
|
case ENOKEY:
|
||||||
|
return "Required key not available";
|
||||||
|
}
|
||||||
|
@@ -342,8 +339,12 @@ pcmk_rc_name(int rc)
|
||||||
|
case ENOMSG: return "ENOMSG";
|
||||||
|
case ENOPROTOOPT: return "ENOPROTOOPT";
|
||||||
|
case ENOSPC: return "ENOSPC";
|
||||||
|
+#ifdef ENOSR
|
||||||
|
case ENOSR: return "ENOSR";
|
||||||
|
+#endif
|
||||||
|
+#ifdef ENOSTR
|
||||||
|
case ENOSTR: return "ENOSTR";
|
||||||
|
+#endif
|
||||||
|
case ENOSYS: return "ENOSYS";
|
||||||
|
case ENOTBLK: return "ENOTBLK";
|
||||||
|
case ENOTCONN: return "ENOTCONN";
|
||||||
|
@@ -376,7 +377,9 @@ pcmk_rc_name(int rc)
|
||||||
|
case ETIME: return "ETIME";
|
||||||
|
case ETIMEDOUT: return "ETIMEDOUT";
|
||||||
|
case ETXTBSY: return "ETXTBSY";
|
||||||
|
+#ifdef EUNATCH
|
||||||
|
case EUNATCH: return "EUNATCH";
|
||||||
|
+#endif
|
||||||
|
case EUSERS: return "EUSERS";
|
||||||
|
/* case EWOULDBLOCK: return "EWOULDBLOCK"; */
|
||||||
|
case EXDEV: return "EXDEV";
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From eac8d1ca51eac3f437e18584f7e013d976ecee2c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 20 Dec 2021 15:33:12 -0600
|
||||||
|
Subject: [PATCH 03/12] Log: libcrmcommon: improve handling of portability.h
|
||||||
|
error codes
|
||||||
|
|
||||||
|
portability.h defines some system error codes that might not be present on
|
||||||
|
non-Linux systems.
|
||||||
|
|
||||||
|
Define a constant for each one (for example, PCMK__ECOMM for ECOMM) when
|
||||||
|
the system doesn't have the value, so we can detect that when relevant.
|
||||||
|
|
||||||
|
Also, make sure pcmk_rc_name() and pcmk_rc_str() handle all of these values.
|
||||||
|
---
|
||||||
|
include/portability.h | 8 ++++++++
|
||||||
|
lib/common/results.c | 32 ++++++++++++++++++++++++++++++--
|
||||||
|
2 files changed, 38 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/include/portability.h b/include/portability.h
|
||||||
|
index ee065a376d..5d5fbf21cb 100644
|
||||||
|
--- a/include/portability.h
|
||||||
|
+++ b/include/portability.h
|
||||||
|
@@ -116,34 +116,42 @@ typedef union
|
||||||
|
# include <errno.h>
|
||||||
|
|
||||||
|
# ifndef ENOTUNIQ
|
||||||
|
+# define PCMK__ENOTUNIQ
|
||||||
|
# define ENOTUNIQ 190
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef ECOMM
|
||||||
|
+# define PCMK__ECOMM
|
||||||
|
# define ECOMM 191
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef ELIBACC
|
||||||
|
+# define PCMK__ELIBACC
|
||||||
|
# define ELIBACC 192
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef EREMOTEIO
|
||||||
|
+# define PCMK__EREMOTIO
|
||||||
|
# define EREMOTEIO 193
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef ENOKEY
|
||||||
|
+# define PCMK__ENOKEY
|
||||||
|
# define ENOKEY 195
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef ENODATA
|
||||||
|
+# define PCMK__ENODATA
|
||||||
|
# define ENODATA 196
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef ETIME
|
||||||
|
+# define PCMK__ETIME
|
||||||
|
# define ETIME 197
|
||||||
|
# endif
|
||||||
|
|
||||||
|
# ifndef EKEYREJECTED
|
||||||
|
+# define PCMK__EKEYREJECTED
|
||||||
|
# define EKEYREJECTED 200
|
||||||
|
# endif
|
||||||
|
|
||||||
|
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||||
|
index 96cd4e5659..bcf289d0d6 100644
|
||||||
|
--- a/lib/common/results.c
|
||||||
|
+++ b/lib/common/results.c
|
||||||
|
@@ -395,9 +395,9 @@ pcmk_rc_name(int rc)
|
||||||
|
#ifdef EISNAM // Not available on OS X, Illumos, Solaris
|
||||||
|
case EISNAM: return "EISNAM";
|
||||||
|
case EKEYEXPIRED: return "EKEYEXPIRED";
|
||||||
|
- case EKEYREJECTED: return "EKEYREJECTED";
|
||||||
|
case EKEYREVOKED: return "EKEYREVOKED";
|
||||||
|
#endif
|
||||||
|
+ case EKEYREJECTED: return "EKEYREJECTED";
|
||||||
|
case EL2HLT: return "EL2HLT";
|
||||||
|
case EL2NSYNC: return "EL2NSYNC";
|
||||||
|
case EL3HLT: return "EL3HLT";
|
||||||
|
@@ -443,7 +443,35 @@ pcmk_rc_str(int rc)
|
||||||
|
if (rc < 0) {
|
||||||
|
return "Unknown error";
|
||||||
|
}
|
||||||
|
- return strerror(rc);
|
||||||
|
+
|
||||||
|
+ // Handle values that could be defined by system or by portability.h
|
||||||
|
+ switch (rc) {
|
||||||
|
+#ifdef PCMK__ENOTUNIQ
|
||||||
|
+ case ENOTUNIQ: return "Name not unique on network";
|
||||||
|
+#endif
|
||||||
|
+#ifdef PCMK__ECOMM
|
||||||
|
+ case ECOMM: return "Communication error on send";
|
||||||
|
+#endif
|
||||||
|
+#ifdef PCMK__ELIBACC
|
||||||
|
+ case ELIBACC: return "Can not access a needed shared library";
|
||||||
|
+#endif
|
||||||
|
+#ifdef PCMK__EREMOTEIO
|
||||||
|
+ case EREMOTEIO: return "Remote I/O error";
|
||||||
|
+#endif
|
||||||
|
+#ifdef PCMK__ENOKEY
|
||||||
|
+ case ENOKEY: return "Required key not available";
|
||||||
|
+#endif
|
||||||
|
+#ifdef PCMK__ENODATA
|
||||||
|
+ case ENODATA: return "No data available";
|
||||||
|
+#endif
|
||||||
|
+#ifdef PCMK__ETIME
|
||||||
|
+ case ETIME: return "Timer expired";
|
||||||
|
+#endif
|
||||||
|
+#ifdef PCMK__EKEYREJECTED
|
||||||
|
+ case EKEYREJECTED: return "Key was rejected by service";
|
||||||
|
+#endif
|
||||||
|
+ default: return strerror(rc);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
// This returns negative values for errors
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 32a38ac6374f85c43e7f4051f5e519822cc481e6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 20 Dec 2021 15:39:19 -0600
|
||||||
|
Subject: [PATCH 04/12] Log: libcrmcommon: redefine pcmk_strerror() in terms of
|
||||||
|
pcmk_rc_str()
|
||||||
|
|
||||||
|
... to reduce code duplication. This causes minor differences in the string for
|
||||||
|
a few values.
|
||||||
|
---
|
||||||
|
lib/common/results.c | 67 +-------------------------------------------
|
||||||
|
1 file changed, 1 insertion(+), 66 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||||
|
index bcf289d0d6..b2c6e8d553 100644
|
||||||
|
--- a/lib/common/results.c
|
||||||
|
+++ b/lib/common/results.c
|
||||||
|
@@ -57,72 +57,7 @@ pcmk_errorname(int rc)
|
||||||
|
const char *
|
||||||
|
pcmk_strerror(int rc)
|
||||||
|
{
|
||||||
|
- if (rc == 0) {
|
||||||
|
- return "OK";
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- rc = abs(rc);
|
||||||
|
-
|
||||||
|
- // Of course rc > 0 ... unless someone passed INT_MIN as rc
|
||||||
|
- if ((rc > 0) && (rc < PCMK_ERROR_OFFSET)) {
|
||||||
|
- return strerror(rc);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- switch (rc) {
|
||||||
|
- case pcmk_err_generic:
|
||||||
|
- return "Generic Pacemaker error";
|
||||||
|
- case pcmk_err_no_quorum:
|
||||||
|
- return "Operation requires quorum";
|
||||||
|
- case pcmk_err_schema_validation:
|
||||||
|
- return "Update does not conform to the configured schema";
|
||||||
|
- case pcmk_err_transform_failed:
|
||||||
|
- return "Schema transform failed";
|
||||||
|
- case pcmk_err_old_data:
|
||||||
|
- return "Update was older than existing configuration";
|
||||||
|
- case pcmk_err_diff_failed:
|
||||||
|
- return "Application of an update diff failed";
|
||||||
|
- case pcmk_err_diff_resync:
|
||||||
|
- return "Application of an update diff failed, requesting a full refresh";
|
||||||
|
- case pcmk_err_cib_modified:
|
||||||
|
- return "The on-disk configuration was manually modified";
|
||||||
|
- case pcmk_err_cib_backup:
|
||||||
|
- return "Could not archive the previous configuration";
|
||||||
|
- case pcmk_err_cib_save:
|
||||||
|
- return "Could not save the new configuration to disk";
|
||||||
|
- case pcmk_err_cib_corrupt:
|
||||||
|
- return "Could not parse on-disk configuration";
|
||||||
|
- case pcmk_err_multiple:
|
||||||
|
- return "Resource active on multiple nodes";
|
||||||
|
- case pcmk_err_node_unknown:
|
||||||
|
- return "Node not found";
|
||||||
|
- case pcmk_err_already:
|
||||||
|
- return "Situation already as requested";
|
||||||
|
- case pcmk_err_bad_nvpair:
|
||||||
|
- return "Bad name/value pair given";
|
||||||
|
- case pcmk_err_schema_unchanged:
|
||||||
|
- return "Schema is already the latest available";
|
||||||
|
- case pcmk_err_unknown_format:
|
||||||
|
- return "Unknown output format";
|
||||||
|
-
|
||||||
|
- /* The following cases will only be hit on systems for which they are non-standard */
|
||||||
|
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||||
|
- case ENOTUNIQ:
|
||||||
|
- return "Name not unique on network";
|
||||||
|
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||||
|
- case ECOMM:
|
||||||
|
- return "Communication error on send";
|
||||||
|
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||||
|
- case ELIBACC:
|
||||||
|
- return "Can not access a needed shared library";
|
||||||
|
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||||
|
- case EREMOTEIO:
|
||||||
|
- return "Remote I/O error";
|
||||||
|
- /* coverity[dead_error_condition] False positive on non-Linux */
|
||||||
|
- case ENOKEY:
|
||||||
|
- return "Required key not available";
|
||||||
|
- }
|
||||||
|
- crm_err("Unknown error code: %d", rc);
|
||||||
|
- return "Unknown error";
|
||||||
|
+ return pcmk_rc_str(pcmk_legacy2rc(rc));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Standard Pacemaker API return codes
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 7c331d7e2275ffebbfd5e2f6432a6137a66ee5db Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 20 Dec 2021 15:41:24 -0600
|
||||||
|
Subject: [PATCH 05/12] Log: libcrmcommon: don't say "Unknown error"
|
||||||
|
|
||||||
|
... which is unhelpful and annoying to users
|
||||||
|
---
|
||||||
|
lib/common/results.c | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/common/results.c b/lib/common/results.c
|
||||||
|
index b2c6e8d553..5ffac76549 100644
|
||||||
|
--- a/lib/common/results.c
|
||||||
|
+++ b/lib/common/results.c
|
||||||
|
@@ -376,7 +376,7 @@ pcmk_rc_str(int rc)
|
||||||
|
return pcmk__rcs[pcmk_rc_error - rc].desc;
|
||||||
|
}
|
||||||
|
if (rc < 0) {
|
||||||
|
- return "Unknown error";
|
||||||
|
+ return "Error";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle values that could be defined by system or by portability.h
|
||||||
|
@@ -768,7 +768,7 @@ bz2_strerror(int rc)
|
||||||
|
case BZ_OUTBUFF_FULL:
|
||||||
|
return "output data will not fit into the buffer provided";
|
||||||
|
}
|
||||||
|
- return "Unknown error";
|
||||||
|
+ return "Data compression error";
|
||||||
|
}
|
||||||
|
|
||||||
|
crm_exit_t
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 26883b4edda7d81bfcb79bd7b33bb3210beff110 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 20 Dec 2021 16:01:39 -0600
|
||||||
|
Subject: [PATCH 06/12] Log: fencing: don't warn if cluster has no watchdog
|
||||||
|
device
|
||||||
|
|
||||||
|
---
|
||||||
|
lib/fencing/st_client.c | 7 ++++++-
|
||||||
|
1 file changed, 6 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||||||
|
index b1de912b2a..a0f3119f3b 100644
|
||||||
|
--- a/lib/fencing/st_client.c
|
||||||
|
+++ b/lib/fencing/st_client.c
|
||||||
|
@@ -187,7 +187,12 @@ stonith__watchdog_fencing_enabled_for_node_api(stonith_t *st, const char *node)
|
||||||
|
* we drop in here - so as not to make remote nodes
|
||||||
|
* panic on that answer
|
||||||
|
*/
|
||||||
|
- crm_warn("watchdog-fencing-query failed");
|
||||||
|
+ if (rc == -ENODEV) {
|
||||||
|
+ crm_notice("Cluster does not have watchdog fencing device");
|
||||||
|
+ } else {
|
||||||
|
+ crm_warn("Could not check for watchdog fencing device: %s",
|
||||||
|
+ pcmk_strerror(rc));
|
||||||
|
+ }
|
||||||
|
} else if (list[0] == '\0') {
|
||||||
|
rv = TRUE;
|
||||||
|
} else {
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 72b3c42232deaca64ffba9582598c59331203761 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Mon, 20 Dec 2021 16:22:49 -0600
|
||||||
|
Subject: [PATCH 07/12] Test: libcrmcommon: update pcmk_rc_str() unit test for
|
||||||
|
recent change
|
||||||
|
|
||||||
|
---
|
||||||
|
lib/common/tests/results/pcmk__results_test.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/lib/common/tests/results/pcmk__results_test.c b/lib/common/tests/results/pcmk__results_test.c
|
||||||
|
index 57a520c501..e08d4b6261 100644
|
||||||
|
--- a/lib/common/tests/results/pcmk__results_test.c
|
||||||
|
+++ b/lib/common/tests/results/pcmk__results_test.c
|
||||||
|
@@ -30,7 +30,7 @@ static void
|
||||||
|
test_for_pcmk_rc_str(void **state) {
|
||||||
|
assert_string_equal(pcmk_rc_str(pcmk_rc_error-1), "Unknown output format");
|
||||||
|
assert_string_equal(pcmk_rc_str(pcmk_rc_ok), "OK");
|
||||||
|
- assert_string_equal(pcmk_rc_str(-1), "Unknown error");
|
||||||
|
+ assert_string_equal(pcmk_rc_str(-1), "Error");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From c1ad3d6640f695321a83183c95fae2f105adc429 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Tue, 21 Dec 2021 10:20:38 -0600
|
||||||
|
Subject: [PATCH 08/12] Test: cts-lab: update expected patterns for recent
|
||||||
|
changes
|
||||||
|
|
||||||
|
---
|
||||||
|
cts/lab/CTStests.py | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/cts/lab/CTStests.py b/cts/lab/CTStests.py
|
||||||
|
index 62c832eb45..f4be998cfb 100644
|
||||||
|
--- a/cts/lab/CTStests.py
|
||||||
|
+++ b/cts/lab/CTStests.py
|
||||||
|
@@ -3055,7 +3055,7 @@ class RemoteStonithd(RemoteDriver):
|
||||||
|
r"pacemaker-controld.*:\s+error.*: Operation remote-.*_monitor",
|
||||||
|
r"pacemaker-controld.*:\s+error.*: Result of monitor operation for remote-.*",
|
||||||
|
r"schedulerd.*:\s+Recover remote-.*\s*\(.*\)",
|
||||||
|
- r"error: Result of monitor operation for .* on remote-.*: No executor connection",
|
||||||
|
+ r"error: Result of monitor operation for .* on remote-.*: Internal communication failure",
|
||||||
|
]
|
||||||
|
|
||||||
|
ignore_pats.extend(RemoteDriver.errorstoignore(self))
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From f272e2f526633c707e894b39c7c7bce3c14de898 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Tue, 21 Dec 2021 15:40:49 -0600
|
||||||
|
Subject: [PATCH 09/12] Log: controller,libpacemaker: make history XML creation
|
||||||
|
less chatty
|
||||||
|
|
||||||
|
Other messages with the same info will already be logged at higher severity
|
||||||
|
---
|
||||||
|
daemons/controld/controld_execd.c | 3 +--
|
||||||
|
daemons/controld/controld_te_actions.c | 7 ++-----
|
||||||
|
include/pcmki/pcmki_sched_utils.h | 3 +--
|
||||||
|
lib/pacemaker/pcmk_injections.c | 3 +--
|
||||||
|
lib/pacemaker/pcmk_sched_actions.c | 12 +++++-------
|
||||||
|
5 files changed, 10 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
|
||||||
|
index 15784e7687..52157fa5d4 100644
|
||||||
|
--- a/daemons/controld/controld_execd.c
|
||||||
|
+++ b/daemons/controld/controld_execd.c
|
||||||
|
@@ -693,9 +693,8 @@ build_operation_update(xmlNode * parent, lrmd_rsc_info_t * rsc, lrmd_event_data_
|
||||||
|
caller_version = CRM_FEATURE_SET;
|
||||||
|
}
|
||||||
|
|
||||||
|
- crm_trace("Building %s operation update with originator version: %s", op->rsc_id, caller_version);
|
||||||
|
xml_op = pcmk__create_history_xml(parent, op, caller_version, target_rc,
|
||||||
|
- fsa_our_uname, src, LOG_DEBUG);
|
||||||
|
+ fsa_our_uname, src);
|
||||||
|
if (xml_op == NULL) {
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
|
||||||
|
index 63b7c72359..b0bcb8b2e4 100644
|
||||||
|
--- a/daemons/controld/controld_te_actions.c
|
||||||
|
+++ b/daemons/controld/controld_te_actions.c
|
||||||
|
@@ -181,7 +181,6 @@ controld_record_action_timeout(crm_action_t *action)
|
||||||
|
lrmd_event_data_t *op = NULL;
|
||||||
|
xmlNode *state = NULL;
|
||||||
|
xmlNode *rsc = NULL;
|
||||||
|
- xmlNode *xml_op = NULL;
|
||||||
|
xmlNode *action_rsc = NULL;
|
||||||
|
|
||||||
|
int rc = pcmk_ok;
|
||||||
|
@@ -245,12 +244,10 @@ controld_record_action_timeout(crm_action_t *action)
|
||||||
|
op->user_data = pcmk__transition_key(transition_graph->id, action->id,
|
||||||
|
target_rc, te_uuid);
|
||||||
|
|
||||||
|
- xml_op = pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc,
|
||||||
|
- target, __func__, LOG_INFO);
|
||||||
|
+ pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
|
||||||
|
+ __func__);
|
||||||
|
lrmd_free_event(op);
|
||||||
|
|
||||||
|
- crm_log_xml_trace(xml_op, "Action timeout");
|
||||||
|
-
|
||||||
|
rc = fsa_cib_conn->cmds->update(fsa_cib_conn, XML_CIB_TAG_STATUS, state, call_options);
|
||||||
|
fsa_register_cib_callback(rc, FALSE, NULL, cib_action_updated);
|
||||||
|
free_xml(state);
|
||||||
|
diff --git a/include/pcmki/pcmki_sched_utils.h b/include/pcmki/pcmki_sched_utils.h
|
||||||
|
index 68d60fc7db..144424a609 100644
|
||||||
|
--- a/include/pcmki/pcmki_sched_utils.h
|
||||||
|
+++ b/include/pcmki/pcmki_sched_utils.h
|
||||||
|
@@ -52,8 +52,7 @@ extern void process_utilization(pe_resource_t * rsc, pe_node_t ** prefer, pe_wor
|
||||||
|
|
||||||
|
xmlNode *pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *event,
|
||||||
|
const char *caller_version, int target_rc,
|
||||||
|
- const char *node, const char *origin,
|
||||||
|
- int level);
|
||||||
|
+ const char *node, const char *origin);
|
||||||
|
|
||||||
|
# define LOAD_STOPPED "load_stopped"
|
||||||
|
|
||||||
|
diff --git a/lib/pacemaker/pcmk_sched_transition.c b/lib/pacemaker/pcmk_sched_transition.c
|
||||||
|
index 678c3f5dd2..1aa90a5a0b 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_sched_transition.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_sched_transition.c
|
||||||
|
@@ -201,8 +201,7 @@ inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
|
||||||
|
inject_op(xmlNode * cib_resource, lrmd_event_data_t * op, int target_rc)
|
||||||
|
{
|
||||||
|
return pcmk__create_history_xml(cib_resource, op, CRM_FEATURE_SET,
|
||||||
|
- target_rc, NULL, crm_system_name,
|
||||||
|
- LOG_TRACE);
|
||||||
|
+ target_rc, NULL, crm_system_name);
|
||||||
|
}
|
||||||
|
|
||||||
|
static xmlNode *
|
||||||
|
diff --git a/lib/pacemaker/pcmk_sched_actions.c b/lib/pacemaker/pcmk_sched_actions.c
|
||||||
|
index f8200b0efc..4f63d3374d 100644
|
||||||
|
--- a/lib/pacemaker/pcmk_sched_utils.c
|
||||||
|
+++ b/lib/pacemaker/pcmk_sched_utils.c
|
||||||
|
@@ -892,14 +892,13 @@ add_op_digest_to_xml(lrmd_event_data_t *op, xmlNode *update)
|
||||||
|
* \param[in] target_rc Expected result of operation
|
||||||
|
* \param[in] node Name of node on which operation was performed
|
||||||
|
* \param[in] origin Arbitrary description of update source
|
||||||
|
- * \param[in] level A log message will be logged at this level
|
||||||
|
*
|
||||||
|
* \return Newly created XML node for history update
|
||||||
|
*/
|
||||||
|
xmlNode *
|
||||||
|
pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
|
||||||
|
const char *caller_version, int target_rc,
|
||||||
|
- const char *node, const char *origin, int level)
|
||||||
|
+ const char *node, const char *origin)
|
||||||
|
{
|
||||||
|
char *key = NULL;
|
||||||
|
char *magic = NULL;
|
||||||
|
@@ -912,11 +911,10 @@ pcmk__create_history_xml(xmlNode *parent, lrmd_event_data_t *op,
|
||||||
|
const char *task = NULL;
|
||||||
|
|
||||||
|
CRM_CHECK(op != NULL, return NULL);
|
||||||
|
- do_crm_log(level, "%s: Updating resource %s after %s op %s (interval=%u)",
|
||||||
|
- origin, op->rsc_id, op->op_type,
|
||||||
|
- pcmk_exec_status_str(op->op_status), op->interval_ms);
|
||||||
|
-
|
||||||
|
- crm_trace("DC version: %s", caller_version);
|
||||||
|
+ crm_trace("Creating history XML for %s-interval %s action for %s on %s "
|
||||||
|
+ "(DC version: %s, origin: %s)",
|
||||||
|
+ pcmk__readable_interval(op->interval_ms), op->op_type, op->rsc_id,
|
||||||
|
+ ((node == NULL)? "no node" : node), caller_version, origin);
|
||||||
|
|
||||||
|
task = op->op_type;
|
||||||
|
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 06b1da9e5345e0d1571042c11646fd7157961279 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Tue, 21 Dec 2021 17:09:44 -0600
|
||||||
|
Subject: [PATCH 10/12] Feature: controller: improve exit reason for internal
|
||||||
|
timeouts
|
||||||
|
|
||||||
|
Functionize the part of controld_record_action_timeout() that creates a fake
|
||||||
|
executor event, into a new function synthesize_timeout_event(), and have it set
|
||||||
|
a more detailed exit reason describing what timed out.
|
||||||
|
---
|
||||||
|
daemons/controld/controld_te_actions.c | 61 ++++++++++++++++++++------
|
||||||
|
1 file changed, 48 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_te_actions.c b/daemons/controld/controld_te_actions.c
|
||||||
|
index b0bcb8b2e4..de2fbb82bf 100644
|
||||||
|
--- a/daemons/controld/controld_te_actions.c
|
||||||
|
+++ b/daemons/controld/controld_te_actions.c
|
||||||
|
@@ -175,6 +175,53 @@ te_crm_command(crm_graph_t * graph, crm_action_t * action)
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/*!
|
||||||
|
+ * \internal
|
||||||
|
+ * \brief Synthesize an executor event for a resource action timeout
|
||||||
|
+ *
|
||||||
|
+ * \param[in] action Resource action that timed out
|
||||||
|
+ * \param[in] target_rc Expected result of action that timed out
|
||||||
|
+ *
|
||||||
|
+ * Synthesize an executor event for a resource action timeout. (If the executor
|
||||||
|
+ * gets a timeout while waiting for a resource action to complete, that will be
|
||||||
|
+ * reported via the usual callback. This timeout means we didn't hear from the
|
||||||
|
+ * executor itself or the controller that relayed the action to the executor.)
|
||||||
|
+ *
|
||||||
|
+ * \return Newly created executor event for result of \p action
|
||||||
|
+ * \note The caller is responsible for freeing the return value using
|
||||||
|
+ * lrmd_free_event().
|
||||||
|
+ */
|
||||||
|
+static lrmd_event_data_t *
|
||||||
|
+synthesize_timeout_event(crm_action_t *action, int target_rc)
|
||||||
|
+{
|
||||||
|
+ lrmd_event_data_t *op = NULL;
|
||||||
|
+ const char *target = crm_element_value(action->xml, XML_LRM_ATTR_TARGET);
|
||||||
|
+ const char *reason = NULL;
|
||||||
|
+ char *dynamic_reason = NULL;
|
||||||
|
+
|
||||||
|
+ if (pcmk__str_eq(target, get_local_node_name(), pcmk__str_casei)) {
|
||||||
|
+ reason = "Local executor did not return result in time";
|
||||||
|
+ } else {
|
||||||
|
+ const char *router_node = NULL;
|
||||||
|
+
|
||||||
|
+ router_node = crm_element_value(action->xml, XML_LRM_ATTR_ROUTER_NODE);
|
||||||
|
+ if (router_node == NULL) {
|
||||||
|
+ router_node = target;
|
||||||
|
+ }
|
||||||
|
+ dynamic_reason = crm_strdup_printf("Controller on %s did not return "
|
||||||
|
+ "result in time", router_node);
|
||||||
|
+ reason = dynamic_reason;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
|
||||||
|
+ PCMK_OCF_UNKNOWN_ERROR, reason);
|
||||||
|
+ op->call_id = -1;
|
||||||
|
+ op->user_data = pcmk__transition_key(transition_graph->id, action->id,
|
||||||
|
+ target_rc, te_uuid);
|
||||||
|
+ free(dynamic_reason);
|
||||||
|
+ return op;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
void
|
||||||
|
controld_record_action_timeout(crm_action_t *action)
|
||||||
|
{
|
||||||
|
@@ -231,19 +278,7 @@ controld_record_action_timeout(crm_action_t *action)
|
||||||
|
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_CLASS);
|
||||||
|
crm_copy_xml_element(action_rsc, rsc, XML_AGENT_ATTR_PROVIDER);
|
||||||
|
|
||||||
|
- /* If the executor gets a timeout while waiting for the action to complete,
|
||||||
|
- * that will be reported via the usual callback. This timeout means that we
|
||||||
|
- * didn't hear from the executor or the controller that relayed the action
|
||||||
|
- * to the executor.
|
||||||
|
- */
|
||||||
|
- op = pcmk__event_from_graph_action(NULL, action, PCMK_EXEC_TIMEOUT,
|
||||||
|
- PCMK_OCF_UNKNOWN_ERROR,
|
||||||
|
- "Cluster communication timeout "
|
||||||
|
- "(no response from executor)");
|
||||||
|
- op->call_id = -1;
|
||||||
|
- op->user_data = pcmk__transition_key(transition_graph->id, action->id,
|
||||||
|
- target_rc, te_uuid);
|
||||||
|
-
|
||||||
|
+ op = synthesize_timeout_event(action, target_rc);
|
||||||
|
pcmk__create_history_xml(rsc, op, CRM_FEATURE_SET, target_rc, target,
|
||||||
|
__func__);
|
||||||
|
lrmd_free_event(op);
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From be620d206faefab967d4c8567d6554d10c9e72ba Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 22 Dec 2021 16:35:06 -0600
|
||||||
|
Subject: [PATCH 11/12] Feature: fencing: improve exit reason for fencing
|
||||||
|
timeouts
|
||||||
|
|
||||||
|
Troubleshooting timeouts is one of the more difficult aspects of cluster
|
||||||
|
maintenance. We want to give as much of a hint as possible, but for fencing in
|
||||||
|
particular it is difficult because an operation might involve multiple retries
|
||||||
|
of multiple devices.
|
||||||
|
|
||||||
|
Barring another major project to track exactly which devices, retries, etc.,
|
||||||
|
were used in a given operation, these changes in wording are probably the best
|
||||||
|
we can do.
|
||||||
|
---
|
||||||
|
daemons/fenced/fenced_remote.c | 8 +++++---
|
||||||
|
lib/fencing/st_client.c | 2 +-
|
||||||
|
2 files changed, 6 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
|
||||||
|
index 1e237150c5..6eebb7381e 100644
|
||||||
|
--- a/daemons/fenced/fenced_remote.c
|
||||||
|
+++ b/daemons/fenced/fenced_remote.c
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/*
|
||||||
|
- * Copyright 2009-2021 the Pacemaker project contributors
|
||||||
|
+ * Copyright 2009-2022 the Pacemaker project contributors
|
||||||
|
*
|
||||||
|
* The version control history for this file may have further details.
|
||||||
|
*
|
||||||
|
@@ -715,8 +715,10 @@ remote_op_timeout(gpointer userdata)
|
||||||
|
CRM_XS " id=%.8s",
|
||||||
|
op->action, op->target, op->client_name, op->id);
|
||||||
|
} else {
|
||||||
|
- finalize_timed_out_op(userdata, "Fencing could not be completed "
|
||||||
|
- "within overall timeout");
|
||||||
|
+ finalize_timed_out_op(userdata, "Fencing did not complete within a "
|
||||||
|
+ "total timeout based on the "
|
||||||
|
+ "configured timeout and retries for "
|
||||||
|
+ "any devices attempted");
|
||||||
|
}
|
||||||
|
return G_SOURCE_REMOVE;
|
||||||
|
}
|
||||||
|
diff --git a/lib/fencing/st_client.c b/lib/fencing/st_client.c
|
||||||
|
index a0f3119f3b..718739b321 100644
|
||||||
|
--- a/lib/fencing/st_client.c
|
||||||
|
+++ b/lib/fencing/st_client.c
|
||||||
|
@@ -906,7 +906,7 @@ invoke_registered_callbacks(stonith_t *stonith, xmlNode *msg, int call_id)
|
||||||
|
if (msg == NULL) {
|
||||||
|
// Fencer didn't reply in time
|
||||||
|
pcmk__set_result(&result, CRM_EX_ERROR, PCMK_EXEC_TIMEOUT,
|
||||||
|
- "Timeout waiting for reply from fencer");
|
||||||
|
+ "Fencer accepted request but did not reply in time");
|
||||||
|
CRM_LOG_ASSERT(call_id > 0);
|
||||||
|
|
||||||
|
} else {
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 0fe8ede2f8e838e335fe42846bdf147111ce9955 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 22 Dec 2021 17:09:09 -0600
|
||||||
|
Subject: [PATCH 12/12] Feature: libcrmservice: improve exit reason for
|
||||||
|
timeouts
|
||||||
|
|
||||||
|
The services library doesn't have enough information about an action to say
|
||||||
|
(for example) what configuration parameters might be relevant, but we can at
|
||||||
|
least distinguish what kind of agent timed out.
|
||||||
|
---
|
||||||
|
lib/services/services_linux.c | 12 +++++++++++-
|
||||||
|
lib/services/systemd.c | 2 +-
|
||||||
|
2 files changed, 12 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/services/services_linux.c b/lib/services/services_linux.c
|
||||||
|
index f15eee860e..d6aafcfe46 100644
|
||||||
|
--- a/lib/services/services_linux.c
|
||||||
|
+++ b/lib/services/services_linux.c
|
||||||
|
@@ -677,9 +677,19 @@ async_action_complete(mainloop_child_t *p, pid_t pid, int core, int signo,
|
||||||
|
parse_exit_reason_from_stderr(op);
|
||||||
|
|
||||||
|
} else if (mainloop_child_timeout(p)) {
|
||||||
|
+ const char *reason = NULL;
|
||||||
|
+
|
||||||
|
+ if (op->rsc != NULL) {
|
||||||
|
+ reason = "Resource agent did not complete in time";
|
||||||
|
+ } else if (pcmk__str_eq(op->standard, PCMK_RESOURCE_CLASS_STONITH,
|
||||||
|
+ pcmk__str_none)) {
|
||||||
|
+ reason = "Fence agent did not complete in time";
|
||||||
|
+ } else {
|
||||||
|
+ reason = "Process did not complete in time";
|
||||||
|
+ }
|
||||||
|
crm_info("%s[%d] timed out after %dms", op->id, op->pid, op->timeout);
|
||||||
|
services__set_result(op, services__generic_error(op), PCMK_EXEC_TIMEOUT,
|
||||||
|
- "Process did not exit within specified timeout");
|
||||||
|
+ reason);
|
||||||
|
|
||||||
|
} else if (op->cancel) {
|
||||||
|
/* If an in-flight recurring operation was killed because it was
|
||||||
|
diff --git a/lib/services/systemd.c b/lib/services/systemd.c
|
||||||
|
index 27a3b376db..d87b287424 100644
|
||||||
|
--- a/lib/services/systemd.c
|
||||||
|
+++ b/lib/services/systemd.c
|
||||||
|
@@ -995,7 +995,7 @@ systemd_timeout_callback(gpointer p)
|
||||||
|
crm_info("%s action for systemd unit %s named '%s' timed out",
|
||||||
|
op->action, op->agent, op->rsc);
|
||||||
|
services__set_result(op, PCMK_OCF_UNKNOWN_ERROR, PCMK_EXEC_TIMEOUT,
|
||||||
|
- "Systemd action did not complete within specified timeout");
|
||||||
|
+ "Systemd unit action did not complete in time");
|
||||||
|
services__finalize_async_op(op);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
29
SOURCES/019-corosync-tracking.patch
Normal file
29
SOURCES/019-corosync-tracking.patch
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
From e8bf0161b872267f1bb7143a9866fdc15ec218f2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jan Friesse <jfriesse@redhat.com>
|
||||||
|
Date: Tue, 18 Jan 2022 16:35:24 +0100
|
||||||
|
Subject: [PATCH] Fix: corosync: Repeat corosync_cfg_trackstart
|
||||||
|
|
||||||
|
corosync_cfg_trackstart can fail with CS_ERR_TRY_AGAIN failure so
|
||||||
|
(similarly as for corosync_cfg_local_get, ...) handle failure with
|
||||||
|
using cs_repeat macro.
|
||||||
|
---
|
||||||
|
daemons/pacemakerd/pcmkd_corosync.c | 3 ++-
|
||||||
|
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/pacemakerd/pcmkd_corosync.c b/daemons/pacemakerd/pcmkd_corosync.c
|
||||||
|
index 7990bc43c5..cd7a40321d 100644
|
||||||
|
--- a/daemons/pacemakerd/pcmkd_corosync.c
|
||||||
|
+++ b/daemons/pacemakerd/pcmkd_corosync.c
|
||||||
|
@@ -186,7 +186,8 @@ cluster_connect_cfg(void)
|
||||||
|
crm_debug("Corosync reports local node ID is %lu", (unsigned long) nodeid);
|
||||||
|
|
||||||
|
#ifdef HAVE_COROSYNC_CFG_TRACKSTART
|
||||||
|
- rc = corosync_cfg_trackstart(cfg_handle, 0);
|
||||||
|
+ retries = 0;
|
||||||
|
+ cs_repeat(retries, 30, rc = corosync_cfg_trackstart(cfg_handle, 0));
|
||||||
|
if (rc != CS_OK) {
|
||||||
|
crm_crit("Could not enable Corosync CFG shutdown tracker: %s " CRM_XS " rc=%d",
|
||||||
|
cs_strerror(rc), rc);
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
41
SOURCES/020-systemd-unit.patch
Normal file
41
SOURCES/020-systemd-unit.patch
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
From e316840a7e1d2a72e3089ee194334244c959905a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 19 Jan 2022 09:53:53 -0600
|
||||||
|
Subject: [PATCH] Fix: pacemakerd: tweak systemd unit respawn settings
|
||||||
|
|
||||||
|
If pacemaker exits immediately after starting, wait 1 second before trying to
|
||||||
|
respawn, since the default of 100ms is a bit aggressive for a Pacemaker
|
||||||
|
cluster.
|
||||||
|
|
||||||
|
Also, allow 5 attempts in 25 seconds before giving up.
|
||||||
|
---
|
||||||
|
daemons/pacemakerd/pacemaker.service.in | 6 ++++++
|
||||||
|
1 file changed, 6 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/daemons/pacemakerd/pacemaker.service.in b/daemons/pacemakerd/pacemaker.service.in
|
||||||
|
index 0363a2259c..3fd53d9ffb 100644
|
||||||
|
--- a/daemons/pacemakerd/pacemaker.service.in
|
||||||
|
+++ b/daemons/pacemakerd/pacemaker.service.in
|
||||||
|
@@ -31,6 +31,9 @@ After=rsyslog.service
|
||||||
|
After=corosync.service
|
||||||
|
Requires=corosync.service
|
||||||
|
|
||||||
|
+# If Pacemaker respawns repeatedly, give up after this many tries in this time
|
||||||
|
+StartLimitBurst=5
|
||||||
|
+StartLimitIntervalSec=25s
|
||||||
|
|
||||||
|
[Install]
|
||||||
|
WantedBy=multi-user.target
|
||||||
|
@@ -57,6 +60,9 @@ TasksMax=infinity
|
||||||
|
# resource. Sending -KILL will just get the node fenced
|
||||||
|
SendSIGKILL=no
|
||||||
|
|
||||||
|
+# Systemd's default of respawning a failed service after 100ms is too aggressive
|
||||||
|
+RestartSec=1s
|
||||||
|
+
|
||||||
|
# If we ever hit the StartLimitInterval/StartLimitBurst limit, and the
|
||||||
|
# admin wants to stop the cluster while pacemakerd is not running, it
|
||||||
|
# might be a good idea to enable the ExecStopPost directive below.
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
354
SOURCES/021-daemon-tracking.patch
Normal file
354
SOURCES/021-daemon-tracking.patch
Normal file
@ -0,0 +1,354 @@
|
|||||||
|
From 9ee9fd6b98d8a5ff5eac57a14cbc0ce1009b10e4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||||
|
Date: Thu, 18 Nov 2021 13:23:34 +0100
|
||||||
|
Subject: [PATCH 1/2] Feature: pacemakerd: keep tracking pacemakerd for
|
||||||
|
liveness
|
||||||
|
|
||||||
|
---
|
||||||
|
daemons/pacemakerd/pacemakerd.c | 2 +
|
||||||
|
daemons/pacemakerd/pacemakerd.h | 3 +-
|
||||||
|
daemons/pacemakerd/pcmkd_messages.c | 6 +-
|
||||||
|
daemons/pacemakerd/pcmkd_subdaemons.c | 139 +++++++++++++++++---------
|
||||||
|
4 files changed, 98 insertions(+), 52 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/pacemakerd/pacemakerd.c b/daemons/pacemakerd/pacemakerd.c
|
||||||
|
index 34d64c4053..062c2d5326 100644
|
||||||
|
--- a/daemons/pacemakerd/pacemakerd.c
|
||||||
|
+++ b/daemons/pacemakerd/pacemakerd.c
|
||||||
|
@@ -259,6 +259,8 @@ main(int argc, char **argv)
|
||||||
|
pcmk_ipc_api_t *old_instance = NULL;
|
||||||
|
qb_ipcs_service_t *ipcs = NULL;
|
||||||
|
|
||||||
|
+ subdaemon_check_progress = time(NULL);
|
||||||
|
+
|
||||||
|
crm_log_preinit(NULL, argc, argv);
|
||||||
|
mainloop_add_signal(SIGHUP, pcmk_ignore);
|
||||||
|
mainloop_add_signal(SIGQUIT, pcmk_sigquit);
|
||||||
|
diff --git a/daemons/pacemakerd/pacemakerd.h b/daemons/pacemakerd/pacemakerd.h
|
||||||
|
index 7c541bbf9e..424dbbcc5d 100644
|
||||||
|
--- a/daemons/pacemakerd/pacemakerd.h
|
||||||
|
+++ b/daemons/pacemakerd/pacemakerd.h
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/*
|
||||||
|
- * Copyright 2010-2021 the Pacemaker project contributors
|
||||||
|
+ * Copyright 2010-2022 the Pacemaker project contributors
|
||||||
|
*
|
||||||
|
* The version control history for this file may have further details.
|
||||||
|
*
|
||||||
|
@@ -21,6 +21,7 @@ extern unsigned int shutdown_complete_state_reported_to;
|
||||||
|
extern gboolean shutdown_complete_state_reported_client_closed;
|
||||||
|
extern crm_trigger_t *shutdown_trigger;
|
||||||
|
extern crm_trigger_t *startup_trigger;
|
||||||
|
+extern time_t subdaemon_check_progress;
|
||||||
|
|
||||||
|
gboolean mcp_read_config(void);
|
||||||
|
|
||||||
|
diff --git a/daemons/pacemakerd/pcmkd_messages.c b/daemons/pacemakerd/pcmkd_messages.c
|
||||||
|
index 0439986ecf..f2cddc353e 100644
|
||||||
|
--- a/daemons/pacemakerd/pcmkd_messages.c
|
||||||
|
+++ b/daemons/pacemakerd/pcmkd_messages.c
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/*
|
||||||
|
- * Copyright 2010-2021 the Pacemaker project contributors
|
||||||
|
+ * Copyright 2010-2022 the Pacemaker project contributors
|
||||||
|
*
|
||||||
|
* The version control history for this file may have further details.
|
||||||
|
*
|
||||||
|
@@ -25,7 +25,6 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
|
||||||
|
const char *value = NULL;
|
||||||
|
xmlNode *ping = NULL;
|
||||||
|
xmlNode *reply = NULL;
|
||||||
|
- time_t pinged = time(NULL);
|
||||||
|
const char *from = crm_element_value(msg, F_CRM_SYS_FROM);
|
||||||
|
|
||||||
|
/* Pinged for status */
|
||||||
|
@@ -36,7 +35,8 @@ pcmk_handle_ping_request(pcmk__client_t *c, xmlNode *msg, uint32_t id)
|
||||||
|
value = crm_element_value(msg, F_CRM_SYS_TO);
|
||||||
|
crm_xml_add(ping, XML_PING_ATTR_SYSFROM, value);
|
||||||
|
crm_xml_add(ping, XML_PING_ATTR_PACEMAKERDSTATE, pacemakerd_state);
|
||||||
|
- crm_xml_add_ll(ping, XML_ATTR_TSTAMP, (long long) pinged);
|
||||||
|
+ crm_xml_add_ll(ping, XML_ATTR_TSTAMP,
|
||||||
|
+ (long long) subdaemon_check_progress);
|
||||||
|
crm_xml_add(ping, XML_PING_ATTR_STATUS, "ok");
|
||||||
|
reply = create_reply(msg, ping);
|
||||||
|
free_xml(ping);
|
||||||
|
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||||
|
index a54fcce1ba..c03903c99e 100644
|
||||||
|
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||||
|
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||||
|
@@ -32,14 +32,16 @@ typedef struct pcmk_child_s {
|
||||||
|
const char *command;
|
||||||
|
const char *endpoint; /* IPC server name */
|
||||||
|
bool needs_cluster;
|
||||||
|
+ int check_count;
|
||||||
|
|
||||||
|
/* Anything below here will be dynamically initialized */
|
||||||
|
bool needs_retry;
|
||||||
|
bool active_before_startup;
|
||||||
|
} pcmk_child_t;
|
||||||
|
|
||||||
|
-#define PCMK_PROCESS_CHECK_INTERVAL 5
|
||||||
|
-#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
|
||||||
|
+#define PCMK_PROCESS_CHECK_INTERVAL 1
|
||||||
|
+#define PCMK_PROCESS_CHECK_RETRIES 5
|
||||||
|
+#define SHUTDOWN_ESCALATION_PERIOD 180000 /* 3m */
|
||||||
|
|
||||||
|
/* Index into the array below */
|
||||||
|
#define PCMK_CHILD_CONTROLD 5
|
||||||
|
@@ -82,6 +84,7 @@ static char *opts_vgrind[] = { NULL, NULL, NULL, NULL, NULL };
|
||||||
|
|
||||||
|
crm_trigger_t *shutdown_trigger = NULL;
|
||||||
|
crm_trigger_t *startup_trigger = NULL;
|
||||||
|
+time_t subdaemon_check_progress = 0;
|
||||||
|
|
||||||
|
/* When contacted via pacemakerd-api by a client having sbd in
|
||||||
|
* the name we assume it is sbd-daemon which wants to know
|
||||||
|
@@ -103,7 +106,6 @@ gboolean running_with_sbd = FALSE; /* local copy */
|
||||||
|
GMainLoop *mainloop = NULL;
|
||||||
|
|
||||||
|
static gboolean fatal_error = FALSE;
|
||||||
|
-static bool global_keep_tracking = false;
|
||||||
|
|
||||||
|
static gboolean check_active_before_startup_processes(gpointer user_data);
|
||||||
|
static int child_liveness(pcmk_child_t *child);
|
||||||
|
@@ -127,44 +129,94 @@ pcmkd_cluster_connected(void)
|
||||||
|
static gboolean
|
||||||
|
check_active_before_startup_processes(gpointer user_data)
|
||||||
|
{
|
||||||
|
- gboolean keep_tracking = FALSE;
|
||||||
|
-
|
||||||
|
- for (int i = 0; i < PCMK__NELEM(pcmk_children); i++) {
|
||||||
|
- if (!pcmk_children[i].active_before_startup) {
|
||||||
|
- /* we are already tracking it as a child process. */
|
||||||
|
- continue;
|
||||||
|
- } else {
|
||||||
|
- int rc = child_liveness(&pcmk_children[i]);
|
||||||
|
-
|
||||||
|
- switch (rc) {
|
||||||
|
- case pcmk_rc_ok:
|
||||||
|
- break;
|
||||||
|
- case pcmk_rc_ipc_unresponsive:
|
||||||
|
- case pcmk_rc_ipc_pid_only: // This case: it was previously OK
|
||||||
|
- if (pcmk_children[i].respawn) {
|
||||||
|
- crm_err("%s[%lld] terminated%s", pcmk_children[i].name,
|
||||||
|
- (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[i].pid),
|
||||||
|
- (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
|
||||||
|
- } else {
|
||||||
|
- /* orderly shutdown */
|
||||||
|
- crm_notice("%s[%lld] terminated%s", pcmk_children[i].name,
|
||||||
|
- (long long) PCMK__SPECIAL_PID_AS_0(pcmk_children[i].pid),
|
||||||
|
- (rc == pcmk_rc_ipc_pid_only)? " as IPC server" : "");
|
||||||
|
- }
|
||||||
|
- pcmk_process_exit(&(pcmk_children[i]));
|
||||||
|
- continue;
|
||||||
|
- default:
|
||||||
|
- crm_exit(CRM_EX_FATAL);
|
||||||
|
- break; /* static analysis/noreturn */
|
||||||
|
+ static int next_child = 0;
|
||||||
|
+ int rc = child_liveness(&pcmk_children[next_child]);
|
||||||
|
+
|
||||||
|
+ crm_trace("%s[%lld] checked as %d",
|
||||||
|
+ pcmk_children[next_child].name,
|
||||||
|
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||||
|
+ pcmk_children[next_child].pid),
|
||||||
|
+ rc);
|
||||||
|
+
|
||||||
|
+ switch (rc) {
|
||||||
|
+ case pcmk_rc_ok:
|
||||||
|
+ pcmk_children[next_child].check_count = 0;
|
||||||
|
+ next_child++;
|
||||||
|
+ subdaemon_check_progress = time(NULL);
|
||||||
|
+ break;
|
||||||
|
+ case pcmk_rc_ipc_pid_only: // This case: it was previously OK
|
||||||
|
+ pcmk_children[next_child].check_count++;
|
||||||
|
+ if (pcmk_children[next_child].check_count >= PCMK_PROCESS_CHECK_RETRIES) {
|
||||||
|
+ crm_err("%s[%lld] is unresponsive to ipc after %d tries but "
|
||||||
|
+ "we found the pid so have it killed that we can restart",
|
||||||
|
+ pcmk_children[next_child].name,
|
||||||
|
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||||
|
+ pcmk_children[next_child].pid),
|
||||||
|
+ pcmk_children[next_child].check_count);
|
||||||
|
+ stop_child(&pcmk_children[next_child], SIGKILL);
|
||||||
|
+ if (pcmk_children[next_child].respawn) {
|
||||||
|
+ /* as long as the respawn-limit isn't reached
|
||||||
|
+ give it another round of check retries
|
||||||
|
+ */
|
||||||
|
+ pcmk_children[next_child].check_count = 0;
|
||||||
|
+ }
|
||||||
|
+ } else {
|
||||||
|
+ crm_notice("%s[%lld] is unresponsive to ipc after %d tries",
|
||||||
|
+ pcmk_children[next_child].name,
|
||||||
|
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||||
|
+ pcmk_children[next_child].pid),
|
||||||
|
+ pcmk_children[next_child].check_count);
|
||||||
|
+ if (pcmk_children[next_child].respawn) {
|
||||||
|
+ /* as long as the respawn-limit isn't reached
|
||||||
|
+ and we haven't run out of connect retries
|
||||||
|
+ we account this as progress we are willing
|
||||||
|
+ to tell to sbd
|
||||||
|
+ */
|
||||||
|
+ subdaemon_check_progress = time(NULL);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
- }
|
||||||
|
- /* at least one of the processes found at startup
|
||||||
|
- * is still going, so keep this recurring timer around */
|
||||||
|
- keep_tracking = TRUE;
|
||||||
|
+ /* go to the next child and see if
|
||||||
|
+ we can make progress there
|
||||||
|
+ */
|
||||||
|
+ next_child++;
|
||||||
|
+ break;
|
||||||
|
+ case pcmk_rc_ipc_unresponsive:
|
||||||
|
+ if (pcmk_children[next_child].respawn) {
|
||||||
|
+ crm_err("%s[%lld] terminated",
|
||||||
|
+ pcmk_children[next_child].name,
|
||||||
|
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||||
|
+ pcmk_children[next_child].pid));
|
||||||
|
+ } else {
|
||||||
|
+ /* orderly shutdown */
|
||||||
|
+ crm_notice("%s[%lld] terminated",
|
||||||
|
+ pcmk_children[next_child].name,
|
||||||
|
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||||
|
+ pcmk_children[next_child].pid));
|
||||||
|
+ }
|
||||||
|
+ pcmk_process_exit(&(pcmk_children[next_child]));
|
||||||
|
+ if (!pcmk_children[next_child].respawn) {
|
||||||
|
+ /* if a subdaemon is down and we don't want it
|
||||||
|
+ to be restarted this is a success during
|
||||||
|
+ shutdown. if it isn't restarted anymore
|
||||||
|
+ due to MAX_RESPAWN it is
|
||||||
|
+ rather no success.
|
||||||
|
+ */
|
||||||
|
+ if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
|
||||||
|
+ subdaemon_check_progress = time(NULL);
|
||||||
|
+ }
|
||||||
|
+ next_child++;
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ crm_exit(CRM_EX_FATAL);
|
||||||
|
+ break; /* static analysis/noreturn */
|
||||||
|
}
|
||||||
|
|
||||||
|
- global_keep_tracking = keep_tracking;
|
||||||
|
- return keep_tracking;
|
||||||
|
+ if (next_child >= PCMK__NELEM(pcmk_children)) {
|
||||||
|
+ next_child = 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return G_SOURCE_CONTINUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
@@ -257,11 +309,6 @@ pcmk_process_exit(pcmk_child_t * child)
|
||||||
|
child->name, child->endpoint);
|
||||||
|
/* need to monitor how it evolves, and start new process if badly */
|
||||||
|
child->active_before_startup = true;
|
||||||
|
- if (!global_keep_tracking) {
|
||||||
|
- global_keep_tracking = true;
|
||||||
|
- g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
|
||||||
|
- check_active_before_startup_processes, NULL);
|
||||||
|
- }
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (child->needs_cluster && !pcmkd_cluster_connected()) {
|
||||||
|
@@ -648,7 +695,6 @@ child_liveness(pcmk_child_t *child)
|
||||||
|
int
|
||||||
|
find_and_track_existing_processes(void)
|
||||||
|
{
|
||||||
|
- bool tracking = false;
|
||||||
|
bool wait_in_progress;
|
||||||
|
int rc;
|
||||||
|
size_t i, rounds;
|
||||||
|
@@ -716,7 +762,6 @@ find_and_track_existing_processes(void)
|
||||||
|
pcmk_children[i].pid));
|
||||||
|
pcmk_children[i].respawn_count = -1; /* 0~keep watching */
|
||||||
|
pcmk_children[i].active_before_startup = true;
|
||||||
|
- tracking = true;
|
||||||
|
break;
|
||||||
|
case pcmk_rc_ipc_pid_only:
|
||||||
|
if (pcmk_children[i].respawn_count == WAIT_TRIES) {
|
||||||
|
@@ -751,10 +796,8 @@ find_and_track_existing_processes(void)
|
||||||
|
pcmk_children[i].respawn_count = 0; /* restore pristine state */
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (tracking) {
|
||||||
|
- g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
|
||||||
|
+ g_timeout_add_seconds(PCMK_PROCESS_CHECK_INTERVAL,
|
||||||
|
check_active_before_startup_processes, NULL);
|
||||||
|
- }
|
||||||
|
return pcmk_rc_ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
||||||
|
|
||||||
|
From 4b60aa100669ff494dd3f1303ca9586dc52e95e4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||||
|
Date: Thu, 9 Dec 2021 11:25:22 +0100
|
||||||
|
Subject: [PATCH 2/2] Fix: ipc_client: use libqb async API for connect
|
||||||
|
|
||||||
|
---
|
||||||
|
configure.ac | 3 +++
|
||||||
|
lib/common/ipc_client.c | 22 ++++++++++++++++++++++
|
||||||
|
2 files changed, 25 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/configure.ac b/configure.ac
|
||||||
|
index f43fb724c7..c747fe1193 100644
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -1309,6 +1309,9 @@ PKG_CHECK_MODULES(libqb, libqb >= 0.17)
|
||||||
|
CPPFLAGS="$libqb_CFLAGS $CPPFLAGS"
|
||||||
|
LIBS="$libqb_LIBS $LIBS"
|
||||||
|
|
||||||
|
+dnl libqb libqb-2.0.3 + ipc-connect-async-API (2022-01)
|
||||||
|
+AC_CHECK_FUNCS([qb_ipcc_connect_async])
|
||||||
|
+
|
||||||
|
dnl libqb 2.0.2+ (2020-10)
|
||||||
|
AC_CHECK_FUNCS(qb_ipcc_auth_get,
|
||||||
|
AC_DEFINE(HAVE_IPCC_AUTH_GET, 1,
|
||||||
|
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
|
||||||
|
index c5afdf3a3d..417b9ef175 100644
|
||||||
|
--- a/lib/common/ipc_client.c
|
||||||
|
+++ b/lib/common/ipc_client.c
|
||||||
|
@@ -1407,13 +1407,35 @@ pcmk__ipc_is_authentic_process_active(const char *name, uid_t refuid,
|
||||||
|
int32_t qb_rc;
|
||||||
|
pid_t found_pid = 0; uid_t found_uid = 0; gid_t found_gid = 0;
|
||||||
|
qb_ipcc_connection_t *c;
|
||||||
|
+#ifdef HAVE_QB_IPCC_CONNECT_ASYNC
|
||||||
|
+ struct pollfd pollfd = { 0, };
|
||||||
|
+ int poll_rc;
|
||||||
|
|
||||||
|
+ c = qb_ipcc_connect_async(name, 0,
|
||||||
|
+ &(pollfd.fd));
|
||||||
|
+#else
|
||||||
|
c = qb_ipcc_connect(name, 0);
|
||||||
|
+#endif
|
||||||
|
if (c == NULL) {
|
||||||
|
crm_info("Could not connect to %s IPC: %s", name, strerror(errno));
|
||||||
|
rc = pcmk_rc_ipc_unresponsive;
|
||||||
|
goto bail;
|
||||||
|
}
|
||||||
|
+#ifdef HAVE_QB_IPCC_CONNECT_ASYNC
|
||||||
|
+ pollfd.events = POLLIN;
|
||||||
|
+ do {
|
||||||
|
+ poll_rc = poll(&pollfd, 1, 2000);
|
||||||
|
+ } while ((poll_rc == -1) && (errno == EINTR));
|
||||||
|
+ if ((poll_rc <= 0) || (qb_ipcc_connect_continue(c) != 0)) {
|
||||||
|
+ crm_info("Could not connect to %s IPC: %s", name,
|
||||||
|
+ (poll_rc == 0)?"timeout":strerror(errno));
|
||||||
|
+ rc = pcmk_rc_ipc_unresponsive;
|
||||||
|
+ if (poll_rc > 0) {
|
||||||
|
+ c = NULL; // qb_ipcc_connect_continue cleaned up for us
|
||||||
|
+ }
|
||||||
|
+ goto bail;
|
||||||
|
+ }
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
qb_rc = qb_ipcc_fd_get(c, &fd);
|
||||||
|
if (qb_rc != 0) {
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
1338
SOURCES/022-failure-messages.patch
Normal file
1338
SOURCES/022-failure-messages.patch
Normal file
File diff suppressed because it is too large
Load Diff
82
SOURCES/023-memory-leak.patch
Normal file
82
SOURCES/023-memory-leak.patch
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
From 8034a203bbff0aa3b53f2946dc58e409bd7246c9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Thu, 20 Jan 2022 15:03:31 -0600
|
||||||
|
Subject: [PATCH] Fix: scheduler: avoid memory leak when displaying clones
|
||||||
|
|
||||||
|
Previously, pe__clone_default() unconditionally created a hash table for
|
||||||
|
stopped instances, but didn't free it in every code path.
|
||||||
|
|
||||||
|
Now, only create the table when we have something to put in it and might
|
||||||
|
actually use it, and ensure it always gets freed.
|
||||||
|
---
|
||||||
|
lib/pengine/clone.c | 18 +++++++++++++-----
|
||||||
|
1 file changed, 13 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/lib/pengine/clone.c b/lib/pengine/clone.c
|
||||||
|
index 742e2920b0..920a04c32c 100644
|
||||||
|
--- a/lib/pengine/clone.c
|
||||||
|
+++ b/lib/pengine/clone.c
|
||||||
|
@@ -761,7 +761,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||||
|
GList *only_node = va_arg(args, GList *);
|
||||||
|
GList *only_rsc = va_arg(args, GList *);
|
||||||
|
|
||||||
|
- GHashTable *stopped = pcmk__strkey_table(free, free);
|
||||||
|
+ GHashTable *stopped = NULL;
|
||||||
|
|
||||||
|
char *list_text = NULL;
|
||||||
|
size_t list_text_len = 0;
|
||||||
|
@@ -818,7 +818,11 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||||
|
} else if (partially_active == FALSE) {
|
||||||
|
// List stopped instances when requested (except orphans)
|
||||||
|
if (!pcmk_is_set(child_rsc->flags, pe_rsc_orphan)
|
||||||
|
+ && !pcmk_is_set(show_opts, pcmk_show_clone_detail)
|
||||||
|
&& pcmk_is_set(show_opts, pcmk_show_inactive_rscs)) {
|
||||||
|
+ if (stopped == NULL) {
|
||||||
|
+ stopped = pcmk__strkey_table(free, free);
|
||||||
|
+ }
|
||||||
|
g_hash_table_insert(stopped, strdup(child_rsc->id), strdup("Stopped"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -873,7 +877,6 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pcmk_is_set(show_opts, pcmk_show_clone_detail)) {
|
||||||
|
- g_hash_table_destroy(stopped);
|
||||||
|
PCMK__OUTPUT_LIST_FOOTER(out, rc);
|
||||||
|
return pcmk_rc_ok;
|
||||||
|
}
|
||||||
|
@@ -948,8 +951,10 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||||
|
GList *list = g_hash_table_get_values(rsc->allowed_nodes);
|
||||||
|
|
||||||
|
/* Custom stopped table for non-unique clones */
|
||||||
|
- g_hash_table_destroy(stopped);
|
||||||
|
- stopped = pcmk__strkey_table(free, free);
|
||||||
|
+ if (stopped != NULL) {
|
||||||
|
+ g_hash_table_destroy(stopped);
|
||||||
|
+ stopped = NULL;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
if (list == NULL) {
|
||||||
|
/* Clusters with symmetrical=false haven't calculated allowed_nodes yet
|
||||||
|
@@ -972,6 +977,9 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||||
|
state = "Stopped (disabled)";
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (stopped == NULL) {
|
||||||
|
+ stopped = pcmk__strkey_table(free, free);
|
||||||
|
+ }
|
||||||
|
if (probe_op != NULL) {
|
||||||
|
int rc;
|
||||||
|
|
||||||
|
@@ -987,7 +995,7 @@ pe__clone_default(pcmk__output_t *out, va_list args)
|
||||||
|
g_list_free(list);
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (g_hash_table_size(stopped) > 0) {
|
||||||
|
+ if (stopped != NULL) {
|
||||||
|
GList *list = sorted_hash_table_values(stopped);
|
||||||
|
|
||||||
|
clone_header(out, &rc, rsc, clone_data);
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
108
SOURCES/024-daemon-tracking.patch
Normal file
108
SOURCES/024-daemon-tracking.patch
Normal file
@ -0,0 +1,108 @@
|
|||||||
|
From ac92690d8426ec4d1c8be1e0eb4b9289411afe75 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Klaus Wenninger <klaus.wenninger@aon.at>
|
||||||
|
Date: Mon, 24 Jan 2022 12:18:42 +0100
|
||||||
|
Subject: [PATCH] Fix: pacemakerd: have signal-handler take care of lost
|
||||||
|
processes
|
||||||
|
|
||||||
|
regression from introduction of periodic subdaemon checking
|
||||||
|
in cases they are pacemakerd children - previously it was either
|
||||||
|
periodic checking or signal-handler per process.
|
||||||
|
---
|
||||||
|
daemons/pacemakerd/pcmkd_subdaemons.c | 38 ++++++++++++++++-----------
|
||||||
|
1 file changed, 22 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/pacemakerd/pcmkd_subdaemons.c b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||||
|
index c03903c99e..84ecdc1ee8 100644
|
||||||
|
--- a/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||||
|
+++ b/daemons/pacemakerd/pcmkd_subdaemons.c
|
||||||
|
@@ -141,7 +141,6 @@ check_active_before_startup_processes(gpointer user_data)
|
||||||
|
switch (rc) {
|
||||||
|
case pcmk_rc_ok:
|
||||||
|
pcmk_children[next_child].check_count = 0;
|
||||||
|
- next_child++;
|
||||||
|
subdaemon_check_progress = time(NULL);
|
||||||
|
break;
|
||||||
|
case pcmk_rc_ipc_pid_only: // This case: it was previously OK
|
||||||
|
@@ -178,9 +177,27 @@ check_active_before_startup_processes(gpointer user_data)
|
||||||
|
/* go to the next child and see if
|
||||||
|
we can make progress there
|
||||||
|
*/
|
||||||
|
- next_child++;
|
||||||
|
break;
|
||||||
|
case pcmk_rc_ipc_unresponsive:
|
||||||
|
+ if (!pcmk_children[next_child].respawn) {
|
||||||
|
+ /* if a subdaemon is down and we don't want it
|
||||||
|
+ to be restarted this is a success during
|
||||||
|
+ shutdown. if it isn't restarted anymore
|
||||||
|
+ due to MAX_RESPAWN it is
|
||||||
|
+ rather no success.
|
||||||
|
+ */
|
||||||
|
+ if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
|
||||||
|
+ subdaemon_check_progress = time(NULL);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (!pcmk_children[next_child].active_before_startup) {
|
||||||
|
+ crm_trace("found %s[%lld] missing - signal-handler "
|
||||||
|
+ "will take care of it",
|
||||||
|
+ pcmk_children[next_child].name,
|
||||||
|
+ (long long) PCMK__SPECIAL_PID_AS_0(
|
||||||
|
+ pcmk_children[next_child].pid));
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
if (pcmk_children[next_child].respawn) {
|
||||||
|
crm_err("%s[%lld] terminated",
|
||||||
|
pcmk_children[next_child].name,
|
||||||
|
@@ -194,24 +211,13 @@ check_active_before_startup_processes(gpointer user_data)
|
||||||
|
pcmk_children[next_child].pid));
|
||||||
|
}
|
||||||
|
pcmk_process_exit(&(pcmk_children[next_child]));
|
||||||
|
- if (!pcmk_children[next_child].respawn) {
|
||||||
|
- /* if a subdaemon is down and we don't want it
|
||||||
|
- to be restarted this is a success during
|
||||||
|
- shutdown. if it isn't restarted anymore
|
||||||
|
- due to MAX_RESPAWN it is
|
||||||
|
- rather no success.
|
||||||
|
- */
|
||||||
|
- if (pcmk_children[next_child].respawn_count <= MAX_RESPAWN) {
|
||||||
|
- subdaemon_check_progress = time(NULL);
|
||||||
|
- }
|
||||||
|
- next_child++;
|
||||||
|
- }
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
crm_exit(CRM_EX_FATAL);
|
||||||
|
break; /* static analysis/noreturn */
|
||||||
|
}
|
||||||
|
|
||||||
|
+ next_child++;
|
||||||
|
if (next_child >= PCMK__NELEM(pcmk_children)) {
|
||||||
|
next_child = 0;
|
||||||
|
}
|
||||||
|
@@ -285,6 +291,7 @@ pcmk_process_exit(pcmk_child_t * child)
|
||||||
|
{
|
||||||
|
child->pid = 0;
|
||||||
|
child->active_before_startup = false;
|
||||||
|
+ child->check_count = 0;
|
||||||
|
|
||||||
|
child->respawn_count += 1;
|
||||||
|
if (child->respawn_count > MAX_RESPAWN) {
|
||||||
|
@@ -307,8 +314,6 @@ pcmk_process_exit(pcmk_child_t * child)
|
||||||
|
crm_warn("One-off suppressing strict respawning of a child process %s,"
|
||||||
|
" appears alright per %s IPC end-point",
|
||||||
|
child->name, child->endpoint);
|
||||||
|
- /* need to monitor how it evolves, and start new process if badly */
|
||||||
|
- child->active_before_startup = true;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
if (child->needs_cluster && !pcmkd_cluster_connected()) {
|
||||||
|
@@ -422,6 +427,7 @@ start_child(pcmk_child_t * child)
|
||||||
|
const char *env_callgrind = getenv("PCMK_callgrind_enabled");
|
||||||
|
|
||||||
|
child->active_before_startup = false;
|
||||||
|
+ child->check_count = 0;
|
||||||
|
|
||||||
|
if (child->command == NULL) {
|
||||||
|
crm_info("Nothing to do for child \"%s\"", child->name);
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
30
SOURCES/025-regression.patch
Normal file
30
SOURCES/025-regression.patch
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
From 16928cfc69136bc56b1574bee9966e0d5de73abd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Ken Gaillot <kgaillot@redhat.com>
|
||||||
|
Date: Wed, 26 Jan 2022 09:15:43 -0600
|
||||||
|
Subject: [PATCH] Fix: controller: correctly match "node down" events
|
||||||
|
|
||||||
|
regression introduced in 2.1.2 by 03ce7376e
|
||||||
|
|
||||||
|
The symptom that led to this was that removing a remote node connection
|
||||||
|
resource would lead to the remote node getting fenced when the connection stop
|
||||||
|
was not recognized as an expected down event.
|
||||||
|
---
|
||||||
|
daemons/controld/controld_te_events.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/daemons/controld/controld_te_events.c b/daemons/controld/controld_te_events.c
|
||||||
|
index 36fd832ba0..1fd7129922 100644
|
||||||
|
--- a/daemons/controld/controld_te_events.c
|
||||||
|
+++ b/daemons/controld/controld_te_events.c
|
||||||
|
@@ -304,7 +304,7 @@ match_down_event(const char *target)
|
||||||
|
gIter2 = gIter2->next) {
|
||||||
|
|
||||||
|
match = (crm_action_t*)gIter2->data;
|
||||||
|
- if (pcmk_is_set(match->flags, pcmk__graph_action_confirmed)) {
|
||||||
|
+ if (pcmk_is_set(match->flags, pcmk__graph_action_executed)) {
|
||||||
|
xpath_ret = xpath_search(match->xml, xpath);
|
||||||
|
if (numXpathResults(xpath_ret) < 1) {
|
||||||
|
match = NULL;
|
||||||
|
--
|
||||||
|
2.27.0
|
||||||
|
|
@ -36,7 +36,7 @@
|
|||||||
## can be incremented to build packages reliably considered "newer"
|
## can be incremented to build packages reliably considered "newer"
|
||||||
## than previously built packages with the same pcmkversion)
|
## than previously built packages with the same pcmkversion)
|
||||||
%global pcmkversion 2.1.2
|
%global pcmkversion 2.1.2
|
||||||
%global specversion 2
|
%global specversion 4
|
||||||
|
|
||||||
## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
|
## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
|
||||||
%global commit ada5c3b36e2adf1703d54d39f40a4b8628eca175
|
%global commit ada5c3b36e2adf1703d54d39f40a4b8628eca175
|
||||||
@ -250,6 +250,23 @@ Patch5: 005-fencing-reasons.patch
|
|||||||
Patch6: 006-stateful-metadata.patch
|
Patch6: 006-stateful-metadata.patch
|
||||||
Patch7: 007-memory-leak.patch
|
Patch7: 007-memory-leak.patch
|
||||||
Patch8: 008-fencing-history.patch
|
Patch8: 008-fencing-history.patch
|
||||||
|
Patch9: 009-fencing-reasons.patch
|
||||||
|
Patch10: 010-probe-failures.patch
|
||||||
|
Patch11: 011-fencing-reasons.patch
|
||||||
|
Patch12: 012-notify-crash.patch
|
||||||
|
Patch13: 013-probe-failures.patch
|
||||||
|
Patch14: 014-pcmk_delay_base.patch
|
||||||
|
Patch15: 015-fencing-reasons.patch
|
||||||
|
Patch16: 016-fencing-crash.patch
|
||||||
|
Patch17: 017-fencing-reasons.patch
|
||||||
|
Patch18: 018-failure-messages.patch
|
||||||
|
Patch19: 019-corosync-tracking.patch
|
||||||
|
Patch20: 020-systemd-unit.patch
|
||||||
|
Patch21: 021-daemon-tracking.patch
|
||||||
|
Patch22: 022-failure-messages.patch
|
||||||
|
Patch23: 023-memory-leak.patch
|
||||||
|
Patch24: 024-daemon-tracking.patch
|
||||||
|
Patch25: 025-regression.patch
|
||||||
|
|
||||||
Requires: resource-agents
|
Requires: resource-agents
|
||||||
Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
|
Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release}
|
||||||
@ -269,8 +286,9 @@ Requires: %{python_path}
|
|||||||
BuildRequires: %{python_name}-devel
|
BuildRequires: %{python_name}-devel
|
||||||
|
|
||||||
# Pacemaker requires a minimum libqb functionality
|
# Pacemaker requires a minimum libqb functionality
|
||||||
Requires: libqb >= 0.17.0
|
# RHEL requires a higher version than upstream, for qb_ipcc_connect_async()
|
||||||
BuildRequires: libqb-devel >= 0.17.0
|
Requires: libqb >= 2.0.3-7
|
||||||
|
BuildRequires: libqb-devel >= 2.0.3-7
|
||||||
|
|
||||||
# Required basic build tools
|
# Required basic build tools
|
||||||
BuildRequires: autoconf
|
BuildRequires: autoconf
|
||||||
@ -855,6 +873,24 @@ exit 0
|
|||||||
%license %{nagios_name}-%{nagios_hash}/COPYING
|
%license %{nagios_name}-%{nagios_hash}/COPYING
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Jan 26 2022 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-4
|
||||||
|
- Fix regression in down event detection that affects remote nodes
|
||||||
|
- Resolves: rhbz2039399
|
||||||
|
|
||||||
|
* Mon Jan 24 2022 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-3
|
||||||
|
- Detect an unresponsive subdaemon
|
||||||
|
- Handle certain probe failures as stopped instead of failed
|
||||||
|
- Update pcmk_delay_base option meta-data
|
||||||
|
- Avoid crash when using clone notifications
|
||||||
|
- Retry Corosync shutdown tracking if first attempt fails
|
||||||
|
- Improve display of failed actions
|
||||||
|
- Resolves: rhbz1707851
|
||||||
|
- Resolves: rhbz2039982
|
||||||
|
- Resolves: rhbz2032032
|
||||||
|
- Resolves: rhbz2040443
|
||||||
|
- Resolves: rhbz2042367
|
||||||
|
- Resolves: rhbz2042546
|
||||||
|
|
||||||
* Thu Dec 16 2021 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-2
|
* Thu Dec 16 2021 Ken Gaillot <kgaillot@redhat.com> - 2.1.2-2
|
||||||
- Correctly get metadata for systemd agent names that end in '@'
|
- Correctly get metadata for systemd agent names that end in '@'
|
||||||
- Use correct OCF 1.1 syntax in ocf:pacemaker:Stateful meta-data
|
- Use correct OCF 1.1 syntax in ocf:pacemaker:Stateful meta-data
|
||||||
|
Loading…
Reference in New Issue
Block a user