284 lines
10 KiB
Diff
284 lines
10 KiB
Diff
From e2db52ba7f9cb2d976771897435324c2f1637581 Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Thu, 19 Mar 2026 11:24:07 -0400
|
|
Subject: [PATCH 1/3] Refactor: tools: Move pending xpath query into its own
|
|
define.
|
|
|
|
---
|
|
tools/crm_resource_runtime.c | 17 +++++++++--------
|
|
1 file changed, 9 insertions(+), 8 deletions(-)
|
|
|
|
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
|
|
index 286c10c..6655c85 100644
|
|
--- a/tools/crm_resource_runtime.c
|
|
+++ b/tools/crm_resource_runtime.c
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * Copyright 2004-2024 the Pacemaker project contributors
|
|
+ * Copyright 2004-2026 the Pacemaker project contributors
|
|
*
|
|
* The version control history for this file may have further details.
|
|
*
|
|
@@ -1946,6 +1946,13 @@ print_pending_actions(pcmk__output_t *out, GList *actions)
|
|
/* For --wait, how long to sleep between cluster state checks */
|
|
#define WAIT_SLEEP_S (2)
|
|
|
|
+#define XPATH_PENDING_ACTION "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
|
|
+ "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM \
|
|
+ "/" PCMK__XE_LRM_RESOURCES \
|
|
+ "/" PCMK__XE_LRM_RESOURCE \
|
|
+ "/" PCMK__XE_LRM_RSC_OP \
|
|
+ "[@" PCMK__XA_RC_CODE "='%d']"
|
|
+
|
|
/*!
|
|
* \internal
|
|
* \brief Wait until all pending cluster actions are complete
|
|
@@ -1985,13 +1992,7 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib)
|
|
return ENOMEM;
|
|
}
|
|
|
|
- xpath = crm_strdup_printf("/" PCMK_XE_CIB "/" PCMK_XE_STATUS
|
|
- "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM
|
|
- "/" PCMK__XE_LRM_RESOURCES
|
|
- "/" PCMK__XE_LRM_RESOURCE
|
|
- "/" PCMK__XE_LRM_RSC_OP
|
|
- "[@" PCMK__XA_RC_CODE "='%d']",
|
|
- PCMK_OCF_UNKNOWN);
|
|
+ xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN);
|
|
do {
|
|
/* Abort if timeout is reached */
|
|
time_diff = expire_time - time(NULL);
|
|
--
|
|
2.53.0
|
|
|
|
From f6915f09ec3412aaaf824fe1fdd3f7c9dc4a27da Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Thu, 19 Mar 2026 11:28:47 -0400
|
|
Subject: [PATCH 2/3] Refactor: tools: Add pending_actions_in_cib to
|
|
crm_resource.
|
|
|
|
This just refactors a little bit of code into its own function to reduce
|
|
complexity in wait_till_stable and to give me a place to add a bunch
|
|
more code later.
|
|
---
|
|
tools/crm_resource_runtime.c | 28 +++++++++++++++++++---------
|
|
1 file changed, 19 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
|
|
index 6655c85..06ff68d 100644
|
|
--- a/tools/crm_resource_runtime.c
|
|
+++ b/tools/crm_resource_runtime.c
|
|
@@ -1953,6 +1953,23 @@ print_pending_actions(pcmk__output_t *out, GList *actions)
|
|
"/" PCMK__XE_LRM_RSC_OP \
|
|
"[@" PCMK__XA_RC_CODE "='%d']"
|
|
|
|
+static bool
|
|
+pending_actions_in_cib(pcmk_scheduler_t *scheduler)
|
|
+{
|
|
+ xmlXPathObject *search = NULL;
|
|
+ bool pending = false;
|
|
+ char *xpath = NULL;
|
|
+
|
|
+ xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN);
|
|
+ search = xpath_search(scheduler->input, xpath);
|
|
+ pending = (numXpathResults(search) > 0);
|
|
+
|
|
+ xmlXPathFreeObject(search);
|
|
+ free(xpath);
|
|
+
|
|
+ return pending;
|
|
+}
|
|
+
|
|
/*!
|
|
* \internal
|
|
* \brief Wait until all pending cluster actions are complete
|
|
@@ -1973,13 +1990,10 @@ int
|
|
wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib)
|
|
{
|
|
pcmk_scheduler_t *scheduler = NULL;
|
|
- xmlXPathObjectPtr search;
|
|
int rc = pcmk_rc_ok;
|
|
- bool pending_unknown_state_resources;
|
|
time_t expire_time = time(NULL);
|
|
time_t time_diff;
|
|
bool printed_version_warning = out->is_quiet(out); // i.e. don't print if quiet
|
|
- char *xpath = NULL;
|
|
|
|
if (timeout_ms == 0) {
|
|
expire_time += WAIT_DEFAULT_TIMEOUT_S;
|
|
@@ -1992,7 +2006,6 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib)
|
|
return ENOMEM;
|
|
}
|
|
|
|
- xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN);
|
|
do {
|
|
/* Abort if timeout is reached */
|
|
time_diff = expire_time - time(NULL);
|
|
@@ -2038,13 +2051,10 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib)
|
|
}
|
|
}
|
|
|
|
- search = xpath_search(scheduler->input, xpath);
|
|
- pending_unknown_state_resources = (numXpathResults(search) > 0);
|
|
- freeXpathObject(search);
|
|
- } while (actions_are_pending(scheduler->actions) || pending_unknown_state_resources);
|
|
+ } while (actions_are_pending(scheduler->actions)
|
|
+ || pending_actions_in_cib(scheduler));
|
|
|
|
pe_free_working_set(scheduler);
|
|
- free(xpath);
|
|
return rc;
|
|
}
|
|
|
|
--
|
|
2.53.0
|
|
|
|
From 5783c35095c8f8b06550ea71174183af87473b4e Mon Sep 17 00:00:00 2001
|
|
From: Chris Lumens <clumens@redhat.com>
|
|
Date: Mon, 11 May 2026 12:43:09 -0400
|
|
Subject: [PATCH 3/3] Low: tools: Don't wait on certain "pending" monitor
|
|
actions.
|
|
|
|
d253cdf introduced a change where `crm_resource --wait` will wait on
|
|
pending actions in the CIB before returning. Most of the time this is
|
|
fine, but there's a very specific case where it's not.
|
|
|
|
It's possible to end up in a situation where you have a resource that is
|
|
disabled and constrained such that it can't run where it wants to. In
|
|
that case, the CIB will still contain a pending lrm_rsc_op history entry
|
|
for a recurring monitor operation even after the resource fails to
|
|
start. It will look something like this:
|
|
|
|
<lrm_rsc_op id="dummy1_monitor_10000" operation_key="dummy1_monitor_10000" operation="monitor" call-id="-1" rc-code="193" op-status="-1" interval="10000" last-rc-change="1773083270" exec-time="0" .../>
|
|
|
|
By design, pacemaker doesn't replace these pending monitor entries with
|
|
a new entry when they fail. The scheduler requires this for remote
|
|
connection resources at least. See bbadfe553.
|
|
|
|
So instead, we'll fix this in the tools. If the pending monitor action
|
|
has failed, it will also have a history entry like this:
|
|
|
|
<lrm_rsc_op id="dummy1_last_failure_0" operation_key="dummy1_monitor_10000" operation="monitor" call-id="-1" rc-code="193" op-status="-1" interval="10000" last-rc-change="1773083270" exec-time="0" queue-time="0" .../>
|
|
|
|
We can look through the history entries for one with an operation_key
|
|
matching the pending recurring monitor entry, and see if its ID contains
|
|
_last_failure_0. If so, that's a monitor action we shouldn't wait on.
|
|
|
|
Fixes RHEL-78393
|
|
---
|
|
tools/crm_resource_runtime.c | 86 ++++++++++++++++++++++++++++++++++--
|
|
1 file changed, 83 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
|
|
index 06ff68d..e2cb94b 100644
|
|
--- a/tools/crm_resource_runtime.c
|
|
+++ b/tools/crm_resource_runtime.c
|
|
@@ -1953,21 +1953,101 @@ print_pending_actions(pcmk__output_t *out, GList *actions)
|
|
"/" PCMK__XE_LRM_RSC_OP \
|
|
"[@" PCMK__XA_RC_CODE "='%d']"
|
|
|
|
+#define XPATH_LAST_FAILURE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \
|
|
+ "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM \
|
|
+ "/" PCMK__XE_LRM_RESOURCES \
|
|
+ "/" PCMK__XE_LRM_RESOURCE \
|
|
+ "/" PCMK__XE_LRM_RSC_OP \
|
|
+ "[@" PCMK__XA_OPERATION_KEY "='%s']"
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Check if there's a lrm_rsc_op last_failure entry for a given key
|
|
+ *
|
|
+ * \param[in] scheduler The scheduler object
|
|
+ * \param[in] key The operation_key attribute of some lrm_rsc_op entry
|
|
+ *
|
|
+ * \return \c true if there is an lrm_rsc_op history entry with \p key as its
|
|
+ * operation_key and with an id attribute ending in "_last_failure_0",
|
|
+ * \c false otherwise
|
|
+ */
|
|
+static bool
|
|
+action_has_matching_last_failure(pcmk_scheduler_t *scheduler, const char *key)
|
|
+{
|
|
+ xmlXPathObject *search = NULL;
|
|
+ bool retval = false;
|
|
+ char *xpath = NULL;
|
|
+
|
|
+ xpath = crm_strdup_printf(XPATH_LAST_FAILURE, key);
|
|
+ search = xpath_search(scheduler->input, xpath);
|
|
+
|
|
+ for (int i = 0; i < numXpathResults(search); i++) {
|
|
+ const xmlNode *lrm_op_xml = getXpathResult(search, i);
|
|
+
|
|
+ if (g_str_has_suffix(crm_element_value(lrm_op_xml, PCMK_XA_ID),
|
|
+ "_last_failure_0")) {
|
|
+ retval = true;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ xmlXPathFreeObject(search);
|
|
+ free(xpath);
|
|
+
|
|
+ return retval;
|
|
+}
|
|
+
|
|
+/*!
|
|
+ * \internal
|
|
+ * \brief Determine if there are certain pending actions in the CIB
|
|
+ *
|
|
+ * \param[in] scheduler The scheduler object
|
|
+ *
|
|
+ * \return \c true if there are any pending actions in the CIB, after
|
|
+ * filtering out pending recurring monitor actions with a last_failure
|
|
+ * history entry; \c false otherwise
|
|
+ *
|
|
+ * \note We filter out certain recurring monitor actions because they might
|
|
+ * always be present. The scheduler can't replace the history entry
|
|
+ * with a failure entry (see bbadfe553), but it's still not a pending
|
|
+ * action and we don't want to wait for it.
|
|
+ */
|
|
static bool
|
|
pending_actions_in_cib(pcmk_scheduler_t *scheduler)
|
|
{
|
|
xmlXPathObject *search = NULL;
|
|
- bool pending = false;
|
|
char *xpath = NULL;
|
|
+ bool any_pending = false;
|
|
|
|
xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN);
|
|
search = xpath_search(scheduler->input, xpath);
|
|
- pending = (numXpathResults(search) > 0);
|
|
+
|
|
+ for (int i = 0; i < numXpathResults(search); i++) {
|
|
+ const char *op_key = NULL;
|
|
+ const xmlNode *lrm_op_xml = getXpathResult(search, i);
|
|
+
|
|
+ if (!pcmk__str_eq(PCMK_ACTION_MONITOR,
|
|
+ crm_element_value(lrm_op_xml, PCMK_XA_OPERATION),
|
|
+ pcmk__str_none)) {
|
|
+ any_pending = true;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (pcmk_xe_is_probe(lrm_op_xml)) {
|
|
+ any_pending = true;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ op_key = crm_element_value(lrm_op_xml, PCMK__XA_OPERATION_KEY);
|
|
+ if ((op_key == NULL) || !action_has_matching_last_failure(scheduler, op_key)) {
|
|
+ any_pending = true;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
|
|
xmlXPathFreeObject(search);
|
|
free(xpath);
|
|
|
|
- return pending;
|
|
+ return any_pending;
|
|
}
|
|
|
|
/*!
|
|
--
|
|
2.53.0
|
|
|