From e2db52ba7f9cb2d976771897435324c2f1637581 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Thu, 19 Mar 2026 11:24:07 -0400 Subject: [PATCH 1/3] Refactor: tools: Move pending xpath query into its own define. --- tools/crm_resource_runtime.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 286c10c..6655c85 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1,5 +1,5 @@ /* - * Copyright 2004-2024 the Pacemaker project contributors + * Copyright 2004-2026 the Pacemaker project contributors * * The version control history for this file may have further details. * @@ -1946,6 +1946,13 @@ print_pending_actions(pcmk__output_t *out, GList *actions) /* For --wait, how long to sleep between cluster state checks */ #define WAIT_SLEEP_S (2) +#define XPATH_PENDING_ACTION "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ + "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM \ + "/" PCMK__XE_LRM_RESOURCES \ + "/" PCMK__XE_LRM_RESOURCE \ + "/" PCMK__XE_LRM_RSC_OP \ + "[@" PCMK__XA_RC_CODE "='%d']" + /*! * \internal * \brief Wait until all pending cluster actions are complete @@ -1985,13 +1992,7 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib) return ENOMEM; } - xpath = crm_strdup_printf("/" PCMK_XE_CIB "/" PCMK_XE_STATUS - "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM - "/" PCMK__XE_LRM_RESOURCES - "/" PCMK__XE_LRM_RESOURCE - "/" PCMK__XE_LRM_RSC_OP - "[@" PCMK__XA_RC_CODE "='%d']", - PCMK_OCF_UNKNOWN); + xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); -- 2.53.0 From f6915f09ec3412aaaf824fe1fdd3f7c9dc4a27da Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Thu, 19 Mar 2026 11:28:47 -0400 Subject: [PATCH 2/3] Refactor: tools: Add pending_actions_in_cib to crm_resource. This just refactors a little bit of code into its own function to reduce complexity in wait_till_stable and to give me a place to add a bunch more code later. --- tools/crm_resource_runtime.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 6655c85..06ff68d 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1953,6 +1953,23 @@ print_pending_actions(pcmk__output_t *out, GList *actions) "/" PCMK__XE_LRM_RSC_OP \ "[@" PCMK__XA_RC_CODE "='%d']" +static bool +pending_actions_in_cib(pcmk_scheduler_t *scheduler) +{ + xmlXPathObject *search = NULL; + bool pending = false; + char *xpath = NULL; + + xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN); + search = xpath_search(scheduler->input, xpath); + pending = (numXpathResults(search) > 0); + + xmlXPathFreeObject(search); + free(xpath); + + return pending; +} + /*! * \internal * \brief Wait until all pending cluster actions are complete @@ -1973,13 +1990,10 @@ int wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib) { pcmk_scheduler_t *scheduler = NULL; - xmlXPathObjectPtr search; int rc = pcmk_rc_ok; - bool pending_unknown_state_resources; time_t expire_time = time(NULL); time_t time_diff; bool printed_version_warning = out->is_quiet(out); // i.e. don't print if quiet - char *xpath = NULL; if (timeout_ms == 0) { expire_time += WAIT_DEFAULT_TIMEOUT_S; @@ -1992,7 +2006,6 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib) return ENOMEM; } - xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN); do { /* Abort if timeout is reached */ time_diff = expire_time - time(NULL); @@ -2038,13 +2051,10 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib) } } - search = xpath_search(scheduler->input, xpath); - pending_unknown_state_resources = (numXpathResults(search) > 0); - freeXpathObject(search); - } while (actions_are_pending(scheduler->actions) || pending_unknown_state_resources); + } while (actions_are_pending(scheduler->actions) + || pending_actions_in_cib(scheduler)); pe_free_working_set(scheduler); - free(xpath); return rc; } -- 2.53.0 From 5783c35095c8f8b06550ea71174183af87473b4e Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Mon, 11 May 2026 12:43:09 -0400 Subject: [PATCH 3/3] Low: tools: Don't wait on certain "pending" monitor actions. d253cdf introduced a change where `crm_resource --wait` will wait on pending actions in the CIB before returning. Most of the time this is fine, but there's a very specific case where it's not. It's possible to end up in a situation where you have a resource that is disabled and constrained such that it can't run where it wants to. In that case, the CIB will still contain a pending lrm_rsc_op history entry for a recurring monitor operation even after the resource fails to start. It will look something like this: By design, pacemaker doesn't replace these pending monitor entries with a new entry when they fail. The scheduler requires this for remote connection resources at least. See bbadfe553. So instead, we'll fix this in the tools. If the pending monitor action has failed, it will also have a history entry like this: We can look through the history entries for one with an operation_key matching the pending recurring monitor entry, and see if its ID contains _last_failure_0. If so, that's a monitor action we shouldn't wait on. Fixes RHEL-78393 --- tools/crm_resource_runtime.c | 86 ++++++++++++++++++++++++++++++++++-- 1 file changed, 83 insertions(+), 3 deletions(-) diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c index 06ff68d..e2cb94b 100644 --- a/tools/crm_resource_runtime.c +++ b/tools/crm_resource_runtime.c @@ -1953,21 +1953,101 @@ print_pending_actions(pcmk__output_t *out, GList *actions) "/" PCMK__XE_LRM_RSC_OP \ "[@" PCMK__XA_RC_CODE "='%d']" +#define XPATH_LAST_FAILURE "/" PCMK_XE_CIB "/" PCMK_XE_STATUS \ + "/" PCMK__XE_NODE_STATE "/" PCMK__XE_LRM \ + "/" PCMK__XE_LRM_RESOURCES \ + "/" PCMK__XE_LRM_RESOURCE \ + "/" PCMK__XE_LRM_RSC_OP \ + "[@" PCMK__XA_OPERATION_KEY "='%s']" +/*! + * \internal + * \brief Check if there's a lrm_rsc_op last_failure entry for a given key + * + * \param[in] scheduler The scheduler object + * \param[in] key The operation_key attribute of some lrm_rsc_op entry + * + * \return \c true if there is an lrm_rsc_op history entry with \p key as its + * operation_key and with an id attribute ending in "_last_failure_0", + * \c false otherwise + */ +static bool +action_has_matching_last_failure(pcmk_scheduler_t *scheduler, const char *key) +{ + xmlXPathObject *search = NULL; + bool retval = false; + char *xpath = NULL; + + xpath = crm_strdup_printf(XPATH_LAST_FAILURE, key); + search = xpath_search(scheduler->input, xpath); + + for (int i = 0; i < numXpathResults(search); i++) { + const xmlNode *lrm_op_xml = getXpathResult(search, i); + + if (g_str_has_suffix(crm_element_value(lrm_op_xml, PCMK_XA_ID), + "_last_failure_0")) { + retval = true; + break; + } + } + + xmlXPathFreeObject(search); + free(xpath); + + return retval; +} + +/*! + * \internal + * \brief Determine if there are certain pending actions in the CIB + * + * \param[in] scheduler The scheduler object + * + * \return \c true if there are any pending actions in the CIB, after + * filtering out pending recurring monitor actions with a last_failure + * history entry; \c false otherwise + * + * \note We filter out certain recurring monitor actions because they might + * always be present. The scheduler can't replace the history entry + * with a failure entry (see bbadfe553), but it's still not a pending + * action and we don't want to wait for it. + */ static bool pending_actions_in_cib(pcmk_scheduler_t *scheduler) { xmlXPathObject *search = NULL; - bool pending = false; char *xpath = NULL; + bool any_pending = false; xpath = crm_strdup_printf(XPATH_PENDING_ACTION, PCMK_OCF_UNKNOWN); search = xpath_search(scheduler->input, xpath); - pending = (numXpathResults(search) > 0); + + for (int i = 0; i < numXpathResults(search); i++) { + const char *op_key = NULL; + const xmlNode *lrm_op_xml = getXpathResult(search, i); + + if (!pcmk__str_eq(PCMK_ACTION_MONITOR, + crm_element_value(lrm_op_xml, PCMK_XA_OPERATION), + pcmk__str_none)) { + any_pending = true; + break; + } + + if (pcmk_xe_is_probe(lrm_op_xml)) { + any_pending = true; + break; + } + + op_key = crm_element_value(lrm_op_xml, PCMK__XA_OPERATION_KEY); + if ((op_key == NULL) || !action_has_matching_last_failure(scheduler, op_key)) { + any_pending = true; + break; + } + } xmlXPathFreeObject(search); free(xpath); - return pending; + return any_pending; } /*! -- 2.53.0