From 029534679b109d5f720470c0cb2934bb238c2b4b Mon Sep 17 00:00:00 2001 From: AlmaLinux RelEng Bot Date: Mon, 30 Mar 2026 10:50:56 -0400 Subject: [PATCH] import CS pacemaker-2.1.10-2.el9 --- SOURCES/001-crm_resource_wait.patch | 131 +++++++++++ SOURCES/002-ipc_connect_retry.patch | 178 +++++++++++++++ SOURCES/003-ipc_evict.patch | 330 ++++++++++++++++++++++++++++ SOURCES/004-fewer_messages.patch | 88 ++++++++ SPECS/pacemaker.spec | 16 +- 5 files changed, 741 insertions(+), 2 deletions(-) create mode 100644 SOURCES/001-crm_resource_wait.patch create mode 100644 SOURCES/002-ipc_connect_retry.patch create mode 100644 SOURCES/003-ipc_evict.patch create mode 100644 SOURCES/004-fewer_messages.patch diff --git a/SOURCES/001-crm_resource_wait.patch b/SOURCES/001-crm_resource_wait.patch new file mode 100644 index 0000000..8193769 --- /dev/null +++ b/SOURCES/001-crm_resource_wait.patch @@ -0,0 +1,131 @@ +From 17a2d7acffc86e06b2fa8b49c96c33d72466e7f7 Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Fri, 5 Sep 2025 20:35:31 -0700 +Subject: [PATCH] Fix: tools: Handle large timeouts correctly in crm_resource + --wait + +Previously, if the --timeout value parsed to a value greater than +(UINT_MAX - 999), the wait timeout would overflow. The effective timeout +would be either 0 seconds or 1 second. This is because 999 was added to +the guint value before passing it to pcmk__timeout_ms2s(). + +Now, we simply pass the timeout in milliseconds to +pcmk__timeout_ms2s(), without adding 999. + +This implies a slight behavior change. Previously, timeouts were always +rounded up to the next greatest second. Now, they're rounded to the +nearest second. For example, previously: +* timeout values between 1ms and 500ms => wait timeout of 1 second +* timeout values between 501ms and 1500ms => wait timeout of 2 seconds +* timeout values between 1501ms and 2500ms => wait timeout of 3 seconds +* and so on + +Now: +* timeout values between 1ms and 1499ms => wait timeout of 1 second +* timeout values between 1500ms and 2499ms => wait timeout of 2 seconds +* timeout values between 2500ms and 3499ms => wait timeout of 3 seconds +* and so on + +The previous rounding behavior has existed since crm_resource --wait was +added by 424afcdf. + +Update the help text to note the granularity and rounding behavior. The +exact behavior of the restart command is confusing, and its logic should +be cleaned up in the future. + +Fixes RHEL-45869 +Fixes RHEL-86148 +Closes T841 + +Signed-off-by: Reid Wahl +--- + include/crm/common/internal.h | 1 + + lib/common/utils.c | 30 ++++++++++++++++++++++++++++++ + tools/crm_resource.c | 4 +++- + tools/crm_resource_runtime.c | 2 +- + 4 files changed, 35 insertions(+), 2 deletions(-) + +diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h +index 7d1e95f..ee26321 100644 +--- a/include/crm/common/internal.h ++++ b/include/crm/common/internal.h +@@ -249,6 +249,7 @@ void pcmk__daemonize(const char *name, const char *pidfile); + void pcmk__panic(const char *origin); + pid_t pcmk__locate_sbd(void); + void pcmk__sleep_ms(unsigned int ms); ++guint pcmk__timeout_ms2s(guint timeout_ms); + + extern int pcmk__score_red; + extern int pcmk__score_green; +diff --git a/lib/common/utils.c b/lib/common/utils.c +index 9e44c22..0a38811 100644 +--- a/lib/common/utils.c ++++ b/lib/common/utils.c +@@ -413,6 +413,36 @@ pcmk__sleep_ms(unsigned int ms) + #endif + } + ++/*! ++ * \internal ++ * \brief Convert milliseconds to seconds ++ * ++ * \param[in] timeout_ms The interval, in ms ++ * ++ * \return If \p timeout_ms is 0, return 0. Otherwise, return the number of ++ * seconds, rounded to the nearest integer, with a minimum of 1. ++ */ ++guint ++pcmk__timeout_ms2s(guint timeout_ms) ++{ ++ guint quot, rem; ++ ++ if (timeout_ms == 0) { ++ return 0; ++ } else if (timeout_ms < 1000) { ++ return 1; ++ } ++ ++ quot = timeout_ms / 1000; ++ rem = timeout_ms % 1000; ++ ++ if (rem >= 500) { ++ quot += 1; ++ } ++ ++ return quot; ++} ++ + // Deprecated functions kept only for backward API compatibility + // LCOV_EXCL_START + +diff --git a/tools/crm_resource.c b/tools/crm_resource.c +index 898c21e..ac50c5e 100644 +--- a/tools/crm_resource.c ++++ b/tools/crm_resource.c +@@ -766,7 +766,9 @@ static GOptionEntry addl_entries[] = { + "ID" }, + { "timeout", 'T', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, timeout_cb, + "(Advanced) Abort if command does not finish in this time (with\n" +- INDENT "--restart, --wait, --force-*)", ++ INDENT "--restart, --wait, --force-*). The --restart command uses a\n" ++ INDENT "two-second granularity and the --wait command uses a one-second\n" ++ INDENT "granularity, with rounding.", + "N" }, + { "all", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.all, + "List all options, including advanced and deprecated (with\n" +diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c +index 7350f07..286c10c 100644 +--- a/tools/crm_resource_runtime.c ++++ b/tools/crm_resource_runtime.c +@@ -1977,7 +1977,7 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib) + if (timeout_ms == 0) { + expire_time += WAIT_DEFAULT_TIMEOUT_S; + } else { +- expire_time += (timeout_ms + 999) / 1000; ++ expire_time += pcmk__timeout_ms2s(timeout_ms); + } + + scheduler = pe_new_working_set(); +-- +2.47.1 + diff --git a/SOURCES/002-ipc_connect_retry.patch b/SOURCES/002-ipc_connect_retry.patch new file mode 100644 index 0000000..19b8028 --- /dev/null +++ b/SOURCES/002-ipc_connect_retry.patch @@ -0,0 +1,178 @@ +From 8ff58d786907b64c32350e72e341cdd0f5026813 Mon Sep 17 00:00:00 2001 +From: Thomas Jones +Date: Fri, 30 May 2025 16:40:13 -0400 +Subject: [PATCH 1/2] Fix: libcrmcommon: Add retries on connect to avoid fatal + errors when sub-daemons communicate Add pcmk__connect_ipc_retry_conrefused() + and use it where it makes sense Add retry loop to + connect_and_send_attrd_request() that retries connect and send. + +--- + daemons/controld/controld_schedulerd.c | 2 +- + include/crm/common/ipc_internal.h | 4 ++++ + lib/common/ipc_attrd.c | 19 ++++++++++++---- + lib/common/ipc_client.c | 30 ++++++++++++++++++++++++++ + lib/pacemaker/pcmk_cluster_queries.c | 2 +- + 5 files changed, 51 insertions(+), 6 deletions(-) + +diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c +index 22b37b8..444bdef 100644 +--- a/daemons/controld/controld_schedulerd.c ++++ b/daemons/controld/controld_schedulerd.c +@@ -197,7 +197,7 @@ new_schedulerd_ipc_connection(void) + + pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL); + +- rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3); ++ rc = pcmk__connect_ipc_retry_conrefused(schedulerd_api, pcmk_ipc_dispatch_main, 3); + if (rc != pcmk_rc_ok) { + crm_err("Error connecting to %s: %s", + pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc)); +diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h +index 27b8bfc..09145ba 100644 +--- a/include/crm/common/ipc_internal.h ++++ b/include/crm/common/ipc_internal.h +@@ -101,6 +101,10 @@ int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd); + int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type, + int attempts); + ++int pcmk__connect_ipc_retry_conrefused(pcmk_ipc_api_t *api, ++ enum pcmk_ipc_dispatch dispatch_type, ++ int attempts); ++ + /* + * Server-related + */ +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 5ab0f2d..60a92a0 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -152,6 +152,8 @@ create_attrd_op(const char *user_name) + static int + connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + { ++ static const int max_retries = 5; ++ int remaining_attempts = max_retries; + int rc = pcmk_rc_ok; + bool created_api = false; + +@@ -163,10 +165,19 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request) + created_api = true; + } + +- rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5); +- if (rc == pcmk_rc_ok) { +- rc = pcmk__send_ipc_request(api, request); +- } ++ // If attrd is killed and is being restarted we will temporarily get ++ // ECONNREFUSED on connect if it is already dead or ENOTCONN if it died ++ // after we connected to it. We should wait a bit and retry in those cases. ++ do { ++ if (rc == ENOTCONN || rc == ECONNREFUSED) { ++ sleep(max_retries - remaining_attempts); ++ } ++ rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, remaining_attempts); ++ if (rc == pcmk_rc_ok) { ++ rc = pcmk__send_ipc_request(api, request); ++ } ++ remaining_attempts--; ++ } while ((rc == ENOTCONN || rc == ECONNREFUSED) && remaining_attempts >= 0); + + if (created_api) { + pcmk_free_ipc_api(api); +diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c +index 24d7745..4ac7810 100644 +--- a/lib/common/ipc_client.c ++++ b/lib/common/ipc_client.c +@@ -488,6 +488,36 @@ connect_without_main_loop(pcmk_ipc_api_t *api) + return rc; + } + ++/*! ++ * \internal ++ * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors ++ * and ECONNREFUSED) ++ * ++ * \param[in,out] api IPC API instance ++ * \param[in] dispatch_type How IPC replies should be dispatched ++ * \param[in] attempts How many times to try (in case of soft error) ++ * ++ * \return Standard Pacemaker return code ++*/ ++int ++pcmk__connect_ipc_retry_conrefused(pcmk_ipc_api_t *api, ++ enum pcmk_ipc_dispatch dispatch_type, ++ int attempts) ++{ ++ int remaining = attempts; ++ int rc = pcmk_rc_ok; ++ ++ do { ++ if (rc == ECONNREFUSED) { ++ pcmk__sleep_ms((attempts - remaining) * 500); ++ } ++ rc = pcmk__connect_ipc(api, dispatch_type, remaining); ++ remaining--; ++ } while (rc == ECONNREFUSED && remaining >= 0); ++ ++ return rc; ++} ++ + /*! + * \internal + * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors) +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index bbefcda..c1fcf73 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -360,7 +360,7 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, + pcmk_register_ipc_callback(api, cb, data); + } + +- rc = pcmk__connect_ipc(api, dispatch_type, 5); ++ rc = pcmk__connect_ipc_retry_conrefused(api, dispatch_type, 5); + if (rc != pcmk_rc_ok) { + if (rc == EREMOTEIO) { + data->pcmkd_state = pcmk_pacemakerd_state_remote; +-- +2.47.1 + +From 2e46b914fb08d346d7b022ba75302f9290034507 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Mon, 4 Aug 2025 10:38:00 -0400 +Subject: [PATCH 2/2] Med: libpacemaker: Do not retry on ECONNREFUSED in tools. + +This is a regression introduced by e438946787. In that patch, what +we're trying to do is retry IPC connections between daemons. If a +daemon gets ECONNREFUSED when it initiates an IPC connection, the most +likely reason is that another daemon has been killed and is restarting +but is not yet ready to accept connections. Waiting and retrying +repeatedly is an acceptable way to deal with this. + +However, if a command line tool gets ECONNREFUSED, it's more likely that +the problem is the cluster isn't running at all. In this case, waiting +and retrying just introduces a delay for a situation that will never be +resolved. Reverting just the part in pcmk_cluster_queries.c should fix +this problem without affecting any of the daemons - they don't call this +code. + +Fixes RHEL-106594 +--- + lib/pacemaker/pcmk_cluster_queries.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c +index c1fcf73..bbefcda 100644 +--- a/lib/pacemaker/pcmk_cluster_queries.c ++++ b/lib/pacemaker/pcmk_cluster_queries.c +@@ -360,7 +360,7 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb, + pcmk_register_ipc_callback(api, cb, data); + } + +- rc = pcmk__connect_ipc_retry_conrefused(api, dispatch_type, 5); ++ rc = pcmk__connect_ipc(api, dispatch_type, 5); + if (rc != pcmk_rc_ok) { + if (rc == EREMOTEIO) { + data->pcmkd_state = pcmk_pacemakerd_state_remote; +-- +2.47.1 + diff --git a/SOURCES/003-ipc_evict.patch b/SOURCES/003-ipc_evict.patch new file mode 100644 index 0000000..8df911f --- /dev/null +++ b/SOURCES/003-ipc_evict.patch @@ -0,0 +1,330 @@ +From fd592986fb50c7b07197df077d744c3e4d4aecd1 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 27 Aug 2025 10:41:13 -0400 +Subject: [PATCH 1/5] Refactor: libcrmcommon: Rearrange the queue_len check. + +Check if the queue length is 0 first and return, which allows everything +else to be un-indented one level. +--- + lib/common/ipc_server.c | 47 ++++++++++++++++++++--------------------- + 1 file changed, 23 insertions(+), 24 deletions(-) + +diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c +index 96f3cdb..dddd96f 100644 +--- a/lib/common/ipc_server.c ++++ b/lib/common/ipc_server.c +@@ -547,34 +547,33 @@ crm_ipcs_flush_events(pcmk__client_t *c) + pcmk_rc_str(rc), (long long) qb_rc); + } + +- if (queue_len) { +- +- /* Allow clients to briefly fall behind on processing incoming messages, +- * but drop completely unresponsive clients so the connection doesn't +- * consume resources indefinitely. +- */ +- if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { +- if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { +- /* Don't evict for a new or shrinking backlog */ +- crm_warn("Client with process ID %u has a backlog of %u messages " +- CRM_XS " %p", c->pid, queue_len, c->ipcs); +- } else { +- crm_err("Evicting client with process ID %u due to backlog of %u messages " +- CRM_XS " %p", c->pid, queue_len, c->ipcs); +- c->queue_backlog = 0; +- qb_ipcs_disconnect(c->ipcs); +- return rc; +- } +- } +- +- c->queue_backlog = queue_len; +- delay_next_flush(c, queue_len); +- +- } else { ++ if (queue_len == 0) { + /* Event queue is empty, there is no backlog */ + c->queue_backlog = 0; ++ return rc; + } + ++ /* Allow clients to briefly fall behind on processing incoming messages, ++ * but drop completely unresponsive clients so the connection doesn't ++ * consume resources indefinitely. ++ */ ++ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { ++ if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { ++ /* Don't evict for a new or shrinking backlog */ ++ crm_warn("Client with process ID %u has a backlog of %u messages " ++ CRM_XS " %p", c->pid, queue_len, c->ipcs); ++ } else { ++ crm_err("Evicting client with process ID %u due to backlog of %u messages " ++ CRM_XS " %p", c->pid, queue_len, c->ipcs); ++ c->queue_backlog = 0; ++ qb_ipcs_disconnect(c->ipcs); ++ return rc; ++ } ++ } ++ ++ c->queue_backlog = queue_len; ++ delay_next_flush(c, queue_len); ++ + return rc; + } + +-- +2.47.1 + +From 652e78628d9edc39fc0ec1a6ba6729b2adcaeb22 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 27 Aug 2025 11:31:37 -0400 +Subject: [PATCH 2/5] Refactor: libcrmcommon: Simplify an empty event queue + check. + +I find this just a little bit more straightforward to follow. +--- + lib/common/ipc_server.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c +index dddd96f..249b0d1 100644 +--- a/lib/common/ipc_server.c ++++ b/lib/common/ipc_server.c +@@ -512,10 +512,13 @@ crm_ipcs_flush_events(pcmk__client_t *c) + pcmk__ipc_header_t *header = NULL; + struct iovec *event = NULL; + +- if (c->event_queue) { +- // We don't pop unless send is successful +- event = g_queue_peek_head(c->event_queue); ++ if ((c->event_queue == NULL) || g_queue_is_empty(c->event_queue)) { ++ break; + } ++ ++ // We don't pop unless send is successful ++ event = g_queue_peek_head(c->event_queue); ++ + if (event == NULL) { // Queue is empty + break; + } +-- +2.47.1 + +From e5aa53eda1675a2930527f8af93455e9a6fbecb5 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 27 Aug 2025 11:35:38 -0400 +Subject: [PATCH 3/5] Refactor: libcrmcommon: Rearrange a few tests in + crm_ipcs_flush_events. + +Again, no important code changes here. I just find these a little +easier to follow. +--- + lib/common/ipc_server.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c +index 249b0d1..e47f461 100644 +--- a/lib/common/ipc_server.c ++++ b/lib/common/ipc_server.c +@@ -498,16 +498,18 @@ crm_ipcs_flush_events(pcmk__client_t *c) + + if (c == NULL) { + return rc; ++ } + +- } else if (c->event_timer) { ++ if (c->event_timer != 0) { + /* There is already a timer, wait until it goes off */ + crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer); + return rc; + } + +- if (c->event_queue) { ++ if (c->event_queue != NULL) { + queue_len = g_queue_get_length(c->event_queue); + } ++ + while (sent < 100) { + pcmk__ipc_header_t *header = NULL; + struct iovec *event = NULL; +-- +2.47.1 + +From d61b6184dbfcd7b534804d95ad3d38707aab2880 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Wed, 27 Aug 2025 13:14:54 -0400 +Subject: [PATCH 4/5] Feature: libcrmcommon: Be more lenient in evicting IPC + clients. + +Each IPC connection has a message queue. If the client is unable to +process messages faster than the server is sending them, that queue +start to back up. pacemaker enforces a cap on the queue size, and +that's adjustable with the cluster-ipc-limit parameter. Once the queue +grows beyond that size, the client is assumed to be dead and is evicted +so it can be restarted and the queue resources freed. + +However, it's possible that the client is not dead. On clusters with +very large numbers of resources (I've tried with 300, but fewer might +also cause problems), certain actions can happen that cause a spike in +IPC messages. In RHEL-76276, the action that causes this is moving +nodes in and out of standby. This spike in messages causes the server +to overwhelm the client, which is then evicted. + +My multi-part IPC patches made this even worse, as now if the CIB is so +large that it needs to split an IPC message up, there will be more +messages than before. + +What this fix does is get rid of the cap on the queue size for pacemaker +daemons. As long as the server has been able to send messages to the +client, the client is still doing work and shouldn't be evicted. It may +just be processing messages slower than the server is sending them. +Note that this could lead the queue to grow without bound, eventually +crashing the server. For this reason, we're only allowing pacemaker +daemons to ignore the queue size limit. + +Potential problems with this approach: + +* If the client is so busy that it can't receive even a single message + that crm_ipcs_flush_events tries to send, it will still be evicted. + However, the flush operation does retry with a delay several times + giving the client time to finish up what it's doing. + +* We have timers all over the place with daemons waiting on replies. + It's possible that because we are no longer just evicting the clients, + we will now see those timers expire which will just lead to different + problems. If so, these fixes would probably need to take place in the + client code. + +Fixes T38 +--- + lib/common/ipc_server.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c +index e47f461..83c501b 100644 +--- a/lib/common/ipc_server.c ++++ b/lib/common/ipc_server.c +@@ -563,10 +563,20 @@ crm_ipcs_flush_events(pcmk__client_t *c) + * consume resources indefinitely. + */ + if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) { +- if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) { +- /* Don't evict for a new or shrinking backlog */ ++ /* Don't evict: ++ * - Clients with a new backlog. ++ * - Clients with a shrinking backlog (the client is processing ++ * messages faster than the server is sending them). ++ * - Clients that are pacemaker daemons and have had any messages sent ++ * to them in this flush call (the server is sending messages faster ++ * than the client is processing them, but the client is not dead). ++ */ ++ if ((c->queue_backlog <= 1) ++ || (queue_len < c->queue_backlog) ++ || ((sent > 0) && crm_is_daemon_name(c->name))) { + crm_warn("Client with process ID %u has a backlog of %u messages " + CRM_XS " %p", c->pid, queue_len, c->ipcs); ++ + } else { + crm_err("Evicting client with process ID %u due to backlog of %u messages " + CRM_XS " %p", c->pid, queue_len, c->ipcs); +-- +2.47.1 + +From 0336f2c961470ad5afbfe18e2a90a0f2c0614e68 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 30 Sep 2025 13:50:53 -0400 +Subject: [PATCH 5/5] Feature: libcrmcommon: Update documentation for + cluster-ipc-limit. + +Clarify that this no longer applies to pacemaker daemons. +--- + cts/cli/regression.daemons.exp | 4 ++-- + cts/cli/regression.tools.exp | 16 ++++++++-------- + lib/common/options.c | 6 +++--- + 3 files changed, 13 insertions(+), 13 deletions(-) + +diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp +index fe53044..a16056f 100644 +--- a/cts/cli/regression.daemons.exp ++++ b/cts/cli/regression.daemons.exp +@@ -21,10 +21,10 @@ + + + +- Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). ++ Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). + + +- Maximum IPC message backlog before disconnecting a cluster daemon ++ Maximum IPC message backlog before disconnecting a client + + + +diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp +index 5448ae3..08ddd44 100644 +--- a/cts/cli/regression.tools.exp ++++ b/cts/cli/regression.tools.exp +@@ -139,8 +139,8 @@ Also known as properties, these are options that affect behavior across the enti + * migration-limit: The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) + * Possible values: integer (default: ) + +- * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a cluster daemon +- * Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). ++ * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a client ++ * Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). + * Possible values: nonnegative_integer (default: ) + + * stop-all-resources: Whether the cluster should stop all active resources +@@ -385,8 +385,8 @@ Also known as properties, these are options that affect behavior across the enti + + + +- Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). +- Maximum IPC message backlog before disconnecting a cluster daemon ++ Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). ++ Maximum IPC message backlog before disconnecting a client + + + +@@ -574,8 +574,8 @@ Also known as properties, these are options that affect behavior across the enti + * migration-limit: The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit) + * Possible values: integer (default: ) + +- * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a cluster daemon +- * Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). ++ * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a client ++ * Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). + * Possible values: nonnegative_integer (default: ) + + * stop-all-resources: Whether the cluster should stop all active resources +@@ -856,8 +856,8 @@ Also known as properties, these are options that affect behavior across the enti + + + +- Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). +- Maximum IPC message backlog before disconnecting a cluster daemon ++ Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes). ++ Maximum IPC message backlog before disconnecting a client + + + +diff --git a/lib/common/options.c b/lib/common/options.c +index c47a402..e7a76c5 100644 +--- a/lib/common/options.c ++++ b/lib/common/options.c +@@ -440,10 +440,10 @@ static const pcmk__cluster_option_t cluster_options[] = { + PCMK_OPT_CLUSTER_IPC_LIMIT, NULL, PCMK_VALUE_NONNEGATIVE_INTEGER, NULL, + "500", pcmk__valid_positive_int, + pcmk__opt_based, +- N_("Maximum IPC message backlog before disconnecting a cluster daemon"), ++ N_("Maximum IPC message backlog before disconnecting a client"), + N_("Raise this if log has \"Evicting client\" messages for cluster " +- "daemon PIDs (a good value is the number of resources in the " +- "cluster multiplied by the number of nodes)."), ++ "PIDs (a good value is the number of resources in the cluster " ++ "multiplied by the number of nodes)."), + }, + + // Orphans and stopping +-- +2.47.1 + diff --git a/SOURCES/004-fewer_messages.patch b/SOURCES/004-fewer_messages.patch new file mode 100644 index 0000000..846c907 --- /dev/null +++ b/SOURCES/004-fewer_messages.patch @@ -0,0 +1,88 @@ +From 468ea9851958d26c25121f201f3631dfdd709cb4 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 11 Nov 2025 15:11:58 -0500 +Subject: [PATCH] Med: daemons: Don't add repeated I_PE_CALC messages to the + fsa queue. + +Let's say you have a two node cluster, node1 and node2. For purposes of +testing, it's easiest if you use fence_dummy instead of a real fencing +agent as this will fake fencing happening but without rebooting the node +so you can see all the log files. + +Assume the DC is node1. Now do the following on node2: + +- pcs node standby node1 +- pcs resource defaults update resource-stickiness=1 +- for i in $(seq 1 300); do echo $i; pcs resource create dummy$i ocf:heartbeat:Dummy --group dummy-group; done +- pcs node unstandby node1 + +It will take a long time to create that many resources. After node1 +comes out of standby, it'll take a minute or two but eventually you'll +see that node1 was fenced. On node1, you'll see a lot of transition +abort messages happen. Each of these transition aborts causes an +I_PE_CALC message to be generated and added to the fsa queue. In my +testing, I've seen the queue grow to ~ 600 messages, all of which are +exactly the same thing. + +The FSA is triggered at G_PRIORITY_HIGH, and once it is triggered, it +will run until its queue is empty. With so many messages being added so +quickly, we've basically ensured it won't be empty any time soon. While +controld is processing the FSA messages, it will be unable to read +anything out of the IPC backlog. + +based continues to attempt to send IPC events to controld but is unable +to do so, so the backlog continues to grow. Eventually, the backlog +reaches that 500 message threshold without anything having been read by +controld, which triggers the eviction process. + +There doesn't seem to be any reason for all these I_PE_CALC messages to +be generated. They're all exactly the same, they don't appear to be +tagged with any unique data tying them to a specific query, and their +presence just slows everything down. + +Thus, the fix here is very simple: if the latest message in the queue is +an I_PE_CALC message, just don't add another one. We could also make +sure there's only ever one I_PE_CALC message in the queue, but there +could potentially be valid reasons for there to be multiple interleaved +with other message types. I am erring on the side of caution with this +minimal fix. + +Resolves: RHEL-114894 +--- + daemons/controld/controld_messages.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c +index 88c032f..ae54743 100644 +--- a/daemons/controld/controld_messages.c ++++ b/daemons/controld/controld_messages.c +@@ -71,6 +71,26 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input, + return; + } + ++ if (input == I_PE_CALC) { ++ GList *ele = NULL; ++ ++ if (prepend) { ++ ele = g_list_first(controld_globals.fsa_message_queue); ++ } else { ++ ele = g_list_last(controld_globals.fsa_message_queue); ++ } ++ ++ if (ele != NULL) { ++ fsa_data_t *message = (fsa_data_t *) ele->data; ++ ++ if (message->fsa_input == I_PE_CALC) { ++ crm_debug("%s item in fsa queue is I_PE_CALC, not adding another", ++ (prepend ? "First" : "Last")); ++ return; ++ } ++ } ++ } ++ + if (input == I_WAIT_FOR_EVENT) { + controld_set_global_flags(controld_fsa_is_stalled); + crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d", +-- +2.47.1 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index 290042e..2eade75 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -36,7 +36,7 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.10 -%global specversion 1 +%global specversion 2 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit 5693eaeeef06faa1622515963082b5a1731d9fc0 @@ -247,7 +247,10 @@ Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz Source2: pacemaker.sysusers # upstream commits -#Patch001: 001-xxxx.patch +Patch001: 001-crm_resource_wait.patch +Patch002: 002-ipc_connect_retry.patch +Patch003: 003-ipc_evict.patch +Patch004: 004-fewer_messages.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -914,6 +917,15 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Wed Nov 12 2025 Chris Lumens - 2.1.10-2 +- Handle large timeouts correctly in crm_resource --wait +- Do not try to connect to subdaemons before they're respawned +- Don't evict IPC clients as long as they're still processing messages +- Don't overwhelm the FSA queue with repeated CIB queries +- Resolves: RHEL-45869 +- Resolves: RHEL-87484 +- Resolves: RHEL-114894 + * Mon Jun 23 2025 Chris Lumens - 2.1.10-1 - Rebase on upstream 2.1.10-rc1 release - Use dbus to detect completion of systemd resource start/stop actions