Compare commits

...

No commits in common. "c8" and "c9-beta" have entirely different histories.
c8 ... c9-beta

22 changed files with 1285 additions and 14570 deletions

4
.gitignore vendored
View File

@ -1,2 +1,2 @@
SOURCES/nagios-agents-metadata-105ab8a.tar.gz
SOURCES/pacemaker-0f7f88312.tar.gz
SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz
SOURCES/pacemaker-5693eaeee.tar.gz

View File

@ -1,2 +1,2 @@
ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz
88946a460e3be18852861269f8837aaaf339328c SOURCES/pacemaker-0f7f88312.tar.gz
2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz
1b0699e6243e4fb441229e6cab0b333c95f2d391 SOURCES/pacemaker-5693eaeee.tar.gz

View File

@ -0,0 +1,131 @@
From 17a2d7acffc86e06b2fa8b49c96c33d72466e7f7 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Fri, 5 Sep 2025 20:35:31 -0700
Subject: [PATCH] Fix: tools: Handle large timeouts correctly in crm_resource
--wait
Previously, if the --timeout value parsed to a value greater than
(UINT_MAX - 999), the wait timeout would overflow. The effective timeout
would be either 0 seconds or 1 second. This is because 999 was added to
the guint value before passing it to pcmk__timeout_ms2s().
Now, we simply pass the timeout in milliseconds to
pcmk__timeout_ms2s(), without adding 999.
This implies a slight behavior change. Previously, timeouts were always
rounded up to the next greatest second. Now, they're rounded to the
nearest second. For example, previously:
* timeout values between 1ms and 500ms => wait timeout of 1 second
* timeout values between 501ms and 1500ms => wait timeout of 2 seconds
* timeout values between 1501ms and 2500ms => wait timeout of 3 seconds
* and so on
Now:
* timeout values between 1ms and 1499ms => wait timeout of 1 second
* timeout values between 1500ms and 2499ms => wait timeout of 2 seconds
* timeout values between 2500ms and 3499ms => wait timeout of 3 seconds
* and so on
The previous rounding behavior has existed since crm_resource --wait was
added by 424afcdf.
Update the help text to note the granularity and rounding behavior. The
exact behavior of the restart command is confusing, and its logic should
be cleaned up in the future.
Fixes RHEL-45869
Fixes RHEL-86148
Closes T841
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
include/crm/common/internal.h | 1 +
lib/common/utils.c | 30 ++++++++++++++++++++++++++++++
tools/crm_resource.c | 4 +++-
tools/crm_resource_runtime.c | 2 +-
4 files changed, 35 insertions(+), 2 deletions(-)
diff --git a/include/crm/common/internal.h b/include/crm/common/internal.h
index 7d1e95f..ee26321 100644
--- a/include/crm/common/internal.h
+++ b/include/crm/common/internal.h
@@ -249,6 +249,7 @@ void pcmk__daemonize(const char *name, const char *pidfile);
void pcmk__panic(const char *origin);
pid_t pcmk__locate_sbd(void);
void pcmk__sleep_ms(unsigned int ms);
+guint pcmk__timeout_ms2s(guint timeout_ms);
extern int pcmk__score_red;
extern int pcmk__score_green;
diff --git a/lib/common/utils.c b/lib/common/utils.c
index 9e44c22..0a38811 100644
--- a/lib/common/utils.c
+++ b/lib/common/utils.c
@@ -413,6 +413,36 @@ pcmk__sleep_ms(unsigned int ms)
#endif
}
+/*!
+ * \internal
+ * \brief Convert milliseconds to seconds
+ *
+ * \param[in] timeout_ms The interval, in ms
+ *
+ * \return If \p timeout_ms is 0, return 0. Otherwise, return the number of
+ * seconds, rounded to the nearest integer, with a minimum of 1.
+ */
+guint
+pcmk__timeout_ms2s(guint timeout_ms)
+{
+ guint quot, rem;
+
+ if (timeout_ms == 0) {
+ return 0;
+ } else if (timeout_ms < 1000) {
+ return 1;
+ }
+
+ quot = timeout_ms / 1000;
+ rem = timeout_ms % 1000;
+
+ if (rem >= 500) {
+ quot += 1;
+ }
+
+ return quot;
+}
+
// Deprecated functions kept only for backward API compatibility
// LCOV_EXCL_START
diff --git a/tools/crm_resource.c b/tools/crm_resource.c
index 898c21e..ac50c5e 100644
--- a/tools/crm_resource.c
+++ b/tools/crm_resource.c
@@ -766,7 +766,9 @@ static GOptionEntry addl_entries[] = {
"ID" },
{ "timeout", 'T', G_OPTION_FLAG_NONE, G_OPTION_ARG_CALLBACK, timeout_cb,
"(Advanced) Abort if command does not finish in this time (with\n"
- INDENT "--restart, --wait, --force-*)",
+ INDENT "--restart, --wait, --force-*). The --restart command uses a\n"
+ INDENT "two-second granularity and the --wait command uses a one-second\n"
+ INDENT "granularity, with rounding.",
"N" },
{ "all", 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE, &options.all,
"List all options, including advanced and deprecated (with\n"
diff --git a/tools/crm_resource_runtime.c b/tools/crm_resource_runtime.c
index 7350f07..286c10c 100644
--- a/tools/crm_resource_runtime.c
+++ b/tools/crm_resource_runtime.c
@@ -1977,7 +1977,7 @@ wait_till_stable(pcmk__output_t *out, guint timeout_ms, cib_t * cib)
if (timeout_ms == 0) {
expire_time += WAIT_DEFAULT_TIMEOUT_S;
} else {
- expire_time += (timeout_ms + 999) / 1000;
+ expire_time += pcmk__timeout_ms2s(timeout_ms);
}
scheduler = pe_new_working_set();
--
2.47.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,178 @@
From 8ff58d786907b64c32350e72e341cdd0f5026813 Mon Sep 17 00:00:00 2001
From: Thomas Jones <thomas.jones@ibm.com>
Date: Fri, 30 May 2025 16:40:13 -0400
Subject: [PATCH 1/2] Fix: libcrmcommon: Add retries on connect to avoid fatal
errors when sub-daemons communicate Add pcmk__connect_ipc_retry_conrefused()
and use it where it makes sense Add retry loop to
connect_and_send_attrd_request() that retries connect and send.
---
daemons/controld/controld_schedulerd.c | 2 +-
include/crm/common/ipc_internal.h | 4 ++++
lib/common/ipc_attrd.c | 19 ++++++++++++----
lib/common/ipc_client.c | 30 ++++++++++++++++++++++++++
lib/pacemaker/pcmk_cluster_queries.c | 2 +-
5 files changed, 51 insertions(+), 6 deletions(-)
diff --git a/daemons/controld/controld_schedulerd.c b/daemons/controld/controld_schedulerd.c
index 22b37b8..444bdef 100644
--- a/daemons/controld/controld_schedulerd.c
+++ b/daemons/controld/controld_schedulerd.c
@@ -197,7 +197,7 @@ new_schedulerd_ipc_connection(void)
pcmk_register_ipc_callback(schedulerd_api, scheduler_event_callback, NULL);
- rc = pcmk__connect_ipc(schedulerd_api, pcmk_ipc_dispatch_main, 3);
+ rc = pcmk__connect_ipc_retry_conrefused(schedulerd_api, pcmk_ipc_dispatch_main, 3);
if (rc != pcmk_rc_ok) {
crm_err("Error connecting to %s: %s",
pcmk_ipc_name(schedulerd_api, true), pcmk_rc_str(rc));
diff --git a/include/crm/common/ipc_internal.h b/include/crm/common/ipc_internal.h
index 27b8bfc..09145ba 100644
--- a/include/crm/common/ipc_internal.h
+++ b/include/crm/common/ipc_internal.h
@@ -101,6 +101,10 @@ int pcmk__ipc_fd(crm_ipc_t *ipc, int *fd);
int pcmk__connect_ipc(pcmk_ipc_api_t *api, enum pcmk_ipc_dispatch dispatch_type,
int attempts);
+int pcmk__connect_ipc_retry_conrefused(pcmk_ipc_api_t *api,
+ enum pcmk_ipc_dispatch dispatch_type,
+ int attempts);
+
/*
* Server-related
*/
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
index 5ab0f2d..60a92a0 100644
--- a/lib/common/ipc_attrd.c
+++ b/lib/common/ipc_attrd.c
@@ -152,6 +152,8 @@ create_attrd_op(const char *user_name)
static int
connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request)
{
+ static const int max_retries = 5;
+ int remaining_attempts = max_retries;
int rc = pcmk_rc_ok;
bool created_api = false;
@@ -163,10 +165,19 @@ connect_and_send_attrd_request(pcmk_ipc_api_t *api, const xmlNode *request)
created_api = true;
}
- rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, 5);
- if (rc == pcmk_rc_ok) {
- rc = pcmk__send_ipc_request(api, request);
- }
+ // If attrd is killed and is being restarted we will temporarily get
+ // ECONNREFUSED on connect if it is already dead or ENOTCONN if it died
+ // after we connected to it. We should wait a bit and retry in those cases.
+ do {
+ if (rc == ENOTCONN || rc == ECONNREFUSED) {
+ sleep(max_retries - remaining_attempts);
+ }
+ rc = pcmk__connect_ipc(api, pcmk_ipc_dispatch_sync, remaining_attempts);
+ if (rc == pcmk_rc_ok) {
+ rc = pcmk__send_ipc_request(api, request);
+ }
+ remaining_attempts--;
+ } while ((rc == ENOTCONN || rc == ECONNREFUSED) && remaining_attempts >= 0);
if (created_api) {
pcmk_free_ipc_api(api);
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index 24d7745..4ac7810 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -488,6 +488,36 @@ connect_without_main_loop(pcmk_ipc_api_t *api)
return rc;
}
+/*!
+ * \internal
+ * \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors
+ * and ECONNREFUSED)
+ *
+ * \param[in,out] api IPC API instance
+ * \param[in] dispatch_type How IPC replies should be dispatched
+ * \param[in] attempts How many times to try (in case of soft error)
+ *
+ * \return Standard Pacemaker return code
+*/
+int
+pcmk__connect_ipc_retry_conrefused(pcmk_ipc_api_t *api,
+ enum pcmk_ipc_dispatch dispatch_type,
+ int attempts)
+{
+ int remaining = attempts;
+ int rc = pcmk_rc_ok;
+
+ do {
+ if (rc == ECONNREFUSED) {
+ pcmk__sleep_ms((attempts - remaining) * 500);
+ }
+ rc = pcmk__connect_ipc(api, dispatch_type, remaining);
+ remaining--;
+ } while (rc == ECONNREFUSED && remaining >= 0);
+
+ return rc;
+}
+
/*!
* \internal
* \brief Connect to a Pacemaker daemon via IPC (retrying after soft errors)
diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c
index bbefcda..c1fcf73 100644
--- a/lib/pacemaker/pcmk_cluster_queries.c
+++ b/lib/pacemaker/pcmk_cluster_queries.c
@@ -360,7 +360,7 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb,
pcmk_register_ipc_callback(api, cb, data);
}
- rc = pcmk__connect_ipc(api, dispatch_type, 5);
+ rc = pcmk__connect_ipc_retry_conrefused(api, dispatch_type, 5);
if (rc != pcmk_rc_ok) {
if (rc == EREMOTEIO) {
data->pcmkd_state = pcmk_pacemakerd_state_remote;
--
2.47.1
From 2e46b914fb08d346d7b022ba75302f9290034507 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Mon, 4 Aug 2025 10:38:00 -0400
Subject: [PATCH 2/2] Med: libpacemaker: Do not retry on ECONNREFUSED in tools.
This is a regression introduced by e438946787. In that patch, what
we're trying to do is retry IPC connections between daemons. If a
daemon gets ECONNREFUSED when it initiates an IPC connection, the most
likely reason is that another daemon has been killed and is restarting
but is not yet ready to accept connections. Waiting and retrying
repeatedly is an acceptable way to deal with this.
However, if a command line tool gets ECONNREFUSED, it's more likely that
the problem is the cluster isn't running at all. In this case, waiting
and retrying just introduces a delay for a situation that will never be
resolved. Reverting just the part in pcmk_cluster_queries.c should fix
this problem without affecting any of the daemons - they don't call this
code.
Fixes RHEL-106594
---
lib/pacemaker/pcmk_cluster_queries.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c
index c1fcf73..bbefcda 100644
--- a/lib/pacemaker/pcmk_cluster_queries.c
+++ b/lib/pacemaker/pcmk_cluster_queries.c
@@ -360,7 +360,7 @@ ipc_connect(data_t *data, enum pcmk_ipc_server server, pcmk_ipc_callback_t cb,
pcmk_register_ipc_callback(api, cb, data);
}
- rc = pcmk__connect_ipc_retry_conrefused(api, dispatch_type, 5);
+ rc = pcmk__connect_ipc(api, dispatch_type, 5);
if (rc != pcmk_rc_ok) {
if (rc == EREMOTEIO) {
data->pcmkd_state = pcmk_pacemakerd_state_remote;
--
2.47.1

File diff suppressed because it is too large Load Diff

330
SOURCES/003-ipc_evict.patch Normal file
View File

@ -0,0 +1,330 @@
From fd592986fb50c7b07197df077d744c3e4d4aecd1 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 27 Aug 2025 10:41:13 -0400
Subject: [PATCH 1/5] Refactor: libcrmcommon: Rearrange the queue_len check.
Check if the queue length is 0 first and return, which allows everything
else to be un-indented one level.
---
lib/common/ipc_server.c | 47 ++++++++++++++++++++---------------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c
index 96f3cdb..dddd96f 100644
--- a/lib/common/ipc_server.c
+++ b/lib/common/ipc_server.c
@@ -547,34 +547,33 @@ crm_ipcs_flush_events(pcmk__client_t *c)
pcmk_rc_str(rc), (long long) qb_rc);
}
- if (queue_len) {
-
- /* Allow clients to briefly fall behind on processing incoming messages,
- * but drop completely unresponsive clients so the connection doesn't
- * consume resources indefinitely.
- */
- if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) {
- if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) {
- /* Don't evict for a new or shrinking backlog */
- crm_warn("Client with process ID %u has a backlog of %u messages "
- CRM_XS " %p", c->pid, queue_len, c->ipcs);
- } else {
- crm_err("Evicting client with process ID %u due to backlog of %u messages "
- CRM_XS " %p", c->pid, queue_len, c->ipcs);
- c->queue_backlog = 0;
- qb_ipcs_disconnect(c->ipcs);
- return rc;
- }
- }
-
- c->queue_backlog = queue_len;
- delay_next_flush(c, queue_len);
-
- } else {
+ if (queue_len == 0) {
/* Event queue is empty, there is no backlog */
c->queue_backlog = 0;
+ return rc;
}
+ /* Allow clients to briefly fall behind on processing incoming messages,
+ * but drop completely unresponsive clients so the connection doesn't
+ * consume resources indefinitely.
+ */
+ if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) {
+ if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) {
+ /* Don't evict for a new or shrinking backlog */
+ crm_warn("Client with process ID %u has a backlog of %u messages "
+ CRM_XS " %p", c->pid, queue_len, c->ipcs);
+ } else {
+ crm_err("Evicting client with process ID %u due to backlog of %u messages "
+ CRM_XS " %p", c->pid, queue_len, c->ipcs);
+ c->queue_backlog = 0;
+ qb_ipcs_disconnect(c->ipcs);
+ return rc;
+ }
+ }
+
+ c->queue_backlog = queue_len;
+ delay_next_flush(c, queue_len);
+
return rc;
}
--
2.47.1
From 652e78628d9edc39fc0ec1a6ba6729b2adcaeb22 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 27 Aug 2025 11:31:37 -0400
Subject: [PATCH 2/5] Refactor: libcrmcommon: Simplify an empty event queue
check.
I find this just a little bit more straightforward to follow.
---
lib/common/ipc_server.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c
index dddd96f..249b0d1 100644
--- a/lib/common/ipc_server.c
+++ b/lib/common/ipc_server.c
@@ -512,10 +512,13 @@ crm_ipcs_flush_events(pcmk__client_t *c)
pcmk__ipc_header_t *header = NULL;
struct iovec *event = NULL;
- if (c->event_queue) {
- // We don't pop unless send is successful
- event = g_queue_peek_head(c->event_queue);
+ if ((c->event_queue == NULL) || g_queue_is_empty(c->event_queue)) {
+ break;
}
+
+ // We don't pop unless send is successful
+ event = g_queue_peek_head(c->event_queue);
+
if (event == NULL) { // Queue is empty
break;
}
--
2.47.1
From e5aa53eda1675a2930527f8af93455e9a6fbecb5 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 27 Aug 2025 11:35:38 -0400
Subject: [PATCH 3/5] Refactor: libcrmcommon: Rearrange a few tests in
crm_ipcs_flush_events.
Again, no important code changes here. I just find these a little
easier to follow.
---
lib/common/ipc_server.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c
index 249b0d1..e47f461 100644
--- a/lib/common/ipc_server.c
+++ b/lib/common/ipc_server.c
@@ -498,16 +498,18 @@ crm_ipcs_flush_events(pcmk__client_t *c)
if (c == NULL) {
return rc;
+ }
- } else if (c->event_timer) {
+ if (c->event_timer != 0) {
/* There is already a timer, wait until it goes off */
crm_trace("Timer active for %p - %d", c->ipcs, c->event_timer);
return rc;
}
- if (c->event_queue) {
+ if (c->event_queue != NULL) {
queue_len = g_queue_get_length(c->event_queue);
}
+
while (sent < 100) {
pcmk__ipc_header_t *header = NULL;
struct iovec *event = NULL;
--
2.47.1
From d61b6184dbfcd7b534804d95ad3d38707aab2880 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 27 Aug 2025 13:14:54 -0400
Subject: [PATCH 4/5] Feature: libcrmcommon: Be more lenient in evicting IPC
clients.
Each IPC connection has a message queue. If the client is unable to
process messages faster than the server is sending them, that queue
start to back up. pacemaker enforces a cap on the queue size, and
that's adjustable with the cluster-ipc-limit parameter. Once the queue
grows beyond that size, the client is assumed to be dead and is evicted
so it can be restarted and the queue resources freed.
However, it's possible that the client is not dead. On clusters with
very large numbers of resources (I've tried with 300, but fewer might
also cause problems), certain actions can happen that cause a spike in
IPC messages. In RHEL-76276, the action that causes this is moving
nodes in and out of standby. This spike in messages causes the server
to overwhelm the client, which is then evicted.
My multi-part IPC patches made this even worse, as now if the CIB is so
large that it needs to split an IPC message up, there will be more
messages than before.
What this fix does is get rid of the cap on the queue size for pacemaker
daemons. As long as the server has been able to send messages to the
client, the client is still doing work and shouldn't be evicted. It may
just be processing messages slower than the server is sending them.
Note that this could lead the queue to grow without bound, eventually
crashing the server. For this reason, we're only allowing pacemaker
daemons to ignore the queue size limit.
Potential problems with this approach:
* If the client is so busy that it can't receive even a single message
that crm_ipcs_flush_events tries to send, it will still be evicted.
However, the flush operation does retry with a delay several times
giving the client time to finish up what it's doing.
* We have timers all over the place with daemons waiting on replies.
It's possible that because we are no longer just evicting the clients,
we will now see those timers expire which will just lead to different
problems. If so, these fixes would probably need to take place in the
client code.
Fixes T38
---
lib/common/ipc_server.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/lib/common/ipc_server.c b/lib/common/ipc_server.c
index e47f461..83c501b 100644
--- a/lib/common/ipc_server.c
+++ b/lib/common/ipc_server.c
@@ -563,10 +563,20 @@ crm_ipcs_flush_events(pcmk__client_t *c)
* consume resources indefinitely.
*/
if (queue_len > QB_MAX(c->queue_max, PCMK_IPC_DEFAULT_QUEUE_MAX)) {
- if ((c->queue_backlog <= 1) || (queue_len < c->queue_backlog)) {
- /* Don't evict for a new or shrinking backlog */
+ /* Don't evict:
+ * - Clients with a new backlog.
+ * - Clients with a shrinking backlog (the client is processing
+ * messages faster than the server is sending them).
+ * - Clients that are pacemaker daemons and have had any messages sent
+ * to them in this flush call (the server is sending messages faster
+ * than the client is processing them, but the client is not dead).
+ */
+ if ((c->queue_backlog <= 1)
+ || (queue_len < c->queue_backlog)
+ || ((sent > 0) && crm_is_daemon_name(c->name))) {
crm_warn("Client with process ID %u has a backlog of %u messages "
CRM_XS " %p", c->pid, queue_len, c->ipcs);
+
} else {
crm_err("Evicting client with process ID %u due to backlog of %u messages "
CRM_XS " %p", c->pid, queue_len, c->ipcs);
--
2.47.1
From 0336f2c961470ad5afbfe18e2a90a0f2c0614e68 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Tue, 30 Sep 2025 13:50:53 -0400
Subject: [PATCH 5/5] Feature: libcrmcommon: Update documentation for
cluster-ipc-limit.
Clarify that this no longer applies to pacemaker daemons.
---
cts/cli/regression.daemons.exp | 4 ++--
cts/cli/regression.tools.exp | 16 ++++++++--------
lib/common/options.c | 6 +++---
3 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp
index fe53044..a16056f 100644
--- a/cts/cli/regression.daemons.exp
+++ b/cts/cli/regression.daemons.exp
@@ -21,10 +21,10 @@
</parameter>
<parameter name="cluster-ipc-limit">
<longdesc lang="en">
- Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).
+ Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).
</longdesc>
<shortdesc lang="en">
- Maximum IPC message backlog before disconnecting a cluster daemon
+ Maximum IPC message backlog before disconnecting a client
</shortdesc>
<content type="integer" default=""/>
</parameter>
diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp
index 5448ae3..08ddd44 100644
--- a/cts/cli/regression.tools.exp
+++ b/cts/cli/regression.tools.exp
@@ -139,8 +139,8 @@ Also known as properties, these are options that affect behavior across the enti
* migration-limit: The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit)
* Possible values: integer (default: )
- * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a cluster daemon
- * Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).
+ * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a client
+ * Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).
* Possible values: nonnegative_integer (default: )
* stop-all-resources: Whether the cluster should stop all active resources
@@ -385,8 +385,8 @@ Also known as properties, these are options that affect behavior across the enti
<content type="integer" default=""/>
</parameter>
<parameter name="cluster-ipc-limit" advanced="0" generated="0">
- <longdesc lang="en">Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).</longdesc>
- <shortdesc lang="en">Maximum IPC message backlog before disconnecting a cluster daemon</shortdesc>
+ <longdesc lang="en">Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).</longdesc>
+ <shortdesc lang="en">Maximum IPC message backlog before disconnecting a client</shortdesc>
<content type="nonnegative_integer" default=""/>
</parameter>
<parameter name="stop-all-resources" advanced="0" generated="0">
@@ -574,8 +574,8 @@ Also known as properties, these are options that affect behavior across the enti
* migration-limit: The number of live migration actions that the cluster is allowed to execute in parallel on a node (-1 means no limit)
* Possible values: integer (default: )
- * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a cluster daemon
- * Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).
+ * cluster-ipc-limit: Maximum IPC message backlog before disconnecting a client
+ * Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).
* Possible values: nonnegative_integer (default: )
* stop-all-resources: Whether the cluster should stop all active resources
@@ -856,8 +856,8 @@ Also known as properties, these are options that affect behavior across the enti
<content type="integer" default=""/>
</parameter>
<parameter name="cluster-ipc-limit" advanced="0" generated="0">
- <longdesc lang="en">Raise this if log has "Evicting client" messages for cluster daemon PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).</longdesc>
- <shortdesc lang="en">Maximum IPC message backlog before disconnecting a cluster daemon</shortdesc>
+ <longdesc lang="en">Raise this if log has "Evicting client" messages for cluster PIDs (a good value is the number of resources in the cluster multiplied by the number of nodes).</longdesc>
+ <shortdesc lang="en">Maximum IPC message backlog before disconnecting a client</shortdesc>
<content type="nonnegative_integer" default=""/>
</parameter>
<parameter name="stop-all-resources" advanced="0" generated="0">
diff --git a/lib/common/options.c b/lib/common/options.c
index c47a402..e7a76c5 100644
--- a/lib/common/options.c
+++ b/lib/common/options.c
@@ -440,10 +440,10 @@ static const pcmk__cluster_option_t cluster_options[] = {
PCMK_OPT_CLUSTER_IPC_LIMIT, NULL, PCMK_VALUE_NONNEGATIVE_INTEGER, NULL,
"500", pcmk__valid_positive_int,
pcmk__opt_based,
- N_("Maximum IPC message backlog before disconnecting a cluster daemon"),
+ N_("Maximum IPC message backlog before disconnecting a client"),
N_("Raise this if log has \"Evicting client\" messages for cluster "
- "daemon PIDs (a good value is the number of resources in the "
- "cluster multiplied by the number of nodes)."),
+ "PIDs (a good value is the number of resources in the cluster "
+ "multiplied by the number of nodes)."),
},
// Orphans and stopping
--
2.47.1

View File

@ -1,42 +0,0 @@
From a3bffc7c66bf6f796f977cffd44f223635b008c5 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Wed, 20 Dec 2023 13:33:47 -0800
Subject: [PATCH] Doc: Pacemaker Explained: Add replace for
PCMK__REMOTE_SCHEMA_DIR
So that the existing use in local-options.rst expands correctly.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
doc/sphinx/Makefile.am | 1 +
doc/sphinx/conf.py.in | 1 +
3 files changed, 2 insertions(+)
create mode 100644 doc/sphinx/conf.py.in.rej
diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am
index e48e19a..d0309ff 100644
--- a/doc/sphinx/Makefile.am
+++ b/doc/sphinx/Makefile.am
@@ -134,6 +134,7 @@ $(BOOKS:%=%/conf.py): conf.py.in
-e 's#%CRM_SCHEMA_DIRECTORY%#@CRM_SCHEMA_DIRECTORY@#g' \
-e 's#%PACEMAKER_CONFIG_DIR%#@PACEMAKER_CONFIG_DIR@#g' \
-e 's#%PCMK_GNUTLS_PRIORITIES%#@PCMK_GNUTLS_PRIORITIES@#g' \
+ -e 's#%PCMK__REMOTE_SCHEMA_DIR%#@PCMK__REMOTE_SCHEMA_DIR@#g' \
$(<) > "$@"
$(BOOK)/_build: $(STATIC_FILES) $(BOOK)/conf.py $(DEPS_$(BOOK)) $(wildcard $(srcdir)/$(BOOK)/*.rst)
diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in
index 556eb72..511f029 100644
--- a/doc/sphinx/conf.py.in
+++ b/doc/sphinx/conf.py.in
@@ -40,6 +40,7 @@ rst_prolog="""
.. |PCMK_INIT_ENV_FILE| replace:: ``%PACEMAKER_CONFIG_DIR%/pcmk-init.env``
.. |PCMK_LOG_FILE| replace:: %CRM_LOG_DIR%/pacemaker.log
.. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES%
+.. |PCMK__REMOTE_SCHEMA_DIR| replace:: %PCMK__REMOTE_SCHEMA_DIR%
.. |REMOTE_DISTRO| replace:: AlmaLinux
.. |REMOTE_DISTRO_VER| replace:: 9
"""
--
2.31.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,88 @@
From 468ea9851958d26c25121f201f3631dfdd709cb4 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Tue, 11 Nov 2025 15:11:58 -0500
Subject: [PATCH] Med: daemons: Don't add repeated I_PE_CALC messages to the
fsa queue.
Let's say you have a two node cluster, node1 and node2. For purposes of
testing, it's easiest if you use fence_dummy instead of a real fencing
agent as this will fake fencing happening but without rebooting the node
so you can see all the log files.
Assume the DC is node1. Now do the following on node2:
- pcs node standby node1
- pcs resource defaults update resource-stickiness=1
- for i in $(seq 1 300); do echo $i; pcs resource create dummy$i ocf:heartbeat:Dummy --group dummy-group; done
- pcs node unstandby node1
It will take a long time to create that many resources. After node1
comes out of standby, it'll take a minute or two but eventually you'll
see that node1 was fenced. On node1, you'll see a lot of transition
abort messages happen. Each of these transition aborts causes an
I_PE_CALC message to be generated and added to the fsa queue. In my
testing, I've seen the queue grow to ~ 600 messages, all of which are
exactly the same thing.
The FSA is triggered at G_PRIORITY_HIGH, and once it is triggered, it
will run until its queue is empty. With so many messages being added so
quickly, we've basically ensured it won't be empty any time soon. While
controld is processing the FSA messages, it will be unable to read
anything out of the IPC backlog.
based continues to attempt to send IPC events to controld but is unable
to do so, so the backlog continues to grow. Eventually, the backlog
reaches that 500 message threshold without anything having been read by
controld, which triggers the eviction process.
There doesn't seem to be any reason for all these I_PE_CALC messages to
be generated. They're all exactly the same, they don't appear to be
tagged with any unique data tying them to a specific query, and their
presence just slows everything down.
Thus, the fix here is very simple: if the latest message in the queue is
an I_PE_CALC message, just don't add another one. We could also make
sure there's only ever one I_PE_CALC message in the queue, but there
could potentially be valid reasons for there to be multiple interleaved
with other message types. I am erring on the side of caution with this
minimal fix.
Resolves: RHEL-114894
---
daemons/controld/controld_messages.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index 88c032f..ae54743 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -71,6 +71,26 @@ register_fsa_input_adv(enum crmd_fsa_cause cause, enum crmd_fsa_input input,
return;
}
+ if (input == I_PE_CALC) {
+ GList *ele = NULL;
+
+ if (prepend) {
+ ele = g_list_first(controld_globals.fsa_message_queue);
+ } else {
+ ele = g_list_last(controld_globals.fsa_message_queue);
+ }
+
+ if (ele != NULL) {
+ fsa_data_t *message = (fsa_data_t *) ele->data;
+
+ if (message->fsa_input == I_PE_CALC) {
+ crm_debug("%s item in fsa queue is I_PE_CALC, not adding another",
+ (prepend ? "First" : "Last"));
+ return;
+ }
+ }
+ }
+
if (input == I_WAIT_FOR_EVENT) {
controld_set_global_flags(controld_fsa_is_stalled);
crm_debug("Stalling the FSA pending further input: source=%s cause=%s data=%p queue=%d",
--
2.47.1

File diff suppressed because it is too large Load Diff

View File

@ -1,276 +0,0 @@
From d50bbafc32428e873c0052a9defcf93d2e52667e Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 10 Jan 2024 11:35:11 -0500
Subject: [PATCH 1/3] Refactor: libcrmcommon: Split feature set check into its
own function.
---
include/crm/common/cib_internal.h | 4 +++-
lib/cib/cib_utils.c | 12 ++++++------
lib/common/cib.c | 18 +++++++++++++++++-
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/include/crm/common/cib_internal.h b/include/crm/common/cib_internal.h
index c41c12e..fa65e58 100644
--- a/include/crm/common/cib_internal.h
+++ b/include/crm/common/cib_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 the Pacemaker project contributors
+ * Copyright 2023-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -16,6 +16,8 @@ extern "C" {
const char *pcmk__cib_abs_xpath_for(const char *element);
+int pcmk__check_feature_set(const char *cib_version);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c
index 0082eef..bf2982c 100644
--- a/lib/cib/cib_utils.c
+++ b/lib/cib/cib_utils.c
@@ -353,7 +353,6 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
xmlNode *patchset_cib = NULL;
xmlNode *local_diff = NULL;
- const char *new_version = NULL;
const char *user = crm_element_value(req, F_CIB_USER);
bool with_digest = false;
@@ -470,12 +469,13 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
}
if (scratch) {
- new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION);
+ const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION);
- if (new_version && compare_version(new_version, CRM_FEATURE_SET) > 0) {
- crm_err("Discarding update with feature set '%s' greater than our own '%s'",
- new_version, CRM_FEATURE_SET);
- rc = -EPROTONOSUPPORT;
+ rc = pcmk__check_feature_set(new_version);
+ if (rc != pcmk_rc_ok) {
+ pcmk__config_err("Discarding update with feature set '%s' greater than our own '%s'",
+ new_version, CRM_FEATURE_SET);
+ rc = pcmk_rc2legacy(rc);
goto done;
}
}
diff --git a/lib/common/cib.c b/lib/common/cib.c
index fee7881..cbebc2e 100644
--- a/lib/common/cib.c
+++ b/lib/common/cib.c
@@ -1,6 +1,6 @@
/*
* Original copyright 2004 International Business Machines
- * Later changes copyright 2008-2023 the Pacemaker project contributors
+ * Later changes copyright 2008-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -173,3 +173,19 @@ pcmk_find_cib_element(xmlNode *cib, const char *element_name)
{
return get_xpath_object(pcmk_cib_xpath_for(element_name), cib, LOG_TRACE);
}
+
+/*!
+ * \internal
+ * \brief Check that the feature set in the CIB is supported on this node
+ *
+ * \param[in] new_version XML_ATTR_CRM_VERSION attribute from the CIB
+ */
+int
+pcmk__check_feature_set(const char *cib_version)
+{
+ if (cib_version && compare_version(cib_version, CRM_FEATURE_SET) > 0) {
+ return EPROTONOSUPPORT;
+ }
+
+ return pcmk_rc_ok;
+}
--
2.31.1
From d89fd8336ae47d892201513c99773705d57f15f0 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 10 Jan 2024 13:46:42 -0500
Subject: [PATCH 2/3] Feature: scheduler: Check the CIB feature set in
cluster_status.
This adds the check that was previously only in cib_perform_op to the
scheduler code, ensuring that any daemon or tool that calls the
scheduler will check that the feature set in the CIB is supported.
---
lib/pengine/status.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/lib/pengine/status.c b/lib/pengine/status.c
index e6ec237..1294803 100644
--- a/lib/pengine/status.c
+++ b/lib/pengine/status.c
@@ -14,6 +14,7 @@
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
+#include <crm/common/cib_internal.h>
#include <glib.h>
@@ -70,12 +71,21 @@ pe_free_working_set(pcmk_scheduler_t *scheduler)
gboolean
cluster_status(pcmk_scheduler_t * scheduler)
{
+ const char *new_version = NULL;
xmlNode *section = NULL;
if ((scheduler == NULL) || (scheduler->input == NULL)) {
return FALSE;
}
+ new_version = crm_element_value(scheduler->input, XML_ATTR_CRM_VERSION);
+
+ if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) {
+ pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'",
+ new_version, CRM_FEATURE_SET);
+ return FALSE;
+ }
+
crm_trace("Beginning unpack");
if (scheduler->failed != NULL) {
--
2.31.1
From a3428926d37af506014a6b462d1308d8541c5932 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 10 Jan 2024 14:56:36 -0500
Subject: [PATCH 3/3] Low: libcib: Do not check CIB feature set for files in
cib_perform_op.
This is related to the previous feature for transferring schema files to
older remote nodes. In that case, the newer schema files may also have
a newer feature set than the node supports, so the transferred files are
still not usable.
However, the feature set only matters for the scheduler, not for most
command line tools (obviously, crm_simulate would still care). So in
those cases, we can just disable the feature set check if the CIB was
read in from a file. For the scheduler, the check is still performed as
part of cluster_status.
---
cts/cli/regression.tools.exp | 2 +-
daemons/based/based_callbacks.c | 4 ++--
include/crm/cib/internal.h | 4 ++--
lib/cib/cib_file.c | 2 +-
lib/cib/cib_utils.c | 15 +++++++++------
5 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp
index 417b5cd..c81c420 100644
--- a/cts/cli/regression.tools.exp
+++ b/cts/cli/regression.tools.exp
@@ -7939,7 +7939,7 @@ unpack_config warning: Blind faith: not fencing unseen nodes
=#=#=#= End test: Verbosely verify a file-specified invalid configuration, outputting as xml - Invalid configuration (78) =#=#=#=
* Passed: crm_verify - Verbosely verify a file-specified invalid configuration, outputting as xml
=#=#=#= Begin test: Verbosely verify another file-specified invalid configuration, outputting as xml =#=#=#=
-(cluster_status@status.c:113) warning: Fencing and resource management disabled due to lack of quorum
+(cluster_status@status.c:123) warning: Fencing and resource management disabled due to lack of quorum
<pacemaker-result api-version="X" request="crm_verify_invalid_no_stonith.xml --output-as=xml --verbose">
<status code="78" message="Invalid configuration">
<errors>
diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c
index 5f3dc62..f16e4d9 100644
--- a/daemons/based/based_callbacks.c
+++ b/daemons/based/based_callbacks.c
@@ -1362,7 +1362,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation,
input = prepare_input(request, operation->type, &section);
if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) {
- rc = cib_perform_op(op, call_options, op_function, true, section,
+ rc = cib_perform_op(NULL, op, call_options, op_function, true, section,
request, input, false, &config_changed, &the_cib,
&result_cib, NULL, &output);
@@ -1395,7 +1395,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation,
}
// result_cib must not be modified after cib_perform_op() returns
- rc = cib_perform_op(op, call_options, op_function, false, section,
+ rc = cib_perform_op(NULL, op, call_options, op_function, false, section,
request, input, manage_counters, &config_changed,
&the_cib, &result_cib, cib_diff, &output);
diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h
index 9d54d52..b6d6871 100644
--- a/include/crm/cib/internal.h
+++ b/include/crm/cib/internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2023 the Pacemaker project contributors
+ * Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -206,7 +206,7 @@ int cib__get_notify_patchset(const xmlNode *msg, const xmlNode **patchset);
bool cib__element_in_patchset(const xmlNode *patchset, const char *element);
-int cib_perform_op(const char *op, int call_options, cib__op_fn_t fn,
+int cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn,
bool is_query, const char *section, xmlNode *req,
xmlNode *input, bool manage_counters, bool *config_changed,
xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff,
diff --git a/lib/cib/cib_file.c b/lib/cib/cib_file.c
index a279823..9dd952c 100644
--- a/lib/cib/cib_file.c
+++ b/lib/cib/cib_file.c
@@ -245,7 +245,7 @@ cib_file_process_request(cib_t *cib, xmlNode *request, xmlNode **output)
data = pcmk_find_cib_element(data, section);
}
- rc = cib_perform_op(op, call_options, op_function, read_only, section,
+ rc = cib_perform_op(cib, op, call_options, op_function, read_only, section,
request, data, true, &changed, &private->cib_xml,
&result_cib, &cib_diff, output);
diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c
index bf2982c..9c3f9f1 100644
--- a/lib/cib/cib_utils.c
+++ b/lib/cib/cib_utils.c
@@ -339,11 +339,10 @@ should_copy_cib(const char *op, const char *section, int call_options)
}
int
-cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
- const char *section, xmlNode *req, xmlNode *input,
- bool manage_counters, bool *config_changed,
- xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff,
- xmlNode **output)
+cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn,
+ bool is_query, const char *section, xmlNode *req, xmlNode *input,
+ bool manage_counters, bool *config_changed, xmlNode **current_cib,
+ xmlNode **result_cib, xmlNode **diff, xmlNode **output)
{
int rc = pcmk_ok;
bool check_schema = true;
@@ -468,7 +467,11 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
goto done;
}
- if (scratch) {
+ /* If the CIB is from a file, we don't need to check that the feature set is
+ * supported. All we care about in that case is the schema version, which
+ * is checked elsewhere.
+ */
+ if (scratch && (cib == NULL || cib->variant != cib_file)) {
const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION);
rc = pcmk__check_feature_set(new_version);
--
2.31.1

File diff suppressed because it is too large Load Diff

View File

@ -1,373 +0,0 @@
From 4823643bef8801b33688167b159bb531bcdf8911 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 4 Jan 2024 17:10:08 -0600
Subject: [PATCH 1/5] Refactor: pacemaker-attrd: drop redundant argument from
update_attr_on_host()
It can check for a force-write via its xml argument, to simplify the caller
---
daemons/attrd/attrd_corosync.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 158d82f..1b56923 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -266,7 +266,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host)
static void
update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
- bool filter, int is_force_write)
+ bool filter)
{
attribute_value_t *v = NULL;
@@ -309,6 +309,10 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
}
} else {
+ int is_force_write = 0;
+
+ crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write);
+
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
@@ -338,15 +342,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter)
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
- int is_force_write = 0;
if (attr == NULL) {
crm_warn("Could not update attribute: peer did not specify name");
return;
}
- crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write);
-
a = attrd_populate_attribute(xml, attr);
if (a == NULL) {
return;
@@ -361,12 +362,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter)
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
+ update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
} else {
// Update attribute value for the given host
- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
+ update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
/* If this is a message from some attrd instance broadcasting its protocol
--
2.31.1
From c7a1ab819b25e3225c185c1630a7139a96fb5c71 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 9 Jan 2024 16:48:37 -0600
Subject: [PATCH 2/5] Refactor: pacemaker-attrd: drop unused argument from
attrd_peer_sync()
---
daemons/attrd/attrd_corosync.c | 10 ++++++++--
daemons/attrd/attrd_elections.c | 2 +-
daemons/attrd/attrd_messages.c | 2 +-
daemons/attrd/pacemaker-attrd.h | 2 +-
4 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 1b56923..088f00c 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -233,7 +233,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
*/
if (attrd_election_won()
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
- attrd_peer_sync(peer, NULL);
+ attrd_peer_sync(peer);
}
} else {
// Remove all attribute values associated with lost nodes
@@ -535,8 +535,14 @@ attrd_peer_remove(const char *host, bool uncache, const char *source)
}
}
+/*!
+ * \internal
+ * \brief Send all known attributes and values to a peer
+ *
+ * \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
+ */
void
-attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
+attrd_peer_sync(crm_node_t *peer)
{
GHashTableIter aIter;
GHashTableIter vIter;
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index 82fbe8a..9dbf133 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -23,7 +23,7 @@ attrd_election_cb(gpointer user_data)
attrd_declare_winner();
/* Update the peers after an election */
- attrd_peer_sync(NULL, NULL);
+ attrd_peer_sync(NULL);
/* After winning an election, update the CIB with the values of all
* attributes as the winner knows them.
diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
index 5525d4b..13ac01f 100644
--- a/daemons/attrd/attrd_messages.c
+++ b/daemons/attrd/attrd_messages.c
@@ -180,7 +180,7 @@ handle_sync_request(pcmk__request_t *request)
crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
pcmk__node_search_cluster);
- attrd_peer_sync(peer, request->xml);
+ attrd_peer_sync(peer);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 7384188..bacaad6 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -175,7 +175,7 @@ extern GHashTable *peer_protocol_vers;
int attrd_cluster_connect(void);
void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host,
bool filter);
-void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
+void attrd_peer_sync(crm_node_t *peer);
void attrd_peer_remove(const char *host, bool uncache, const char *source);
void attrd_peer_clear_failure(pcmk__request_t *request);
void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won,
--
2.31.1
From abafae0068e10abb135b0496086947728365299a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Jan 2024 17:31:17 -0600
Subject: [PATCH 3/5] Refactor: pacemaker-attrd: de-functionize
attrd_lookup_or_create_value()
... to make planned changes easier
---
daemons/attrd/attrd_corosync.c | 62 +++++++++++++---------------------
1 file changed, 24 insertions(+), 38 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 088f00c..59e6a26 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -168,40 +168,6 @@ broadcast_local_value(const attribute_t *a)
#define state_text(state) pcmk__s((state), "in unknown state")
-/*!
- * \internal
- * \brief Return a node's value from hash table (creating one if needed)
- *
- * \param[in,out] values Hash table of values
- * \param[in] node_name Name of node to look up
- * \param[in] xml XML describing the attribute
- *
- * \return Pointer to new or existing hash table entry
- */
-static attribute_value_t *
-attrd_lookup_or_create_value(GHashTable *values, const char *node_name,
- const xmlNode *xml)
-{
- attribute_value_t *v = g_hash_table_lookup(values, node_name);
- int is_remote = 0;
-
- if (v == NULL) {
- v = calloc(1, sizeof(attribute_value_t));
- CRM_ASSERT(v != NULL);
-
- pcmk__str_update(&v->nodename, node_name);
- g_hash_table_replace(values, v->nodename, v);
- }
-
- crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
- if (is_remote) {
- attrd_set_value_flags(v, attrd_value_remote);
- CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
- }
-
- return(v);
-}
-
static void
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
{
@@ -268,18 +234,38 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
bool filter)
{
+ int is_remote = 0;
+ bool changed = false;
attribute_value_t *v = NULL;
- v = attrd_lookup_or_create_value(a->values, host, xml);
+ // Create entry for value if not already existing
+ v = g_hash_table_lookup(a->values, host);
+ if (v == NULL) {
+ v = calloc(1, sizeof(attribute_value_t));
+ CRM_ASSERT(v != NULL);
+
+ pcmk__str_update(&v->nodename, host);
+ g_hash_table_replace(a->values, v->nodename, v);
+ }
+
+ // If value is for a Pacemaker Remote node, remember that
+ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
+ if (is_remote) {
+ attrd_set_value_flags(v, attrd_value_remote);
+ CRM_ASSERT(crm_remote_peer_get(host) != NULL);
+ }
+
+ // Check whether the value changed
+ changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);
- if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
- && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
+ if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname,
+ pcmk__str_casei)) {
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
attr, host, v->current, value, peer->uname);
v = broadcast_local_value(a);
- } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
+ } else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
CRM_XS " from %s with %s write delay",
attr, host, a->set_type ? " in " : "",
--
2.31.1
From 72529ec512fb4938bd8dbbd2caf44bbb1a616826 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Jan 2024 18:04:33 -0600
Subject: [PATCH 4/5] Refactor: pacemaker-attrd: minor shuffling to make
planned changes easier
---
daemons/attrd/attrd_cib.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index bdc0a10..481fea7 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -51,6 +51,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
{
const xmlNode *patchset = NULL;
const char *client_name = NULL;
+ bool status_changed = false;
if (attrd_shutting_down(true)) {
return;
@@ -64,20 +65,22 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
mainloop_set_trigger(attrd_config_read);
}
- if (!attrd_election_won()) {
- // Don't write attributes if we're not the writer
- return;
- }
+ status_changed = cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS);
client_name = crm_element_value(msg, F_CIB_CLIENTNAME);
if (!cib__client_triggers_refresh(client_name)) {
- // The CIB is still accurate
+ /* This change came from a source that ensured the CIB is consistent
+ * with our attributes table, so we don't need to write anything out.
+ */
return;
}
- if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES)
- || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) {
-
+ if (!attrd_election_won()) {
+ // Don't write attributes if we're not the writer
+ return;
+ }
+
+ if (status_changed || cib__element_in_patchset(patchset, XML_CIB_TAG_NODES)) {
/* An unsafe client modified the nodes or status section. Write
* transient attributes to ensure they're up-to-date in the CIB.
*/
--
2.31.1
From b83c2567fb450eec5b18882ded16403831d2c3c0 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Jan 2024 17:53:55 -0600
Subject: [PATCH 5/5] Log: pacemaker-attrd: make sure we don't try to log NULL
---
daemons/attrd/attrd_corosync.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 59e6a26..b348d52 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -229,6 +229,11 @@ record_peer_nodeid(attribute_value_t *v, const char *host)
}
}
+#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")
+
+#define readable_peer(p) \
+ (((p) == NULL)? "all peers" : pcmk__s((p)->uname, "unknown peer"))
+
static void
update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
@@ -262,14 +267,14 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
pcmk__str_casei)) {
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
- attr, host, v->current, value, peer->uname);
+ attr, host, readable_value(v), value, peer->uname);
v = broadcast_local_value(a);
} else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
CRM_XS " from %s with %s write delay",
attr, host, a->set_type ? " in " : "",
- pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"),
+ pcmk__s(a->set_type, ""), readable_value(v),
pcmk__s(value, "(unset)"), peer->uname,
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
pcmk__str_update(&v->current, value);
@@ -543,12 +548,14 @@ attrd_peer_sync(crm_node_t *peer)
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
- crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
+ crm_debug("Syncing %s[%s]='%s' to %s",
+ a->id, v->nodename, readable_value(v),
+ readable_peer(peer));
attrd_add_value_xml(sync, a, v, false);
}
}
- crm_debug("Syncing values to %s", peer?peer->uname:"everyone");
+ crm_debug("Syncing values to %s", readable_peer(peer));
attrd_send_message(peer, sync, false);
free_xml(sync);
}
--
2.31.1

View File

@ -1,385 +0,0 @@
From 84d4a0d5f562df91baa0fece45d06ad3732f941c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 11:20:53 -0600
Subject: [PATCH 1/5] Low: pacemaker-attrd: properly validate attribute set
type
The sense of the test was accidentally reversed in 26471a52689
---
daemons/attrd/attrd_attributes.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 8f32988..f059059 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -40,9 +40,9 @@ attrd_create_attribute(xmlNode *xml)
* attributes are not written.
*/
crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private);
- if ((is_private != 0)
- && !pcmk__str_any_of(set_type, XML_TAG_ATTR_SETS, XML_TAG_UTILIZATION,
- NULL)) {
+ if (!is_private && !pcmk__str_any_of(set_type,
+ XML_TAG_ATTR_SETS,
+ XML_TAG_UTILIZATION, NULL)) {
crm_warn("Ignoring attribute %s with invalid set type %s",
pcmk__s(name, "(unidentified)"), set_type);
return NULL;
--
2.31.1
From d0d0511e71fe983a2d89589c39810b79fb48a8ca Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 12:13:42 -0600
Subject: [PATCH 2/5] Fix: pacemaker-attrd: sync utilization attributes to
peers correctly
Include the set type with attribute syncs.
Previously, utilization attributes would have the correct set_type on the node
where they were set, but peers would store it as a regular node attribute. If
one of those peers became writer, the attribute would get written to the wrong
set.
---
daemons/attrd/attrd_attributes.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index f059059..0ad9630 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -139,6 +139,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
xmlNode *xml = create_xml_node(parent, __func__);
crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
+ crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type);
crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
--
2.31.1
From 4479ff8507dd69f5946d31cf83c7e47fe15d3bdb Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 12:18:40 -0600
Subject: [PATCH 3/5] Refactor: pacemaker-attrd: functionize getting attribute
set ID
... for future reuse
---
daemons/attrd/attrd_attributes.c | 38 ++++++++++++++++++++++++++++++++
daemons/attrd/attrd_cib.c | 9 +-------
daemons/attrd/pacemaker-attrd.h | 3 ++-
3 files changed, 41 insertions(+), 9 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 0ad9630..5727ab8 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -210,3 +210,41 @@ attrd_populate_attribute(xmlNode *xml, const char *attr)
return a;
}
+
+/*!
+ * \internal
+ * \brief Get the XML ID used to write out an attribute set
+ *
+ * \param[in] attr Attribute to get set ID for
+ * \param[in] node_state_id XML ID of node state that attribute value is for
+ *
+ * \return Newly allocated string with XML ID to use for \p attr set
+ */
+char *
+attrd_set_id(const attribute_t *attr, const char *node_state_id)
+{
+ char *set_id = NULL;
+
+ CRM_ASSERT((attr != NULL) && (node_state_id != NULL));
+
+ if (attr->set_id == NULL) {
+ /* @COMPAT This should really take the set type into account. Currently
+ * we use the same XML ID for transient attributes and utilization
+ * attributes. It doesn't cause problems because the status section is
+ * not limited by the schema in any way, but it's still unfortunate.
+ * For backward compatibility reasons, we can't change this.
+ */
+ set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_state_id);
+ } else {
+ /* @COMPAT When the user specifies a set ID for an attribute, it is the
+ * same for every node. That is less than ideal, but again, the schema
+ * doesn't enforce anything for the status section. We couldn't change
+ * it without allowing the set ID to vary per value rather than per
+ * attribute, which would break backward compatibility, pose design
+ * challenges, and potentially cause problems in rolling upgrades.
+ */
+ pcmk__str_update(&set_id, attr->set_id);
+ }
+ crm_xml_sanitize_id(set_id);
+ return set_id;
+}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 481fea7..08d3425 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -423,17 +423,10 @@ add_unset_attr_update(const attribute_t *attr, const char *attr_id,
static int
add_attr_update(const attribute_t *attr, const char *value, const char *node_id)
{
- char *set_id = NULL;
+ char *set_id = attrd_set_id(attr, node_id);
char *attr_id = NULL;
int rc = pcmk_rc_ok;
- if (attr->set_id != NULL) {
- pcmk__str_update(&set_id, attr->set_id);
- } else {
- set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_id);
- }
- crm_xml_sanitize_id(set_id);
-
if (attr->uuid != NULL) {
pcmk__str_update(&attr_id, attr->uuid);
} else {
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index bacaad6..3da7f8d 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2013-2023 the Pacemaker project contributors
+ * Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -195,6 +195,7 @@ void attrd_clear_value_seen(void);
void attrd_free_attribute(gpointer data);
void attrd_free_attribute_value(gpointer data);
attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
+char *attrd_set_id(const attribute_t *attr, const char *node_state_id);
enum attrd_write_options {
attrd_write_changed = 0,
--
2.31.1
From eee2169ac348b8ed26ac0b78cb11ddc5cef9384e Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 12:25:59 -0600
Subject: [PATCH 4/5] Refactor: pacemaker-attrd: functionize getting attribute
nvpair ID
... for future reuse
---
daemons/attrd/attrd_attributes.c | 28 ++++++++++++++++++++++++++++
daemons/attrd/attrd_cib.c | 17 +++++------------
daemons/attrd/pacemaker-attrd.h | 1 +
3 files changed, 34 insertions(+), 12 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 5727ab8..23de2e2 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -248,3 +248,31 @@ attrd_set_id(const attribute_t *attr, const char *node_state_id)
crm_xml_sanitize_id(set_id);
return set_id;
}
+
+/*!
+ * \internal
+ * \brief Get the XML ID used to write out an attribute value
+ *
+ * \param[in] attr Attribute to get value XML ID for
+ * \param[in] node_state_id UUID of node that attribute value is for
+ *
+ * \return Newly allocated string with XML ID of \p attr value
+ */
+char *
+attrd_nvpair_id(const attribute_t *attr, const char *node_state_id)
+{
+ char *nvpair_id = NULL;
+
+ if (attr->uuid != NULL) {
+ pcmk__str_update(&nvpair_id, attr->uuid);
+
+ } else if (attr->set_id != NULL) {
+ nvpair_id = crm_strdup_printf("%s-%s", attr->set_id, attr->id);
+
+ } else {
+ nvpair_id = crm_strdup_printf(XML_CIB_TAG_STATUS "-%s-%s",
+ node_state_id, attr->id);
+ }
+ crm_xml_sanitize_id(nvpair_id);
+ return nvpair_id;
+}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 08d3425..d42345f 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -424,23 +424,16 @@ static int
add_attr_update(const attribute_t *attr, const char *value, const char *node_id)
{
char *set_id = attrd_set_id(attr, node_id);
- char *attr_id = NULL;
+ char *nvpair_id = attrd_nvpair_id(attr, node_id);
int rc = pcmk_rc_ok;
- if (attr->uuid != NULL) {
- pcmk__str_update(&attr_id, attr->uuid);
+ if (value == NULL) {
+ rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id);
} else {
- attr_id = crm_strdup_printf("%s-%s", set_id, attr->id);
- }
- crm_xml_sanitize_id(attr_id);
-
- if (value != NULL) {
- rc = add_set_attr_update(attr, attr_id, node_id, set_id, value);
- } else {
- rc = add_unset_attr_update(attr, attr_id, node_id, set_id);
+ rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value);
}
free(set_id);
- free(attr_id);
+ free(nvpair_id);
return rc;
}
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 3da7f8d..deec790 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -196,6 +196,7 @@ void attrd_free_attribute(gpointer data);
void attrd_free_attribute_value(gpointer data);
attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
char *attrd_set_id(const attribute_t *attr, const char *node_state_id);
+char *attrd_nvpair_id(const attribute_t *attr, const char *node_state_id);
enum attrd_write_options {
attrd_write_changed = 0,
--
2.31.1
From 2abde6cb87d2e3d31a370c74656f6f7c0818c185 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Jan 2024 10:01:56 -0600
Subject: [PATCH 5/5] Log: pacemaker-attrd: improve some messages for debugging
---
daemons/attrd/attrd_attributes.c | 8 +++++---
daemons/attrd/attrd_cib.c | 13 +++++++++----
daemons/attrd/attrd_corosync.c | 10 ++++++----
3 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 23de2e2..68b9585 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -60,13 +60,10 @@ attrd_create_attribute(xmlNode *xml)
a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value);
a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER);
- crm_trace("Performing all %s operations as user '%s'", a->id, a->user);
if (dampen_s != NULL) {
dampen = crm_get_msec(dampen_s);
}
- crm_trace("Created attribute %s with %s write delay", a->id,
- (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
if(dampen > 0) {
a->timeout_ms = dampen;
@@ -75,6 +72,11 @@ attrd_create_attribute(xmlNode *xml)
crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id);
}
+ crm_trace("Created attribute %s with %s write delay and %s CIB user",
+ a->id,
+ ((dampen > 0)? pcmk__readable_interval(a->timeout_ms) : "no"),
+ pcmk__s(a->user, "default"));
+
g_hash_table_replace(attributes, a->id, a);
return a;
}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index d42345f..cae6846 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -54,6 +54,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
bool status_changed = false;
if (attrd_shutting_down(true)) {
+ crm_debug("Ignoring CIB change during shutdown");
return;
}
@@ -278,11 +279,13 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
- do_crm_log(level, "* %s[%s]=%s",
- a->id, peer, pcmk__s(v->requested, "(null)"));
if (rc == pcmk_ok) {
+ crm_info("* Wrote %s[%s]=%s",
+ a->id, peer, pcmk__s(v->requested, "(unset)"));
pcmk__str_update(&(v->requested), NULL);
} else {
+ do_crm_log(level, "* Could not write %s[%s]=%s",
+ a->id, peer, pcmk__s(v->requested, "(unset)"));
a->changed = true; // Reattempt write below if we are still writer
}
}
@@ -292,6 +295,7 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use
/* We deferred a write of a new update because this update was in
* progress. Write out the new value without additional delay.
*/
+ crm_debug("Pending update for %s can be written now", a->id);
write_attribute(a, false);
/* We're re-attempting a write because the original failed; delay
@@ -593,8 +597,9 @@ write_attribute(attribute_t *a, bool ignore_delay)
continue;
}
- crm_debug("Updating %s[%s]=%s (node uuid=%s id=%" PRIu32 ")",
- a->id, v->nodename, v->current, uuid, v->nodeid);
+ crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")",
+ a->id, v->nodename, pcmk__s(v->current, "(unset)"),
+ uuid, v->nodeid);
cib_updates++;
/* Preservation of the attribute to transmit alert */
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index b348d52..6fb847b 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -293,7 +293,8 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
// Write out new value or start dampening timer
if (a->timeout_ms && a->timer) {
- crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr);
+ crm_trace("Delaying write of %s %s for dampening",
+ attr, pcmk__readable_interval(a->timeout_ms));
mainloop_timer_start(a->timer);
} else {
attrd_write_or_elect_attribute(a);
@@ -307,11 +308,12 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
- crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)",
- attr, host, peer->uname, value);
+ crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
+ attr, host, peer->uname, pcmk__s(value, "unset"));
a->force_write = TRUE;
} else {
- crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value);
+ crm_trace("%s[%s] from %s is unchanged (%s)",
+ attr, host, peer->uname, pcmk__s(value, "unset"));
}
}
--
2.31.1

View File

@ -1,53 +0,0 @@
From 9c13ce6fe95812308443c188ace8f897e6bce942 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Mon, 29 Jan 2024 11:14:25 -0800
Subject: [PATCH] Fix: tools: crm_attribute emits garbage for --node localhost
or auto
This happens because pcmk__node_attr_target() returns its argument if
its argument is NULL, "auto", or "localhost" and no relevant environment
variables are found. Then crm_attribute frees the return value, makes a
copy of it, and assigns it back to options.dest_uname.
The fix is to check whether the return value is equal to the argument.
Fixes RHEL-23065
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
tools/crm_attribute.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c
index d221ab85d..636d03dbd 100644
--- a/tools/crm_attribute.c
+++ b/tools/crm_attribute.c
@@ -766,8 +766,23 @@ main(int argc, char **argv)
const char *target = pcmk__node_attr_target(options.dest_uname);
if (target != NULL) {
- g_free(options.dest_uname);
- options.dest_uname = g_strdup(target);
+ /* If options.dest_uname is "auto" or "localhost", then
+ * pcmk__node_attr_target() may return it, depending on environment
+ * variables. In that case, attribute lookups will fail for "auto"
+ * (unless there's a node named "auto"). attrd maps "localhost" to
+ * the true local node name for queries.
+ *
+ * @TODO
+ * * Investigate whether "localhost" is mapped to a real node name
+ * for non-query commands. If not, possibly modify it so that it
+ * is.
+ * * Map "auto" to "localhost" (probably).
+ */
+ if (target != (const char *) options.dest_uname) {
+ g_free(options.dest_uname);
+ options.dest_uname = g_strdup(target);
+ }
+
} else if (getenv("CIB_file") != NULL && options.dest_uname == NULL) {
get_node_name_from_local();
}
--
2.41.0

View File

@ -1,31 +0,0 @@
From 8dc0d9b43343919edf4f4011ceecfd6b6765b4a4 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Wed, 8 May 2024 11:18:50 -0500
Subject: [PATCH] Low: libcib: avoid memory leak in async calls
Never in a release
---
lib/cib/cib_native.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/lib/cib/cib_native.c b/lib/cib/cib_native.c
index 0e502155bc..b014223112 100644
--- a/lib/cib/cib_native.c
+++ b/lib/cib/cib_native.c
@@ -94,9 +94,10 @@ cib_native_perform_op_delegate(cib_t *cib, const char *op, const char *host,
if (!(call_options & cib_sync_call)) {
crm_trace("Async call, returning %d", cib->call_id);
- CRM_CHECK(cib->call_id != 0, return -ENOMSG);
- free_xml(op_reply);
- return cib->call_id;
+ CRM_CHECK(cib->call_id != 0,
+ rc = -ENOMSG; goto done);
+ rc = cib->call_id;
+ goto done;
}
rc = pcmk_ok;
--
2.41.0

View File

@ -1,121 +0,0 @@
From d7c233090057d4f660fa458a2ff97896b15ea951 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Thu, 11 Jul 2024 12:43:49 -0700
Subject: [PATCH] Refactor: various: Don't set cluster-layer node ID as XML ID
Currently, we call the pcmk__xe_set_id() function using a stringified
version of the numeric cluster-layer node ID. However, pcmk__xe_set_id()
tries to sanitize its input to a valid XML ID. An XML ID cannot begin
with a digit.
crm_xml_set_id() does not sanitize comprehensively, and in particular,
it does not care whether its argument begins with a digit. So the
current code doesn't cause a problem.
Still, as a best practice, set the PCMK_XA_ID attribute using
crm_xml_add_ll() instead.
Ref T848
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
daemons/controld/controld_messages.c | 6 +++++-
lib/cluster/corosync.c | 2 +-
lib/common/ipc_client.c | 2 +-
lib/common/ipc_controld.c | 9 ++++++---
tools/crm_node.c | 4 ++--
5 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/daemons/controld/controld_messages.c b/daemons/controld/controld_messages.c
index bd5237e..0b0f25b 100644
--- a/daemons/controld/controld_messages.c
+++ b/daemons/controld/controld_messages.c
@@ -893,7 +893,11 @@ handle_node_info_request(const xmlNode *msg)
pcmk_is_set(controld_globals.flags,
controld_has_quorum));
- // Check whether client requested node info by ID and/or name
+ /* Check whether client requested node info by ID and/or name
+ *
+ * @TODO A Corosync-layer node ID is of type uint32_t. We should be able to
+ * handle legitimate node IDs greater than INT_MAX, but currently we do not.
+ */
crm_element_value_int(msg, XML_ATTR_ID, &node_id);
if (node_id < 0) {
node_id = 0;
diff --git a/lib/cluster/corosync.c b/lib/cluster/corosync.c
index 374250f..fc33cce 100644
--- a/lib/cluster/corosync.c
+++ b/lib/cluster/corosync.c
@@ -650,7 +650,7 @@ pcmk__corosync_add_nodes(xmlNode *xml_parent)
if (xml_parent) {
xmlNode *node = create_xml_node(xml_parent, XML_CIB_TAG_NODE);
- crm_xml_set_id(node, "%u", nodeid);
+ crm_xml_add_ll(node, XML_ATTR_ID, (long long) nodeid);
crm_xml_add(node, XML_ATTR_UNAME, name);
}
}
diff --git a/lib/common/ipc_client.c b/lib/common/ipc_client.c
index 5e64e23..7696a69 100644
--- a/lib/common/ipc_client.c
+++ b/lib/common/ipc_client.c
@@ -769,7 +769,7 @@ create_purge_node_request(const pcmk_ipc_api_t *api, const char *node_name,
request = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL,
pcmk_ipc_name(api, false), client, NULL);
if (nodeid > 0) {
- crm_xml_set_id(request, "%lu", (unsigned long) nodeid);
+ crm_xml_add_ll(request, XML_ATTR_ID, (unsigned long) nodeid);
}
crm_xml_add(request, XML_ATTR_UNAME, node_name);
break;
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
index 8e2016e..e4284f5 100644
--- a/lib/common/ipc_controld.c
+++ b/lib/common/ipc_controld.c
@@ -9,9 +9,12 @@
#include <crm_internal.h>
-#include <stdio.h>
-#include <stdbool.h>
#include <errno.h>
+#include <inttypes.h> // PRIu32
+#include <stdbool.h>
+#include <stdint.h> // uint32_t
+#include <stdio.h>
+
#include <libxml/tree.h>
#include <crm/crm.h>
@@ -412,7 +415,7 @@ pcmk_controld_api_node_info(pcmk_ipc_api_t *api, uint32_t nodeid)
return EINVAL;
}
if (nodeid > 0) {
- crm_xml_set_id(request, "%lu", (unsigned long) nodeid);
+ crm_xml_add_ll(request, XML_ATTR_ID, (unsigned long) nodeid);
}
rc = send_controller_request(api, request, true);
diff --git a/tools/crm_node.c b/tools/crm_node.c
index 1e7ce6c..ad8c459 100644
--- a/tools/crm_node.c
+++ b/tools/crm_node.c
@@ -552,7 +552,7 @@ remove_from_section(cib_t *cib, const char *element, const char *section,
}
crm_xml_add(xml, XML_ATTR_UNAME, node_name);
if (node_id > 0) {
- crm_xml_set_id(xml, "%ld", node_id);
+ crm_xml_add_ll(xml, XML_ATTR_ID, node_id);
}
rc = cib->cmds->remove(cib, section, xml, cib_transaction);
free_xml(xml);
@@ -691,7 +691,7 @@ purge_node_from_fencer(const char *node_name, long node_id)
cmd = create_request(CRM_OP_RM_NODE_CACHE, NULL, NULL, "stonith-ng",
crm_system_name, NULL);
if (node_id > 0) {
- crm_xml_set_id(cmd, "%ld", node_id);
+ crm_xml_add_ll(cmd, XML_ATTR_ID, node_id);
}
crm_xml_add(cmd, XML_ATTR_UNAME, node_name);

View File

@ -1,89 +0,0 @@
From 22e093a5bff608c86d0ea68588078ca747a6d945 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Thu, 11 Jul 2024 12:29:34 -0700
Subject: [PATCH] Fix: tools: crm_node -i must initialize nodeid before passing
pointer
This is a regression introduced in 2.1.7 via a27f099.
Currently, crm_node -i passes a pointer to the uninitialized uint32_t
nodeid variable, to pcmk__query_node_info(). Since the pointer is
non-NULL, pcmk__query_node_info() dereferences it. Whatever garbage
value resides there gets passed as the ID to query.
The controller parses the node ID from the request as an int. If the
garbage value is greater than INT_MAX, it overflows to a negative int
value, and the controller (in handle_node_info_request()) defaults it to
0. In that case, there's no problem: we search for the local node name
instead of the garbage node ID.
If the garbage value is less than or equal to INT_MAX, we search for it
directly. We won't find a matching node unless one happens to exist with
that garbage node ID. In the case of no match, crm_node -i outputs "Node
is not known to cluster" instead of the local node's cluster-layer ID.
Thanks to Artur Novik for the report:
https://lists.clusterlabs.org/pipermail/users/2024-July/036270.html
Fixes T847
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
lib/pacemaker/pcmk_cluster_queries.c | 22 +++++++++++-----------
tools/crm_node.c | 2 +-
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/lib/pacemaker/pcmk_cluster_queries.c b/lib/pacemaker/pcmk_cluster_queries.c
index 3229fae3eff..8404584580e 100644
--- a/lib/pacemaker/pcmk_cluster_queries.c
+++ b/lib/pacemaker/pcmk_cluster_queries.c
@@ -586,25 +586,25 @@ pcmk_designated_controller(xmlNodePtr *xml, unsigned int message_timeout_ms)
* the controller
*
* \param[in,out] out Output object
- * \param[in,out] node_id ID of node whose name to get. If \p NULL
- * or 0, get the local node name. If not
- * \p NULL, store the true node ID here on
+ * \param[in,out] node_id ID of node whose info to get. If \p NULL
+ * or 0, get the local node's info. If not
+ * \c NULL, store the true node ID here on
* success.
- * \param[out] node_name If not \p NULL, where to store the node
+ * \param[out] node_name If not \c NULL, where to store the node
* name
- * \param[out] uuid If not \p NULL, where to store the node
+ * \param[out] uuid If not \c NULL, where to store the node
* UUID
- * \param[out] state If not \p NULL, where to store the
+ * \param[out] state If not \c NULL, where to store the
* membership state
- * \param[out] is_remote If not \p NULL, where to store whether the
+ * \param[out] is_remote If not \c NULL, where to store whether the
* node is a Pacemaker Remote node
- * \param[out] have_quorum If not \p NULL, where to store whether the
+ * \param[out] have_quorum If not \c NULL, where to store whether the
* node has quorum
* \param[in] show_output Whether to show the node info
* \param[in] message_timeout_ms How long to wait for a reply from the
- * \p pacemaker-controld API. If 0,
- * \p pcmk_ipc_dispatch_sync will be used.
- * Otherwise, \p pcmk_ipc_dispatch_poll will
+ * \c pacemaker-controld API. If 0,
+ * \c pcmk_ipc_dispatch_sync will be used.
+ * Otherwise, \c pcmk_ipc_dispatch_poll will
* be used.
*
* \return Standard Pacemaker return code
diff --git a/tools/crm_node.c b/tools/crm_node.c
index d4153605a69..8aa8d3d29c7 100644
--- a/tools/crm_node.c
+++ b/tools/crm_node.c
@@ -434,7 +434,7 @@ run_controller_mainloop(void)
static void
print_node_id(void)
{
- uint32_t nodeid;
+ uint32_t nodeid = 0;
int rc = pcmk__query_node_info(out, &nodeid, NULL, NULL, NULL, NULL, NULL,
false, 0);

View File

@ -1,387 +0,0 @@
From 96bb1076281bfe46caab135f24ea24fc02ead388 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Thu, 10 Jul 2025 11:15:20 -0400
Subject: [PATCH 1/5] Refactor: scheduler: Fix formatting in pe_can_fence.
---
lib/pengine/utils.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 4055d6d..19cc5a2 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2023 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -63,10 +63,10 @@ pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
} else if (scheduler->no_quorum_policy == pcmk_no_quorum_ignore) {
return true;
- } else if(node == NULL) {
+ } else if (node == NULL) {
return false;
- } else if(node->details->online) {
+ } else if (node->details->online) {
crm_notice("We can fence %s without quorum because they're in our membership",
pe__node_name(node));
return true;
--
2.43.0
From a69d33bb78458e5bb19d07dacc91754856418649 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Fri, 28 Mar 2025 15:08:56 -0400
Subject: [PATCH 2/5] Med: scheduler: Don't always fence online remote nodes.
Let's assume you have a cluster configured as follows:
* Three nodes, plus one Pacemaker Remote node.
* At least two NICs on each node.
* Multiple layers of fencing, including fence_kdump.
* The timeout for fence_kdump is set higher on the real nodes than it is
on the remote node.
* A resource is configured that can only be run on the remote node.
Now, let's assume that the node running the connection resource for the
remote node is disconnect from the rest of the cluster. In testing,
this disconnection was done by bringing one network interface down.
Due to the fence timeouts, the following things will occur:
* The node whose interface was brought down will split off into its own
cluster partition without quorum, while the other two nodes maintain
quorum.
* The partition with quorum will restart the remote node resource on
another real node in the partition.
* The node by itself will be fenced. However, due to the long
fence_kdump timeout, it will continue to make decisions regarding
resources.
* The node by itself will re-assign resources, including the remote
connection resource. This resource will be assigned back to the same
node again.
* The node by itself will decide to fence the remote node, which will
hit the "in our membership" clause of pe_can_fence. This is because
remote nodes are marked as online when they are assigned, not when
they are actually running.
* When the fence_kdump timeout expires, the node by itself will fence
the remote node. This succeeds because there is still a secondary
network connection it can use. This fencing will succeed, causing the
remote node to reboot and then causing a loss of service.
* The node by itself will then be fenced.
The bug to me seems to be that the remote resource is marked as online
when it isn't yet. I think with that changed, all the other remote
fencing related code would then work as intended. However, it probably
has to remain as-is in order to schedule resources on the remote node -
resources probably can't be assigned to an offline node. Making changes
in pe_can_fence seems like the least invasive way to deal with this
problem.
I also think this probably has probably been here for a very long time -
perhaps always - but we just haven't seen it due to the number of things
that have to be configured before it can show up. In particular, the
fencing timeouts and secondary network connection are what allow this
behavior to happen.
I can't think of a good reason why a node without quorum would ever want
to fence a remote node, especially if the connection resource has been
moved to the quorate node.
My fix here therefore is just to test whether there is another node it
could have been moved to and if so, don't fence it.
Fixes T978
Fixes RHEL-84018
---
lib/pengine/utils.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 19cc5a2..402cecd 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -67,6 +67,39 @@ pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
return false;
} else if (node->details->online) {
+ /* Remote nodes are marked online when we assign their resource to a
+ * node, not when they are actually started (see remote_connection_assigned)
+ * so the above test by itself isn't good enough.
+ */
+ if (pe__is_remote_node(node)) {
+ /* If we're on a system without quorum, it's entirely possible that
+ * the remote resource was automatically moved to a node on the
+ * partition with quorum. We can't tell that from this node - the
+ * best we can do is check if it's possible for the resource to run
+ * on another node in the partition with quorum. If so, it has
+ * likely been moved and we shouldn't fence it.
+ *
+ * NOTE: This condition appears to only come up in very limited
+ * circumstances. It at least requires some very lengthy fencing
+ * timeouts set, some way for fencing to still take place (a second
+ * NIC is how I've reproduced it in testing, but fence_scsi or
+ * sbd could work too), and a resource that runs on the remote node.
+ */
+ pcmk_resource_t *rsc = node->details->remote_rsc;
+ pcmk_node_t *n = NULL;
+ GHashTableIter iter;
+
+ g_hash_table_iter_init(&iter, rsc->allowed_nodes);
+ while (g_hash_table_iter_next(&iter, NULL, (void **) &n)) {
+ /* A node that's not online according to this non-quorum node
+ * is a node that's in another partition.
+ */
+ if (!n->details->online) {
+ return false;
+ }
+ }
+ }
+
crm_notice("We can fence %s without quorum because they're in our membership",
pe__node_name(node));
return true;
--
2.43.0
From 7d503928c48b67da0bd06bccaa080f0309f7be90 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Thu, 10 Jul 2025 11:25:05 -0400
Subject: [PATCH 3/5] Med: scheduler: Require a cluster option for new remote
fencing behavior.
We don't have a ton of confidence that the previous patch is the right
thing to do for everyone, so we are going to hide it behind this
undocumented cluster config option. By default, if the option is
missing (or is set to "true"), the existing remote fencing behavior will
be what happens. That is, a node without quorum will be allowed to
fence remote nodes in the same partition even if they've been restarted
elsewhere.
However, with fence-remote-without-quorum="false", we will check to see
if the remote node could possibly have been started on another node and
if so, it will not be fenced.
---
cts/cli/regression.daemons.exp | 5 +++++
include/crm/common/options_internal.h | 5 ++++-
include/crm/common/scheduler.h | 7 ++++++-
lib/common/options.c | 13 ++++++++++++-
lib/pengine/unpack.c | 12 +++++++++++-
lib/pengine/utils.c | 6 +++++-
6 files changed, 43 insertions(+), 5 deletions(-)
diff --git a/cts/cli/regression.daemons.exp b/cts/cli/regression.daemons.exp
index 543d62f..678cb62 100644
--- a/cts/cli/regression.daemons.exp
+++ b/cts/cli/regression.daemons.exp
@@ -292,6 +292,11 @@
<shortdesc lang="en">Whether the cluster should check for active resources during start-up</shortdesc>
<content type="boolean" default=""/>
</parameter>
+ <parameter name="fence-remote-without-quorum">
+ <longdesc lang="en">By default, inquorate nodes can fence Pacemaker Remote nodes that are part of its partition regardless of whether the resource was successfully restarted elsewhere. If false, an additional check will be added to only fence remote nodes if the cluster thinks they were unable to be restarted.</longdesc>
+ <shortdesc lang="en">*** Advanced Use Only *** Whether remote nodes can be fenced without quorum</shortdesc>
+ <content type="boolean" default=""/>
+ </parameter>
<parameter name="stonith-enabled">
<longdesc lang="en">If false, unresponsive nodes are immediately assumed to be harmless, and resources that were active on them may be recovered elsewhere. This can result in a &quot;split-brain&quot; situation, potentially leading to data loss and/or service unavailability.</longdesc>
<shortdesc lang="en">*** Advanced Use Only *** Whether nodes may be fenced as part of recovery</shortdesc>
diff --git a/include/crm/common/options_internal.h b/include/crm/common/options_internal.h
index b727a58..95edc53 100644
--- a/include/crm/common/options_internal.h
+++ b/include/crm/common/options_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2006-2023 the Pacemaker project contributors
+ * Copyright 2006-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -167,5 +167,8 @@ bool pcmk__valid_sbd_timeout(const char *value);
#define PCMK__VALUE_RED "red"
#define PCMK__VALUE_UNFENCING "unfencing"
#define PCMK__VALUE_YELLOW "yellow"
+
+// Cluster options
+#define PCMK__OPT_FENCE_REMOTE_WITHOUT_QUORUM "fence-remote-without-quorum"
#endif // PCMK__OPTIONS_INTERNAL__H
diff --git a/include/crm/common/scheduler.h b/include/crm/common/scheduler.h
index 96f9a62..259fa5b 100644
--- a/include/crm/common/scheduler.h
+++ b/include/crm/common/scheduler.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2023 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -184,6 +184,11 @@ struct pe_working_set_s {
int stonith_timeout; //!< Value of stonith-timeout property
enum pe_quorum_policy no_quorum_policy; //!< Response to loss of quorum
+
+ // Can Pacemaker Remote nodes be fenced even from a node that doesn't
+ // have quorum?
+ bool fence_remote_without_quorum;
+
GHashTable *config_hash; //!< Cluster properties
//!< Ticket constraints unpacked from ticket state
diff --git a/lib/common/options.c b/lib/common/options.c
index 13d58e3..96f059c 100644
--- a/lib/common/options.c
+++ b/lib/common/options.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2022 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -232,6 +232,17 @@ static pcmk__cluster_option_t cluster_options[] = {
},
// Fencing-related options
+ { PCMK__OPT_FENCE_REMOTE_WITHOUT_QUORUM, NULL, "boolean", NULL,
+ XML_BOOLEAN_TRUE, pcmk__valid_boolean,
+ pcmk__opt_context_schedulerd,
+ N_("*** Advanced Use Only *** "
+ "Whether remote nodes can be fenced without quorum"),
+ N_("By default, inquorate nodes can fence Pacemaker Remote nodes that "
+ "are part of its partition regardless of whether the resource "
+ "was successfully restarted elsewhere. If false, an additional "
+ "check will be added to only fence remote nodes if the cluster "
+ "thinks they were unable to be restarted.")
+ },
{
"stonith-enabled", NULL, "boolean", NULL,
XML_BOOLEAN_TRUE, pcmk__valid_boolean,
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index d484e93..e96b978 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2023 the Pacemaker project contributors
+ * Copyright 2004-2025 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -435,6 +435,16 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
* 1000));
}
+ value = pcmk__cluster_option(config_hash,
+ PCMK__OPT_FENCE_REMOTE_WITHOUT_QUORUM);
+ if ((value != NULL) && !crm_is_true(value)) {
+ crm_warn(PCMK__OPT_FENCE_REMOTE_WITHOUT_QUORUM " disabled - remote "
+ "nodes may not be fenced in inquorate partition");
+ scheduler->fence_remote_without_quorum = false;
+ } else {
+ scheduler->fence_remote_without_quorum = true;
+ }
+
return TRUE;
}
diff --git a/lib/pengine/utils.c b/lib/pengine/utils.c
index 402cecd..f25717d 100644
--- a/lib/pengine/utils.c
+++ b/lib/pengine/utils.c
@@ -70,8 +70,12 @@ pe_can_fence(const pcmk_scheduler_t *scheduler, const pcmk_node_t *node)
/* Remote nodes are marked online when we assign their resource to a
* node, not when they are actually started (see remote_connection_assigned)
* so the above test by itself isn't good enough.
+ *
+ * This is experimental behavior, so the user has to opt into it by
+ * adding fence-remote-without-quorum="false" to their CIB.
*/
- if (pe__is_remote_node(node)) {
+ if (pe__is_remote_node(node)
+ && !scheduler->fence_remote_without_quorum) {
/* If we're on a system without quorum, it's entirely possible that
* the remote resource was automatically moved to a node on the
* partition with quorum. We can't tell that from this node - the
--
2.43.0
From 9ad7c0157cb0ac271ddf9b401e072a7d00de05de Mon Sep 17 00:00:00 2001
From: "Gao,Yan" <ygao@suse.com>
Date: Thu, 10 Apr 2025 12:51:57 +0200
Subject: [PATCH 4/5] Refactor: libcrmcommon: move the new struct member to the
end for backward compatibility
Commit f342b77561 broke backward compatibility by inserting the new
member `fence_remote_without_quorum` into the middle of the
`pe_working_set_s` struct.
---
include/crm/common/scheduler.h | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/include/crm/common/scheduler.h b/include/crm/common/scheduler.h
index 259fa5b..4baee09 100644
--- a/include/crm/common/scheduler.h
+++ b/include/crm/common/scheduler.h
@@ -185,10 +185,6 @@ struct pe_working_set_s {
int stonith_timeout; //!< Value of stonith-timeout property
enum pe_quorum_policy no_quorum_policy; //!< Response to loss of quorum
- // Can Pacemaker Remote nodes be fenced even from a node that doesn't
- // have quorum?
- bool fence_remote_without_quorum;
-
GHashTable *config_hash; //!< Cluster properties
//!< Ticket constraints unpacked from ticket state
@@ -234,6 +230,10 @@ struct pe_working_set_s {
void *priv; //!< For Pacemaker use only
guint node_pending_timeout; //!< Pending join times out after this (ms)
+
+ // Can Pacemaker Remote nodes be fenced even from a node that doesn't
+ // have quorum?
+ bool fence_remote_without_quorum;
};
#ifdef __cplusplus
--
2.43.0
From 8159779e13da5ddd2a6ce77542e945abb4c2663d Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Tue, 29 Apr 2025 12:49:45 -0400
Subject: [PATCH 5/5] Refactor: scheduler: Lower fencing log message to debug
level.
Most other things in unpack_config are logged at debug or trace level.
Having the fencing message at the warn level makes it come up quite
often.
---
lib/pengine/unpack.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/pengine/unpack.c b/lib/pengine/unpack.c
index e96b978..5d124a3 100644
--- a/lib/pengine/unpack.c
+++ b/lib/pengine/unpack.c
@@ -438,8 +438,8 @@ unpack_config(xmlNode *config, pcmk_scheduler_t *scheduler)
value = pcmk__cluster_option(config_hash,
PCMK__OPT_FENCE_REMOTE_WITHOUT_QUORUM);
if ((value != NULL) && !crm_is_true(value)) {
- crm_warn(PCMK__OPT_FENCE_REMOTE_WITHOUT_QUORUM " disabled - remote "
- "nodes may not be fenced in inquorate partition");
+ crm_debug(PCMK__OPT_FENCE_REMOTE_WITHOUT_QUORUM " disabled - remote "
+ "nodes may not be fenced in inquorate partition");
scheduler->fence_remote_without_quorum = false;
} else {
scheduler->fence_remote_without_quorum = true;
--
2.43.0

View File

@ -0,0 +1,3 @@
#Type Name ID GECOS Home directory Shell
g haclient 189
u hacluster 189:haclient "cluster user" /var/lib/pacemaker /sbin/nologin

File diff suppressed because it is too large Load Diff