Compare commits

..

No commits in common. "c8" and "a9" have entirely different histories.
c8 ... a9

24 changed files with 6770 additions and 14042 deletions

4
.gitignore vendored
View File

@ -1,2 +1,2 @@
SOURCES/nagios-agents-metadata-105ab8a.tar.gz SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz
SOURCES/pacemaker-0f7f88312.tar.gz SOURCES/pacemaker-a3f44794f.tar.gz

View File

@ -1,2 +1,2 @@
ea6c0a27fd0ae8ce02f84a11f08a0d79377041c3 SOURCES/nagios-agents-metadata-105ab8a.tar.gz 2cbec94ad67dfbeba75e38d2c3c5c44961b3cd16 SOURCES/nagios-agents-metadata-105ab8a7b2c16b9a29cf1c1596b80136eeef332b.tar.gz
88946a460e3be18852861269f8837aaaf339328c SOURCES/pacemaker-0f7f88312.tar.gz b16198db5f86857ba8bc0ebd04fd386da360478a SOURCES/pacemaker-a3f44794f.tar.gz

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,98 @@
From d8e08729ad5e3dc62f774172f992210902fc0ed4 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 23 Jan 2023 14:25:56 -0600
Subject: [PATCH] High: executor: fix regression in remote node shutdown
This reverts the essential part of d61494347, which was based on misdiagnosing
a remote node shutdown issue. Initially, it was thought that a "TLS server
session ended" log just after a remote node requested shutdown indicated that
the proxy connection coincidentally dropped at that moment. It actually is the
routine stopping of accepting new proxy connections, and existing when that
happens makes the remote node exit immediately without waiting for the
all-clear from the cluster.
Fixes T361
---
daemons/execd/pacemaker-execd.c | 19 +------------------
daemons/execd/pacemaker-execd.h | 3 +--
daemons/execd/remoted_tls.c | 6 +-----
3 files changed, 3 insertions(+), 25 deletions(-)
diff --git a/daemons/execd/pacemaker-execd.c b/daemons/execd/pacemaker-execd.c
index db12674f13..491808974a 100644
--- a/daemons/execd/pacemaker-execd.c
+++ b/daemons/execd/pacemaker-execd.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2022 the Pacemaker project contributors
+ * Copyright 2012-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -305,23 +305,6 @@ lrmd_exit(gpointer data)
return FALSE;
}
-/*!
- * \internal
- * \brief Clean up and exit if shutdown has started
- *
- * \return Doesn't return
- */
-void
-execd_exit_if_shutting_down(void)
-{
-#ifdef PCMK__COMPILE_REMOTE
- if (shutting_down) {
- crm_warn("exit because TLS connection was closed and 'shutting_down' set");
- lrmd_exit(NULL);
- }
-#endif
-}
-
/*!
* \internal
* \brief Request cluster shutdown if appropriate, otherwise exit immediately
diff --git a/daemons/execd/pacemaker-execd.h b/daemons/execd/pacemaker-execd.h
index 6646ae29e3..f78e8dcdde 100644
--- a/daemons/execd/pacemaker-execd.h
+++ b/daemons/execd/pacemaker-execd.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2022 the Pacemaker project contributors
+ * Copyright 2012-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -105,6 +105,5 @@ void remoted_spawn_pidone(int argc, char **argv, char **envp);
int process_lrmd_alert_exec(pcmk__client_t *client, uint32_t id,
xmlNode *request);
void lrmd_drain_alerts(GMainLoop *mloop);
-void execd_exit_if_shutting_down(void);
#endif // PACEMAKER_EXECD__H
diff --git a/daemons/execd/remoted_tls.c b/daemons/execd/remoted_tls.c
index 6f4b2d0062..c65e3f394d 100644
--- a/daemons/execd/remoted_tls.c
+++ b/daemons/execd/remoted_tls.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2012-2022 the Pacemaker project contributors
+ * Copyright 2012-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -250,10 +250,6 @@ static void
tls_server_dropped(gpointer user_data)
{
crm_notice("TLS server session ended");
- /* If we are in the process of shutting down, then we should actually exit.
- * bz#1804259
- */
- execd_exit_if_shutting_down();
return;
}
--
2.31.1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,42 +0,0 @@
From a3bffc7c66bf6f796f977cffd44f223635b008c5 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Wed, 20 Dec 2023 13:33:47 -0800
Subject: [PATCH] Doc: Pacemaker Explained: Add replace for
PCMK__REMOTE_SCHEMA_DIR
So that the existing use in local-options.rst expands correctly.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
doc/sphinx/Makefile.am | 1 +
doc/sphinx/conf.py.in | 1 +
3 files changed, 2 insertions(+)
create mode 100644 doc/sphinx/conf.py.in.rej
diff --git a/doc/sphinx/Makefile.am b/doc/sphinx/Makefile.am
index e48e19a..d0309ff 100644
--- a/doc/sphinx/Makefile.am
+++ b/doc/sphinx/Makefile.am
@@ -134,6 +134,7 @@ $(BOOKS:%=%/conf.py): conf.py.in
-e 's#%CRM_SCHEMA_DIRECTORY%#@CRM_SCHEMA_DIRECTORY@#g' \
-e 's#%PACEMAKER_CONFIG_DIR%#@PACEMAKER_CONFIG_DIR@#g' \
-e 's#%PCMK_GNUTLS_PRIORITIES%#@PCMK_GNUTLS_PRIORITIES@#g' \
+ -e 's#%PCMK__REMOTE_SCHEMA_DIR%#@PCMK__REMOTE_SCHEMA_DIR@#g' \
$(<) > "$@"
$(BOOK)/_build: $(STATIC_FILES) $(BOOK)/conf.py $(DEPS_$(BOOK)) $(wildcard $(srcdir)/$(BOOK)/*.rst)
diff --git a/doc/sphinx/conf.py.in b/doc/sphinx/conf.py.in
index 556eb72..511f029 100644
--- a/doc/sphinx/conf.py.in
+++ b/doc/sphinx/conf.py.in
@@ -40,6 +40,7 @@ rst_prolog="""
.. |PCMK_INIT_ENV_FILE| replace:: ``%PACEMAKER_CONFIG_DIR%/pcmk-init.env``
.. |PCMK_LOG_FILE| replace:: %CRM_LOG_DIR%/pacemaker.log
.. |PCMK_GNUTLS_PRIORITIES| replace:: %PCMK_GNUTLS_PRIORITIES%
+.. |PCMK__REMOTE_SCHEMA_DIR| replace:: %PCMK__REMOTE_SCHEMA_DIR%
.. |REMOTE_DISTRO| replace:: AlmaLinux
.. |REMOTE_DISTRO_VER| replace:: 9
"""
--
2.31.1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,107 @@
From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Fri, 3 Feb 2023 12:08:57 -0800
Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one
QE observed a rarely reproducible core dump in the fencer during
Pacemaker shutdown, in which we try to g_source_remove() an op timer
that's already been removed.
free_stonith_remote_op_list()
-> g_hash_table_destroy()
-> g_hash_table_remove_all_nodes()
-> clear_remote_op_timers()
-> g_source_remove()
-> crm_glib_handler()
-> "Source ID 190 was not found when attempting to remove it"
The likely cause is that request_peer_fencing() doesn't set
op->op_timer_one to 0 after calling g_source_remove() on it, so if that
op is still in the stonith_remote_op_list at shutdown with the same
timer, clear_remote_op_timers() tries to remove the source for
op_timer_one again.
There are only five locations that call g_source_remove() on a
remote_fencing_op_t timer.
* Three of them are in clear_remote_op_timers(), which first 0-checks
the timer and then sets it to 0 after g_source_remove().
* One is in remote_op_query_timeout(), which does the same.
* The last is the one we fix here in request_peer_fencing().
I don't know all the conditions of QE's test scenario at this point.
What I do know:
* have-watchdog=true
* stonith-watchdog-timeout=10
* no explicit topology
* fence agent script is missing for the configured fence device
* requested fencing of one node
* cluster shutdown
Fixes RHBZ2166967
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
daemons/fenced/fenced_remote.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index d61b5bd..b7426ff 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1825,6 +1825,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
op->state = st_exec;
if (op->op_timer_one) {
g_source_remove(op->op_timer_one);
+ op->op_timer_one = 0;
}
if (!((stonith_watchdog_timeout_ms > 0)
--
2.31.1
From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Wed, 15 Feb 2023 22:30:27 -0800
Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total
remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove
the source from the main loop after returning. Currently this function
is used as the callback only when creating op->op_timer_total.
If we don't set op->op_timer_total to 0 before returning from
remote_op_timeout(), then we can get an assertion and core dump from
GLib when the op's timers are being cleared (either during op
finalization or during fencer shutdown). This is because
clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to
remove the source, but the source has already been removed.
Note that we're already (correctly) zeroing op->op_timer_one and
op->query_timeout as appropriate in their respective callback functions.
Fortunately, GLib doesn't care whether the source has already been
removed before we return G_SOURCE_REMOVE from a callback. So it's safe
to call finalize_op() (which removes all the op's timer sources) from
within a callback.
Fixes RHBZ#2166967
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
daemons/fenced/fenced_remote.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index b7426ff88..adea3d7d8 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata)
{
remote_fencing_op_t *op = userdata;
+ op->op_timer_total = 0;
+
if (op->state == st_done) {
crm_debug("Action '%s' targeting %s for client %s already completed "
CRM_XS " id=%.8s",
--
2.39.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,151 @@
From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Tue, 14 Feb 2023 14:00:37 -0500
Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in
pcmk__attrd_api_query.
According to the header file, if node is NULL, pcmk__attrd_api_query
should query the value of the given attribute on all cluster nodes.
This is also what the server expects and how attrd_updater is supposed
to work.
However, pcmk__attrd_api_query has no way of letting callers decide
whether they want to query all nodes or whether they want to use the
local node. We were passing NULL for the node name, which it took to
mean it should look up the local node name. This calls
pcmk__node_attr_target, which probes the local cluster name and returns
that to pcmk__attrd_api_query. If it returns non-NULL, that value will
then be put into the XML IPC call which means the server will only
return the value for that node.
In testing this was usually fine. However, in pratice, the methods
pcmk__node_attr_target uses to figure out the local cluster node name
involves checking the OCF_RESKEY_CRM_meta_on_node environment variable
among others.
This variable was never set in testing, but can be set in the real
world. This leads to circumstances where the user did "attrd_updater -QA"
expecting to get the values on all nodes, but instead only got the value
on the local cluster node.
In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never
called if the node was NULL but was called otherwise.
The fix is to modify pcmk__attrd_api_query to take an option for
querying all nodes. If that's present, we'll query all nodes. If it's
not present, we'll look at the given node name - NULL means look it up,
anything else means just that node.
Regression in 2.1.5 introduced by eb20a65577
---
include/crm/common/attrd_internal.h | 6 +++++-
include/crm/common/ipc_attrd_internal.h | 7 +++++--
lib/common/ipc_attrd.c | 12 ++++++++----
tools/attrd_updater.c | 5 +++--
4 files changed, 21 insertions(+), 9 deletions(-)
diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h
index 389be48..7337c38 100644
--- a/include/crm/common/attrd_internal.h
+++ b/include/crm/common/attrd_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2022 the Pacemaker project contributors
+ * Copyright 2004-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts {
pcmk__node_attr_perm = (1 << 5),
pcmk__node_attr_sync_local = (1 << 6),
pcmk__node_attr_sync_cluster = (1 << 7),
+ // pcmk__node_attr_utilization is 8, but that has not been backported.
+ // I'm leaving the gap here in case we backport that in the future and
+ // also to avoid problems on mixed-version clusters.
+ pcmk__node_attr_query_all = (1 << 9),
};
#define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \
diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h
index 2c6713f..b1b7584 100644
--- a/include/crm/common/ipc_attrd_internal.h
+++ b/include/crm/common/ipc_attrd_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2022 the Pacemaker project contributors
+ * Copyright 2022-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node);
*
* \param[in,out] api Connection to pacemaker-attrd
* \param[in] node Look up the attribute for this node
- * (or NULL for all nodes)
+ * (or NULL for the local node)
* \param[in] name Attribute name
* \param[in] options Bitmask of pcmk__node_attr_opts
*
+ * \note Passing pcmk__node_attr_query_all will cause the function to query
+ * the value of \p name on all nodes, regardless of the value of \p node.
+ *
* \return Standard Pacemaker return code
*/
int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c
index 4606509..dece49b 100644
--- a/lib/common/ipc_attrd.c
+++ b/lib/common/ipc_attrd.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2011-2022 the Pacemaker project contributors
+ * Copyright 2011-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name,
return EINVAL;
}
- target = pcmk__node_attr_target(node);
+ if (pcmk_is_set(options, pcmk__node_attr_query_all)) {
+ node = NULL;
+ } else {
+ target = pcmk__node_attr_target(node);
- if (target != NULL) {
- node = target;
+ if (target != NULL) {
+ node = target;
+ }
}
request = create_attrd_op(NULL);
diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c
index 3cd766d..cbd341d 100644
--- a/tools/attrd_updater.c
+++ b/tools/attrd_updater.c
@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type,
static int
send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all)
{
+ uint32_t options = pcmk__node_attr_none;
pcmk_ipc_api_t *attrd_api = NULL;
int rc = pcmk_rc_ok;
@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no
/* Decide which node(s) to query */
if (query_all == TRUE) {
- attr_node = NULL;
+ options |= pcmk__node_attr_query_all;
}
- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0);
+ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options);
if (rc != pcmk_rc_ok) {
g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)",
--
2.31.1

View File

@ -1,276 +0,0 @@
From d50bbafc32428e873c0052a9defcf93d2e52667e Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 10 Jan 2024 11:35:11 -0500
Subject: [PATCH 1/3] Refactor: libcrmcommon: Split feature set check into its
own function.
---
include/crm/common/cib_internal.h | 4 +++-
lib/cib/cib_utils.c | 12 ++++++------
lib/common/cib.c | 18 +++++++++++++++++-
3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/include/crm/common/cib_internal.h b/include/crm/common/cib_internal.h
index c41c12e..fa65e58 100644
--- a/include/crm/common/cib_internal.h
+++ b/include/crm/common/cib_internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2023 the Pacemaker project contributors
+ * Copyright 2023-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -16,6 +16,8 @@ extern "C" {
const char *pcmk__cib_abs_xpath_for(const char *element);
+int pcmk__check_feature_set(const char *cib_version);
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c
index 0082eef..bf2982c 100644
--- a/lib/cib/cib_utils.c
+++ b/lib/cib/cib_utils.c
@@ -353,7 +353,6 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
xmlNode *patchset_cib = NULL;
xmlNode *local_diff = NULL;
- const char *new_version = NULL;
const char *user = crm_element_value(req, F_CIB_USER);
bool with_digest = false;
@@ -470,12 +469,13 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
}
if (scratch) {
- new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION);
+ const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION);
- if (new_version && compare_version(new_version, CRM_FEATURE_SET) > 0) {
- crm_err("Discarding update with feature set '%s' greater than our own '%s'",
- new_version, CRM_FEATURE_SET);
- rc = -EPROTONOSUPPORT;
+ rc = pcmk__check_feature_set(new_version);
+ if (rc != pcmk_rc_ok) {
+ pcmk__config_err("Discarding update with feature set '%s' greater than our own '%s'",
+ new_version, CRM_FEATURE_SET);
+ rc = pcmk_rc2legacy(rc);
goto done;
}
}
diff --git a/lib/common/cib.c b/lib/common/cib.c
index fee7881..cbebc2e 100644
--- a/lib/common/cib.c
+++ b/lib/common/cib.c
@@ -1,6 +1,6 @@
/*
* Original copyright 2004 International Business Machines
- * Later changes copyright 2008-2023 the Pacemaker project contributors
+ * Later changes copyright 2008-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -173,3 +173,19 @@ pcmk_find_cib_element(xmlNode *cib, const char *element_name)
{
return get_xpath_object(pcmk_cib_xpath_for(element_name), cib, LOG_TRACE);
}
+
+/*!
+ * \internal
+ * \brief Check that the feature set in the CIB is supported on this node
+ *
+ * \param[in] new_version XML_ATTR_CRM_VERSION attribute from the CIB
+ */
+int
+pcmk__check_feature_set(const char *cib_version)
+{
+ if (cib_version && compare_version(cib_version, CRM_FEATURE_SET) > 0) {
+ return EPROTONOSUPPORT;
+ }
+
+ return pcmk_rc_ok;
+}
--
2.31.1
From d89fd8336ae47d892201513c99773705d57f15f0 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 10 Jan 2024 13:46:42 -0500
Subject: [PATCH 2/3] Feature: scheduler: Check the CIB feature set in
cluster_status.
This adds the check that was previously only in cib_perform_op to the
scheduler code, ensuring that any daemon or tool that calls the
scheduler will check that the feature set in the CIB is supported.
---
lib/pengine/status.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/lib/pengine/status.c b/lib/pengine/status.c
index e6ec237..1294803 100644
--- a/lib/pengine/status.c
+++ b/lib/pengine/status.c
@@ -14,6 +14,7 @@
#include <crm/crm.h>
#include <crm/msg_xml.h>
#include <crm/common/xml.h>
+#include <crm/common/cib_internal.h>
#include <glib.h>
@@ -70,12 +71,21 @@ pe_free_working_set(pcmk_scheduler_t *scheduler)
gboolean
cluster_status(pcmk_scheduler_t * scheduler)
{
+ const char *new_version = NULL;
xmlNode *section = NULL;
if ((scheduler == NULL) || (scheduler->input == NULL)) {
return FALSE;
}
+ new_version = crm_element_value(scheduler->input, XML_ATTR_CRM_VERSION);
+
+ if (pcmk__check_feature_set(new_version) != pcmk_rc_ok) {
+ pcmk__config_err("Can't process CIB with feature set '%s' greater than our own '%s'",
+ new_version, CRM_FEATURE_SET);
+ return FALSE;
+ }
+
crm_trace("Beginning unpack");
if (scheduler->failed != NULL) {
--
2.31.1
From a3428926d37af506014a6b462d1308d8541c5932 Mon Sep 17 00:00:00 2001
From: Chris Lumens <clumens@redhat.com>
Date: Wed, 10 Jan 2024 14:56:36 -0500
Subject: [PATCH 3/3] Low: libcib: Do not check CIB feature set for files in
cib_perform_op.
This is related to the previous feature for transferring schema files to
older remote nodes. In that case, the newer schema files may also have
a newer feature set than the node supports, so the transferred files are
still not usable.
However, the feature set only matters for the scheduler, not for most
command line tools (obviously, crm_simulate would still care). So in
those cases, we can just disable the feature set check if the CIB was
read in from a file. For the scheduler, the check is still performed as
part of cluster_status.
---
cts/cli/regression.tools.exp | 2 +-
daemons/based/based_callbacks.c | 4 ++--
include/crm/cib/internal.h | 4 ++--
lib/cib/cib_file.c | 2 +-
lib/cib/cib_utils.c | 15 +++++++++------
5 files changed, 15 insertions(+), 12 deletions(-)
diff --git a/cts/cli/regression.tools.exp b/cts/cli/regression.tools.exp
index 417b5cd..c81c420 100644
--- a/cts/cli/regression.tools.exp
+++ b/cts/cli/regression.tools.exp
@@ -7939,7 +7939,7 @@ unpack_config warning: Blind faith: not fencing unseen nodes
=#=#=#= End test: Verbosely verify a file-specified invalid configuration, outputting as xml - Invalid configuration (78) =#=#=#=
* Passed: crm_verify - Verbosely verify a file-specified invalid configuration, outputting as xml
=#=#=#= Begin test: Verbosely verify another file-specified invalid configuration, outputting as xml =#=#=#=
-(cluster_status@status.c:113) warning: Fencing and resource management disabled due to lack of quorum
+(cluster_status@status.c:123) warning: Fencing and resource management disabled due to lack of quorum
<pacemaker-result api-version="X" request="crm_verify_invalid_no_stonith.xml --output-as=xml --verbose">
<status code="78" message="Invalid configuration">
<errors>
diff --git a/daemons/based/based_callbacks.c b/daemons/based/based_callbacks.c
index 5f3dc62..f16e4d9 100644
--- a/daemons/based/based_callbacks.c
+++ b/daemons/based/based_callbacks.c
@@ -1362,7 +1362,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation,
input = prepare_input(request, operation->type, &section);
if (!pcmk_is_set(operation->flags, cib__op_attr_modifies)) {
- rc = cib_perform_op(op, call_options, op_function, true, section,
+ rc = cib_perform_op(NULL, op, call_options, op_function, true, section,
request, input, false, &config_changed, &the_cib,
&result_cib, NULL, &output);
@@ -1395,7 +1395,7 @@ cib_process_command(xmlNode *request, const cib__operation_t *operation,
}
// result_cib must not be modified after cib_perform_op() returns
- rc = cib_perform_op(op, call_options, op_function, false, section,
+ rc = cib_perform_op(NULL, op, call_options, op_function, false, section,
request, input, manage_counters, &config_changed,
&the_cib, &result_cib, cib_diff, &output);
diff --git a/include/crm/cib/internal.h b/include/crm/cib/internal.h
index 9d54d52..b6d6871 100644
--- a/include/crm/cib/internal.h
+++ b/include/crm/cib/internal.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2004-2023 the Pacemaker project contributors
+ * Copyright 2004-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -206,7 +206,7 @@ int cib__get_notify_patchset(const xmlNode *msg, const xmlNode **patchset);
bool cib__element_in_patchset(const xmlNode *patchset, const char *element);
-int cib_perform_op(const char *op, int call_options, cib__op_fn_t fn,
+int cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn,
bool is_query, const char *section, xmlNode *req,
xmlNode *input, bool manage_counters, bool *config_changed,
xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff,
diff --git a/lib/cib/cib_file.c b/lib/cib/cib_file.c
index a279823..9dd952c 100644
--- a/lib/cib/cib_file.c
+++ b/lib/cib/cib_file.c
@@ -245,7 +245,7 @@ cib_file_process_request(cib_t *cib, xmlNode *request, xmlNode **output)
data = pcmk_find_cib_element(data, section);
}
- rc = cib_perform_op(op, call_options, op_function, read_only, section,
+ rc = cib_perform_op(cib, op, call_options, op_function, read_only, section,
request, data, true, &changed, &private->cib_xml,
&result_cib, &cib_diff, output);
diff --git a/lib/cib/cib_utils.c b/lib/cib/cib_utils.c
index bf2982c..9c3f9f1 100644
--- a/lib/cib/cib_utils.c
+++ b/lib/cib/cib_utils.c
@@ -339,11 +339,10 @@ should_copy_cib(const char *op, const char *section, int call_options)
}
int
-cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
- const char *section, xmlNode *req, xmlNode *input,
- bool manage_counters, bool *config_changed,
- xmlNode **current_cib, xmlNode **result_cib, xmlNode **diff,
- xmlNode **output)
+cib_perform_op(cib_t *cib, const char *op, int call_options, cib__op_fn_t fn,
+ bool is_query, const char *section, xmlNode *req, xmlNode *input,
+ bool manage_counters, bool *config_changed, xmlNode **current_cib,
+ xmlNode **result_cib, xmlNode **diff, xmlNode **output)
{
int rc = pcmk_ok;
bool check_schema = true;
@@ -468,7 +467,11 @@ cib_perform_op(const char *op, int call_options, cib__op_fn_t fn, bool is_query,
goto done;
}
- if (scratch) {
+ /* If the CIB is from a file, we don't need to check that the feature set is
+ * supported. All we care about in that case is the schema version, which
+ * is checked elsewhere.
+ */
+ if (scratch && (cib == NULL || cib->variant != cib_file)) {
const char *new_version = crm_element_value(scratch, XML_ATTR_CRM_VERSION);
rc = pcmk__check_feature_set(new_version);
--
2.31.1

View File

@ -0,0 +1,142 @@
From 17cc49e1564b0ae55cc8212d14c5c055f88040da Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 14 Feb 2023 15:35:37 +0100
Subject: [PATCH] Fix: watchdog-fencing: terminate dangling timer before
watchdog-waiting
---
daemons/fenced/fenced_remote.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index 5c3fe25e3..aab185adb 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2009-2022 the Pacemaker project contributors
+ * Copyright 2009-2023 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -1702,6 +1702,10 @@ check_watchdog_fencing_and_wait(remote_fencing_op_t * op)
"client %s " CRM_XS " id=%.8s",
(stonith_watchdog_timeout_ms / 1000),
op->target, op->action, op->client_name, op->id);
+
+ if (op->op_timer_one) {
+ g_source_remove(op->op_timer_one);
+ }
op->op_timer_one = g_timeout_add(stonith_watchdog_timeout_ms,
remote_op_watchdog_done, op);
return TRUE;
--
2.39.0
From f2cc2a4277124230903a18713e50604a8f1842cd Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Wed, 1 Mar 2023 15:00:15 +0100
Subject: [PATCH] Refactor: watchdog-fencing: convenience function
pcmk__is_fencing_action
for consistency and add comment making clear why this block exits
with new timer set in any case
---
daemons/fenced/fenced_remote.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index aab185adb..e0f8de057 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -1834,7 +1834,7 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
if (!((stonith_watchdog_timeout_ms > 0)
&& (pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
|| (pcmk__str_eq(peer->host, op->target, pcmk__str_casei)
- && !pcmk__str_eq(op->action, "on", pcmk__str_none)))
+ && pcmk__is_fencing_action(op->action)))
&& check_watchdog_fencing_and_wait(op))) {
/* Some thoughts about self-fencing cases reaching this point:
@@ -1854,6 +1854,9 @@ request_peer_fencing(remote_fencing_op_t *op, peer_device_info_t *peer)
Otherwise the selection of stonith-watchdog-timeout at
least is questionable.
*/
+
+ /* coming here we're not waiting for watchdog timeout -
+ thus engage timer with timout evaluated before */
op->op_timer_one = g_timeout_add((1000 * timeout_one), remote_op_timeout_one, op);
}
--
2.39.0
From c4eb45a986f8865fc5e69350fd5b9f4b056d9d69 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 14 Feb 2023 11:57:17 +0100
Subject: [PATCH] Fix: watchdog-fencing: correctly derive timeout with topology
up to now the timeout for watchdog-fencing was just added to
the overall timeout if the node to be fenced was visible and
reported back to the query.
---
daemons/fenced/fenced_remote.c | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c
index e0f8de057..3b7ab05e9 100644
--- a/daemons/fenced/fenced_remote.c
+++ b/daemons/fenced/fenced_remote.c
@@ -969,8 +969,9 @@ advance_topology_level(remote_fencing_op_t *op, bool empty_ok)
return pcmk_rc_ok;
}
- crm_info("All fencing options targeting %s for client %s@%s failed "
+ crm_info("All %sfencing options targeting %s for client %s@%s failed "
CRM_XS " id=%.8s",
+ (stonith_watchdog_timeout_ms > 0)?"non-watchdog ":"",
op->target, op->client_name, op->originator, op->id);
return ENODEV;
}
@@ -1434,8 +1435,17 @@ stonith_choose_peer(remote_fencing_op_t * op)
&& pcmk_is_set(op->call_options, st_opt_topology)
&& (advance_topology_level(op, false) == pcmk_rc_ok));
- crm_notice("Couldn't find anyone to fence (%s) %s using %s",
- op->action, op->target, (device? device : "any device"));
+ if ((stonith_watchdog_timeout_ms > 0)
+ && pcmk__is_fencing_action(op->action)
+ && pcmk__str_eq(device, STONITH_WATCHDOG_ID, pcmk__str_none)
+ && node_does_watchdog_fencing(op->target)) {
+ crm_info("Couldn't contact watchdog-fencing target-node (%s)",
+ op->target);
+ /* check_watchdog_fencing_and_wait will log additional info */
+ } else {
+ crm_notice("Couldn't find anyone to fence (%s) %s using %s",
+ op->action, op->target, (device? device : "any device"));
+ }
return NULL;
}
@@ -1531,6 +1541,18 @@ get_op_total_timeout(const remote_fencing_op_t *op,
continue;
}
for (device_list = tp->levels[i]; device_list; device_list = device_list->next) {
+ /* in case of watchdog-device we add the timeout to the budget
+ regardless of if we got a reply or not
+ */
+ if ((stonith_watchdog_timeout_ms > 0)
+ && pcmk__is_fencing_action(op->action)
+ && pcmk__str_eq(device_list->data, STONITH_WATCHDOG_ID,
+ pcmk__str_none)
+ && node_does_watchdog_fencing(op->target)) {
+ total_timeout += stonith_watchdog_timeout_ms / 1000;
+ continue;
+ }
+
for (iter = op->query_results; iter != NULL; iter = iter->next) {
const peer_device_info_t *peer = iter->data;
--
2.39.0

View File

@ -0,0 +1,26 @@
From ebac530c815a62f7c3a1c24f64e9a530d9753dbe Mon Sep 17 00:00:00 2001
From: Hideo Yamauchi <renayama19661014@ybb.ne.jp>
Date: Wed, 19 Jul 2023 18:21:07 +0900
Subject: [PATCH] High: tools: The dampen parameter is disabled when setting
values with attrd_updater.
---
tools/attrd_updater.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c
index b615a3575..4688b9ff6 100644
--- a/tools/attrd_updater.c
+++ b/tools/attrd_updater.c
@@ -445,7 +445,7 @@ send_attrd_update(char command, const char *attr_node, const char *attr_name,
case 'U':
rc = pcmk__attrd_api_update(NULL, attr_node, attr_name, attr_value,
- NULL, attr_set, NULL,
+ attr_dampen, attr_set, NULL,
attr_options | pcmk__node_attr_value);
break;
--
2.41.0

File diff suppressed because it is too large Load Diff

View File

@ -1,373 +0,0 @@
From 4823643bef8801b33688167b159bb531bcdf8911 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 4 Jan 2024 17:10:08 -0600
Subject: [PATCH 1/5] Refactor: pacemaker-attrd: drop redundant argument from
update_attr_on_host()
It can check for a force-write via its xml argument, to simplify the caller
---
daemons/attrd/attrd_corosync.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 158d82f..1b56923 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -266,7 +266,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host)
static void
update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
- bool filter, int is_force_write)
+ bool filter)
{
attribute_value_t *v = NULL;
@@ -309,6 +309,10 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
}
} else {
+ int is_force_write = 0;
+
+ crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write);
+
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
@@ -338,15 +342,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter)
const char *attr = crm_element_value(xml, PCMK__XA_ATTR_NAME);
const char *value = crm_element_value(xml, PCMK__XA_ATTR_VALUE);
const char *host = crm_element_value(xml, PCMK__XA_ATTR_NODE_NAME);
- int is_force_write = 0;
if (attr == NULL) {
crm_warn("Could not update attribute: peer did not specify name");
return;
}
- crm_element_value_int(xml, PCMK__XA_ATTR_FORCE, &is_force_write);
-
a = attrd_populate_attribute(xml, attr);
if (a == NULL) {
return;
@@ -361,12 +362,12 @@ attrd_peer_update_one(const crm_node_t *peer, xmlNode *xml, bool filter)
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, (gpointer *) & host, NULL)) {
- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
+ update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
} else {
// Update attribute value for the given host
- update_attr_on_host(a, peer, xml, attr, value, host, filter, is_force_write);
+ update_attr_on_host(a, peer, xml, attr, value, host, filter);
}
/* If this is a message from some attrd instance broadcasting its protocol
--
2.31.1
From c7a1ab819b25e3225c185c1630a7139a96fb5c71 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 9 Jan 2024 16:48:37 -0600
Subject: [PATCH 2/5] Refactor: pacemaker-attrd: drop unused argument from
attrd_peer_sync()
---
daemons/attrd/attrd_corosync.c | 10 ++++++++--
daemons/attrd/attrd_elections.c | 2 +-
daemons/attrd/attrd_messages.c | 2 +-
daemons/attrd/pacemaker-attrd.h | 2 +-
4 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 1b56923..088f00c 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -233,7 +233,7 @@ attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *da
*/
if (attrd_election_won()
&& !pcmk_is_set(peer->flags, crm_remote_node)) {
- attrd_peer_sync(peer, NULL);
+ attrd_peer_sync(peer);
}
} else {
// Remove all attribute values associated with lost nodes
@@ -535,8 +535,14 @@ attrd_peer_remove(const char *host, bool uncache, const char *source)
}
}
+/*!
+ * \internal
+ * \brief Send all known attributes and values to a peer
+ *
+ * \param[in] peer Peer to send sync to (if NULL, broadcast to all peers)
+ */
void
-attrd_peer_sync(crm_node_t *peer, xmlNode *xml)
+attrd_peer_sync(crm_node_t *peer)
{
GHashTableIter aIter;
GHashTableIter vIter;
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index 82fbe8a..9dbf133 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -23,7 +23,7 @@ attrd_election_cb(gpointer user_data)
attrd_declare_winner();
/* Update the peers after an election */
- attrd_peer_sync(NULL, NULL);
+ attrd_peer_sync(NULL);
/* After winning an election, update the CIB with the values of all
* attributes as the winner knows them.
diff --git a/daemons/attrd/attrd_messages.c b/daemons/attrd/attrd_messages.c
index 5525d4b..13ac01f 100644
--- a/daemons/attrd/attrd_messages.c
+++ b/daemons/attrd/attrd_messages.c
@@ -180,7 +180,7 @@ handle_sync_request(pcmk__request_t *request)
crm_node_t *peer = pcmk__get_node(0, request->peer, NULL,
pcmk__node_search_cluster);
- attrd_peer_sync(peer, request->xml);
+ attrd_peer_sync(peer);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
} else {
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 7384188..bacaad6 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -175,7 +175,7 @@ extern GHashTable *peer_protocol_vers;
int attrd_cluster_connect(void);
void attrd_peer_update(const crm_node_t *peer, xmlNode *xml, const char *host,
bool filter);
-void attrd_peer_sync(crm_node_t *peer, xmlNode *xml);
+void attrd_peer_sync(crm_node_t *peer);
void attrd_peer_remove(const char *host, bool uncache, const char *source);
void attrd_peer_clear_failure(pcmk__request_t *request);
void attrd_peer_sync_response(const crm_node_t *peer, bool peer_won,
--
2.31.1
From abafae0068e10abb135b0496086947728365299a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Jan 2024 17:31:17 -0600
Subject: [PATCH 3/5] Refactor: pacemaker-attrd: de-functionize
attrd_lookup_or_create_value()
... to make planned changes easier
---
daemons/attrd/attrd_corosync.c | 62 +++++++++++++---------------------
1 file changed, 24 insertions(+), 38 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 088f00c..59e6a26 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -168,40 +168,6 @@ broadcast_local_value(const attribute_t *a)
#define state_text(state) pcmk__s((state), "in unknown state")
-/*!
- * \internal
- * \brief Return a node's value from hash table (creating one if needed)
- *
- * \param[in,out] values Hash table of values
- * \param[in] node_name Name of node to look up
- * \param[in] xml XML describing the attribute
- *
- * \return Pointer to new or existing hash table entry
- */
-static attribute_value_t *
-attrd_lookup_or_create_value(GHashTable *values, const char *node_name,
- const xmlNode *xml)
-{
- attribute_value_t *v = g_hash_table_lookup(values, node_name);
- int is_remote = 0;
-
- if (v == NULL) {
- v = calloc(1, sizeof(attribute_value_t));
- CRM_ASSERT(v != NULL);
-
- pcmk__str_update(&v->nodename, node_name);
- g_hash_table_replace(values, v->nodename, v);
- }
-
- crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
- if (is_remote) {
- attrd_set_value_flags(v, attrd_value_remote);
- CRM_ASSERT(crm_remote_peer_get(node_name) != NULL);
- }
-
- return(v);
-}
-
static void
attrd_peer_change_cb(enum crm_status_type kind, crm_node_t *peer, const void *data)
{
@@ -268,18 +234,38 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
bool filter)
{
+ int is_remote = 0;
+ bool changed = false;
attribute_value_t *v = NULL;
- v = attrd_lookup_or_create_value(a->values, host, xml);
+ // Create entry for value if not already existing
+ v = g_hash_table_lookup(a->values, host);
+ if (v == NULL) {
+ v = calloc(1, sizeof(attribute_value_t));
+ CRM_ASSERT(v != NULL);
+
+ pcmk__str_update(&v->nodename, host);
+ g_hash_table_replace(a->values, v->nodename, v);
+ }
+
+ // If value is for a Pacemaker Remote node, remember that
+ crm_element_value_int(xml, PCMK__XA_ATTR_IS_REMOTE, &is_remote);
+ if (is_remote) {
+ attrd_set_value_flags(v, attrd_value_remote);
+ CRM_ASSERT(crm_remote_peer_get(host) != NULL);
+ }
+
+ // Check whether the value changed
+ changed = !pcmk__str_eq(v->current, value, pcmk__str_casei);
- if (filter && !pcmk__str_eq(v->current, value, pcmk__str_casei)
- && pcmk__str_eq(host, attrd_cluster->uname, pcmk__str_casei)) {
+ if (changed && filter && pcmk__str_eq(host, attrd_cluster->uname,
+ pcmk__str_casei)) {
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
attr, host, v->current, value, peer->uname);
v = broadcast_local_value(a);
- } else if (!pcmk__str_eq(v->current, value, pcmk__str_casei)) {
+ } else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
CRM_XS " from %s with %s write delay",
attr, host, a->set_type ? " in " : "",
--
2.31.1
From 72529ec512fb4938bd8dbbd2caf44bbb1a616826 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Jan 2024 18:04:33 -0600
Subject: [PATCH 4/5] Refactor: pacemaker-attrd: minor shuffling to make
planned changes easier
---
daemons/attrd/attrd_cib.c | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index bdc0a10..481fea7 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -51,6 +51,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
{
const xmlNode *patchset = NULL;
const char *client_name = NULL;
+ bool status_changed = false;
if (attrd_shutting_down(true)) {
return;
@@ -64,20 +65,22 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
mainloop_set_trigger(attrd_config_read);
}
- if (!attrd_election_won()) {
- // Don't write attributes if we're not the writer
- return;
- }
+ status_changed = cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS);
client_name = crm_element_value(msg, F_CIB_CLIENTNAME);
if (!cib__client_triggers_refresh(client_name)) {
- // The CIB is still accurate
+ /* This change came from a source that ensured the CIB is consistent
+ * with our attributes table, so we don't need to write anything out.
+ */
return;
}
- if (cib__element_in_patchset(patchset, XML_CIB_TAG_NODES)
- || cib__element_in_patchset(patchset, XML_CIB_TAG_STATUS)) {
-
+ if (!attrd_election_won()) {
+ // Don't write attributes if we're not the writer
+ return;
+ }
+
+ if (status_changed || cib__element_in_patchset(patchset, XML_CIB_TAG_NODES)) {
/* An unsafe client modified the nodes or status section. Write
* transient attributes to ensure they're up-to-date in the CIB.
*/
--
2.31.1
From b83c2567fb450eec5b18882ded16403831d2c3c0 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 11 Jan 2024 17:53:55 -0600
Subject: [PATCH 5/5] Log: pacemaker-attrd: make sure we don't try to log NULL
---
daemons/attrd/attrd_corosync.c | 15 +++++++++++----
1 file changed, 11 insertions(+), 4 deletions(-)
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 59e6a26..b348d52 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -229,6 +229,11 @@ record_peer_nodeid(attribute_value_t *v, const char *host)
}
}
+#define readable_value(rv_v) pcmk__s((rv_v)->current, "(unset)")
+
+#define readable_peer(p) \
+ (((p) == NULL)? "all peers" : pcmk__s((p)->uname, "unknown peer"))
+
static void
update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
const char *attr, const char *value, const char *host,
@@ -262,14 +267,14 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
pcmk__str_casei)) {
crm_notice("%s[%s]: local value '%s' takes priority over '%s' from %s",
- attr, host, v->current, value, peer->uname);
+ attr, host, readable_value(v), value, peer->uname);
v = broadcast_local_value(a);
} else if (changed) {
crm_notice("Setting %s[%s]%s%s: %s -> %s "
CRM_XS " from %s with %s write delay",
attr, host, a->set_type ? " in " : "",
- pcmk__s(a->set_type, ""), pcmk__s(v->current, "(unset)"),
+ pcmk__s(a->set_type, ""), readable_value(v),
pcmk__s(value, "(unset)"), peer->uname,
(a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
pcmk__str_update(&v->current, value);
@@ -543,12 +548,14 @@ attrd_peer_sync(crm_node_t *peer)
while (g_hash_table_iter_next(&aIter, NULL, (gpointer *) & a)) {
g_hash_table_iter_init(&vIter, a->values);
while (g_hash_table_iter_next(&vIter, NULL, (gpointer *) & v)) {
- crm_debug("Syncing %s[%s] = %s to %s", a->id, v->nodename, v->current, peer?peer->uname:"everyone");
+ crm_debug("Syncing %s[%s]='%s' to %s",
+ a->id, v->nodename, readable_value(v),
+ readable_peer(peer));
attrd_add_value_xml(sync, a, v, false);
}
}
- crm_debug("Syncing values to %s", peer?peer->uname:"everyone");
+ crm_debug("Syncing values to %s", readable_peer(peer));
attrd_send_message(peer, sync, false);
free_xml(sync);
}
--
2.31.1

View File

@ -0,0 +1,109 @@
From 3e31da0016795397bfeacb2f3d76ecfe35cc1f67 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 17 Jul 2023 14:52:42 -0500
Subject: [PATCH] Fix: libcrmcommon: wait for reply from appropriate controller
commands
ipc_controld.c:reply_expected() wrongly omitted PCMK__CONTROLD_CMD_NODES (which
hasn't been a problem because crm_node uses a mainloop instead of sync dispatch
for that) and CRM_OP_RM_NODE_CACHE (which can be sent via
ipc_client.c:pcmk_ipc_purge_node()).
Because CRM_OP_RM_NODE_CACHE gets only an ack and no further replies, we now
have to be careful not to return true from the controller's dispatch()
function, otherwise crm_node -R would wait forever for more data. That means
we have to check for whether any replies are expected, which means we have to
increment expected replies *before* sending a request (in case it's sync).
Regression introduced in 2.0.5 by ae14fa4a
Fixes T681
---
lib/common/ipc_controld.c | 49 ++++++++++++++-------------------------
1 file changed, 17 insertions(+), 32 deletions(-)
diff --git a/lib/common/ipc_controld.c b/lib/common/ipc_controld.c
index 3c3a98964..405fd0518 100644
--- a/lib/common/ipc_controld.c
+++ b/lib/common/ipc_controld.c
@@ -143,18 +143,16 @@ set_nodes_data(pcmk_controld_api_reply_t *data, xmlNode *msg_data)
static bool
reply_expected(pcmk_ipc_api_t *api, xmlNode *request)
{
- const char *command = crm_element_value(request, F_CRM_TASK);
-
- if (command == NULL) {
- return false;
- }
-
- // We only need to handle commands that functions in this file can send
- return !strcmp(command, CRM_OP_REPROBE)
- || !strcmp(command, CRM_OP_NODE_INFO)
- || !strcmp(command, CRM_OP_PING)
- || !strcmp(command, CRM_OP_LRM_FAIL)
- || !strcmp(command, CRM_OP_LRM_DELETE);
+ // We only need to handle commands that API functions can send
+ return pcmk__str_any_of(crm_element_value(request, F_CRM_TASK),
+ PCMK__CONTROLD_CMD_NODES,
+ CRM_OP_LRM_DELETE,
+ CRM_OP_LRM_FAIL,
+ CRM_OP_NODE_INFO,
+ CRM_OP_PING,
+ CRM_OP_REPROBE,
+ CRM_OP_RM_NODE_CACHE,
+ NULL);
}
static bool
@@ -168,22 +166,12 @@ dispatch(pcmk_ipc_api_t *api, xmlNode *reply)
pcmk_controld_reply_unknown, NULL, NULL,
};
- /* If we got an ACK, return true so the caller knows to expect more responses
- * from the IPC server. We do this before decrementing replies_expected because
- * ACKs are not going to be included in that value.
- *
- * Note that we cannot do the same kind of status checking here that we do in
- * ipc_pacemakerd.c. The ACK message we receive does not necessarily contain
- * a status attribute. That is, we may receive this:
- *
- * <ack function="crmd_remote_proxy_cb" line="556"/>
- *
- * Instead of this:
- *
- * <ack function="dispatch_controller_ipc" line="391" status="112"/>
- */
if (pcmk__str_eq(crm_element_name(reply), "ack", pcmk__str_none)) {
- return true; // More replies needed
+ /* ACKs are trivial responses that do not count toward expected replies,
+ * and do not have all the fields that validation requires, so skip that
+ * processing.
+ */
+ return private->replies_expected > 0;
}
if (private->replies_expected > 0) {
@@ -310,18 +298,15 @@ static int
send_controller_request(pcmk_ipc_api_t *api, xmlNode *request,
bool reply_is_expected)
{
- int rc;
-
if (crm_element_value(request, XML_ATTR_REFERENCE) == NULL) {
return EINVAL;
}
- rc = pcmk__send_ipc_request(api, request);
- if ((rc == pcmk_rc_ok) && reply_is_expected) {
+ if (reply_is_expected) {
struct controld_api_private_s *private = api->api_data;
private->replies_expected++;
}
- return rc;
+ return pcmk__send_ipc_request(api, request);
}
static xmlNode *
--
2.41.0

View File

@ -1,385 +0,0 @@
From 84d4a0d5f562df91baa0fece45d06ad3732f941c Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 11:20:53 -0600
Subject: [PATCH 1/5] Low: pacemaker-attrd: properly validate attribute set
type
The sense of the test was accidentally reversed in 26471a52689
---
daemons/attrd/attrd_attributes.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 8f32988..f059059 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -40,9 +40,9 @@ attrd_create_attribute(xmlNode *xml)
* attributes are not written.
*/
crm_element_value_int(xml, PCMK__XA_ATTR_IS_PRIVATE, &is_private);
- if ((is_private != 0)
- && !pcmk__str_any_of(set_type, XML_TAG_ATTR_SETS, XML_TAG_UTILIZATION,
- NULL)) {
+ if (!is_private && !pcmk__str_any_of(set_type,
+ XML_TAG_ATTR_SETS,
+ XML_TAG_UTILIZATION, NULL)) {
crm_warn("Ignoring attribute %s with invalid set type %s",
pcmk__s(name, "(unidentified)"), set_type);
return NULL;
--
2.31.1
From d0d0511e71fe983a2d89589c39810b79fb48a8ca Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 12:13:42 -0600
Subject: [PATCH 2/5] Fix: pacemaker-attrd: sync utilization attributes to
peers correctly
Include the set type with attribute syncs.
Previously, utilization attributes would have the correct set_type on the node
where they were set, but peers would store it as a regular node attribute. If
one of those peers became writer, the attribute would get written to the wrong
set.
---
daemons/attrd/attrd_attributes.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index f059059..0ad9630 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -139,6 +139,7 @@ attrd_add_value_xml(xmlNode *parent, const attribute_t *a,
xmlNode *xml = create_xml_node(parent, __func__);
crm_xml_add(xml, PCMK__XA_ATTR_NAME, a->id);
+ crm_xml_add(xml, PCMK__XA_ATTR_SET_TYPE, a->set_type);
crm_xml_add(xml, PCMK__XA_ATTR_SET, a->set_id);
crm_xml_add(xml, PCMK__XA_ATTR_UUID, a->uuid);
crm_xml_add(xml, PCMK__XA_ATTR_USER, a->user);
--
2.31.1
From 4479ff8507dd69f5946d31cf83c7e47fe15d3bdb Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 12:18:40 -0600
Subject: [PATCH 3/5] Refactor: pacemaker-attrd: functionize getting attribute
set ID
... for future reuse
---
daemons/attrd/attrd_attributes.c | 38 ++++++++++++++++++++++++++++++++
daemons/attrd/attrd_cib.c | 9 +-------
daemons/attrd/pacemaker-attrd.h | 3 ++-
3 files changed, 41 insertions(+), 9 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 0ad9630..5727ab8 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -210,3 +210,41 @@ attrd_populate_attribute(xmlNode *xml, const char *attr)
return a;
}
+
+/*!
+ * \internal
+ * \brief Get the XML ID used to write out an attribute set
+ *
+ * \param[in] attr Attribute to get set ID for
+ * \param[in] node_state_id XML ID of node state that attribute value is for
+ *
+ * \return Newly allocated string with XML ID to use for \p attr set
+ */
+char *
+attrd_set_id(const attribute_t *attr, const char *node_state_id)
+{
+ char *set_id = NULL;
+
+ CRM_ASSERT((attr != NULL) && (node_state_id != NULL));
+
+ if (attr->set_id == NULL) {
+ /* @COMPAT This should really take the set type into account. Currently
+ * we use the same XML ID for transient attributes and utilization
+ * attributes. It doesn't cause problems because the status section is
+ * not limited by the schema in any way, but it's still unfortunate.
+ * For backward compatibility reasons, we can't change this.
+ */
+ set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_state_id);
+ } else {
+ /* @COMPAT When the user specifies a set ID for an attribute, it is the
+ * same for every node. That is less than ideal, but again, the schema
+ * doesn't enforce anything for the status section. We couldn't change
+ * it without allowing the set ID to vary per value rather than per
+ * attribute, which would break backward compatibility, pose design
+ * challenges, and potentially cause problems in rolling upgrades.
+ */
+ pcmk__str_update(&set_id, attr->set_id);
+ }
+ crm_xml_sanitize_id(set_id);
+ return set_id;
+}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 481fea7..08d3425 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -423,17 +423,10 @@ add_unset_attr_update(const attribute_t *attr, const char *attr_id,
static int
add_attr_update(const attribute_t *attr, const char *value, const char *node_id)
{
- char *set_id = NULL;
+ char *set_id = attrd_set_id(attr, node_id);
char *attr_id = NULL;
int rc = pcmk_rc_ok;
- if (attr->set_id != NULL) {
- pcmk__str_update(&set_id, attr->set_id);
- } else {
- set_id = crm_strdup_printf("%s-%s", XML_CIB_TAG_STATUS, node_id);
- }
- crm_xml_sanitize_id(set_id);
-
if (attr->uuid != NULL) {
pcmk__str_update(&attr_id, attr->uuid);
} else {
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index bacaad6..3da7f8d 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -1,5 +1,5 @@
/*
- * Copyright 2013-2023 the Pacemaker project contributors
+ * Copyright 2013-2024 the Pacemaker project contributors
*
* The version control history for this file may have further details.
*
@@ -195,6 +195,7 @@ void attrd_clear_value_seen(void);
void attrd_free_attribute(gpointer data);
void attrd_free_attribute_value(gpointer data);
attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
+char *attrd_set_id(const attribute_t *attr, const char *node_state_id);
enum attrd_write_options {
attrd_write_changed = 0,
--
2.31.1
From eee2169ac348b8ed26ac0b78cb11ddc5cef9384e Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Tue, 16 Jan 2024 12:25:59 -0600
Subject: [PATCH 4/5] Refactor: pacemaker-attrd: functionize getting attribute
nvpair ID
... for future reuse
---
daemons/attrd/attrd_attributes.c | 28 ++++++++++++++++++++++++++++
daemons/attrd/attrd_cib.c | 17 +++++------------
daemons/attrd/pacemaker-attrd.h | 1 +
3 files changed, 34 insertions(+), 12 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 5727ab8..23de2e2 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -248,3 +248,31 @@ attrd_set_id(const attribute_t *attr, const char *node_state_id)
crm_xml_sanitize_id(set_id);
return set_id;
}
+
+/*!
+ * \internal
+ * \brief Get the XML ID used to write out an attribute value
+ *
+ * \param[in] attr Attribute to get value XML ID for
+ * \param[in] node_state_id UUID of node that attribute value is for
+ *
+ * \return Newly allocated string with XML ID of \p attr value
+ */
+char *
+attrd_nvpair_id(const attribute_t *attr, const char *node_state_id)
+{
+ char *nvpair_id = NULL;
+
+ if (attr->uuid != NULL) {
+ pcmk__str_update(&nvpair_id, attr->uuid);
+
+ } else if (attr->set_id != NULL) {
+ nvpair_id = crm_strdup_printf("%s-%s", attr->set_id, attr->id);
+
+ } else {
+ nvpair_id = crm_strdup_printf(XML_CIB_TAG_STATUS "-%s-%s",
+ node_state_id, attr->id);
+ }
+ crm_xml_sanitize_id(nvpair_id);
+ return nvpair_id;
+}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 08d3425..d42345f 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -424,23 +424,16 @@ static int
add_attr_update(const attribute_t *attr, const char *value, const char *node_id)
{
char *set_id = attrd_set_id(attr, node_id);
- char *attr_id = NULL;
+ char *nvpair_id = attrd_nvpair_id(attr, node_id);
int rc = pcmk_rc_ok;
- if (attr->uuid != NULL) {
- pcmk__str_update(&attr_id, attr->uuid);
+ if (value == NULL) {
+ rc = add_unset_attr_update(attr, nvpair_id, node_id, set_id);
} else {
- attr_id = crm_strdup_printf("%s-%s", set_id, attr->id);
- }
- crm_xml_sanitize_id(attr_id);
-
- if (value != NULL) {
- rc = add_set_attr_update(attr, attr_id, node_id, set_id, value);
- } else {
- rc = add_unset_attr_update(attr, attr_id, node_id, set_id);
+ rc = add_set_attr_update(attr, nvpair_id, node_id, set_id, value);
}
free(set_id);
- free(attr_id);
+ free(nvpair_id);
return rc;
}
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 3da7f8d..deec790 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -196,6 +196,7 @@ void attrd_free_attribute(gpointer data);
void attrd_free_attribute_value(gpointer data);
attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
char *attrd_set_id(const attribute_t *attr, const char *node_state_id);
+char *attrd_nvpair_id(const attribute_t *attr, const char *node_state_id);
enum attrd_write_options {
attrd_write_changed = 0,
--
2.31.1
From 2abde6cb87d2e3d31a370c74656f6f7c0818c185 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 18 Jan 2024 10:01:56 -0600
Subject: [PATCH 5/5] Log: pacemaker-attrd: improve some messages for debugging
---
daemons/attrd/attrd_attributes.c | 8 +++++---
daemons/attrd/attrd_cib.c | 13 +++++++++----
daemons/attrd/attrd_corosync.c | 10 ++++++----
3 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/daemons/attrd/attrd_attributes.c b/daemons/attrd/attrd_attributes.c
index 23de2e2..68b9585 100644
--- a/daemons/attrd/attrd_attributes.c
+++ b/daemons/attrd/attrd_attributes.c
@@ -60,13 +60,10 @@ attrd_create_attribute(xmlNode *xml)
a->values = pcmk__strikey_table(NULL, attrd_free_attribute_value);
a->user = crm_element_value_copy(xml, PCMK__XA_ATTR_USER);
- crm_trace("Performing all %s operations as user '%s'", a->id, a->user);
if (dampen_s != NULL) {
dampen = crm_get_msec(dampen_s);
}
- crm_trace("Created attribute %s with %s write delay", a->id,
- (a->timeout_ms == 0)? "no" : pcmk__readable_interval(a->timeout_ms));
if(dampen > 0) {
a->timeout_ms = dampen;
@@ -75,6 +72,11 @@ attrd_create_attribute(xmlNode *xml)
crm_warn("Ignoring invalid delay %s for attribute %s", dampen_s, a->id);
}
+ crm_trace("Created attribute %s with %s write delay and %s CIB user",
+ a->id,
+ ((dampen > 0)? pcmk__readable_interval(a->timeout_ms) : "no"),
+ pcmk__s(a->user, "default"));
+
g_hash_table_replace(attributes, a->id, a);
return a;
}
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index d42345f..cae6846 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -54,6 +54,7 @@ attrd_cib_updated_cb(const char *event, xmlNode *msg)
bool status_changed = false;
if (attrd_shutting_down(true)) {
+ crm_debug("Ignoring CIB change during shutdown");
return;
}
@@ -278,11 +279,13 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use
g_hash_table_iter_init(&iter, a->values);
while (g_hash_table_iter_next(&iter, (gpointer *) & peer, (gpointer *) & v)) {
- do_crm_log(level, "* %s[%s]=%s",
- a->id, peer, pcmk__s(v->requested, "(null)"));
if (rc == pcmk_ok) {
+ crm_info("* Wrote %s[%s]=%s",
+ a->id, peer, pcmk__s(v->requested, "(unset)"));
pcmk__str_update(&(v->requested), NULL);
} else {
+ do_crm_log(level, "* Could not write %s[%s]=%s",
+ a->id, peer, pcmk__s(v->requested, "(unset)"));
a->changed = true; // Reattempt write below if we are still writer
}
}
@@ -292,6 +295,7 @@ attrd_cib_callback(xmlNode *msg, int call_id, int rc, xmlNode *output, void *use
/* We deferred a write of a new update because this update was in
* progress. Write out the new value without additional delay.
*/
+ crm_debug("Pending update for %s can be written now", a->id);
write_attribute(a, false);
/* We're re-attempting a write because the original failed; delay
@@ -593,8 +597,9 @@ write_attribute(attribute_t *a, bool ignore_delay)
continue;
}
- crm_debug("Updating %s[%s]=%s (node uuid=%s id=%" PRIu32 ")",
- a->id, v->nodename, v->current, uuid, v->nodeid);
+ crm_debug("Writing %s[%s]=%s (node-state-id=%s node-id=%" PRIu32 ")",
+ a->id, v->nodename, pcmk__s(v->current, "(unset)"),
+ uuid, v->nodeid);
cib_updates++;
/* Preservation of the attribute to transmit alert */
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index b348d52..6fb847b 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -293,7 +293,8 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
// Write out new value or start dampening timer
if (a->timeout_ms && a->timer) {
- crm_trace("Delayed write out (%dms) for %s", a->timeout_ms, attr);
+ crm_trace("Delaying write of %s %s for dampening",
+ attr, pcmk__readable_interval(a->timeout_ms));
mainloop_timer_start(a->timer);
} else {
attrd_write_or_elect_attribute(a);
@@ -307,11 +308,12 @@ update_attr_on_host(attribute_t *a, const crm_node_t *peer, const xmlNode *xml,
if (is_force_write == 1 && a->timeout_ms && a->timer) {
/* Save forced writing and set change flag. */
/* The actual attribute is written by Writer after election. */
- crm_trace("Unchanged %s[%s] from %s is %s(Set the forced write flag)",
- attr, host, peer->uname, value);
+ crm_trace("%s[%s] from %s is unchanged (%s), forcing write",
+ attr, host, peer->uname, pcmk__s(value, "unset"));
a->force_write = TRUE;
} else {
- crm_trace("Unchanged %s[%s] from %s is %s", attr, host, peer->uname, value);
+ crm_trace("%s[%s] from %s is unchanged (%s)",
+ attr, host, peer->uname, pcmk__s(value, "unset"));
}
}
--
2.31.1

View File

@ -0,0 +1,127 @@
From 2587f9fabea3a7ef01eb7752d4e2ef082823934e Mon Sep 17 00:00:00 2001
From: eabdullin <ed.abdullin.1@gmail.com>
Date: Wed, 13 Sep 2023 14:15:46 +0300
Subject: [PATCH] - Fix: controller: don't try to execute agent action at
shutdown Normally, agent execution is not possible at shutdown. However, when
metadata is needed for some action, the agent can be called asynchronously,
and when the metadata action returns, the original action is performed. If
the metadata is initiated before shutdown, but completes after shutdown has
begun, do not try to attempt the original action, so we avoid unnecessary
error logs. - Refactor: controller: de-functionize lrm_state_destroy() It was
a one-liner called once - Log: controller: improve messages for resource
history updates - Low: controller: guard lrm_state_table usage with NULLcheck
It is NULL while draining the mainloop during the shutdown sequence.
---
daemons/controld/controld_execd.c | 15 ++++++++++++---
daemons/controld/controld_execd_state.c | 15 +++++++--------
daemons/controld/controld_lrm.h | 5 -----
3 files changed, 19 insertions(+), 16 deletions(-)
diff --git a/daemons/controld/controld_execd.c b/daemons/controld/controld_execd.c
index afead92..e7a91ab 100644
--- a/daemons/controld/controld_execd.c
+++ b/daemons/controld/controld_execd.c
@@ -1728,7 +1728,9 @@ metadata_complete(int pid, const pcmk__action_result_t *result, void *user_data)
md = controld_cache_metadata(lrm_state->metadata_cache, data->rsc,
result->action_stdout);
}
- do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
+ if (!pcmk_is_set(fsa_input_register, R_HA_DISCONNECTED)) {
+ do_lrm_rsc_op(lrm_state, data->rsc, data->input_xml, md);
+ }
free_metadata_cb_data(data);
}
@@ -2406,10 +2408,17 @@ cib_rsc_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *use
case pcmk_ok:
case -pcmk_err_diff_failed:
case -pcmk_err_diff_resync:
- crm_trace("Resource update %d complete: rc=%d", call_id, rc);
+ crm_trace("Resource history update completed (call=%d rc=%d)",
+ call_id, rc);
break;
default:
- crm_warn("Resource update %d failed: (rc=%d) %s", call_id, rc, pcmk_strerror(rc));
+ if (call_id > 0) {
+ crm_warn("Resource history update %d failed: %s "
+ CRM_XS " rc=%d", call_id, pcmk_strerror(rc), rc);
+ } else {
+ crm_warn("Resource history update failed: %s " CRM_XS " rc=%d",
+ pcmk_strerror(rc), rc);
+ }
}
if (call_id == last_resource_update) {
diff --git a/daemons/controld/controld_execd_state.c b/daemons/controld/controld_execd_state.c
index adba2e5..3994b6d 100644
--- a/daemons/controld/controld_execd_state.c
+++ b/daemons/controld/controld_execd_state.c
@@ -131,12 +131,6 @@ lrm_state_create(const char *node_name)
return state;
}
-void
-lrm_state_destroy(const char *node_name)
-{
- g_hash_table_remove(lrm_state_table, node_name);
-}
-
static gboolean
remote_proxy_remove_by_node(gpointer key, gpointer value, gpointer user_data)
{
@@ -252,7 +246,7 @@ lrm_state_destroy_all(void)
lrm_state_t *
lrm_state_find(const char *node_name)
{
- if (!node_name) {
+ if ((node_name == NULL) || (lrm_state_table == NULL)) {
return NULL;
}
return g_hash_table_lookup(lrm_state_table, node_name);
@@ -263,6 +257,8 @@ lrm_state_find_or_create(const char *node_name)
{
lrm_state_t *lrm_state;
+ CRM_CHECK(lrm_state_table != NULL, return NULL);
+
lrm_state = g_hash_table_lookup(lrm_state_table, node_name);
if (!lrm_state) {
lrm_state = lrm_state_create(node_name);
@@ -274,6 +270,9 @@ lrm_state_find_or_create(const char *node_name)
GList *
lrm_state_get_list(void)
{
+ if (lrm_state_table == NULL) {
+ return NULL;
+ }
return g_hash_table_get_values(lrm_state_table);
}
@@ -764,7 +763,7 @@ lrm_state_unregister_rsc(lrm_state_t * lrm_state,
}
if (is_remote_lrmd_ra(NULL, NULL, rsc_id)) {
- lrm_state_destroy(rsc_id);
+ g_hash_table_remove(lrm_state_table, rsc_id);
return pcmk_ok;
}
diff --git a/daemons/controld/controld_lrm.h b/daemons/controld/controld_lrm.h
index 983c288..11ff1bc 100644
--- a/daemons/controld/controld_lrm.h
+++ b/daemons/controld/controld_lrm.h
@@ -113,11 +113,6 @@ void lrm_state_destroy_all(void);
*/
lrm_state_t *lrm_state_create(const char *node_name);
-/*!
- * \brief Destroy executor connection by node name
- */
-void lrm_state_destroy(const char *node_name);
-
/*!
* \brief Find lrm_state data by node name
*/
--

View File

@ -0,0 +1,45 @@
From f5263c9401c9c38d4e039149deddcc0da0c184ba Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 3 Aug 2023 12:17:08 -0500
Subject: [PATCH] Fix: attrd: avoid race condition when shutting down
This addresses a race condition that can occur when the DC and the attribute
writer are different nodes, and shutting down at the same time. When the DC
controller leaves its Corosync process group, the remaining nodes erase its
transient node attributes (including "shutdown") from the CIB. However if the
(former) DC's attrd is still up, it can win the attribute writer election
called after the original writer leaves. As the election winner, it writes out
all its attributes to the CIB, including "shutdown". The next time it rejoins
the cluster, it will be immediately shut down.
Fixes T138
---
daemons/attrd/attrd_elections.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index 3b6b55a0f59..6f4916888a9 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -22,12 +22,20 @@ attrd_election_cb(gpointer user_data)
{
attrd_declare_winner();
+ if (attrd_requesting_shutdown() || attrd_shutting_down()) {
+ /* This node is shutting down or about to, meaning its attributes will
+ * be removed (and may have already been removed from the CIB by a
+ * controller). Don't sync or write its attributes in this case.
+ */
+ return G_SOURCE_REMOVE;
+ }
+
/* Update the peers after an election */
attrd_peer_sync(NULL, NULL);
/* Update the CIB after an election */
attrd_write_attributes(true, false);
- return FALSE;
+ return G_SOURCE_REMOVE;
}
void

View File

@ -1,53 +0,0 @@
From 9c13ce6fe95812308443c188ace8f897e6bce942 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Mon, 29 Jan 2024 11:14:25 -0800
Subject: [PATCH] Fix: tools: crm_attribute emits garbage for --node localhost
or auto
This happens because pcmk__node_attr_target() returns its argument if
its argument is NULL, "auto", or "localhost" and no relevant environment
variables are found. Then crm_attribute frees the return value, makes a
copy of it, and assigns it back to options.dest_uname.
The fix is to check whether the return value is equal to the argument.
Fixes RHEL-23065
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
tools/crm_attribute.c | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/tools/crm_attribute.c b/tools/crm_attribute.c
index d221ab85d..636d03dbd 100644
--- a/tools/crm_attribute.c
+++ b/tools/crm_attribute.c
@@ -766,8 +766,23 @@ main(int argc, char **argv)
const char *target = pcmk__node_attr_target(options.dest_uname);
if (target != NULL) {
- g_free(options.dest_uname);
- options.dest_uname = g_strdup(target);
+ /* If options.dest_uname is "auto" or "localhost", then
+ * pcmk__node_attr_target() may return it, depending on environment
+ * variables. In that case, attribute lookups will fail for "auto"
+ * (unless there's a node named "auto"). attrd maps "localhost" to
+ * the true local node name for queries.
+ *
+ * @TODO
+ * * Investigate whether "localhost" is mapped to a real node name
+ * for non-query commands. If not, possibly modify it so that it
+ * is.
+ * * Map "auto" to "localhost" (probably).
+ */
+ if (target != (const char *) options.dest_uname) {
+ g_free(options.dest_uname);
+ options.dest_uname = g_strdup(target);
+ }
+
} else if (getenv("CIB_file") != NULL && options.dest_uname == NULL) {
get_node_name_from_local();
}
--
2.41.0

View File

@ -0,0 +1,210 @@
From 83e547cc64f2586031a007ab58e91fc22cd1a68a Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Aug 2023 12:18:23 -0500
Subject: [PATCH] Refactor: attrd: use enum instead of bools for
attrd_write_attributes()
---
daemons/attrd/attrd_cib.c | 24 ++++++++++++++++++------
daemons/attrd/attrd_corosync.c | 2 +-
daemons/attrd/attrd_elections.c | 2 +-
daemons/attrd/attrd_ipc.c | 2 +-
daemons/attrd/attrd_utils.c | 2 +-
daemons/attrd/pacemaker-attrd.h | 8 +++++++-
6 files changed, 29 insertions(+), 11 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 928c0133745..9c787fe1024 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -343,16 +343,23 @@ attrd_write_attribute(attribute_t *a, bool ignore_delay)
free_xml(xml_top);
}
+/*!
+ * \internal
+ * \brief Write out attributes
+ *
+ * \param[in] options Group of enum attrd_write_options
+ */
void
-attrd_write_attributes(bool all, bool ignore_delay)
+attrd_write_attributes(uint32_t options)
{
GHashTableIter iter;
attribute_t *a = NULL;
- crm_debug("Writing out %s attributes", all? "all" : "changed");
+ crm_debug("Writing out %s attributes",
+ pcmk_is_set(options, attrd_write_all)? "all" : "changed");
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
- if (!all && a->unknown_peer_uuids) {
+ if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) {
// Try writing this attribute again, in case peer ID was learned
a->changed = true;
} else if (a->force_write) {
@@ -360,9 +367,14 @@ attrd_write_attributes(bool all, bool ignore_delay)
a->changed = true;
}
- if(all || a->changed) {
- /* When forced write flag is set, ignore delay. */
- attrd_write_attribute(a, (a->force_write ? true : ignore_delay));
+ if (pcmk_is_set(options, attrd_write_all) || a->changed) {
+ bool ignore_delay = pcmk_is_set(options, attrd_write_no_delay);
+
+ if (a->force_write) {
+ // Always ignore delay when forced write flag is set
+ ignore_delay = true;
+ }
+ attrd_write_attribute(a, ignore_delay);
} else {
crm_trace("Skipping unchanged attribute %s", a->id);
}
diff --git a/daemons/attrd/attrd_corosync.c b/daemons/attrd/attrd_corosync.c
index 1aec35a054e..49631df6e44 100644
--- a/daemons/attrd/attrd_corosync.c
+++ b/daemons/attrd/attrd_corosync.c
@@ -285,7 +285,7 @@ record_peer_nodeid(attribute_value_t *v, const char *host)
crm_trace("Learned %s has node id %s", known_peer->uname, known_peer->uuid);
if (attrd_election_won()) {
- attrd_write_attributes(false, false);
+ attrd_write_attributes(attrd_write_changed);
}
}
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index c25a41a4492..01341db18e4 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -34,7 +34,7 @@ attrd_election_cb(gpointer user_data)
attrd_peer_sync(NULL, NULL);
/* Update the CIB after an election */
- attrd_write_attributes(true, false);
+ attrd_write_attributes(attrd_write_all);
return G_SOURCE_REMOVE;
}
diff --git a/daemons/attrd/attrd_ipc.c b/daemons/attrd/attrd_ipc.c
index 4be789de7f9..05c4a696a19 100644
--- a/daemons/attrd/attrd_ipc.c
+++ b/daemons/attrd/attrd_ipc.c
@@ -232,7 +232,7 @@ attrd_client_refresh(pcmk__request_t *request)
crm_info("Updating all attributes");
attrd_send_ack(request->ipc_client, request->ipc_id, request->ipc_flags);
- attrd_write_attributes(true, true);
+ attrd_write_attributes(attrd_write_all|attrd_write_no_delay);
pcmk__set_result(&request->result, CRM_EX_OK, PCMK_EXEC_DONE, NULL);
return NULL;
diff --git a/daemons/attrd/attrd_utils.c b/daemons/attrd/attrd_utils.c
index c43eac1695a..bfd51368890 100644
--- a/daemons/attrd/attrd_utils.c
+++ b/daemons/attrd/attrd_utils.c
@@ -156,7 +156,7 @@ attrd_cib_replaced_cb(const char *event, xmlNode * msg)
if (attrd_election_won()) {
if (change_section & (cib_change_section_nodes | cib_change_section_status)) {
crm_notice("Updating all attributes after %s event", event);
- attrd_write_attributes(true, false);
+ attrd_write_attributes(attrd_write_all);
}
}
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 41f31d97b3b..2d781d11394 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -176,8 +176,14 @@ void attrd_free_attribute(gpointer data);
void attrd_free_attribute_value(gpointer data);
attribute_t *attrd_populate_attribute(xmlNode *xml, const char *attr);
+enum attrd_write_options {
+ attrd_write_changed = 0,
+ attrd_write_all = (1 << 0),
+ attrd_write_no_delay = (1 << 1),
+};
+
void attrd_write_attribute(attribute_t *a, bool ignore_delay);
-void attrd_write_attributes(bool all, bool ignore_delay);
+void attrd_write_attributes(uint32_t options);
void attrd_write_or_elect_attribute(attribute_t *a);
extern int minimum_protocol_version;
From 58400e272cfc51f02eec69cdd0ed0d27a30e78a3 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Thu, 24 Aug 2023 12:27:53 -0500
Subject: [PATCH] Fix: attrd: avoid race condition at writer election
f5263c94 was not a complete fix. The issue may also occur if a remaining node
(not the original DC or writer) wins the attribute writer election after the
original DC's controller has exited but before its attribute manger has exited.
The long-term solution will be to have the attribute manager (instead of the
controller) be in control of erasing transient attributes from the CIB when a
node leaves. This short-term workaround simply has new attribute writers skip
shutdown attributes when writing out all attributes.
Fixes T138
---
daemons/attrd/attrd_cib.c | 5 +++++
daemons/attrd/attrd_elections.c | 14 ++++++++++++--
daemons/attrd/pacemaker-attrd.h | 1 +
3 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/daemons/attrd/attrd_cib.c b/daemons/attrd/attrd_cib.c
index 9c787fe102..2c910b4c64 100644
--- a/daemons/attrd/attrd_cib.c
+++ b/daemons/attrd/attrd_cib.c
@@ -359,6 +359,11 @@ attrd_write_attributes(uint32_t options)
pcmk_is_set(options, attrd_write_all)? "all" : "changed");
g_hash_table_iter_init(&iter, attributes);
while (g_hash_table_iter_next(&iter, NULL, (gpointer *) & a)) {
+ if (pcmk_is_set(options, attrd_write_skip_shutdown)
+ && pcmk__str_eq(a->id, XML_CIB_ATTR_SHUTDOWN, pcmk__str_none)) {
+ continue;
+ }
+
if (!pcmk_is_set(options, attrd_write_all) && a->unknown_peer_uuids) {
// Try writing this attribute again, in case peer ID was learned
a->changed = true;
diff --git a/daemons/attrd/attrd_elections.c b/daemons/attrd/attrd_elections.c
index 01341db18e..a95cd44cbd 100644
--- a/daemons/attrd/attrd_elections.c
+++ b/daemons/attrd/attrd_elections.c
@@ -33,8 +33,18 @@ attrd_election_cb(gpointer user_data)
/* Update the peers after an election */
attrd_peer_sync(NULL, NULL);
- /* Update the CIB after an election */
- attrd_write_attributes(attrd_write_all);
+ /* After winning an election, update the CIB with the values of all
+ * attributes as the winner knows them.
+ *
+ * However, do not write out any "shutdown" attributes. A node that is
+ * shutting down will have all its transient attributes removed from the CIB
+ * when its controller exits, and from the attribute manager's memory (on
+ * remaining nodes) when its attribute manager exits; if an election is won
+ * between when those two things happen, we don't want to write the shutdown
+ * attribute back out, which would cause the node to immediately shut down
+ * the next time it rejoins.
+ */
+ attrd_write_attributes(attrd_write_all|attrd_write_skip_shutdown);
return G_SOURCE_REMOVE;
}
diff --git a/daemons/attrd/pacemaker-attrd.h b/daemons/attrd/pacemaker-attrd.h
index 2d781d1139..2e35bd7ec5 100644
--- a/daemons/attrd/pacemaker-attrd.h
+++ b/daemons/attrd/pacemaker-attrd.h
@@ -180,6 +180,7 @@ enum attrd_write_options {
attrd_write_changed = 0,
attrd_write_all = (1 << 0),
attrd_write_no_delay = (1 << 1),
+ attrd_write_skip_shutdown = (1 << 2),
};
void attrd_write_attribute(attribute_t *a, bool ignore_delay);

File diff suppressed because it is too large Load Diff