From ac8081c5a469c4533907024e814722fc1578a5d5 Mon Sep 17 00:00:00 2001 From: Chris Lumens Date: Tue, 21 Feb 2023 09:32:58 -0500 Subject: [PATCH] Backport regression fixes. - Additional fixes for SIGABRT during pacemaker-fenced shutdown - Backport fix for attrd_updater -QA not displaying all nodes - Related: rhbz2166967 - Resolves: rhbz2169829 --- 004-g_source_remove.patch | 48 +++++++++++- 005-query-null.patch | 151 ++++++++++++++++++++++++++++++++++++++ pacemaker.spec | 9 ++- 3 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 005-query-null.patch diff --git a/004-g_source_remove.patch b/004-g_source_remove.patch index e85c4f3..2af0f47 100644 --- a/004-g_source_remove.patch +++ b/004-g_source_remove.patch @@ -1,7 +1,7 @@ From 45617b727e280cac384a28ae3d96145e066e6197 Mon Sep 17 00:00:00 2001 From: Reid Wahl Date: Fri, 3 Feb 2023 12:08:57 -0800 -Subject: [PATCH] Fix: fencer: Prevent double g_source_remove of op_timer_one +Subject: [PATCH 01/02] Fix: fencer: Prevent double g_source_remove of op_timer_one QE observed a rarely reproducible core dump in the fencer during Pacemaker shutdown, in which we try to g_source_remove() an op timer @@ -59,3 +59,49 @@ index d61b5bd..b7426ff 100644 -- 2.31.1 +From 0291db4750322ec7f01ae6a4a2a30abca9d8e19e Mon Sep 17 00:00:00 2001 +From: Reid Wahl +Date: Wed, 15 Feb 2023 22:30:27 -0800 +Subject: [PATCH 02/02] Fix: fencer: Avoid double source remove of op_timer_total + +remote_op_timeout() returns G_SOURCE_REMOVE, which tells GLib to remove +the source from the main loop after returning. Currently this function +is used as the callback only when creating op->op_timer_total. + +If we don't set op->op_timer_total to 0 before returning from +remote_op_timeout(), then we can get an assertion and core dump from +GLib when the op's timers are being cleared (either during op +finalization or during fencer shutdown). This is because +clear_remote_op_timers() sees that op->op_timer_total != 0 and tries to +remove the source, but the source has already been removed. + +Note that we're already (correctly) zeroing op->op_timer_one and +op->query_timeout as appropriate in their respective callback functions. + +Fortunately, GLib doesn't care whether the source has already been +removed before we return G_SOURCE_REMOVE from a callback. So it's safe +to call finalize_op() (which removes all the op's timer sources) from +within a callback. + +Fixes RHBZ#2166967 + +Signed-off-by: Reid Wahl +--- + daemons/fenced/fenced_remote.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/daemons/fenced/fenced_remote.c b/daemons/fenced/fenced_remote.c +index b7426ff88..adea3d7d8 100644 +--- a/daemons/fenced/fenced_remote.c ++++ b/daemons/fenced/fenced_remote.c +@@ -718,6 +718,8 @@ remote_op_timeout(gpointer userdata) + { + remote_fencing_op_t *op = userdata; + ++ op->op_timer_total = 0; ++ + if (op->state == st_done) { + crm_debug("Action '%s' targeting %s for client %s already completed " + CRM_XS " id=%.8s", +-- +2.39.0 diff --git a/005-query-null.patch b/005-query-null.patch new file mode 100644 index 0000000..194cd33 --- /dev/null +++ b/005-query-null.patch @@ -0,0 +1,151 @@ +From 0d15568a538349ac41028db6b506d13dd23e8732 Mon Sep 17 00:00:00 2001 +From: Chris Lumens +Date: Tue, 14 Feb 2023 14:00:37 -0500 +Subject: [PATCH] High: libcrmcommon: Fix handling node=NULL in + pcmk__attrd_api_query. + +According to the header file, if node is NULL, pcmk__attrd_api_query +should query the value of the given attribute on all cluster nodes. +This is also what the server expects and how attrd_updater is supposed +to work. + +However, pcmk__attrd_api_query has no way of letting callers decide +whether they want to query all nodes or whether they want to use the +local node. We were passing NULL for the node name, which it took to +mean it should look up the local node name. This calls +pcmk__node_attr_target, which probes the local cluster name and returns +that to pcmk__attrd_api_query. If it returns non-NULL, that value will +then be put into the XML IPC call which means the server will only +return the value for that node. + +In testing this was usually fine. However, in pratice, the methods +pcmk__node_attr_target uses to figure out the local cluster node name +involves checking the OCF_RESKEY_CRM_meta_on_node environment variable +among others. + +This variable was never set in testing, but can be set in the real +world. This leads to circumstances where the user did "attrd_updater -QA" +expecting to get the values on all nodes, but instead only got the value +on the local cluster node. + +In pacemaker-2.1.4 and prior, pcmk__node_attr_target was simply never +called if the node was NULL but was called otherwise. + +The fix is to modify pcmk__attrd_api_query to take an option for +querying all nodes. If that's present, we'll query all nodes. If it's +not present, we'll look at the given node name - NULL means look it up, +anything else means just that node. + +Regression in 2.1.5 introduced by eb20a65577 +--- + include/crm/common/attrd_internal.h | 6 +++++- + include/crm/common/ipc_attrd_internal.h | 7 +++++-- + lib/common/ipc_attrd.c | 12 ++++++++---- + tools/attrd_updater.c | 5 +++-- + 4 files changed, 21 insertions(+), 9 deletions(-) + +diff --git a/include/crm/common/attrd_internal.h b/include/crm/common/attrd_internal.h +index 389be48..7337c38 100644 +--- a/include/crm/common/attrd_internal.h ++++ b/include/crm/common/attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2004-2022 the Pacemaker project contributors ++ * Copyright 2004-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -25,6 +25,10 @@ enum pcmk__node_attr_opts { + pcmk__node_attr_perm = (1 << 5), + pcmk__node_attr_sync_local = (1 << 6), + pcmk__node_attr_sync_cluster = (1 << 7), ++ // pcmk__node_attr_utilization is 8, but that has not been backported. ++ // I'm leaving the gap here in case we backport that in the future and ++ // also to avoid problems on mixed-version clusters. ++ pcmk__node_attr_query_all = (1 << 9), + }; + + #define pcmk__set_node_attr_flags(node_attr_flags, flags_to_set) do { \ +diff --git a/include/crm/common/ipc_attrd_internal.h b/include/crm/common/ipc_attrd_internal.h +index 2c6713f..b1b7584 100644 +--- a/include/crm/common/ipc_attrd_internal.h ++++ b/include/crm/common/ipc_attrd_internal.h +@@ -1,5 +1,5 @@ + /* +- * Copyright 2022 the Pacemaker project contributors ++ * Copyright 2022-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -110,10 +110,13 @@ int pcmk__attrd_api_purge(pcmk_ipc_api_t *api, const char *node); + * + * \param[in,out] api Connection to pacemaker-attrd + * \param[in] node Look up the attribute for this node +- * (or NULL for all nodes) ++ * (or NULL for the local node) + * \param[in] name Attribute name + * \param[in] options Bitmask of pcmk__node_attr_opts + * ++ * \note Passing pcmk__node_attr_query_all will cause the function to query ++ * the value of \p name on all nodes, regardless of the value of \p node. ++ * + * \return Standard Pacemaker return code + */ + int pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, +diff --git a/lib/common/ipc_attrd.c b/lib/common/ipc_attrd.c +index 4606509..dece49b 100644 +--- a/lib/common/ipc_attrd.c ++++ b/lib/common/ipc_attrd.c +@@ -1,5 +1,5 @@ + /* +- * Copyright 2011-2022 the Pacemaker project contributors ++ * Copyright 2011-2023 the Pacemaker project contributors + * + * The version control history for this file may have further details. + * +@@ -332,10 +332,14 @@ pcmk__attrd_api_query(pcmk_ipc_api_t *api, const char *node, const char *name, + return EINVAL; + } + +- target = pcmk__node_attr_target(node); ++ if (pcmk_is_set(options, pcmk__node_attr_query_all)) { ++ node = NULL; ++ } else { ++ target = pcmk__node_attr_target(node); + +- if (target != NULL) { +- node = target; ++ if (target != NULL) { ++ node = target; ++ } + } + + request = create_attrd_op(NULL); +diff --git a/tools/attrd_updater.c b/tools/attrd_updater.c +index 3cd766d..cbd341d 100644 +--- a/tools/attrd_updater.c ++++ b/tools/attrd_updater.c +@@ -376,6 +376,7 @@ attrd_event_cb(pcmk_ipc_api_t *attrd_api, enum pcmk_ipc_event event_type, + static int + send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_node, gboolean query_all) + { ++ uint32_t options = pcmk__node_attr_none; + pcmk_ipc_api_t *attrd_api = NULL; + int rc = pcmk_rc_ok; + +@@ -400,10 +401,10 @@ send_attrd_query(pcmk__output_t *out, const char *attr_name, const char *attr_no + + /* Decide which node(s) to query */ + if (query_all == TRUE) { +- attr_node = NULL; ++ options |= pcmk__node_attr_query_all; + } + +- rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, 0); ++ rc = pcmk__attrd_api_query(attrd_api, attr_node, attr_name, options); + + if (rc != pcmk_rc_ok) { + g_set_error(&error, PCMK__RC_ERROR, rc, "Could not query value of %s: %s (%d)", +-- +2.31.1 + diff --git a/pacemaker.spec b/pacemaker.spec index 0312397..2be7c1f 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -36,7 +36,7 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.5 -%global specversion 6 +%global specversion 7 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit a3f44794f94e1571c6ba0042915ade369b4ce4b1 @@ -252,6 +252,7 @@ Patch001: 001-sync-points.patch Patch002: 002-remote-regression.patch Patch003: 003-history-cleanup.patch Patch004: 004-g_source_remove.patch +Patch005: 005-query-null.patch Requires: resource-agents Requires: %{pkgname_pcmk_libs}%{?_isa} = %{version}-%{release} @@ -854,6 +855,12 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Wed Feb 22 2023 Chris Lumens - 2.1.5-7 +- Additional fixes for SIGABRT during pacemaker-fenced shutdown +- Backport fix for attrd_updater -QA not displaying all nodes +- Related: rhbz2166967 +- Resolves: rhbz2169829 + * Thu Feb 9 2023 Chris Lumens - 2.1.5-6 - Backport fix for migration history cleanup causing resource recovery - Backport fix for SIGABRT during pacemaker-fenced shutdown