* Wed Aug 11 2021 Klaus Wenninger <kwenning@redhat.com> - 2.1.1-0.3.rc2

- package fence_watchdog in base-package instead if cli-subpackage
- fix version output of fence_watchdog as needed for help2man
- ensure transient attributes of lost nodes are cleared reliably
This commit is contained in:
Klaus Wenninger 2021-08-11 11:48:10 +02:00
parent e8f53e7d76
commit 7889c6669e
4 changed files with 188 additions and 2 deletions

View File

@ -0,0 +1,24 @@
From 88e75d5b98df197fa731e7642434951a24a67095 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 10 Aug 2021 09:10:23 +0200
Subject: [PATCH] Fix: fence_watchdog: fix version output needed for help2man
---
daemons/fenced/fence_watchdog.in | 1 +
1 file changed, 1 insertion(+)
diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in
index c83304f..700065e 100755
--- a/daemons/fenced/fence_watchdog.in
+++ b/daemons/fenced/fence_watchdog.in
@@ -12,6 +12,7 @@ import sys
import atexit
import getopt
+AGENT_VERSION = "1.0.0"
SHORT_DESC = "Dummy watchdog fence agent"
LONG_DESC = """fence_watchdog just provides
meta-data - actual fencing is done by the pacemaker internal watchdog agent."""
--
1.8.3.1

View File

@ -0,0 +1,32 @@
From 61eb9c240004d1dbd0b5973e2fecda3686bb4c53 Mon Sep 17 00:00:00 2001
From: Klaus Wenninger <klaus.wenninger@aon.at>
Date: Tue, 10 Aug 2021 09:06:55 +0200
Subject: [PATCH] Build: rpm: package fence_watchdog in base-package
---
rpm/pacemaker.spec.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in
index f58357a..0c569b9 100644
--- a/rpm/pacemaker.spec.in
+++ b/rpm/pacemaker.spec.in
@@ -734,6 +734,7 @@ exit 0
%{_sbindir}/crm_attribute
%{_sbindir}/crm_master
%{_sbindir}/fence_legacy
+%{_sbindir}/fence_watchdog
%doc %{_mandir}/man7/pacemaker-controld.*
%doc %{_mandir}/man7/pacemaker-schedulerd.*
@@ -797,7 +798,6 @@ exit 0
%{_sbindir}/crm_simulate
%{_sbindir}/crm_report
%{_sbindir}/crm_ticket
-%{_sbindir}/fence_watchdog
%{_sbindir}/stonith_admin
# "dirname" is owned by -schemas, which is a prerequisite
%{_datadir}/pacemaker/report.collector
--
1.8.3.1

View File

@ -0,0 +1,122 @@
From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001
From: Ken Gaillot <kgaillot@redhat.com>
Date: Mon, 9 Aug 2021 14:48:57 -0500
Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are
cleared without DC
Previously, peer_update_callback() cleared a lost node's transient attributes
if either the local node is DC, or there is no DC.
However, that left the possibility of the DC being lost at the same time as
another node -- the local node would still have fsa_our_dc set while processing
the leave notifications, so no node would clear the attributes for the non-DC
node.
Now, the controller has its own CPG configuration change callback, which sets a
global boolean before calling the usual one, so that peer_update_callback() can
know when the DC has been lost.
---
daemons/controld/controld_callbacks.c | 4 ++-
daemons/controld/controld_corosync.c | 57 ++++++++++++++++++++++++++++++++++-
2 files changed, 59 insertions(+), 2 deletions(-)
diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c
index af24856..e564b3d 100644
--- a/daemons/controld/controld_callbacks.c
+++ b/daemons/controld/controld_callbacks.c
@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node)
#define state_text(state) ((state)? (const char *)(state) : "in unknown state")
+bool controld_dc_left = false;
+
void
peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data)
{
@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d
cib_scope_local);
}
- } else if (AM_I_DC || (fsa_our_dc == NULL)) {
+ } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) {
/* This only needs to be done once, so normally the DC should do
* it. However if there is no DC, every node must do it, since
* there is no other way to ensure some one node does it.
diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c
index db99630..c5ab658 100644
--- a/daemons/controld/controld_corosync.c
+++ b/daemons/controld/controld_corosync.c
@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data)
}
}
+extern bool controld_dc_left;
+
+/*!
+ * \brief Handle a Corosync notification of a CPG configuration change
+ *
+ * \param[in] handle CPG connection
+ * \param[in] cpg_name CPG group name
+ * \param[in] member_list List of current CPG members
+ * \param[in] member_list_entries Number of entries in \p member_list
+ * \param[in] left_list List of CPG members that left
+ * \param[in] left_list_entries Number of entries in \p left_list
+ * \param[in] joined_list List of CPG members that joined
+ * \param[in] joined_list_entries Number of entries in \p joined_list
+ */
+static void
+cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name,
+ const struct cpg_address *member_list,
+ size_t member_list_entries,
+ const struct cpg_address *left_list,
+ size_t left_list_entries,
+ const struct cpg_address *joined_list,
+ size_t joined_list_entries)
+{
+ /* When nodes leave CPG, the DC clears their transient node attributes.
+ *
+ * However if there is no DC, or the DC is among the nodes that left, each
+ * remaining node needs to do the clearing, to ensure it gets done.
+ * Otherwise, the attributes would persist when the nodes rejoin, which
+ * could have serious consequences for unfencing, agents that use attributes
+ * for internal logic, etc.
+ *
+ * Here, we set a global boolean if the DC is among the nodes that left, for
+ * use by the peer callback.
+ */
+ if (fsa_our_dc != NULL) {
+ crm_node_t *peer = pcmk__search_cluster_node_cache(0, fsa_our_dc);
+
+ if (peer != NULL) {
+ for (int i = 0; i < left_list_entries; ++i) {
+ if (left_list[i].nodeid == peer->id) {
+ controld_dc_left = true;
+ break;
+ }
+ }
+ }
+ }
+
+ // Process the change normally, which will call the peer callback as needed
+ pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries,
+ left_list, left_list_entries,
+ joined_list, joined_list_entries);
+
+ controld_dc_left = false;
+}
+
extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
gboolean
@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster)
if (is_corosync_cluster()) {
crm_set_status_callback(&peer_update_callback);
cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch;
- cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership;
+ cluster->cpg.cpg_confchg_fn = cpg_membership_callback;
cluster->destroy = crmd_cs_destroy;
if (crm_cluster_connect(cluster)) {
--
1.8.3.1

View File

@ -32,7 +32,7 @@
## can be incremented to build packages reliably considered "newer"
## than previously built packages with the same pcmkversion)
%global pcmkversion 2.1.1
%global specversion 0.2.rc2
%global specversion 0.3.rc2
## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build
%global commit a64ad221abe9cdd968ceacd35c23832ce0fcd189
@ -217,6 +217,9 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch
Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz/%{nagios_archive_github_url}
Patch0: 0001-feature-watchdog-fencing-allow-restriction-to-certai.patch
Patch1: 0002-Fix-watchdog-fencing-Silence-warning-without-node-re.patch
Patch2: 0003-Fix-fence_watchdog-fix-version-output-needed-for-hel.patch
Patch3: 0004-Build-rpm-package-fence_watchdog-in-base-package.patch
Patch4: 0005-Fix-controller-ensure-lost-node-s-transient-attribut.patch
# upstream commits
@ -660,6 +663,7 @@ exit 0
%{_sbindir}/crm_attribute
%{_sbindir}/crm_master
%{_sbindir}/fence_watchdog
%doc %{_mandir}/man7/pacemaker-controld.*
%doc %{_mandir}/man7/pacemaker-schedulerd.*
@ -709,7 +713,6 @@ exit 0
%{_sbindir}/crm_simulate
%{_sbindir}/crm_report
%{_sbindir}/crm_ticket
%{_sbindir}/fence_watchdog
%{_sbindir}/stonith_admin
# "dirname" is owned by -schemas, which is a prerequisite
%{_datadir}/pacemaker/report.collector
@ -825,6 +828,11 @@ exit 0
%license %{nagios_name}-%{nagios_hash}/COPYING
%changelog
* Wed Aug 11 2021 Klaus Wenninger <kwenning@redhat.com> - 2.1.1-0.3.rc2
- package fence_watchdog in base-package instead if cli-subpackage
- fix version output of fence_watchdog as needed for help2man
- ensure transient attributes of lost nodes are cleared reliably
* Mon Aug 9 2021 Klaus Wenninger <kwenning@redhat.com> - 2.1.1-0.2.rc2
- Update for new upstream tarball for release candidate: Pacemaker-2.1.1-rc2,
for full details, see included ChangeLog file or