From 7889c6669e78faaef3ce77ff149430c179bb6792 Mon Sep 17 00:00:00 2001 From: Klaus Wenninger Date: Wed, 11 Aug 2021 11:48:10 +0200 Subject: [PATCH] * Wed Aug 11 2021 Klaus Wenninger - 2.1.1-0.3.rc2 - package fence_watchdog in base-package instead if cli-subpackage - fix version output of fence_watchdog as needed for help2man - ensure transient attributes of lost nodes are cleared reliably --- ...og-fix-version-output-needed-for-hel.patch | 24 ++++ ...ckage-fence_watchdog-in-base-package.patch | 32 +++++ ...nsure-lost-node-s-transient-attribut.patch | 122 ++++++++++++++++++ pacemaker.spec | 12 +- 4 files changed, 188 insertions(+), 2 deletions(-) create mode 100644 0003-Fix-fence_watchdog-fix-version-output-needed-for-hel.patch create mode 100644 0004-Build-rpm-package-fence_watchdog-in-base-package.patch create mode 100644 0005-Fix-controller-ensure-lost-node-s-transient-attribut.patch diff --git a/0003-Fix-fence_watchdog-fix-version-output-needed-for-hel.patch b/0003-Fix-fence_watchdog-fix-version-output-needed-for-hel.patch new file mode 100644 index 0000000..8245e4f --- /dev/null +++ b/0003-Fix-fence_watchdog-fix-version-output-needed-for-hel.patch @@ -0,0 +1,24 @@ +From 88e75d5b98df197fa731e7642434951a24a67095 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 10 Aug 2021 09:10:23 +0200 +Subject: [PATCH] Fix: fence_watchdog: fix version output needed for help2man + +--- + daemons/fenced/fence_watchdog.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in +index c83304f..700065e 100755 +--- a/daemons/fenced/fence_watchdog.in ++++ b/daemons/fenced/fence_watchdog.in +@@ -12,6 +12,7 @@ import sys + import atexit + import getopt + ++AGENT_VERSION = "1.0.0" + SHORT_DESC = "Dummy watchdog fence agent" + LONG_DESC = """fence_watchdog just provides + meta-data - actual fencing is done by the pacemaker internal watchdog agent.""" +-- +1.8.3.1 + diff --git a/0004-Build-rpm-package-fence_watchdog-in-base-package.patch b/0004-Build-rpm-package-fence_watchdog-in-base-package.patch new file mode 100644 index 0000000..d112c40 --- /dev/null +++ b/0004-Build-rpm-package-fence_watchdog-in-base-package.patch @@ -0,0 +1,32 @@ +From 61eb9c240004d1dbd0b5973e2fecda3686bb4c53 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 10 Aug 2021 09:06:55 +0200 +Subject: [PATCH] Build: rpm: package fence_watchdog in base-package + +--- + rpm/pacemaker.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in +index f58357a..0c569b9 100644 +--- a/rpm/pacemaker.spec.in ++++ b/rpm/pacemaker.spec.in +@@ -734,6 +734,7 @@ exit 0 + %{_sbindir}/crm_attribute + %{_sbindir}/crm_master + %{_sbindir}/fence_legacy ++%{_sbindir}/fence_watchdog + + %doc %{_mandir}/man7/pacemaker-controld.* + %doc %{_mandir}/man7/pacemaker-schedulerd.* +@@ -797,7 +798,6 @@ exit 0 + %{_sbindir}/crm_simulate + %{_sbindir}/crm_report + %{_sbindir}/crm_ticket +-%{_sbindir}/fence_watchdog + %{_sbindir}/stonith_admin + # "dirname" is owned by -schemas, which is a prerequisite + %{_datadir}/pacemaker/report.collector +-- +1.8.3.1 + diff --git a/0005-Fix-controller-ensure-lost-node-s-transient-attribut.patch b/0005-Fix-controller-ensure-lost-node-s-transient-attribut.patch new file mode 100644 index 0000000..3e64b25 --- /dev/null +++ b/0005-Fix-controller-ensure-lost-node-s-transient-attribut.patch @@ -0,0 +1,122 @@ +From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 9 Aug 2021 14:48:57 -0500 +Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are + cleared without DC + +Previously, peer_update_callback() cleared a lost node's transient attributes +if either the local node is DC, or there is no DC. + +However, that left the possibility of the DC being lost at the same time as +another node -- the local node would still have fsa_our_dc set while processing +the leave notifications, so no node would clear the attributes for the non-DC +node. + +Now, the controller has its own CPG configuration change callback, which sets a +global boolean before calling the usual one, so that peer_update_callback() can +know when the DC has been lost. +--- + daemons/controld/controld_callbacks.c | 4 ++- + daemons/controld/controld_corosync.c | 57 ++++++++++++++++++++++++++++++++++- + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c +index af24856..e564b3d 100644 +--- a/daemons/controld/controld_callbacks.c ++++ b/daemons/controld/controld_callbacks.c +@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node) + + #define state_text(state) ((state)? (const char *)(state) : "in unknown state") + ++bool controld_dc_left = false; ++ + void + peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) + { +@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d + cib_scope_local); + } + +- } else if (AM_I_DC || (fsa_our_dc == NULL)) { ++ } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) { + /* This only needs to be done once, so normally the DC should do + * it. However if there is no DC, every node must do it, since + * there is no other way to ensure some one node does it. +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index db99630..c5ab658 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data) + } + } + ++extern bool controld_dc_left; ++ ++/*! ++ * \brief Handle a Corosync notification of a CPG configuration change ++ * ++ * \param[in] handle CPG connection ++ * \param[in] cpg_name CPG group name ++ * \param[in] member_list List of current CPG members ++ * \param[in] member_list_entries Number of entries in \p member_list ++ * \param[in] left_list List of CPG members that left ++ * \param[in] left_list_entries Number of entries in \p left_list ++ * \param[in] joined_list List of CPG members that joined ++ * \param[in] joined_list_entries Number of entries in \p joined_list ++ */ ++static void ++cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, ++ const struct cpg_address *member_list, ++ size_t member_list_entries, ++ const struct cpg_address *left_list, ++ size_t left_list_entries, ++ const struct cpg_address *joined_list, ++ size_t joined_list_entries) ++{ ++ /* When nodes leave CPG, the DC clears their transient node attributes. ++ * ++ * However if there is no DC, or the DC is among the nodes that left, each ++ * remaining node needs to do the clearing, to ensure it gets done. ++ * Otherwise, the attributes would persist when the nodes rejoin, which ++ * could have serious consequences for unfencing, agents that use attributes ++ * for internal logic, etc. ++ * ++ * Here, we set a global boolean if the DC is among the nodes that left, for ++ * use by the peer callback. ++ */ ++ if (fsa_our_dc != NULL) { ++ crm_node_t *peer = pcmk__search_cluster_node_cache(0, fsa_our_dc); ++ ++ if (peer != NULL) { ++ for (int i = 0; i < left_list_entries; ++i) { ++ if (left_list[i].nodeid == peer->id) { ++ controld_dc_left = true; ++ break; ++ } ++ } ++ } ++ } ++ ++ // Process the change normally, which will call the peer callback as needed ++ pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries, ++ left_list, left_list_entries, ++ joined_list, joined_list_entries); ++ ++ controld_dc_left = false; ++} ++ + extern gboolean crm_connect_corosync(crm_cluster_t * cluster); + + gboolean +@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster) + if (is_corosync_cluster()) { + crm_set_status_callback(&peer_update_callback); + cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; +- cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; ++ cluster->cpg.cpg_confchg_fn = cpg_membership_callback; + cluster->destroy = crmd_cs_destroy; + + if (crm_cluster_connect(cluster)) { +-- +1.8.3.1 + diff --git a/pacemaker.spec b/pacemaker.spec index a8f1d51..a138e31 100644 --- a/pacemaker.spec +++ b/pacemaker.spec @@ -32,7 +32,7 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.1 -%global specversion 0.2.rc2 +%global specversion 0.3.rc2 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit a64ad221abe9cdd968ceacd35c23832ce0fcd189 @@ -217,6 +217,9 @@ Source0: https://codeload.github.com/%{github_owner}/%{name}/tar.gz/%{arch Source1: https://codeload.github.com/%{github_owner}/%{nagios_name}/tar.gz/%{nagios_archive_github_url} Patch0: 0001-feature-watchdog-fencing-allow-restriction-to-certai.patch Patch1: 0002-Fix-watchdog-fencing-Silence-warning-without-node-re.patch +Patch2: 0003-Fix-fence_watchdog-fix-version-output-needed-for-hel.patch +Patch3: 0004-Build-rpm-package-fence_watchdog-in-base-package.patch +Patch4: 0005-Fix-controller-ensure-lost-node-s-transient-attribut.patch # upstream commits @@ -660,6 +663,7 @@ exit 0 %{_sbindir}/crm_attribute %{_sbindir}/crm_master +%{_sbindir}/fence_watchdog %doc %{_mandir}/man7/pacemaker-controld.* %doc %{_mandir}/man7/pacemaker-schedulerd.* @@ -709,7 +713,6 @@ exit 0 %{_sbindir}/crm_simulate %{_sbindir}/crm_report %{_sbindir}/crm_ticket -%{_sbindir}/fence_watchdog %{_sbindir}/stonith_admin # "dirname" is owned by -schemas, which is a prerequisite %{_datadir}/pacemaker/report.collector @@ -825,6 +828,11 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Wed Aug 11 2021 Klaus Wenninger - 2.1.1-0.3.rc2 +- package fence_watchdog in base-package instead if cli-subpackage +- fix version output of fence_watchdog as needed for help2man +- ensure transient attributes of lost nodes are cleared reliably + * Mon Aug 9 2021 Klaus Wenninger - 2.1.1-0.2.rc2 - Update for new upstream tarball for release candidate: Pacemaker-2.1.1-rc2, for full details, see included ChangeLog file or