diff --git a/SOURCES/017-watchdog-fixes.patch b/SOURCES/017-watchdog-fixes.patch new file mode 100644 index 0000000..d3df876 --- /dev/null +++ b/SOURCES/017-watchdog-fixes.patch @@ -0,0 +1,58 @@ +From 61eb9c240004d1dbd0b5973e2fecda3686bb4c53 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 10 Aug 2021 09:06:55 +0200 +Subject: [PATCH 1/2] Build: rpm: package fence_watchdog in base-package + +--- + rpm/pacemaker.spec.in | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/rpm/pacemaker.spec.in b/rpm/pacemaker.spec.in +index f58357a77..0c569b9ca 100644 +--- a/rpm/pacemaker.spec.in ++++ b/rpm/pacemaker.spec.in +@@ -734,6 +734,7 @@ exit 0 + %{_sbindir}/crm_attribute + %{_sbindir}/crm_master + %{_sbindir}/fence_legacy ++%{_sbindir}/fence_watchdog + + %doc %{_mandir}/man7/pacemaker-controld.* + %doc %{_mandir}/man7/pacemaker-schedulerd.* +@@ -797,7 +798,6 @@ exit 0 + %{_sbindir}/crm_simulate + %{_sbindir}/crm_report + %{_sbindir}/crm_ticket +-%{_sbindir}/fence_watchdog + %{_sbindir}/stonith_admin + # "dirname" is owned by -schemas, which is a prerequisite + %{_datadir}/pacemaker/report.collector +-- +2.27.0 + + +From 88e75d5b98df197fa731e7642434951a24a67095 Mon Sep 17 00:00:00 2001 +From: Klaus Wenninger +Date: Tue, 10 Aug 2021 09:10:23 +0200 +Subject: [PATCH 2/2] Fix: fence_watchdog: fix version output needed for + help2man + +--- + daemons/fenced/fence_watchdog.in | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/daemons/fenced/fence_watchdog.in b/daemons/fenced/fence_watchdog.in +index c83304f1d..700065e0e 100755 +--- a/daemons/fenced/fence_watchdog.in ++++ b/daemons/fenced/fence_watchdog.in +@@ -12,6 +12,7 @@ import sys + import atexit + import getopt + ++AGENT_VERSION = "1.0.0" + SHORT_DESC = "Dummy watchdog fence agent" + LONG_DESC = """fence_watchdog just provides + meta-data - actual fencing is done by the pacemaker internal watchdog agent.""" +-- +2.27.0 + diff --git a/SOURCES/018-controller.patch b/SOURCES/018-controller.patch new file mode 100644 index 0000000..a2094e3 --- /dev/null +++ b/SOURCES/018-controller.patch @@ -0,0 +1,122 @@ +From ee7eba6a7a05bdf0a12d60ebabb334d8ee021101 Mon Sep 17 00:00:00 2001 +From: Ken Gaillot +Date: Mon, 9 Aug 2021 14:48:57 -0500 +Subject: [PATCH] Fix: controller: ensure lost node's transient attributes are + cleared without DC + +Previously, peer_update_callback() cleared a lost node's transient attributes +if either the local node is DC, or there is no DC. + +However, that left the possibility of the DC being lost at the same time as +another node -- the local node would still have fsa_our_dc set while processing +the leave notifications, so no node would clear the attributes for the non-DC +node. + +Now, the controller has its own CPG configuration change callback, which sets a +global boolean before calling the usual one, so that peer_update_callback() can +know when the DC has been lost. +--- + daemons/controld/controld_callbacks.c | 4 +- + daemons/controld/controld_corosync.c | 57 ++++++++++++++++++++++++++- + 2 files changed, 59 insertions(+), 2 deletions(-) + +diff --git a/daemons/controld/controld_callbacks.c b/daemons/controld/controld_callbacks.c +index af24856ae..e564b3dcd 100644 +--- a/daemons/controld/controld_callbacks.c ++++ b/daemons/controld/controld_callbacks.c +@@ -99,6 +99,8 @@ node_alive(const crm_node_t *node) + + #define state_text(state) ((state)? (const char *)(state) : "in unknown state") + ++bool controld_dc_left = false; ++ + void + peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *data) + { +@@ -217,7 +219,7 @@ peer_update_callback(enum crm_status_type type, crm_node_t * node, const void *d + cib_scope_local); + } + +- } else if (AM_I_DC || (fsa_our_dc == NULL)) { ++ } else if (AM_I_DC || controld_dc_left || (fsa_our_dc == NULL)) { + /* This only needs to be done once, so normally the DC should do + * it. However if there is no DC, every node must do it, since + * there is no other way to ensure some one node does it. +diff --git a/daemons/controld/controld_corosync.c b/daemons/controld/controld_corosync.c +index db99630fb..c5ab6580a 100644 +--- a/daemons/controld/controld_corosync.c ++++ b/daemons/controld/controld_corosync.c +@@ -87,6 +87,61 @@ crmd_cs_destroy(gpointer user_data) + } + } + ++extern bool controld_dc_left; ++ ++/*! ++ * \brief Handle a Corosync notification of a CPG configuration change ++ * ++ * \param[in] handle CPG connection ++ * \param[in] cpg_name CPG group name ++ * \param[in] member_list List of current CPG members ++ * \param[in] member_list_entries Number of entries in \p member_list ++ * \param[in] left_list List of CPG members that left ++ * \param[in] left_list_entries Number of entries in \p left_list ++ * \param[in] joined_list List of CPG members that joined ++ * \param[in] joined_list_entries Number of entries in \p joined_list ++ */ ++static void ++cpg_membership_callback(cpg_handle_t handle, const struct cpg_name *cpg_name, ++ const struct cpg_address *member_list, ++ size_t member_list_entries, ++ const struct cpg_address *left_list, ++ size_t left_list_entries, ++ const struct cpg_address *joined_list, ++ size_t joined_list_entries) ++{ ++ /* When nodes leave CPG, the DC clears their transient node attributes. ++ * ++ * However if there is no DC, or the DC is among the nodes that left, each ++ * remaining node needs to do the clearing, to ensure it gets done. ++ * Otherwise, the attributes would persist when the nodes rejoin, which ++ * could have serious consequences for unfencing, agents that use attributes ++ * for internal logic, etc. ++ * ++ * Here, we set a global boolean if the DC is among the nodes that left, for ++ * use by the peer callback. ++ */ ++ if (fsa_our_dc != NULL) { ++ crm_node_t *peer = pcmk__search_cluster_node_cache(0, fsa_our_dc); ++ ++ if (peer != NULL) { ++ for (int i = 0; i < left_list_entries; ++i) { ++ if (left_list[i].nodeid == peer->id) { ++ controld_dc_left = true; ++ break; ++ } ++ } ++ } ++ } ++ ++ // Process the change normally, which will call the peer callback as needed ++ pcmk_cpg_membership(handle, cpg_name, member_list, member_list_entries, ++ left_list, left_list_entries, ++ joined_list, joined_list_entries); ++ ++ controld_dc_left = false; ++} ++ + extern gboolean crm_connect_corosync(crm_cluster_t * cluster); + + gboolean +@@ -95,7 +150,7 @@ crm_connect_corosync(crm_cluster_t * cluster) + if (is_corosync_cluster()) { + crm_set_status_callback(&peer_update_callback); + cluster->cpg.cpg_deliver_fn = crmd_cs_dispatch; +- cluster->cpg.cpg_confchg_fn = pcmk_cpg_membership; ++ cluster->cpg.cpg_confchg_fn = cpg_membership_callback; + cluster->destroy = crmd_cs_destroy; + + if (crm_cluster_connect(cluster)) { +-- +2.27.0 + diff --git a/SPECS/pacemaker.spec b/SPECS/pacemaker.spec index b0d0723..035733e 100644 --- a/SPECS/pacemaker.spec +++ b/SPECS/pacemaker.spec @@ -36,7 +36,7 @@ ## can be incremented to build packages reliably considered "newer" ## than previously built packages with the same pcmkversion) %global pcmkversion 2.1.0 -%global specversion 5 +%global specversion 6 ## Upstream commit (full commit ID, abbreviated commit ID, or tag) to build %global commit 7c3f660707a495a1331716ad32cd3ac9d9f8ff58 @@ -279,6 +279,8 @@ Patch13: 013-leaks.patch Patch14: 014-str-list.patch Patch15: 015-sbd.patch Patch16: 016-cts.patch +Patch17: 017-watchdog-fixes.patch +Patch18: 018-controller.patch # downstream-only commits #Patch1xx: 1xx-xxxx.patch @@ -861,7 +863,6 @@ exit 0 %{_sbindir}/crm_simulate %{_sbindir}/crm_report %{_sbindir}/crm_ticket -%{_sbindir}/fence_watchdog %{_sbindir}/stonith_admin # "dirname" is owned by -schemas, which is a prerequisite %{_datadir}/pacemaker/report.collector @@ -977,6 +978,12 @@ exit 0 %license %{nagios_name}-%{nagios_hash}/COPYING %changelog +* Tue Aug 10 2021 Ken Gaillot - 2.1.0-6 +- Fix watchdog agent version information +- Ensure transient attributes are cleared when multiple nodes are lost +- Resolves: rhbz1443666 +- Resolves: rhbz1986998 + * Fri Aug 06 2021 Ken Gaillot - 2.1.0-5 - Allow configuring specific nodes to use watchdog-only sbd for fencing - Resolves: rhbz1443666