diff --git a/SOURCES/bz2142518-IPaddr2-IPsrcaddr-1-support-policy-based-routing.patch b/SOURCES/bz2142518-IPaddr2-IPsrcaddr-1-support-policy-based-routing.patch new file mode 100644 index 0000000..0607ddb --- /dev/null +++ b/SOURCES/bz2142518-IPaddr2-IPsrcaddr-1-support-policy-based-routing.patch @@ -0,0 +1,84 @@ +From 4d87bcfe5df8a1e40ee945e095ac9e7cca147ec4 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 29 Jun 2022 10:26:25 +0200 +Subject: [PATCH] IPaddr2/IPsrcaddr: add/modify table parameter to be able to + find interface while using policy based routing + +--- + heartbeat/IPaddr2 | 12 ++++++++++++ + heartbeat/IPsrcaddr | 5 ++++- + heartbeat/findif.sh | 2 +- + 3 files changed, 17 insertions(+), 2 deletions(-) + +diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 +index 97a7431a2..e8384c586 100755 +--- a/heartbeat/IPaddr2 ++++ b/heartbeat/IPaddr2 +@@ -73,6 +73,7 @@ OCF_RESKEY_ip_default="" + OCF_RESKEY_cidr_netmask_default="" + OCF_RESKEY_broadcast_default="" + OCF_RESKEY_iflabel_default="" ++OCF_RESKEY_table_default="" + OCF_RESKEY_cidr_netmask_default="" + OCF_RESKEY_lvs_support_default=false + OCF_RESKEY_lvs_ipv6_addrlabel_default=false +@@ -97,6 +98,7 @@ OCF_RESKEY_network_namespace_default="" + : ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}} + : ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}} + : ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}} ++: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}} + : ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}} + : ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}} + : ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}} +@@ -239,6 +241,16 @@ If a label is specified in nic name, this parameter has no effect. + + + ++ ++ ++Table to use to lookup which interface to use for the IP. ++ ++This can be used for policy based routing. See man ip-rule(8). ++ ++Table ++ ++ ++ + + + Enable support for LVS Direct Routing configurations. In case a IP +diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr +index 1bd41a930..cf106cc34 100755 +--- a/heartbeat/IPsrcaddr ++++ b/heartbeat/IPsrcaddr +@@ -155,13 +155,16 @@ Metric. Only needed if incorrect metric value is used. + + + +-Table to modify. E.g. "local". ++Table to modify and use for interface lookup. E.g. "local". + + The table has to have a route matching the "destination" parameter. ++ ++This can be used for policy based routing. See man ip-rule(8). + + Table + + ++ + + + +diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh +index 66bc6d56a..1a40cc9a4 100644 +--- a/heartbeat/findif.sh ++++ b/heartbeat/findif.sh +@@ -32,7 +32,7 @@ prefixcheck() { + getnetworkinfo() + { + local line netinfo +- ip -o -f inet route list match $OCF_RESKEY_ip table local scope host | (while read line; ++ ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table=local}" scope host | (while read line; + do + netinfo=`echo $line | awk '{print $2}'` + case $netinfo in diff --git a/SOURCES/bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch b/SOURCES/bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch new file mode 100644 index 0000000..9cfc808 --- /dev/null +++ b/SOURCES/bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch @@ -0,0 +1,35 @@ +From da9e8e691f39494e14f8f11173b6ab6433384396 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 20 Jun 2023 14:19:23 +0200 +Subject: [PATCH] findif.sh: fix table parameter so it uses main table by + default + +--- + heartbeat/findif.sh | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh +index 1a40cc9a4b..6c04c98c19 100644 +--- a/heartbeat/findif.sh ++++ b/heartbeat/findif.sh +@@ -32,7 +32,7 @@ prefixcheck() { + getnetworkinfo() + { + local line netinfo +- ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table=local}" scope host | (while read line; ++ ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table:=main}" scope host | (while read line; + do + netinfo=`echo $line | awk '{print $2}'` + case $netinfo in +@@ -215,9 +215,9 @@ findif() + fi + if [ -n "$nic" ] ; then + # NIC supports more than two. +- set -- $(ip -o -f $family route list match $match $scope | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}') ++ set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}') + else +- set -- $(ip -o -f $family route list match $match $scope | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}') ++ set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}') + fi + if [ $# = 0 ] ; then + case $OCF_RESKEY_ip in diff --git a/SOURCES/bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch b/SOURCES/bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch new file mode 100644 index 0000000..ef5e34e --- /dev/null +++ b/SOURCES/bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch @@ -0,0 +1,42 @@ +From 2695888c983df331b0fee407a5c69c493a360313 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 30 Nov 2022 12:07:05 +0100 +Subject: [PATCH] lvmlockd: add "use_lvmlockd = 1" if it's commented out or + missing + +--- + heartbeat/lvmlockd | 17 +++++++++++++---- + 1 file changed, 13 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/lvmlockd b/heartbeat/lvmlockd +index dc7bd2d7e..f4b299f28 100755 +--- a/heartbeat/lvmlockd ++++ b/heartbeat/lvmlockd +@@ -180,14 +180,23 @@ setup_lvm_config() + lock_type=$(echo "$out" | cut -d'=' -f2) + + if [ -z "$use_lvmlockd" ]; then +- ocf_exit_reason "\"use_lvmlockd\" not set in /etc/lvm/lvm.conf ..." +- exit $OCF_ERR_CONFIGURED +- fi ++ ocf_log info "adding \"use_lvmlockd=1\" to /etc/lvm/lvm.conf ..." ++ cat >> /etc/lvm/lvm.conf << EOF ++ ++global { ++ use_lvmlockd = 1 ++} ++EOF + +- if [ -n "$use_lvmlockd" ] && [ "$use_lvmlockd" != 1 ] ; then ++ if [ $? -ne 0 ]; then ++ ocf_exit_reason "unable to add \"use_lvmlockd=1\" to /etc/lvm/lvm.conf ..." ++ exit $OCF_ERR_CONFIGURED ++ fi ++ elif [ "$use_lvmlockd" != 1 ] ; then + ocf_log info "setting \"use_lvmlockd=1\" in /etc/lvm/lvm.conf ..." + sed -i 's,^[[:blank:]]*use_lvmlockd[[:blank:]]*=.*,\ \ \ \ use_lvmlockd = 1,g' /etc/lvm/lvm.conf + fi ++ + if [ -n "$lock_type" ] ; then + # locking_type was removed from config in v2.03 + ocf_version_cmp "$(lvmconfig --version | awk '/LVM ver/ {sub(/\(.*/, "", $3); print $3}')" "2.03" diff --git a/SOURCES/bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch b/SOURCES/bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch new file mode 100644 index 0000000..6b771b6 --- /dev/null +++ b/SOURCES/bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch @@ -0,0 +1,24 @@ +From e7a748d35fe56f2be727ecae1885a2f1366f41bf Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 15 Mar 2023 13:03:07 +0100 +Subject: [PATCH] ethmonitor: dont log "Interface does not exist" for + monitor-action + +--- + heartbeat/ethmonitor | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor +index 451738a0b5..f9c9ef4bdd 100755 +--- a/heartbeat/ethmonitor ++++ b/heartbeat/ethmonitor +@@ -271,6 +271,9 @@ if_init() { + validate-all) + ocf_exit_reason "Interface $NIC does not exist" + exit $OCF_ERR_CONFIGURED;; ++ monitor) ++ ocf_log debug "Interface $NIC does not exist" ++ ;; + *) + ## It might be a bond interface which is temporarily not available, therefore we want to continue here + ocf_log warn "Interface $NIC does not exist" diff --git a/SOURCES/bz2174911-LVM-activate-failover-with-missing-pvs.patch b/SOURCES/bz2174911-LVM-activate-failover-with-missing-pvs.patch new file mode 100644 index 0000000..9767c9a --- /dev/null +++ b/SOURCES/bz2174911-LVM-activate-failover-with-missing-pvs.patch @@ -0,0 +1,156 @@ +From 51dd5d5d051aa3b3f0c104f8e80f212cd5780fc3 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 14 Mar 2023 09:14:28 +0100 +Subject: [PATCH] LVM-activate: failover with missing PVs + +There area two changes included: + +- Allow the system ID to be changed on a VG when the VG is + missing PVs, as long as a majority of PVs are still present. + This requires a recent version of lvm that supports the + --majoritypvs option for vgchange. + +- Use --activationmode degraded when activating LVs so that + raid LVs can be activated when legs are missing, as long as + sufficient devices are available for raid to provide all the + data in the LV. + +By David Teigland. +--- + heartbeat/LVM-activate | 82 ++++++++++++++++++++++++++++++++---------- + 1 file changed, 64 insertions(+), 18 deletions(-) + +diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate +index e951a08e9c..f6f24a3b52 100755 +--- a/heartbeat/LVM-activate ++++ b/heartbeat/LVM-activate +@@ -50,6 +50,8 @@ OCF_RESKEY_vg_access_mode_default="" + OCF_RESKEY_activation_mode_default="exclusive" + OCF_RESKEY_tag_default="pacemaker" + OCF_RESKEY_partial_activation_default="false" ++OCF_RESKEY_degraded_activation_default="false" ++OCF_RESKEY_majority_pvs_default="false" + + : ${OCF_RESKEY_vgname=${OCF_RESKEY_vgname_default}} + : ${OCF_RESKEY_lvname=${OCF_RESKEY_lvname_default}} +@@ -57,6 +59,8 @@ OCF_RESKEY_partial_activation_default="false" + : ${OCF_RESKEY_activation_mode=${OCF_RESKEY_activation_mode_default}} + : ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}} + : ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}} ++: ${OCF_RESKEY_degraded_activation=${OCF_RESKEY_degraded_activation_default}} ++: ${OCF_RESKEY_majority_pvs=${OCF_RESKEY_majority_pvs_default}} + + # If LV is given, only activate this named LV; otherwise, activate all + # LVs in the named VG. +@@ -191,6 +195,29 @@ logical volumes. + + + ++ ++ ++Activate RAID LVs using the "degraded" activation mode. This allows RAID ++LVs to be activated with missing PVs if all data can be provided with ++RAID redundancy. The RAID level determines the number of PVs that are ++required for degraded activation to succeed. If fewer PVs are available, ++then degraded activation will fail. Also enable majority_pvs. ++ ++Activate RAID LVs in degraded mode when missing PVs ++ ++ ++ ++ ++ ++If set, the VG system ID can be reassigned to a new host if a majority ++of PVs in the VG are present. Otherwise, VG failover with system ID ++will fail when the VG is missing PVs. Also enable degraded_activation ++when RAID LVs are used. ++ ++Allow changing the system ID of a VG with a majority of PVs ++ ++ ++ + + + +@@ -524,24 +551,27 @@ lvm_validate() { + exit $OCF_ERR_GENERIC + fi + +- # Inconsistency might be due to missing physical volumes, which doesn't +- # automatically mean we should fail. If partial_activation=true then +- # we should let start try to handle it, or if no PVs are listed as +- # "unknown device" then another node may have marked a device missing +- # where we have access to all of them and can start without issue. +- case $(vgs -o attr --noheadings $VG | tr -d ' ') in +- ???p??*) +- if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then +- # We are missing devices and cannot activate partially +- ocf_exit_reason "Volume group [$VG] has devices missing. Consider partial_activation=true to attempt to activate partially" +- exit $OCF_ERR_GENERIC ++ vg_missing_pv_count=$(vgs -o missing_pv_count --noheadings ${VG} 2>/dev/null) ++ ++ if [ $vg_missing_pv_count -gt 0 ]; then ++ ocf_log warn "Volume Group ${VG} is missing $vg_missing_pv_count PVs." ++ ++ # Setting new system ID will succeed if over half of PVs remain. ++ # Don't try to calculate here if a majority is present, ++ # but leave this up to the vgchange command to determine. ++ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then ++ ocf_log warn "Attempting fail over with missing PVs (majority.)" ++ ++ # Setting new system ID will fail, and behavior is undefined for ++ # other access modes. ++ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then ++ ocf_log warn "Attempting fail over with missing PVs (partial.)" ++ + else +- # We are missing devices but are allowed to activate partially. +- # Assume that caused the vgck failure and carry on +- ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action." ++ ocf_exit_reason "Volume group [$VG] has devices missing. Consider majority_pvs=true" ++ exit $OCF_ERR_GENERIC + fi +- ;; +- esac ++ fi + + # Get the access mode from VG metadata and check if it matches the input + # value. Skip to check "tagging" mode because there's no reliable way to +@@ -601,7 +631,18 @@ lvm_validate() { + do_activate() { + do_activate_opt=$1 + +- if ocf_is_true "$OCF_RESKEY_partial_activation" ; then ++ if ocf_is_true "$OCF_RESKEY_degraded_activation" ; then ++ # This will allow a RAID LV to be activated if sufficient ++ # devices are available to allow the LV to be usable ++ do_activate_opt="${do_activate_opt} --activationmode degraded" ++ ++ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then ++ # This will allow a mirror LV to be activated if any ++ # devices are missing, but the activated LV may not be ++ # usable, so it is not recommended. Also, other LV ++ # types without data redundancy will be activated ++ # when partial is set. ++ # RAID LVs and degraded_activation should be used instead. + do_activate_opt="${do_activate_opt} --partial" + fi + +@@ -661,11 +702,16 @@ clvmd_activate() { + } + + systemid_activate() { ++ majority_opt="" + set_autoactivation=0 + cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]') + ++ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then ++ vgchange --help | grep '\--majoritypvs' >/dev/null 2>&1 && majority_opt="--majoritypvs" ++ fi ++ + # Put our system ID on the VG +- vgchange -y --config "local/extra_system_ids=[\"${cur_systemid}\"]" \ ++ vgchange -y $majority_opt --config "local/extra_system_ids=[\"${cur_systemid}\"]" \ + --systemid ${SYSTEM_ID} ${VG} + vgchange --help | grep '\--setautoactivation' >/dev/null 2>&1 && set_autoactivation=1 + diff --git a/SOURCES/bz2179003-mysql-1-replication-fixes.patch b/SOURCES/bz2179003-mysql-1-replication-fixes.patch new file mode 100644 index 0000000..e086e07 --- /dev/null +++ b/SOURCES/bz2179003-mysql-1-replication-fixes.patch @@ -0,0 +1,70 @@ +From 706b48fd93a75a582c538013aea1418b6ed69dd0 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 9 Mar 2023 15:57:59 +0100 +Subject: [PATCH] mysql: promotable fixes to avoid nodes getting bounced around + by setting -v 1/-v 2, and added OCF_CHECK_LEVEL=10 for promotable resources + to be able to distinguish between promoted and not + +--- + heartbeat/mysql | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +diff --git a/heartbeat/mysql b/heartbeat/mysql +index 9ab49ab20e..29ed427319 100755 +--- a/heartbeat/mysql ++++ b/heartbeat/mysql +@@ -757,6 +757,10 @@ mysql_monitor() { + status_loglevel="info" + fi + ++ if ocf_is_ms; then ++ OCF_CHECK_LEVEL=10 ++ fi ++ + mysql_common_status $status_loglevel + rc=$? + +@@ -777,7 +781,13 @@ mysql_monitor() { + return $rc + fi + +- if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then ++ if [ $OCF_CHECK_LEVEL -eq 10 ]; then ++ if [ -z "$OCF_RESKEY_test_table" ]; then ++ ocf_exit_reason "test_table not set" ++ return $OCF_ERR_CONFIGURED ++ ++ fi ++ + # Check if this instance is configured as a slave, and if so + # check slave status + if is_slave; then +@@ -795,18 +805,16 @@ mysql_monitor() { + ocf_exit_reason "Failed to select from $test_table"; + return $OCF_ERR_GENERIC; + fi +- else +- # In case no exnteded tests are enabled and we are in master/slave mode _always_ set the master score to 1 if we reached this point +- ocf_is_ms && $CRM_MASTER -v 1 + fi + + if ocf_is_ms && ! get_read_only; then + ocf_log debug "MySQL monitor succeeded (master)"; + # Always set master score for the master +- $CRM_MASTER -v 2 ++ $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1)) + return $OCF_RUNNING_MASTER + else + ocf_log debug "MySQL monitor succeeded"; ++ ocf_is_ms && $CRM_MASTER -v 1 + return $OCF_SUCCESS + fi + } +@@ -873,7 +881,6 @@ mysql_start() { + # preference set by the administrator. We choose a low + # greater-than-zero preference. + $CRM_MASTER -v 1 +- + fi + + # Initial monitor action diff --git a/SOURCES/bz2179003-mysql-2-fix-demoted-score-bounce.patch b/SOURCES/bz2179003-mysql-2-fix-demoted-score-bounce.patch new file mode 100644 index 0000000..30815b4 --- /dev/null +++ b/SOURCES/bz2179003-mysql-2-fix-demoted-score-bounce.patch @@ -0,0 +1,32 @@ +From 34483f8029ea9ab25220cfee71d53adaf5aacaa0 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 14 Jun 2023 14:37:01 +0200 +Subject: [PATCH] mysql: fix promotion_score bouncing between ~3600 and 1 on + demoted nodes + +--- + heartbeat/mysql | 11 ----------- + 1 file changed, 11 deletions(-) + +diff --git a/heartbeat/mysql b/heartbeat/mysql +index 29ed42731..1df2fc0f2 100755 +--- a/heartbeat/mysql ++++ b/heartbeat/mysql +@@ -517,17 +517,6 @@ check_slave() { + + exit $OCF_ERR_INSTALLED + fi +- elif ocf_is_ms; then +- # Even if we're not set to evict lagging slaves, we can +- # still use the seconds behind master value to set our +- # master preference. +- local master_pref +- master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind})) +- if [ $master_pref -lt 0 ]; then +- # Sanitize a below-zero preference to just zero +- master_pref=0 +- fi +- $CRM_MASTER -v $master_pref + fi + + # is the slave ok to have a VIP on it diff --git a/SOURCES/bz2182415-azure-events-1-fix-no-transition-summary.patch b/SOURCES/bz2182415-azure-events-1-fix-no-transition-summary.patch new file mode 100644 index 0000000..ed2958e --- /dev/null +++ b/SOURCES/bz2182415-azure-events-1-fix-no-transition-summary.patch @@ -0,0 +1,54 @@ +From 81bb58b05d2ddabd17fe31af39f0e857e61db3c9 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 28 Mar 2023 16:53:45 +0200 +Subject: [PATCH] azure-events*: fix for no "Transition Summary" for Pacemaker + 2.1+ + +--- + heartbeat/azure-events-az.in | 8 ++++---- + heartbeat/azure-events.in | 6 +++--- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in +index 59d0953061..67c02c6422 100644 +--- a/heartbeat/azure-events-az.in ++++ b/heartbeat/azure-events-az.in +@@ -311,10 +311,10 @@ class clusterHelper: + summary = clusterHelper._exec("crm_simulate", "-Ls") + if not summary: + ocf.logger.warning("transitionSummary: could not load transition summary") +- return False ++ return "" + if summary.find("Transition Summary:") < 0: +- ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary) +- return False ++ ocf.logger.debug("transitionSummary: no transactions: %s" % summary) ++ return "" + summary = summary.split("Transition Summary:")[1] + ret = summary.split("\n").pop(0) + +@@ -768,4 +768,4 @@ def main(): + agent.run() + + if __name__ == '__main__': +- main() +\ No newline at end of file ++ main() +diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in +index 66e129060a..5ad658df93 100644 +--- a/heartbeat/azure-events.in ++++ b/heartbeat/azure-events.in +@@ -310,10 +310,10 @@ class clusterHelper: + summary = clusterHelper._exec("crm_simulate", "-Ls") + if not summary: + ocf.logger.warning("transitionSummary: could not load transition summary") +- return False ++ return "" + if summary.find("Transition Summary:") < 0: +- ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary) +- return False ++ ocf.logger.debug("transitionSummary: no transactions: %s" % summary) ++ return "" + summary = summary.split("Transition Summary:")[1] + ret = summary.split("\n").pop(0) + diff --git a/SOURCES/bz2182415-azure-events-2-improve-logic.patch b/SOURCES/bz2182415-azure-events-2-improve-logic.patch new file mode 100644 index 0000000..1b5aa9d --- /dev/null +++ b/SOURCES/bz2182415-azure-events-2-improve-logic.patch @@ -0,0 +1,77 @@ +From ff53e5c8d6867e580506d132fba6fcf6aa46b804 Mon Sep 17 00:00:00 2001 +From: Peter Varkoly +Date: Sat, 29 Apr 2023 08:09:11 +0200 +Subject: [PATCH] Use -LS instead of -Ls as parameter to get the Transition + Summary + +--- + heartbeat/azure-events-az.in | 9 +++++---- + heartbeat/azure-events.in | 9 +++++---- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in +index 67c02c642..46d4d1f3d 100644 +--- a/heartbeat/azure-events-az.in ++++ b/heartbeat/azure-events-az.in +@@ -298,7 +298,7 @@ class clusterHelper: + Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby) + """ + # Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node? +- # # crm_simulate -Ls ++ # # crm_simulate -LS + # Transition Summary: + # * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1) + # * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0) +@@ -308,15 +308,16 @@ class clusterHelper: + # Transition Summary: + ocf.logger.debug("transitionSummary: begin") + +- summary = clusterHelper._exec("crm_simulate", "-Ls") ++ summary = clusterHelper._exec("crm_simulate", "-LS") + if not summary: + ocf.logger.warning("transitionSummary: could not load transition summary") + return "" + if summary.find("Transition Summary:") < 0: + ocf.logger.debug("transitionSummary: no transactions: %s" % summary) + return "" +- summary = summary.split("Transition Summary:")[1] +- ret = summary.split("\n").pop(0) ++ j=summary.find('Transition Summary:') + len('Transition Summary:') ++ l=summary.lower().find('executing cluster transition:') ++ ret = list(filter(str.strip, summary[j:l].split("\n"))) + + ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret)) + return ret +diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in +index 5ad658df9..90acaba62 100644 +--- a/heartbeat/azure-events.in ++++ b/heartbeat/azure-events.in +@@ -297,7 +297,7 @@ class clusterHelper: + Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby) + """ + # Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node? +- # # crm_simulate -Ls ++ # # crm_simulate -LS + # Transition Summary: + # * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1) + # * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0) +@@ -307,15 +307,16 @@ class clusterHelper: + # Transition Summary: + ocf.logger.debug("transitionSummary: begin") + +- summary = clusterHelper._exec("crm_simulate", "-Ls") ++ summary = clusterHelper._exec("crm_simulate", "-LS") + if not summary: + ocf.logger.warning("transitionSummary: could not load transition summary") + return "" + if summary.find("Transition Summary:") < 0: + ocf.logger.debug("transitionSummary: no transactions: %s" % summary) + return "" +- summary = summary.split("Transition Summary:")[1] +- ret = summary.split("\n").pop(0) ++ j=summary.find('Transition Summary:') + len('Transition Summary:') ++ l=summary.lower().find('executing cluster transition:') ++ ret = list(filter(str.strip, summary[j:l].split("\n"))) + + ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret)) + return ret diff --git a/SOURCES/bz2183133-Filesystem-fail-efs-utils-not-installed.patch b/SOURCES/bz2183133-Filesystem-fail-efs-utils-not-installed.patch new file mode 100644 index 0000000..72da518 --- /dev/null +++ b/SOURCES/bz2183133-Filesystem-fail-efs-utils-not-installed.patch @@ -0,0 +1,23 @@ +From b02b06c437b1d8cb1dcfe8ace47c2efc4a0e476c Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 30 Mar 2023 14:44:41 +0200 +Subject: [PATCH] Filesystem: fail if AWS efs-utils not installed when + fstype=efs + +--- + heartbeat/Filesystem | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 65088029ec..50c68f115b 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -456,7 +456,7 @@ fstype_supported() + # System (EFS) + case "$FSTYPE" in + fuse.*|glusterfs|rozofs) support="fuse";; +- efs) support="nfs4";; ++ efs) check_binary "mount.efs"; support="nfs4";; + esac + + if [ "$support" != "$FSTYPE" ]; then diff --git a/SOURCES/bz2184779-Filesystem-systemd-drop-in-net-fs.patch b/SOURCES/bz2184779-Filesystem-systemd-drop-in-net-fs.patch new file mode 100644 index 0000000..abb285c --- /dev/null +++ b/SOURCES/bz2184779-Filesystem-systemd-drop-in-net-fs.patch @@ -0,0 +1,29 @@ +From 78622f1d3e46d58b78efe33643d05bea4d6948a2 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Wed, 17 May 2023 12:29:38 +0200 +Subject: [PATCH] Filesystem: create systemd drop-in for network filesystems + +--- + heartbeat/Filesystem | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 50c68f115..65a9dffb5 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -1021,6 +1021,7 @@ is_option "ro" && + case "$FSTYPE" in + nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre) + CLUSTERSAFE=1 # this is kind of safe too ++ systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target" + ;; + # add here CLUSTERSAFE=0 for all filesystems which are not + # cluster aware and which, even if when mounted read-only, +@@ -1028,6 +1029,7 @@ nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|c + ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs) + if ocf_is_true "$OCF_RESKEY_force_clones"; then + CLUSTERSAFE=2 ++ systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target" + else + CLUSTERSAFE=0 # these are not allowed + fi diff --git a/SOURCES/bz2207567-Filesystem-1-improve-stop-action.patch b/SOURCES/bz2207567-Filesystem-1-improve-stop-action.patch new file mode 100644 index 0000000..351600b --- /dev/null +++ b/SOURCES/bz2207567-Filesystem-1-improve-stop-action.patch @@ -0,0 +1,125 @@ +From 48ed6e6d6510f42743e4463970e27f05637e4982 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 4 Jul 2023 14:40:19 +0200 +Subject: [PATCH] Filesystem: improve stop-action and allow setting term/kill + signals and signal_delay for large filesystems + +--- + heartbeat/Filesystem | 80 ++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 70 insertions(+), 10 deletions(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 65a9dffb5..fe608ebfd 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -71,6 +71,9 @@ OCF_RESKEY_run_fsck_default="auto" + OCF_RESKEY_fast_stop_default="no" + OCF_RESKEY_force_clones_default="false" + OCF_RESKEY_force_unmount_default="true" ++OCF_RESKEY_term_signals_default="TERM" ++OCF_RESKEY_kill_signals_default="KILL" ++OCF_RESKEY_signal_delay_default="1" + + # RHEL specific defaults + if is_redhat_based; then +@@ -104,6 +107,9 @@ if [ -z "${OCF_RESKEY_fast_stop}" ]; then + fi + : ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}} + : ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}} ++: ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}} ++: ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}} ++: ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}} + + # Variables used by multiple methods + HOSTOS=$(uname) +@@ -266,6 +272,30 @@ block if unresponsive nfs mounts are in use on the system. + + + ++ ++ ++Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action. ++ ++Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action ++ ++ ++ ++ ++ ++Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action. ++ ++Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action ++ ++ ++ ++ ++ ++How many seconds to wait after sending term/kill signals to processes in stop-action. ++ ++How many seconds to wait after sending term/kill signals to processes in stop-action ++ ++ ++ + + + +@@ -663,19 +693,49 @@ try_umount() { + } + return $OCF_ERR_GENERIC + } +-fs_stop() { +- local SUB="$1" timeout=$2 sig cnt +- for sig in TERM KILL; do +- cnt=$((timeout/2)) # try half time with TERM +- while [ $cnt -gt 0 ]; do +- try_umount "$SUB" && +- return $OCF_SUCCESS +- ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig" ++timeout_child() { ++ local pid="$1" timeout="$2" killer ret ++ ++ # start job in the background that will KILL the given process after timeout expires ++ sleep $timeout && kill -s KILL $pid & ++ killer=$! ++ ++ # block until the child process either exits on its own or gets killed by the above killer pipeline ++ wait $pid ++ ret=$? ++ ++ # ret would be 127 + child exit code if the timeout expired ++ [ $ret -lt 128 ] && kill -s KILL $killer ++ return $ret ++} ++fs_stop_loop() { ++ local SUB="$1" signals="$2" sig ++ while true; do ++ for sig in $signals; do + signal_processes "$SUB" $sig +- cnt=$((cnt-1)) +- sleep 1 + done ++ sleep $OCF_RESKEY_signal_delay ++ try_umount "$SUB" && return $OCF_SUCCESS + done ++} ++fs_stop() { ++ local SUB="$1" timeout=$2 grace_time ret ++ grace_time=$((timeout/2)) ++ ++ # try gracefully terminating processes for up to half of the configured timeout ++ fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" & ++ timeout_child $! $grace_time ++ ret=$? ++ [ $ret -eq $OCF_SUCCESS ] && return $ret ++ ++ # try killing them for the rest of the timeout ++ fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" & ++ timeout_child $! $grace_time ++ ret=$? ++ [ $ret -eq $OCF_SUCCESS ] && return $ret ++ ++ # timeout expired ++ ocf_exit_reason "Couldn't unmount $SUB within given timeout" + return $OCF_ERR_GENERIC + } + diff --git a/SOURCES/bz2207567-Filesystem-2-fix-incorrect-parameter-types.patch b/SOURCES/bz2207567-Filesystem-2-fix-incorrect-parameter-types.patch new file mode 100644 index 0000000..9f61043 --- /dev/null +++ b/SOURCES/bz2207567-Filesystem-2-fix-incorrect-parameter-types.patch @@ -0,0 +1,49 @@ +From 7056635f3f94c1bcaaa5ed5563dc3b0e9f6749e0 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 18 Jul 2023 14:12:27 +0200 +Subject: [PATCH] Filesystem: dont use boolean type for non-boolean parameters + +--- + heartbeat/Filesystem | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index ee55a4843..b9aae8d50 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -269,7 +269,7 @@ fuser cli tool. fuser is known to perform operations that can potentially + block if unresponsive nfs mounts are in use on the system. + + Kill processes before unmount +- ++ + + + +@@ -277,7 +277,7 @@ block if unresponsive nfs mounts are in use on the system. + Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action. + + Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action +- ++ + + + +@@ -285,7 +285,7 @@ Signals (names or numbers, whitespace separated) to send processes during gracef + Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action. + + Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action +- ++ + + + +@@ -293,7 +293,7 @@ Signals (names or numbers, whitespace separated) to send processes during forcef + How many seconds to wait after sending term/kill signals to processes in stop-action. + + How many seconds to wait after sending term/kill signals to processes in stop-action +- ++ + + + diff --git a/SOURCES/bz2207567-Filesystem-3-fix-signal_delay-default-value.patch b/SOURCES/bz2207567-Filesystem-3-fix-signal_delay-default-value.patch new file mode 100644 index 0000000..5079b76 --- /dev/null +++ b/SOURCES/bz2207567-Filesystem-3-fix-signal_delay-default-value.patch @@ -0,0 +1,23 @@ +From f779fad52e5f515ca81218da6098398bdecac286 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Thu, 20 Jul 2023 10:18:12 +0200 +Subject: [PATCH] Filesystem: fix incorrect variable name for signal_delay + default in metadata + +--- + heartbeat/Filesystem | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index b9aae8d50..066562891 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -293,7 +293,7 @@ Signals (names or numbers, whitespace separated) to send processes during forcef + How many seconds to wait after sending term/kill signals to processes in stop-action. + + How many seconds to wait after sending term/kill signals to processes in stop-action +- ++ + + + diff --git a/SOURCES/bz2209433-Delay-1-increase-default-timeouts.patch b/SOURCES/bz2209433-Delay-1-increase-default-timeouts.patch new file mode 100644 index 0000000..7c1941b --- /dev/null +++ b/SOURCES/bz2209433-Delay-1-increase-default-timeouts.patch @@ -0,0 +1,27 @@ +From a913eb6a9a8732db7c56d2e0be937dbd0db9dc38 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 26 May 2023 12:45:13 +0200 +Subject: [PATCH] Delay: increase stop, status and monitor timeouts to 40s to + avoid failing with default values + +--- + heartbeat/Delay | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/Delay b/heartbeat/Delay +index 7ba6623f24..bc6c13559b 100755 +--- a/heartbeat/Delay ++++ b/heartbeat/Delay +@@ -89,9 +89,9 @@ Defaults to "startdelay" if unspecified. + + + +- +- +- ++ ++ ++ + + + diff --git a/SOURCES/bz2209433-Delay-2-remove-incorrect-statement.patch b/SOURCES/bz2209433-Delay-2-remove-incorrect-statement.patch new file mode 100644 index 0000000..611f4a7 --- /dev/null +++ b/SOURCES/bz2209433-Delay-2-remove-incorrect-statement.patch @@ -0,0 +1,30 @@ +From fe8a807dae0398b811d1ee63ebd7202280b2b678 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 18 Jul 2023 14:51:00 +0200 +Subject: [PATCH] Delay: remove statement about defaulting to "startdelay" + value if not specified + +--- + heartbeat/Delay | 2 -- + 1 file changed, 2 deletions(-) + +diff --git a/heartbeat/Delay b/heartbeat/Delay +index bc6c13559..5aa8f4608 100755 +--- a/heartbeat/Delay ++++ b/heartbeat/Delay +@@ -71,7 +71,6 @@ How long in seconds to delay on start operation. + + + How long in seconds to delay on stop operation. +-Defaults to "startdelay" if unspecified. + + Stop delay + +@@ -80,7 +79,6 @@ Defaults to "startdelay" if unspecified. + + + How long in seconds to delay on monitor operation. +-Defaults to "startdelay" if unspecified. + + Monitor delay + diff --git a/SPECS/resource-agents.spec b/SPECS/resource-agents.spec index 2b72748..4fab320 100644 --- a/SPECS/resource-agents.spec +++ b/SPECS/resource-agents.spec @@ -45,7 +45,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.10.0 -Release: 34%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 43%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents Source0: %{upstream_prefix}-%{upstream_version}.tar.gz @@ -93,6 +93,22 @@ Patch40: bz2157872-2-Filesystem-CTDB-validate-all-improvements.patch Patch41: bz2157872-3-pgsqlms-validate-all-OCF_CHECK_LEVEL-10.patch Patch42: bz2157872-4-exportfs-pgsql-validate-all-fixes.patch Patch43: bz2157872-5-pgsqlms-alidate-all-OCF_CHECK_LEVEL-10.patch +Patch44: bz2142518-IPaddr2-IPsrcaddr-1-support-policy-based-routing.patch +Patch45: bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch +Patch46: bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch +Patch47: bz2179003-mysql-1-replication-fixes.patch +Patch48: bz2174911-LVM-activate-failover-with-missing-pvs.patch +Patch49: bz2182415-azure-events-1-fix-no-transition-summary.patch +Patch50: bz2182415-azure-events-2-improve-logic.patch +Patch51: bz2183133-Filesystem-fail-efs-utils-not-installed.patch +Patch52: bz2184779-Filesystem-systemd-drop-in-net-fs.patch +Patch53: bz2179003-mysql-2-fix-demoted-score-bounce.patch +Patch54: bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch +Patch55: bz2207567-Filesystem-1-improve-stop-action.patch +Patch56: bz2207567-Filesystem-2-fix-incorrect-parameter-types.patch +Patch57: bz2209433-Delay-1-increase-default-timeouts.patch +Patch58: bz2209433-Delay-2-remove-incorrect-statement.patch +Patch59: bz2207567-Filesystem-3-fix-signal_delay-default-value.patch # bundled ha-cloud-support libs Patch500: ha-cloud-support-aws.patch @@ -204,7 +220,7 @@ in a cluster environment. License: PostgreSQL Summary: PostgreSQL Automatic Failover (PAF) resource agent Requires: %{name} = %{version}-%{release} -Requires: perl-interpreter perl-English perl-FindBin +Requires: perl-interpreter perl-lib perl-English perl-FindBin %description paf PostgreSQL Automatic Failover (PAF) resource agents allows PostgreSQL @@ -260,6 +276,22 @@ exit 1 %patch41 -p1 %patch42 -p1 %patch43 -p1 +%patch44 -p1 +%patch45 -p1 +%patch46 -p1 +%patch47 -p1 +%patch48 -p1 +%patch49 -p1 +%patch50 -p1 +%patch51 -p1 +%patch52 -p1 +%patch53 -p1 +%patch54 -p1 +%patch55 -p1 +%patch56 -p1 +%patch57 -p1 +%patch58 -p1 +%patch59 -p1 # bundled ha-cloud-support libs %patch500 -p1 @@ -581,6 +613,54 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %changelog +* Thu Jul 20 2023 Oyvind Albrigtsen - 4.10.0-43 +- Filesystem: improve stop-action and allow setting term/kill signals + and signal_delay for large filesystems + + Resolves: rhbz#2207567 + +* Tue Jul 18 2023 Oyvind Albrigtsen - 4.10.0-42 +- Delay: increase stop, status and monitor timeouts to 40s to avoid + failing with default values + + Resolves: rhbz#2209433 + +* Wed Jun 21 2023 Oyvind Albrigtsen - 4.10.0-40 +- IPaddr2/IPsrcaddr: support policy-based routing + + Resolves: rhbz#2142518 + +* Wed Jun 14 2023 Oyvind Albrigtsen - 4.10.0-39 +- mysql: fix replication issues + + Resolves: rhbz#2179003 + +* Mon May 22 2023 Oyvind Albrigtsen - 4.10.0-38 +- resource-agents-paf: add perl-lib dependency +- Filesystem: create systemd drop-in for network filesystems + + Resolves: rhbz#2203813 + Resolves: rhbz#2184779 + +* Mon May 1 2023 Oyvind Albrigtsen - 4.10.0-37 +- azure-events*: fix for no "Transition Summary" for Pacemaker 2.1+ +- Filesystem: fail if AWS efs-utils not installed when fstype=efs + + Resolves: rhbz#2182415 + Resolves: rhbz#2183133 + +* Tue Mar 21 2023 Oyvind Albrigtsen - 4.10.0-36 +- LVM-activate: failover with missing PVs + + Resolves: rhbz#2174911 + +* Tue Mar 21 2023 Oyvind Albrigtsen - 4.10.0-35 +- lvmlockd: add "use_lvmlockd = 1" if it's commented out or missing +- ethmonitor: dont log "Interface does not exist" for monitor-action + + Resolves: rhbz#2149968 + Resolves: rhbz#2174896 + * Wed Jan 25 2023 Oyvind Albrigtsen - 4.10.0-34 - all agents: dont check notify/promotable settings during validate-action