import CS resource-agents-4.10.0-43.el9_3

This commit is contained in:
eabdullin 2023-11-07 09:23:44 +00:00
parent 91898e8949
commit 2ba792472d
17 changed files with 962 additions and 2 deletions

View File

@ -0,0 +1,84 @@
From 4d87bcfe5df8a1e40ee945e095ac9e7cca147ec4 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 29 Jun 2022 10:26:25 +0200
Subject: [PATCH] IPaddr2/IPsrcaddr: add/modify table parameter to be able to
find interface while using policy based routing
---
heartbeat/IPaddr2 | 12 ++++++++++++
heartbeat/IPsrcaddr | 5 ++++-
heartbeat/findif.sh | 2 +-
3 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 97a7431a2..e8384c586 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -73,6 +73,7 @@ OCF_RESKEY_ip_default=""
OCF_RESKEY_cidr_netmask_default=""
OCF_RESKEY_broadcast_default=""
OCF_RESKEY_iflabel_default=""
+OCF_RESKEY_table_default=""
OCF_RESKEY_cidr_netmask_default=""
OCF_RESKEY_lvs_support_default=false
OCF_RESKEY_lvs_ipv6_addrlabel_default=false
@@ -97,6 +98,7 @@ OCF_RESKEY_network_namespace_default=""
: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}}
: ${OCF_RESKEY_broadcast=${OCF_RESKEY_broadcast_default}}
: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
+: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}}
: ${OCF_RESKEY_lvs_support=${OCF_RESKEY_lvs_support_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel=${OCF_RESKEY_lvs_ipv6_addrlabel_default}}
: ${OCF_RESKEY_lvs_ipv6_addrlabel_value=${OCF_RESKEY_lvs_ipv6_addrlabel_value_default}}
@@ -239,6 +241,16 @@ If a label is specified in nic name, this parameter has no effect.
<content type="string" default="${OCF_RESKEY_iflabel_default}"/>
</parameter>
+<parameter name="table">
+<longdesc lang="en">
+Table to use to lookup which interface to use for the IP.
+
+This can be used for policy based routing. See man ip-rule(8).
+</longdesc>
+<shortdesc lang="en">Table</shortdesc>
+<content type="string" default="${OCF_RESKEY_table_default}" />
+</parameter>
+
<parameter name="lvs_support">
<longdesc lang="en">
Enable support for LVS Direct Routing configurations. In case a IP
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 1bd41a930..cf106cc34 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -155,13 +155,16 @@ Metric. Only needed if incorrect metric value is used.
<parameter name="table">
<longdesc lang="en">
-Table to modify. E.g. "local".
+Table to modify and use for interface lookup. E.g. "local".
The table has to have a route matching the "destination" parameter.
+
+This can be used for policy based routing. See man ip-rule(8).
</longdesc>
<shortdesc lang="en">Table</shortdesc>
<content type="string" default="${OCF_RESKEY_table_default}" />
</parameter>
+
</parameters>
<actions>
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 66bc6d56a..1a40cc9a4 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -32,7 +32,7 @@ prefixcheck() {
getnetworkinfo()
{
local line netinfo
- ip -o -f inet route list match $OCF_RESKEY_ip table local scope host | (while read line;
+ ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table=local}" scope host | (while read line;
do
netinfo=`echo $line | awk '{print $2}'`
case $netinfo in

View File

@ -0,0 +1,35 @@
From da9e8e691f39494e14f8f11173b6ab6433384396 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 20 Jun 2023 14:19:23 +0200
Subject: [PATCH] findif.sh: fix table parameter so it uses main table by
default
---
heartbeat/findif.sh | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/findif.sh b/heartbeat/findif.sh
index 1a40cc9a4b..6c04c98c19 100644
--- a/heartbeat/findif.sh
+++ b/heartbeat/findif.sh
@@ -32,7 +32,7 @@ prefixcheck() {
getnetworkinfo()
{
local line netinfo
- ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table=local}" scope host | (while read line;
+ ip -o -f inet route list match $OCF_RESKEY_ip table "${OCF_RESKEY_table:=main}" scope host | (while read line;
do
netinfo=`echo $line | awk '{print $2}'`
case $netinfo in
@@ -215,9 +215,9 @@ findif()
fi
if [ -n "$nic" ] ; then
# NIC supports more than two.
- set -- $(ip -o -f $family route list match $match $scope | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | grep "dev $nic " | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
else
- set -- $(ip -o -f $family route list match $match $scope | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
+ set -- $(ip -o -f $family route list match $match $scope table "${OCF_RESKEY_table:=main}" | awk 'BEGIN{best=0} /\// { mask=$1; sub(".*/", "", mask); if( int(mask)>=best ) { best=int(mask); best_ln=$0; } } END{print best_ln}')
fi
if [ $# = 0 ] ; then
case $OCF_RESKEY_ip in

View File

@ -0,0 +1,42 @@
From 2695888c983df331b0fee407a5c69c493a360313 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 30 Nov 2022 12:07:05 +0100
Subject: [PATCH] lvmlockd: add "use_lvmlockd = 1" if it's commented out or
missing
---
heartbeat/lvmlockd | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/heartbeat/lvmlockd b/heartbeat/lvmlockd
index dc7bd2d7e..f4b299f28 100755
--- a/heartbeat/lvmlockd
+++ b/heartbeat/lvmlockd
@@ -180,14 +180,23 @@ setup_lvm_config()
lock_type=$(echo "$out" | cut -d'=' -f2)
if [ -z "$use_lvmlockd" ]; then
- ocf_exit_reason "\"use_lvmlockd\" not set in /etc/lvm/lvm.conf ..."
- exit $OCF_ERR_CONFIGURED
- fi
+ ocf_log info "adding \"use_lvmlockd=1\" to /etc/lvm/lvm.conf ..."
+ cat >> /etc/lvm/lvm.conf << EOF
+
+global {
+ use_lvmlockd = 1
+}
+EOF
- if [ -n "$use_lvmlockd" ] && [ "$use_lvmlockd" != 1 ] ; then
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "unable to add \"use_lvmlockd=1\" to /etc/lvm/lvm.conf ..."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ elif [ "$use_lvmlockd" != 1 ] ; then
ocf_log info "setting \"use_lvmlockd=1\" in /etc/lvm/lvm.conf ..."
sed -i 's,^[[:blank:]]*use_lvmlockd[[:blank:]]*=.*,\ \ \ \ use_lvmlockd = 1,g' /etc/lvm/lvm.conf
fi
+
if [ -n "$lock_type" ] ; then
# locking_type was removed from config in v2.03
ocf_version_cmp "$(lvmconfig --version | awk '/LVM ver/ {sub(/\(.*/, "", $3); print $3}')" "2.03"

View File

@ -0,0 +1,24 @@
From e7a748d35fe56f2be727ecae1885a2f1366f41bf Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 15 Mar 2023 13:03:07 +0100
Subject: [PATCH] ethmonitor: dont log "Interface does not exist" for
monitor-action
---
heartbeat/ethmonitor | 3 +++
1 file changed, 3 insertions(+)
diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
index 451738a0b5..f9c9ef4bdd 100755
--- a/heartbeat/ethmonitor
+++ b/heartbeat/ethmonitor
@@ -271,6 +271,9 @@ if_init() {
validate-all)
ocf_exit_reason "Interface $NIC does not exist"
exit $OCF_ERR_CONFIGURED;;
+ monitor)
+ ocf_log debug "Interface $NIC does not exist"
+ ;;
*)
## It might be a bond interface which is temporarily not available, therefore we want to continue here
ocf_log warn "Interface $NIC does not exist"

View File

@ -0,0 +1,156 @@
From 51dd5d5d051aa3b3f0c104f8e80f212cd5780fc3 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 14 Mar 2023 09:14:28 +0100
Subject: [PATCH] LVM-activate: failover with missing PVs
There area two changes included:
- Allow the system ID to be changed on a VG when the VG is
missing PVs, as long as a majority of PVs are still present.
This requires a recent version of lvm that supports the
--majoritypvs option for vgchange.
- Use --activationmode degraded when activating LVs so that
raid LVs can be activated when legs are missing, as long as
sufficient devices are available for raid to provide all the
data in the LV.
By David Teigland.
---
heartbeat/LVM-activate | 82 ++++++++++++++++++++++++++++++++----------
1 file changed, 64 insertions(+), 18 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index e951a08e9c..f6f24a3b52 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -50,6 +50,8 @@ OCF_RESKEY_vg_access_mode_default=""
OCF_RESKEY_activation_mode_default="exclusive"
OCF_RESKEY_tag_default="pacemaker"
OCF_RESKEY_partial_activation_default="false"
+OCF_RESKEY_degraded_activation_default="false"
+OCF_RESKEY_majority_pvs_default="false"
: ${OCF_RESKEY_vgname=${OCF_RESKEY_vgname_default}}
: ${OCF_RESKEY_lvname=${OCF_RESKEY_lvname_default}}
@@ -57,6 +59,8 @@ OCF_RESKEY_partial_activation_default="false"
: ${OCF_RESKEY_activation_mode=${OCF_RESKEY_activation_mode_default}}
: ${OCF_RESKEY_tag=${OCF_RESKEY_tag_default}}
: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}}
+: ${OCF_RESKEY_degraded_activation=${OCF_RESKEY_degraded_activation_default}}
+: ${OCF_RESKEY_majority_pvs=${OCF_RESKEY_majority_pvs_default}}
# If LV is given, only activate this named LV; otherwise, activate all
# LVs in the named VG.
@@ -191,6 +195,29 @@ logical volumes.
<content type="string" default="${OCF_RESKEY_partial_activation_default}" />
</parameter>
+<parameter name="degraded_activation" unique="0" required="0">
+<longdesc lang="en">
+Activate RAID LVs using the "degraded" activation mode. This allows RAID
+LVs to be activated with missing PVs if all data can be provided with
+RAID redundancy. The RAID level determines the number of PVs that are
+required for degraded activation to succeed. If fewer PVs are available,
+then degraded activation will fail. Also enable majority_pvs.
+</longdesc>
+<shortdesc lang="en">Activate RAID LVs in degraded mode when missing PVs</shortdesc>
+<content type="string" default="${OCF_RESKEY_degraded_activation_default}" />
+</parameter>
+
+<parameter name="majority_pvs" unique="0" required="0">
+<longdesc lang="en">
+If set, the VG system ID can be reassigned to a new host if a majority
+of PVs in the VG are present. Otherwise, VG failover with system ID
+will fail when the VG is missing PVs. Also enable degraded_activation
+when RAID LVs are used.
+</longdesc>
+<shortdesc lang="en">Allow changing the system ID of a VG with a majority of PVs</shortdesc>
+<content type="string" default="${OCF_RESKEY_majority_pvs_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -524,24 +551,27 @@ lvm_validate() {
exit $OCF_ERR_GENERIC
fi
- # Inconsistency might be due to missing physical volumes, which doesn't
- # automatically mean we should fail. If partial_activation=true then
- # we should let start try to handle it, or if no PVs are listed as
- # "unknown device" then another node may have marked a device missing
- # where we have access to all of them and can start without issue.
- case $(vgs -o attr --noheadings $VG | tr -d ' ') in
- ???p??*)
- if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
- # We are missing devices and cannot activate partially
- ocf_exit_reason "Volume group [$VG] has devices missing. Consider partial_activation=true to attempt to activate partially"
- exit $OCF_ERR_GENERIC
+ vg_missing_pv_count=$(vgs -o missing_pv_count --noheadings ${VG} 2>/dev/null)
+
+ if [ $vg_missing_pv_count -gt 0 ]; then
+ ocf_log warn "Volume Group ${VG} is missing $vg_missing_pv_count PVs."
+
+ # Setting new system ID will succeed if over half of PVs remain.
+ # Don't try to calculate here if a majority is present,
+ # but leave this up to the vgchange command to determine.
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
+ ocf_log warn "Attempting fail over with missing PVs (majority.)"
+
+ # Setting new system ID will fail, and behavior is undefined for
+ # other access modes.
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ ocf_log warn "Attempting fail over with missing PVs (partial.)"
+
else
- # We are missing devices but are allowed to activate partially.
- # Assume that caused the vgck failure and carry on
- ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
+ ocf_exit_reason "Volume group [$VG] has devices missing. Consider majority_pvs=true"
+ exit $OCF_ERR_GENERIC
fi
- ;;
- esac
+ fi
# Get the access mode from VG metadata and check if it matches the input
# value. Skip to check "tagging" mode because there's no reliable way to
@@ -601,7 +631,18 @@ lvm_validate() {
do_activate() {
do_activate_opt=$1
- if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ if ocf_is_true "$OCF_RESKEY_degraded_activation" ; then
+ # This will allow a RAID LV to be activated if sufficient
+ # devices are available to allow the LV to be usable
+ do_activate_opt="${do_activate_opt} --activationmode degraded"
+
+ elif ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ # This will allow a mirror LV to be activated if any
+ # devices are missing, but the activated LV may not be
+ # usable, so it is not recommended. Also, other LV
+ # types without data redundancy will be activated
+ # when partial is set.
+ # RAID LVs and degraded_activation should be used instead.
do_activate_opt="${do_activate_opt} --partial"
fi
@@ -661,11 +702,16 @@ clvmd_activate() {
}
systemid_activate() {
+ majority_opt=""
set_autoactivation=0
cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]')
+ if ocf_is_true "$OCF_RESKEY_majority_pvs" ; then
+ vgchange --help | grep '\--majoritypvs' >/dev/null 2>&1 && majority_opt="--majoritypvs"
+ fi
+
# Put our system ID on the VG
- vgchange -y --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
+ vgchange -y $majority_opt --config "local/extra_system_ids=[\"${cur_systemid}\"]" \
--systemid ${SYSTEM_ID} ${VG}
vgchange --help | grep '\--setautoactivation' >/dev/null 2>&1 && set_autoactivation=1

View File

@ -0,0 +1,70 @@
From 706b48fd93a75a582c538013aea1418b6ed69dd0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 9 Mar 2023 15:57:59 +0100
Subject: [PATCH] mysql: promotable fixes to avoid nodes getting bounced around
by setting -v 1/-v 2, and added OCF_CHECK_LEVEL=10 for promotable resources
to be able to distinguish between promoted and not
---
heartbeat/mysql | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/heartbeat/mysql b/heartbeat/mysql
index 9ab49ab20e..29ed427319 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -757,6 +757,10 @@ mysql_monitor() {
status_loglevel="info"
fi
+ if ocf_is_ms; then
+ OCF_CHECK_LEVEL=10
+ fi
+
mysql_common_status $status_loglevel
rc=$?
@@ -777,7 +781,13 @@ mysql_monitor() {
return $rc
fi
- if [ $OCF_CHECK_LEVEL -gt 0 -a -n "$OCF_RESKEY_test_table" ]; then
+ if [ $OCF_CHECK_LEVEL -eq 10 ]; then
+ if [ -z "$OCF_RESKEY_test_table" ]; then
+ ocf_exit_reason "test_table not set"
+ return $OCF_ERR_CONFIGURED
+
+ fi
+
# Check if this instance is configured as a slave, and if so
# check slave status
if is_slave; then
@@ -795,18 +805,16 @@ mysql_monitor() {
ocf_exit_reason "Failed to select from $test_table";
return $OCF_ERR_GENERIC;
fi
- else
- # In case no exnteded tests are enabled and we are in master/slave mode _always_ set the master score to 1 if we reached this point
- ocf_is_ms && $CRM_MASTER -v 1
fi
if ocf_is_ms && ! get_read_only; then
ocf_log debug "MySQL monitor succeeded (master)";
# Always set master score for the master
- $CRM_MASTER -v 2
+ $CRM_MASTER -v $((${OCF_RESKEY_max_slave_lag}+1))
return $OCF_RUNNING_MASTER
else
ocf_log debug "MySQL monitor succeeded";
+ ocf_is_ms && $CRM_MASTER -v 1
return $OCF_SUCCESS
fi
}
@@ -873,7 +881,6 @@ mysql_start() {
# preference set by the administrator. We choose a low
# greater-than-zero preference.
$CRM_MASTER -v 1
-
fi
# Initial monitor action

View File

@ -0,0 +1,32 @@
From 34483f8029ea9ab25220cfee71d53adaf5aacaa0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 14 Jun 2023 14:37:01 +0200
Subject: [PATCH] mysql: fix promotion_score bouncing between ~3600 and 1 on
demoted nodes
---
heartbeat/mysql | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/heartbeat/mysql b/heartbeat/mysql
index 29ed42731..1df2fc0f2 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -517,17 +517,6 @@ check_slave() {
exit $OCF_ERR_INSTALLED
fi
- elif ocf_is_ms; then
- # Even if we're not set to evict lagging slaves, we can
- # still use the seconds behind master value to set our
- # master preference.
- local master_pref
- master_pref=$((${OCF_RESKEY_max_slave_lag}-${secs_behind}))
- if [ $master_pref -lt 0 ]; then
- # Sanitize a below-zero preference to just zero
- master_pref=0
- fi
- $CRM_MASTER -v $master_pref
fi
# is the slave ok to have a VIP on it

View File

@ -0,0 +1,54 @@
From 81bb58b05d2ddabd17fe31af39f0e857e61db3c9 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 28 Mar 2023 16:53:45 +0200
Subject: [PATCH] azure-events*: fix for no "Transition Summary" for Pacemaker
2.1+
---
heartbeat/azure-events-az.in | 8 ++++----
heartbeat/azure-events.in | 6 +++---
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
index 59d0953061..67c02c6422 100644
--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
@@ -311,10 +311,10 @@ class clusterHelper:
summary = clusterHelper._exec("crm_simulate", "-Ls")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
- return False
+ return ""
if summary.find("Transition Summary:") < 0:
- ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary)
- return False
+ ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
+ return ""
summary = summary.split("Transition Summary:")[1]
ret = summary.split("\n").pop(0)
@@ -768,4 +768,4 @@ def main():
agent.run()
if __name__ == '__main__':
- main()
\ No newline at end of file
+ main()
diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in
index 66e129060a..5ad658df93 100644
--- a/heartbeat/azure-events.in
+++ b/heartbeat/azure-events.in
@@ -310,10 +310,10 @@ class clusterHelper:
summary = clusterHelper._exec("crm_simulate", "-Ls")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
- return False
+ return ""
if summary.find("Transition Summary:") < 0:
- ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary)
- return False
+ ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
+ return ""
summary = summary.split("Transition Summary:")[1]
ret = summary.split("\n").pop(0)

View File

@ -0,0 +1,77 @@
From ff53e5c8d6867e580506d132fba6fcf6aa46b804 Mon Sep 17 00:00:00 2001
From: Peter Varkoly <varkoly@suse.com>
Date: Sat, 29 Apr 2023 08:09:11 +0200
Subject: [PATCH] Use -LS instead of -Ls as parameter to get the Transition
Summary
---
heartbeat/azure-events-az.in | 9 +++++----
heartbeat/azure-events.in | 9 +++++----
2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
index 67c02c642..46d4d1f3d 100644
--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
@@ -298,7 +298,7 @@ class clusterHelper:
Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby)
"""
# <tniek> Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node?
- # # crm_simulate -Ls
+ # # crm_simulate -LS
# Transition Summary:
# * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1)
# * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0)
@@ -308,15 +308,16 @@ class clusterHelper:
# Transition Summary:
ocf.logger.debug("transitionSummary: begin")
- summary = clusterHelper._exec("crm_simulate", "-Ls")
+ summary = clusterHelper._exec("crm_simulate", "-LS")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
return ""
if summary.find("Transition Summary:") < 0:
ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
return ""
- summary = summary.split("Transition Summary:")[1]
- ret = summary.split("\n").pop(0)
+ j=summary.find('Transition Summary:') + len('Transition Summary:')
+ l=summary.lower().find('executing cluster transition:')
+ ret = list(filter(str.strip, summary[j:l].split("\n")))
ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret))
return ret
diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in
index 5ad658df9..90acaba62 100644
--- a/heartbeat/azure-events.in
+++ b/heartbeat/azure-events.in
@@ -297,7 +297,7 @@ class clusterHelper:
Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby)
"""
# <tniek> Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node?
- # # crm_simulate -Ls
+ # # crm_simulate -LS
# Transition Summary:
# * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1)
# * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0)
@@ -307,15 +307,16 @@ class clusterHelper:
# Transition Summary:
ocf.logger.debug("transitionSummary: begin")
- summary = clusterHelper._exec("crm_simulate", "-Ls")
+ summary = clusterHelper._exec("crm_simulate", "-LS")
if not summary:
ocf.logger.warning("transitionSummary: could not load transition summary")
return ""
if summary.find("Transition Summary:") < 0:
ocf.logger.debug("transitionSummary: no transactions: %s" % summary)
return ""
- summary = summary.split("Transition Summary:")[1]
- ret = summary.split("\n").pop(0)
+ j=summary.find('Transition Summary:') + len('Transition Summary:')
+ l=summary.lower().find('executing cluster transition:')
+ ret = list(filter(str.strip, summary[j:l].split("\n")))
ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret))
return ret

View File

@ -0,0 +1,23 @@
From b02b06c437b1d8cb1dcfe8ace47c2efc4a0e476c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 30 Mar 2023 14:44:41 +0200
Subject: [PATCH] Filesystem: fail if AWS efs-utils not installed when
fstype=efs
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 65088029ec..50c68f115b 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -456,7 +456,7 @@ fstype_supported()
# System (EFS)
case "$FSTYPE" in
fuse.*|glusterfs|rozofs) support="fuse";;
- efs) support="nfs4";;
+ efs) check_binary "mount.efs"; support="nfs4";;
esac
if [ "$support" != "$FSTYPE" ]; then

View File

@ -0,0 +1,29 @@
From 78622f1d3e46d58b78efe33643d05bea4d6948a2 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 17 May 2023 12:29:38 +0200
Subject: [PATCH] Filesystem: create systemd drop-in for network filesystems
---
heartbeat/Filesystem | 2 ++
1 file changed, 2 insertions(+)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 50c68f115..65a9dffb5 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -1021,6 +1021,7 @@ is_option "ro" &&
case "$FSTYPE" in
nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre)
CLUSTERSAFE=1 # this is kind of safe too
+ systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target"
;;
# add here CLUSTERSAFE=0 for all filesystems which are not
# cluster aware and which, even if when mounted read-only,
@@ -1028,6 +1029,7 @@ nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|c
ext4|ext4dev|ext3|reiserfs|reiser4|xfs|jfs)
if ocf_is_true "$OCF_RESKEY_force_clones"; then
CLUSTERSAFE=2
+ systemd_drop_in "99-Filesystem-remote" "After" "remote-fs.target"
else
CLUSTERSAFE=0 # these are not allowed
fi

View File

@ -0,0 +1,125 @@
From 48ed6e6d6510f42743e4463970e27f05637e4982 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 4 Jul 2023 14:40:19 +0200
Subject: [PATCH] Filesystem: improve stop-action and allow setting term/kill
signals and signal_delay for large filesystems
---
heartbeat/Filesystem | 80 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 70 insertions(+), 10 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 65a9dffb5..fe608ebfd 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -71,6 +71,9 @@ OCF_RESKEY_run_fsck_default="auto"
OCF_RESKEY_fast_stop_default="no"
OCF_RESKEY_force_clones_default="false"
OCF_RESKEY_force_unmount_default="true"
+OCF_RESKEY_term_signals_default="TERM"
+OCF_RESKEY_kill_signals_default="KILL"
+OCF_RESKEY_signal_delay_default="1"
# RHEL specific defaults
if is_redhat_based; then
@@ -104,6 +107,9 @@ if [ -z "${OCF_RESKEY_fast_stop}" ]; then
fi
: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
: ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}}
+: ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}}
+: ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}}
+: ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}}
# Variables used by multiple methods
HOSTOS=$(uname)
@@ -266,6 +272,30 @@ block if unresponsive nfs mounts are in use on the system.
<content type="boolean" default="${OCF_RESKEY_force_unmount_default}" />
</parameter>
+<parameter name="term_signals">
+<longdesc lang="en">
+Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action.
+</longdesc>
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_term_signals_default}" />
+</parameter>
+
+<parameter name="kill_signals">
+<longdesc lang="en">
+Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action.
+</longdesc>
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_kill_signals_default}" />
+</parameter>
+
+<parameter name="signal_delay">
+<longdesc lang="en">
+How many seconds to wait after sending term/kill signals to processes in stop-action.
+</longdesc>
+<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_kill_signal_delay}" />
+</parameter>
+
</parameters>
<actions>
@@ -663,19 +693,49 @@ try_umount() {
}
return $OCF_ERR_GENERIC
}
-fs_stop() {
- local SUB="$1" timeout=$2 sig cnt
- for sig in TERM KILL; do
- cnt=$((timeout/2)) # try half time with TERM
- while [ $cnt -gt 0 ]; do
- try_umount "$SUB" &&
- return $OCF_SUCCESS
- ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
+timeout_child() {
+ local pid="$1" timeout="$2" killer ret
+
+ # start job in the background that will KILL the given process after timeout expires
+ sleep $timeout && kill -s KILL $pid &
+ killer=$!
+
+ # block until the child process either exits on its own or gets killed by the above killer pipeline
+ wait $pid
+ ret=$?
+
+ # ret would be 127 + child exit code if the timeout expired
+ [ $ret -lt 128 ] && kill -s KILL $killer
+ return $ret
+}
+fs_stop_loop() {
+ local SUB="$1" signals="$2" sig
+ while true; do
+ for sig in $signals; do
signal_processes "$SUB" $sig
- cnt=$((cnt-1))
- sleep 1
done
+ sleep $OCF_RESKEY_signal_delay
+ try_umount "$SUB" && return $OCF_SUCCESS
done
+}
+fs_stop() {
+ local SUB="$1" timeout=$2 grace_time ret
+ grace_time=$((timeout/2))
+
+ # try gracefully terminating processes for up to half of the configured timeout
+ fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" &
+ timeout_child $! $grace_time
+ ret=$?
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
+
+ # try killing them for the rest of the timeout
+ fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" &
+ timeout_child $! $grace_time
+ ret=$?
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
+
+ # timeout expired
+ ocf_exit_reason "Couldn't unmount $SUB within given timeout"
return $OCF_ERR_GENERIC
}

View File

@ -0,0 +1,49 @@
From 7056635f3f94c1bcaaa5ed5563dc3b0e9f6749e0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 18 Jul 2023 14:12:27 +0200
Subject: [PATCH] Filesystem: dont use boolean type for non-boolean parameters
---
heartbeat/Filesystem | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index ee55a4843..b9aae8d50 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -269,7 +269,7 @@ fuser cli tool. fuser is known to perform operations that can potentially
block if unresponsive nfs mounts are in use on the system.
</longdesc>
<shortdesc lang="en">Kill processes before unmount</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_force_unmount_default}" />
+<content type="string" default="${OCF_RESKEY_force_unmount_default}" />
</parameter>
<parameter name="term_signals">
@@ -277,7 +277,7 @@ block if unresponsive nfs mounts are in use on the system.
Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action.
</longdesc>
<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_term_signals_default}" />
+<content type="string" default="${OCF_RESKEY_term_signals_default}" />
</parameter>
<parameter name="kill_signals">
@@ -285,7 +285,7 @@ Signals (names or numbers, whitespace separated) to send processes during gracef
Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action.
</longdesc>
<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_kill_signals_default}" />
+<content type="string" default="${OCF_RESKEY_kill_signals_default}" />
</parameter>
<parameter name="signal_delay">
@@ -293,7 +293,7 @@ Signals (names or numbers, whitespace separated) to send processes during forcef
How many seconds to wait after sending term/kill signals to processes in stop-action.
</longdesc>
<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
-<content type="boolean" default="${OCF_RESKEY_kill_signal_delay}" />
+<content type="string" default="${OCF_RESKEY_kill_signal_delay}" />
</parameter>
</parameters>

View File

@ -0,0 +1,23 @@
From f779fad52e5f515ca81218da6098398bdecac286 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 20 Jul 2023 10:18:12 +0200
Subject: [PATCH] Filesystem: fix incorrect variable name for signal_delay
default in metadata
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index b9aae8d50..066562891 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -293,7 +293,7 @@ Signals (names or numbers, whitespace separated) to send processes during forcef
How many seconds to wait after sending term/kill signals to processes in stop-action.
</longdesc>
<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
-<content type="string" default="${OCF_RESKEY_kill_signal_delay}" />
+<content type="string" default="${OCF_RESKEY_signal_delay_default}" />
</parameter>
</parameters>

View File

@ -0,0 +1,27 @@
From a913eb6a9a8732db7c56d2e0be937dbd0db9dc38 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 26 May 2023 12:45:13 +0200
Subject: [PATCH] Delay: increase stop, status and monitor timeouts to 40s to
avoid failing with default values
---
heartbeat/Delay | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Delay b/heartbeat/Delay
index 7ba6623f24..bc6c13559b 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -89,9 +89,9 @@ Defaults to "startdelay" if unspecified.
<actions>
<action name="start" timeout="30s" />
-<action name="stop" timeout="30s" />
-<action name="status" depth="0" timeout="30s" interval="10s" />
-<action name="monitor" depth="0" timeout="30s" interval="10s" />
+<action name="stop" timeout="40s" />
+<action name="status" depth="0" timeout="40s" interval="10s" />
+<action name="monitor" depth="0" timeout="40s" interval="10s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="5s" />
</actions>

View File

@ -0,0 +1,30 @@
From fe8a807dae0398b811d1ee63ebd7202280b2b678 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 18 Jul 2023 14:51:00 +0200
Subject: [PATCH] Delay: remove statement about defaulting to "startdelay"
value if not specified
---
heartbeat/Delay | 2 --
1 file changed, 2 deletions(-)
diff --git a/heartbeat/Delay b/heartbeat/Delay
index bc6c13559..5aa8f4608 100755
--- a/heartbeat/Delay
+++ b/heartbeat/Delay
@@ -71,7 +71,6 @@ How long in seconds to delay on start operation.
<parameter name="stopdelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on stop operation.
-Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Stop delay</shortdesc>
<content type="integer" default="${OCF_RESKEY_stopdelay_default}" />
@@ -80,7 +79,6 @@ Defaults to "startdelay" if unspecified.
<parameter name="mondelay" unique="0" required="0">
<longdesc lang="en">
How long in seconds to delay on monitor operation.
-Defaults to "startdelay" if unspecified.
</longdesc>
<shortdesc lang="en">Monitor delay</shortdesc>
<content type="integer" default="${OCF_RESKEY_mondelay_default}" />

View File

@ -45,7 +45,7 @@
Name: resource-agents Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.10.0 Version: 4.10.0
Release: 34%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} Release: 43%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPLv2+ and LGPLv2+ License: GPLv2+ and LGPLv2+
URL: https://github.com/ClusterLabs/resource-agents URL: https://github.com/ClusterLabs/resource-agents
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
@ -93,6 +93,22 @@ Patch40: bz2157872-2-Filesystem-CTDB-validate-all-improvements.patch
Patch41: bz2157872-3-pgsqlms-validate-all-OCF_CHECK_LEVEL-10.patch Patch41: bz2157872-3-pgsqlms-validate-all-OCF_CHECK_LEVEL-10.patch
Patch42: bz2157872-4-exportfs-pgsql-validate-all-fixes.patch Patch42: bz2157872-4-exportfs-pgsql-validate-all-fixes.patch
Patch43: bz2157872-5-pgsqlms-alidate-all-OCF_CHECK_LEVEL-10.patch Patch43: bz2157872-5-pgsqlms-alidate-all-OCF_CHECK_LEVEL-10.patch
Patch44: bz2142518-IPaddr2-IPsrcaddr-1-support-policy-based-routing.patch
Patch45: bz2149968-lvmlockd-add-use_lvmlockd-if-missing.patch
Patch46: bz2174896-ethmonitor-dont-log-iface-doesnt-exist-monitor.patch
Patch47: bz2179003-mysql-1-replication-fixes.patch
Patch48: bz2174911-LVM-activate-failover-with-missing-pvs.patch
Patch49: bz2182415-azure-events-1-fix-no-transition-summary.patch
Patch50: bz2182415-azure-events-2-improve-logic.patch
Patch51: bz2183133-Filesystem-fail-efs-utils-not-installed.patch
Patch52: bz2184779-Filesystem-systemd-drop-in-net-fs.patch
Patch53: bz2179003-mysql-2-fix-demoted-score-bounce.patch
Patch54: bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch
Patch55: bz2207567-Filesystem-1-improve-stop-action.patch
Patch56: bz2207567-Filesystem-2-fix-incorrect-parameter-types.patch
Patch57: bz2209433-Delay-1-increase-default-timeouts.patch
Patch58: bz2209433-Delay-2-remove-incorrect-statement.patch
Patch59: bz2207567-Filesystem-3-fix-signal_delay-default-value.patch
# bundled ha-cloud-support libs # bundled ha-cloud-support libs
Patch500: ha-cloud-support-aws.patch Patch500: ha-cloud-support-aws.patch
@ -204,7 +220,7 @@ in a cluster environment.
License: PostgreSQL License: PostgreSQL
Summary: PostgreSQL Automatic Failover (PAF) resource agent Summary: PostgreSQL Automatic Failover (PAF) resource agent
Requires: %{name} = %{version}-%{release} Requires: %{name} = %{version}-%{release}
Requires: perl-interpreter perl-English perl-FindBin Requires: perl-interpreter perl-lib perl-English perl-FindBin
%description paf %description paf
PostgreSQL Automatic Failover (PAF) resource agents allows PostgreSQL PostgreSQL Automatic Failover (PAF) resource agents allows PostgreSQL
@ -260,6 +276,22 @@ exit 1
%patch41 -p1 %patch41 -p1
%patch42 -p1 %patch42 -p1
%patch43 -p1 %patch43 -p1
%patch44 -p1
%patch45 -p1
%patch46 -p1
%patch47 -p1
%patch48 -p1
%patch49 -p1
%patch50 -p1
%patch51 -p1
%patch52 -p1
%patch53 -p1
%patch54 -p1
%patch55 -p1
%patch56 -p1
%patch57 -p1
%patch58 -p1
%patch59 -p1
# bundled ha-cloud-support libs # bundled ha-cloud-support libs
%patch500 -p1 %patch500 -p1
@ -581,6 +613,54 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
%changelog %changelog
* Thu Jul 20 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-43
- Filesystem: improve stop-action and allow setting term/kill signals
and signal_delay for large filesystems
Resolves: rhbz#2207567
* Tue Jul 18 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-42
- Delay: increase stop, status and monitor timeouts to 40s to avoid
failing with default values
Resolves: rhbz#2209433
* Wed Jun 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-40
- IPaddr2/IPsrcaddr: support policy-based routing
Resolves: rhbz#2142518
* Wed Jun 14 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-39
- mysql: fix replication issues
Resolves: rhbz#2179003
* Mon May 22 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-38
- resource-agents-paf: add perl-lib dependency
- Filesystem: create systemd drop-in for network filesystems
Resolves: rhbz#2203813
Resolves: rhbz#2184779
* Mon May 1 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-37
- azure-events*: fix for no "Transition Summary" for Pacemaker 2.1+
- Filesystem: fail if AWS efs-utils not installed when fstype=efs
Resolves: rhbz#2182415
Resolves: rhbz#2183133
* Tue Mar 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-36
- LVM-activate: failover with missing PVs
Resolves: rhbz#2174911
* Tue Mar 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-35
- lvmlockd: add "use_lvmlockd = 1" if it's commented out or missing
- ethmonitor: dont log "Interface does not exist" for monitor-action
Resolves: rhbz#2149968
Resolves: rhbz#2174896
* Wed Jan 25 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-34 * Wed Jan 25 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-34
- all agents: dont check notify/promotable settings during - all agents: dont check notify/promotable settings during
validate-action validate-action