diff --git a/bz2207567-Filesystem-improve-stop-action.patch b/bz2207567-Filesystem-improve-stop-action.patch new file mode 100644 index 0000000..351600b --- /dev/null +++ b/bz2207567-Filesystem-improve-stop-action.patch @@ -0,0 +1,125 @@ +From 48ed6e6d6510f42743e4463970e27f05637e4982 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 4 Jul 2023 14:40:19 +0200 +Subject: [PATCH] Filesystem: improve stop-action and allow setting term/kill + signals and signal_delay for large filesystems + +--- + heartbeat/Filesystem | 80 ++++++++++++++++++++++++++++++++++++++------ + 1 file changed, 70 insertions(+), 10 deletions(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 65a9dffb5..fe608ebfd 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -71,6 +71,9 @@ OCF_RESKEY_run_fsck_default="auto" + OCF_RESKEY_fast_stop_default="no" + OCF_RESKEY_force_clones_default="false" + OCF_RESKEY_force_unmount_default="true" ++OCF_RESKEY_term_signals_default="TERM" ++OCF_RESKEY_kill_signals_default="KILL" ++OCF_RESKEY_signal_delay_default="1" + + # RHEL specific defaults + if is_redhat_based; then +@@ -104,6 +107,9 @@ if [ -z "${OCF_RESKEY_fast_stop}" ]; then + fi + : ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}} + : ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}} ++: ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}} ++: ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}} ++: ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}} + + # Variables used by multiple methods + HOSTOS=$(uname) +@@ -266,6 +272,30 @@ block if unresponsive nfs mounts are in use on the system. + + + ++ ++ ++Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action. ++ ++Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action ++ ++ ++ ++ ++ ++Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action. ++ ++Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action ++ ++ ++ ++ ++ ++How many seconds to wait after sending term/kill signals to processes in stop-action. ++ ++How many seconds to wait after sending term/kill signals to processes in stop-action ++ ++ ++ + + + +@@ -663,19 +693,49 @@ try_umount() { + } + return $OCF_ERR_GENERIC + } +-fs_stop() { +- local SUB="$1" timeout=$2 sig cnt +- for sig in TERM KILL; do +- cnt=$((timeout/2)) # try half time with TERM +- while [ $cnt -gt 0 ]; do +- try_umount "$SUB" && +- return $OCF_SUCCESS +- ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig" ++timeout_child() { ++ local pid="$1" timeout="$2" killer ret ++ ++ # start job in the background that will KILL the given process after timeout expires ++ sleep $timeout && kill -s KILL $pid & ++ killer=$! ++ ++ # block until the child process either exits on its own or gets killed by the above killer pipeline ++ wait $pid ++ ret=$? ++ ++ # ret would be 127 + child exit code if the timeout expired ++ [ $ret -lt 128 ] && kill -s KILL $killer ++ return $ret ++} ++fs_stop_loop() { ++ local SUB="$1" signals="$2" sig ++ while true; do ++ for sig in $signals; do + signal_processes "$SUB" $sig +- cnt=$((cnt-1)) +- sleep 1 + done ++ sleep $OCF_RESKEY_signal_delay ++ try_umount "$SUB" && return $OCF_SUCCESS + done ++} ++fs_stop() { ++ local SUB="$1" timeout=$2 grace_time ret ++ grace_time=$((timeout/2)) ++ ++ # try gracefully terminating processes for up to half of the configured timeout ++ fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" & ++ timeout_child $! $grace_time ++ ret=$? ++ [ $ret -eq $OCF_SUCCESS ] && return $ret ++ ++ # try killing them for the rest of the timeout ++ fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" & ++ timeout_child $! $grace_time ++ ret=$? ++ [ $ret -eq $OCF_SUCCESS ] && return $ret ++ ++ # timeout expired ++ ocf_exit_reason "Couldn't unmount $SUB within given timeout" + return $OCF_ERR_GENERIC + } + diff --git a/bz2209433-Delay-increase-default-timeouts.patch b/bz2209433-Delay-increase-default-timeouts.patch new file mode 100644 index 0000000..7c1941b --- /dev/null +++ b/bz2209433-Delay-increase-default-timeouts.patch @@ -0,0 +1,27 @@ +From a913eb6a9a8732db7c56d2e0be937dbd0db9dc38 Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Fri, 26 May 2023 12:45:13 +0200 +Subject: [PATCH] Delay: increase stop, status and monitor timeouts to 40s to + avoid failing with default values + +--- + heartbeat/Delay | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/heartbeat/Delay b/heartbeat/Delay +index 7ba6623f24..bc6c13559b 100755 +--- a/heartbeat/Delay ++++ b/heartbeat/Delay +@@ -89,9 +89,9 @@ Defaults to "startdelay" if unspecified. + + + +- +- +- ++ ++ ++ + + + diff --git a/resource-agents.spec b/resource-agents.spec index ca9d10e..ff7f8a2 100644 --- a/resource-agents.spec +++ b/resource-agents.spec @@ -45,7 +45,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.10.0 -Release: 40%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 41%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents Source0: %{upstream_prefix}-%{upstream_version}.tar.gz @@ -104,6 +104,8 @@ Patch51: bz2183133-Filesystem-fail-efs-utils-not-installed.patch Patch52: bz2184779-Filesystem-systemd-drop-in-net-fs.patch Patch53: bz2179003-mysql-2-fix-demoted-score-bounce.patch Patch54: bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch +Patch55: bz2207567-Filesystem-improve-stop-action.patch +Patch56: bz2209433-Delay-increase-default-timeouts.patch # bundled ha-cloud-support libs Patch500: ha-cloud-support-aws.patch @@ -282,6 +284,8 @@ exit 1 %patch52 -p1 %patch53 -p1 %patch54 -p1 +%patch55 -p1 +%patch56 -p1 # bundled ha-cloud-support libs %patch500 -p1 @@ -603,6 +607,15 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %changelog +* Wed Jul 12 2023 Oyvind Albrigtsen - 4.10.0-41 +- Filesystem: improve stop-action and allow setting term/kill signals + and signal_delay for large filesystems +- Delay: increase stop, status and monitor timeouts to 40s to avoid + failing with default values + + Resolves: rhbz#2207567 + Resolves: rhbz#2209433 + * Wed Jun 21 2023 Oyvind Albrigtsen - 4.10.0-40 - IPaddr2/IPsrcaddr: support policy-based routing