- Filesystem: improve stop-action and allow setting term/kill signals
and signal_delay for large filesystems - Delay: increase stop, status and monitor timeouts to 40s to avoid failing with default values Resolves: rhbz#2207567 Resolves: rhbz#2209433
This commit is contained in:
parent
e4471672ef
commit
eb72aa316f
125
bz2207567-Filesystem-improve-stop-action.patch
Normal file
125
bz2207567-Filesystem-improve-stop-action.patch
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
From 48ed6e6d6510f42743e4463970e27f05637e4982 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Tue, 4 Jul 2023 14:40:19 +0200
|
||||||
|
Subject: [PATCH] Filesystem: improve stop-action and allow setting term/kill
|
||||||
|
signals and signal_delay for large filesystems
|
||||||
|
|
||||||
|
---
|
||||||
|
heartbeat/Filesystem | 80 ++++++++++++++++++++++++++++++++++++++------
|
||||||
|
1 file changed, 70 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
|
||||||
|
index 65a9dffb5..fe608ebfd 100755
|
||||||
|
--- a/heartbeat/Filesystem
|
||||||
|
+++ b/heartbeat/Filesystem
|
||||||
|
@@ -71,6 +71,9 @@ OCF_RESKEY_run_fsck_default="auto"
|
||||||
|
OCF_RESKEY_fast_stop_default="no"
|
||||||
|
OCF_RESKEY_force_clones_default="false"
|
||||||
|
OCF_RESKEY_force_unmount_default="true"
|
||||||
|
+OCF_RESKEY_term_signals_default="TERM"
|
||||||
|
+OCF_RESKEY_kill_signals_default="KILL"
|
||||||
|
+OCF_RESKEY_signal_delay_default="1"
|
||||||
|
|
||||||
|
# RHEL specific defaults
|
||||||
|
if is_redhat_based; then
|
||||||
|
@@ -104,6 +107,9 @@ if [ -z "${OCF_RESKEY_fast_stop}" ]; then
|
||||||
|
fi
|
||||||
|
: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
|
||||||
|
: ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}}
|
||||||
|
+: ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}}
|
||||||
|
+: ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}}
|
||||||
|
+: ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}}
|
||||||
|
|
||||||
|
# Variables used by multiple methods
|
||||||
|
HOSTOS=$(uname)
|
||||||
|
@@ -266,6 +272,30 @@ block if unresponsive nfs mounts are in use on the system.
|
||||||
|
<content type="boolean" default="${OCF_RESKEY_force_unmount_default}" />
|
||||||
|
</parameter>
|
||||||
|
|
||||||
|
+<parameter name="term_signals">
|
||||||
|
+<longdesc lang="en">
|
||||||
|
+Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action.
|
||||||
|
+</longdesc>
|
||||||
|
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action</shortdesc>
|
||||||
|
+<content type="boolean" default="${OCF_RESKEY_term_signals_default}" />
|
||||||
|
+</parameter>
|
||||||
|
+
|
||||||
|
+<parameter name="kill_signals">
|
||||||
|
+<longdesc lang="en">
|
||||||
|
+Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action.
|
||||||
|
+</longdesc>
|
||||||
|
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action</shortdesc>
|
||||||
|
+<content type="boolean" default="${OCF_RESKEY_kill_signals_default}" />
|
||||||
|
+</parameter>
|
||||||
|
+
|
||||||
|
+<parameter name="signal_delay">
|
||||||
|
+<longdesc lang="en">
|
||||||
|
+How many seconds to wait after sending term/kill signals to processes in stop-action.
|
||||||
|
+</longdesc>
|
||||||
|
+<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
|
||||||
|
+<content type="boolean" default="${OCF_RESKEY_kill_signal_delay}" />
|
||||||
|
+</parameter>
|
||||||
|
+
|
||||||
|
</parameters>
|
||||||
|
|
||||||
|
<actions>
|
||||||
|
@@ -663,19 +693,49 @@ try_umount() {
|
||||||
|
}
|
||||||
|
return $OCF_ERR_GENERIC
|
||||||
|
}
|
||||||
|
-fs_stop() {
|
||||||
|
- local SUB="$1" timeout=$2 sig cnt
|
||||||
|
- for sig in TERM KILL; do
|
||||||
|
- cnt=$((timeout/2)) # try half time with TERM
|
||||||
|
- while [ $cnt -gt 0 ]; do
|
||||||
|
- try_umount "$SUB" &&
|
||||||
|
- return $OCF_SUCCESS
|
||||||
|
- ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
|
||||||
|
+timeout_child() {
|
||||||
|
+ local pid="$1" timeout="$2" killer ret
|
||||||
|
+
|
||||||
|
+ # start job in the background that will KILL the given process after timeout expires
|
||||||
|
+ sleep $timeout && kill -s KILL $pid &
|
||||||
|
+ killer=$!
|
||||||
|
+
|
||||||
|
+ # block until the child process either exits on its own or gets killed by the above killer pipeline
|
||||||
|
+ wait $pid
|
||||||
|
+ ret=$?
|
||||||
|
+
|
||||||
|
+ # ret would be 127 + child exit code if the timeout expired
|
||||||
|
+ [ $ret -lt 128 ] && kill -s KILL $killer
|
||||||
|
+ return $ret
|
||||||
|
+}
|
||||||
|
+fs_stop_loop() {
|
||||||
|
+ local SUB="$1" signals="$2" sig
|
||||||
|
+ while true; do
|
||||||
|
+ for sig in $signals; do
|
||||||
|
signal_processes "$SUB" $sig
|
||||||
|
- cnt=$((cnt-1))
|
||||||
|
- sleep 1
|
||||||
|
done
|
||||||
|
+ sleep $OCF_RESKEY_signal_delay
|
||||||
|
+ try_umount "$SUB" && return $OCF_SUCCESS
|
||||||
|
done
|
||||||
|
+}
|
||||||
|
+fs_stop() {
|
||||||
|
+ local SUB="$1" timeout=$2 grace_time ret
|
||||||
|
+ grace_time=$((timeout/2))
|
||||||
|
+
|
||||||
|
+ # try gracefully terminating processes for up to half of the configured timeout
|
||||||
|
+ fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" &
|
||||||
|
+ timeout_child $! $grace_time
|
||||||
|
+ ret=$?
|
||||||
|
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
|
||||||
|
+
|
||||||
|
+ # try killing them for the rest of the timeout
|
||||||
|
+ fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" &
|
||||||
|
+ timeout_child $! $grace_time
|
||||||
|
+ ret=$?
|
||||||
|
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
|
||||||
|
+
|
||||||
|
+ # timeout expired
|
||||||
|
+ ocf_exit_reason "Couldn't unmount $SUB within given timeout"
|
||||||
|
return $OCF_ERR_GENERIC
|
||||||
|
}
|
||||||
|
|
27
bz2209433-Delay-increase-default-timeouts.patch
Normal file
27
bz2209433-Delay-increase-default-timeouts.patch
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
From a913eb6a9a8732db7c56d2e0be937dbd0db9dc38 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Fri, 26 May 2023 12:45:13 +0200
|
||||||
|
Subject: [PATCH] Delay: increase stop, status and monitor timeouts to 40s to
|
||||||
|
avoid failing with default values
|
||||||
|
|
||||||
|
---
|
||||||
|
heartbeat/Delay | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/Delay b/heartbeat/Delay
|
||||||
|
index 7ba6623f24..bc6c13559b 100755
|
||||||
|
--- a/heartbeat/Delay
|
||||||
|
+++ b/heartbeat/Delay
|
||||||
|
@@ -89,9 +89,9 @@ Defaults to "startdelay" if unspecified.
|
||||||
|
|
||||||
|
<actions>
|
||||||
|
<action name="start" timeout="30s" />
|
||||||
|
-<action name="stop" timeout="30s" />
|
||||||
|
-<action name="status" depth="0" timeout="30s" interval="10s" />
|
||||||
|
-<action name="monitor" depth="0" timeout="30s" interval="10s" />
|
||||||
|
+<action name="stop" timeout="40s" />
|
||||||
|
+<action name="status" depth="0" timeout="40s" interval="10s" />
|
||||||
|
+<action name="monitor" depth="0" timeout="40s" interval="10s" />
|
||||||
|
<action name="meta-data" timeout="5s" />
|
||||||
|
<action name="validate-all" timeout="5s" />
|
||||||
|
</actions>
|
@ -45,7 +45,7 @@
|
|||||||
Name: resource-agents
|
Name: resource-agents
|
||||||
Summary: Open Source HA Reusable Cluster Resource Scripts
|
Summary: Open Source HA Reusable Cluster Resource Scripts
|
||||||
Version: 4.10.0
|
Version: 4.10.0
|
||||||
Release: 40%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
Release: 41%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
||||||
License: GPLv2+ and LGPLv2+
|
License: GPLv2+ and LGPLv2+
|
||||||
URL: https://github.com/ClusterLabs/resource-agents
|
URL: https://github.com/ClusterLabs/resource-agents
|
||||||
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
||||||
@ -104,6 +104,8 @@ Patch51: bz2183133-Filesystem-fail-efs-utils-not-installed.patch
|
|||||||
Patch52: bz2184779-Filesystem-systemd-drop-in-net-fs.patch
|
Patch52: bz2184779-Filesystem-systemd-drop-in-net-fs.patch
|
||||||
Patch53: bz2179003-mysql-2-fix-demoted-score-bounce.patch
|
Patch53: bz2179003-mysql-2-fix-demoted-score-bounce.patch
|
||||||
Patch54: bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch
|
Patch54: bz2142518-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch
|
||||||
|
Patch55: bz2207567-Filesystem-improve-stop-action.patch
|
||||||
|
Patch56: bz2209433-Delay-increase-default-timeouts.patch
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
Patch500: ha-cloud-support-aws.patch
|
Patch500: ha-cloud-support-aws.patch
|
||||||
@ -282,6 +284,8 @@ exit 1
|
|||||||
%patch52 -p1
|
%patch52 -p1
|
||||||
%patch53 -p1
|
%patch53 -p1
|
||||||
%patch54 -p1
|
%patch54 -p1
|
||||||
|
%patch55 -p1
|
||||||
|
%patch56 -p1
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
%patch500 -p1
|
%patch500 -p1
|
||||||
@ -603,6 +607,15 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
|
|||||||
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Jul 12 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-41
|
||||||
|
- Filesystem: improve stop-action and allow setting term/kill signals
|
||||||
|
and signal_delay for large filesystems
|
||||||
|
- Delay: increase stop, status and monitor timeouts to 40s to avoid
|
||||||
|
failing with default values
|
||||||
|
|
||||||
|
Resolves: rhbz#2207567
|
||||||
|
Resolves: rhbz#2209433
|
||||||
|
|
||||||
* Wed Jun 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-40
|
* Wed Jun 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-40
|
||||||
- IPaddr2/IPsrcaddr: support policy-based routing
|
- IPaddr2/IPsrcaddr: support policy-based routing
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user