- Filesystem: improve stop-action and allow setting term/kill signals

and signal_delay for large filesystems

  Resolves: rhbz#2189243
This commit is contained in:
Oyvind Albrigtsen 2023-07-12 13:30:04 +02:00
parent 75d89ebe14
commit 73341b602b
2 changed files with 134 additions and 1 deletions

View File

@ -0,0 +1,125 @@
From 48ed6e6d6510f42743e4463970e27f05637e4982 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 4 Jul 2023 14:40:19 +0200
Subject: [PATCH] Filesystem: improve stop-action and allow setting term/kill
signals and signal_delay for large filesystems
---
heartbeat/Filesystem | 80 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 70 insertions(+), 10 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 65a9dffb5..fe608ebfd 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -71,6 +71,9 @@ OCF_RESKEY_run_fsck_default="auto"
OCF_RESKEY_fast_stop_default="no"
OCF_RESKEY_force_clones_default="false"
OCF_RESKEY_force_unmount_default="true"
+OCF_RESKEY_term_signals_default="TERM"
+OCF_RESKEY_kill_signals_default="KILL"
+OCF_RESKEY_signal_delay_default="1"
# RHEL specific defaults
if is_redhat_based; then
@@ -104,6 +107,9 @@ if [ -z "${OCF_RESKEY_fast_stop}" ]; then
fi
: ${OCF_RESKEY_force_clones=${OCF_RESKEY_force_clones_default}}
: ${OCF_RESKEY_force_unmount=${OCF_RESKEY_force_unmount_default}}
+: ${OCF_RESKEY_term_signals=${OCF_RESKEY_term_signals_default}}
+: ${OCF_RESKEY_kill_signals=${OCF_RESKEY_kill_signals_default}}
+: ${OCF_RESKEY_signal_delay=${OCF_RESKEY_signal_delay_default}}
# Variables used by multiple methods
HOSTOS=$(uname)
@@ -266,6 +272,30 @@ block if unresponsive nfs mounts are in use on the system.
<content type="boolean" default="${OCF_RESKEY_force_unmount_default}" />
</parameter>
+<parameter name="term_signals">
+<longdesc lang="en">
+Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action.
+</longdesc>
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during graceful termination phase in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_term_signals_default}" />
+</parameter>
+
+<parameter name="kill_signals">
+<longdesc lang="en">
+Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action.
+</longdesc>
+<shortdesc lang="en">Signals (names or numbers, whitespace separated) to send processes during forceful killing phase in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_kill_signals_default}" />
+</parameter>
+
+<parameter name="signal_delay">
+<longdesc lang="en">
+How many seconds to wait after sending term/kill signals to processes in stop-action.
+</longdesc>
+<shortdesc lang="en">How many seconds to wait after sending term/kill signals to processes in stop-action</shortdesc>
+<content type="boolean" default="${OCF_RESKEY_kill_signal_delay}" />
+</parameter>
+
</parameters>
<actions>
@@ -663,19 +693,49 @@ try_umount() {
}
return $OCF_ERR_GENERIC
}
-fs_stop() {
- local SUB="$1" timeout=$2 sig cnt
- for sig in TERM KILL; do
- cnt=$((timeout/2)) # try half time with TERM
- while [ $cnt -gt 0 ]; do
- try_umount "$SUB" &&
- return $OCF_SUCCESS
- ocf_exit_reason "Couldn't unmount $SUB; trying cleanup with $sig"
+timeout_child() {
+ local pid="$1" timeout="$2" killer ret
+
+ # start job in the background that will KILL the given process after timeout expires
+ sleep $timeout && kill -s KILL $pid &
+ killer=$!
+
+ # block until the child process either exits on its own or gets killed by the above killer pipeline
+ wait $pid
+ ret=$?
+
+ # ret would be 127 + child exit code if the timeout expired
+ [ $ret -lt 128 ] && kill -s KILL $killer
+ return $ret
+}
+fs_stop_loop() {
+ local SUB="$1" signals="$2" sig
+ while true; do
+ for sig in $signals; do
signal_processes "$SUB" $sig
- cnt=$((cnt-1))
- sleep 1
done
+ sleep $OCF_RESKEY_signal_delay
+ try_umount "$SUB" && return $OCF_SUCCESS
done
+}
+fs_stop() {
+ local SUB="$1" timeout=$2 grace_time ret
+ grace_time=$((timeout/2))
+
+ # try gracefully terminating processes for up to half of the configured timeout
+ fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" &
+ timeout_child $! $grace_time
+ ret=$?
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
+
+ # try killing them for the rest of the timeout
+ fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" &
+ timeout_child $! $grace_time
+ ret=$?
+ [ $ret -eq $OCF_SUCCESS ] && return $ret
+
+ # timeout expired
+ ocf_exit_reason "Couldn't unmount $SUB within given timeout"
return $OCF_ERR_GENERIC
}

View File

@ -69,7 +69,7 @@
Name: resource-agents Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.9.0 Version: 4.9.0
Release: 44%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} Release: 45%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPLv2+ and LGPLv2+ License: GPLv2+ and LGPLv2+
URL: https://github.com/ClusterLabs/resource-agents URL: https://github.com/ClusterLabs/resource-agents
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel} %if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
@ -145,6 +145,7 @@ Patch53: bz2181019-azure-events-2-improve-logic.patch
Patch54: bz2183152-Filesystem-fail-efs-utils-not-installed.patch Patch54: bz2183152-Filesystem-fail-efs-utils-not-installed.patch
Patch55: bz2039692-mysql-2-fix-demoted-score-bounce.patch Patch55: bz2039692-mysql-2-fix-demoted-score-bounce.patch
Patch56: bz2040110-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch Patch56: bz2040110-IPaddr2-IPsrcaddr-2-fix-table-parameter.patch
Patch57: bz2189243-Filesystem-improve-stop-action.patch
# bundle patches # bundle patches
Patch1000: 7-gcp-bundled.patch Patch1000: 7-gcp-bundled.patch
@ -381,6 +382,7 @@ exit 1
%patch54 -p1 %patch54 -p1
%patch55 -p1 %patch55 -p1
%patch56 -p1 %patch56 -p1
%patch57 -p1
chmod 755 heartbeat/nova-compute-wait chmod 755 heartbeat/nova-compute-wait
chmod 755 heartbeat/NovaEvacuate chmod 755 heartbeat/NovaEvacuate
@ -956,6 +958,12 @@ ccs_update_schema > /dev/null 2>&1 ||:
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
%changelog %changelog
* Wed Jul 12 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-45
- Filesystem: improve stop-action and allow setting term/kill signals
and signal_delay for large filesystems
Resolves: rhbz#2189243
* Wed Jun 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-44 * Wed Jun 21 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-44
- IPaddr2/IPsrcaddr: support policy-based routing - IPaddr2/IPsrcaddr: support policy-based routing