- nfsserver: monitor nfsdcld and nfs-mountd services for a failure state

Resolves: RHEL-182592
This commit is contained in:
Arslan Ahmad 2026-06-11 21:17:23 +05:30
parent 01c579f6c9
commit 7a5c9e0f3f
2 changed files with 82 additions and 1 deletions

View File

@ -0,0 +1,73 @@
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -366,14 +366,18 @@
ocf_exit_reason "rpcbind is not running"
return $OCF_NOT_RUNNING
fi
+ fi
- ocf_log debug "Status: nfs-mountd"
- ps axww | grep -q "[r]pc.mountd"
- rc=$?
- if [ "$rc" -ne "0" ]; then
+ ocf_log debug "Status: nfs-mountd"
+ ps axww | grep -q "[r]pc.mountd"
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ if ocf_is_probe || [ "$__OCF_ACTION" = "start" ]; then
+ ocf_log info "nfs-mountd is not running"
+ else
ocf_exit_reason "nfs-mountd is not running"
- return $OCF_NOT_RUNNING
fi
+ return $OCF_NOT_RUNNING
fi
ocf_log debug "Status: nfs-idmapd"
@@ -397,6 +401,46 @@
fi
fi
+ local nfsdcld_unit_status
+ nfsdcld_unit_status=$(systemctl --no-legend list-unit-files "nfsdcld.service" 2>/dev/null)
+
+ if echo "$nfsdcld_unit_status" | grep -qE "nfsdcld.*masked"; then
+ ocf_log debug "Status: nfsdcld (masked, skipping monitor)"
+ elif echo "$nfsdcld_unit_status" | grep -q nfsdcld; then
+ local nfsdcld_state
+ nfsdcld_state=$(systemctl show -p ActiveState --value nfsdcld.service 2>/dev/null)
+
+ ocf_log debug "Status: nfsdcld (state: $nfsdcld_state)"
+
+ case "$nfsdcld_state" in
+ active)
+ ocf_log debug "Status: nfsdcld (monitoring as it is active)"
+ fn=`mktemp`
+ nfs_exec status nfsdcld > $fn 2>&1
+ rc=$?
+ ocf_log debug "$(cat $fn)"
+ rm -f $fn
+ if [ "$rc" -ne "0" ]; then
+ if ocf_is_probe || [ "$__OCF_ACTION" = "start" ]; then
+ ocf_log info "nfsdcld service is not running"
+ else
+ ocf_exit_reason "nfsdcld service is not running"
+ fi
+ return $OCF_NOT_RUNNING
+ fi
+ ;;
+
+ failed|inactive|deactivating)
+ if ocf_is_probe || [ "$__OCF_ACTION" = "start" ]; then
+ ocf_log info "nfsdcld service is not running"
+ else
+ ocf_exit_reason "nfsdcld service is in '$nfsdcld_state' state"
+ fi
+ return $OCF_NOT_RUNNING
+ ;;
+ esac
+ fi
+
nfs_exec is-active nfs-server
rc=$?

View File

@ -45,7 +45,7 @@
Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.16.0
Release: 69%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
Release: 70%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPL-2.0-or-later AND LGPL-2.1-or-later
URL: https://github.com/ClusterLabs/resource-agents
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
@ -140,6 +140,7 @@ Patch87: RHEL-180765-1-nfsserver-fixes-unmount-failure-of-bind-mount-with-fsidd.
Patch88: RHEL-180765-2-nfsserver-fixes-incorrect-indentation.patch
Patch89: RHEL-156729-sybaseASE-fix-for-missing-sybaseASE-env-during-probe-action.patch
Patch90: RHEL-180765-3-nfsserver-do-not-try-to-stop-fsidd-when-not-present.patch
Patch91: RHEL-182592-nfsserver-monitor-nfsdcld-and-nfs-mountd-services-to-trigger-recovery-on-failure.patch
# bundled ha-cloud-support libs
Patch500: ha-cloud-support-aliyun.patch
@ -401,6 +402,7 @@ exit 1
%patch -p1 -P 88
%patch -p1 -P 89
%patch -p1 -P 90
%patch -p1 -P 91
# bundled ha-cloud-support libs
%patch -p1 -P 500
@ -733,6 +735,12 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
%changelog
* Thu Jun 11 2026 Arslan Ahmad <arahmad@redhat.com> - 4.16.0-70
- nfsserver: monitor nfsdcld and nfs-mountd services to trigger
recovery on failure
Resolves: RHEL-182592
* Wed Jun 10 2026 Arslan Ahmad <arahmad@redhat.com> - 4.16.0-69
- nfsserver: stop fsidd when stopping nfsserver