resource-agents/bz1905820-LVM-activate-fix-return-codes.patch

196 lines
6.3 KiB
Diff
Raw Normal View History

From 640c2b57f0f3e7256d587ddd5960341cb38b1982 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Sun, 13 Dec 2020 14:58:34 -0800
Subject: [PATCH] LVM-activate: Fix return codes
OCF_ERR_ARGS should be used when the configuration isn't valid for the
**local** node, and so the resource should not attempt to start again
locally until the issue is corrected.
OCF_ERR_CONFIGURED should be used when the configuration isn't valid on
**any** node, and so the resource should not attempt to start again
anywhere until the issue is corrected.
One remaining gray area: Should lvmlockd/lvmetad/clvmd improperly
running (or improperly not running) be an OCF_ERR_GENERIC or
OCF_ERR_ARGS? The fact that it's a state issue rather than a config
issue suggests OCF_ERR_GENERIC. The fact that it won't be fixed without
user intervention suggests OCF_ERR_ARGS. The approach here is to use
GENERIC for all of these. One can make the case that "improperly
running" should use ARGS, since a process must be manually stopped to
fix the issue, and that "improperly not running" should use GENERIC,
since there's a small chance the process died and will be recovered in
some way.
More info about return code meanings:
- https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/html/agents.html#how-are-ocf-return-codes-interpreted
Resolves: RHBZ#1905820
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/LVM-activate | 47 +++++++++++++++++++++---------------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index c86606637..e951a08e9 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -333,8 +333,7 @@ config_verify()
real=$(lvmconfig "$name" | cut -d'=' -f2)
if [ "$real" != "$expect" ]; then
ocf_exit_reason "config item $name: expect=$expect but real=$real"
- exit $OCF_ERR_CONFIGURED
-
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -366,12 +365,12 @@ lvmlockd_check()
fi
ocf_exit_reason "lvmlockd daemon is not running!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep clvmd >/dev/null 2>&1 ; then
ocf_exit_reason "clvmd daemon is running unexpectedly."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
@@ -402,17 +401,17 @@ clvmd_check()
# Good: clvmd is running, and lvmlockd is not running
if ! pgrep clvmd >/dev/null 2>&1 ; then
ocf_exit_reason "clvmd daemon is not running!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep lvmetad >/dev/null 2>&1 ; then
ocf_exit_reason "Please stop lvmetad daemon when clvmd is running."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep lvmlockd >/dev/null 2>&1 ; then
ocf_exit_reason "lvmlockd daemon is running unexpectedly."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
@@ -424,12 +423,12 @@ systemid_check()
source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2)
if [ "$source" = "" ] || [ "$source" = "none" ]; then
ocf_exit_reason "system_id_source in lvm.conf is not set correctly!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
if [ -z ${SYSTEM_ID} ]; then
ocf_exit_reason "local/system_id is not set!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -441,18 +440,18 @@ tagging_check()
# The volume_list must be initialized to something in order to
# guarantee our tag will be filtered on startup
if ! lvm dumpconfig activation/volume_list; then
- ocf_log err "LVM: Improper setup detected"
+ ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
# Our tag must _NOT_ be in the volume_list. This agent
# overrides the volume_list during activation using the
# special tag reserved for cluster activation
if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\""; then
- ocf_log err "LVM: Improper setup detected"
+ ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -463,13 +462,13 @@ read_parameters()
if [ -z "$VG" ]
then
ocf_exit_reason "You must identify the volume group name!"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ]
then
ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
# Convert VG_access_mode from string to index
@@ -519,8 +518,10 @@ lvm_validate() {
exit $OCF_NOT_RUNNING
fi
+ # Could be a transient error (e.g., iSCSI connection
+ # issue) so use OCF_ERR_GENERIC
ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
# Inconsistency might be due to missing physical volumes, which doesn't
@@ -549,7 +550,7 @@ lvm_validate() {
mode=$?
if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then
ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
# Nothing to do if the VG has no logical volume
@@ -561,11 +562,11 @@ lvm_validate() {
# Check if the given $LV is in the $VG
if [ -n "$LV" ]; then
- OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
+ output=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
if [ $? -ne 0 ]; then
- ocf_log err "lvs: ${OUT}"
+ ocf_log err "lvs: ${output}"
ocf_exit_reason "LV ($LV) is not in the given VG ($VG)."
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
fi
@@ -580,7 +581,6 @@ lvm_validate() {
3)
systemid_check
;;
-
4)
tagging_check
;;
@@ -808,10 +808,9 @@ lvm_status() {
dd if=${dm_name} of=/dev/null bs=1 count=1 >/dev/null \
2>&1
if [ $? -ne 0 ]; then
- return $OCF_NOT_RUNNING
- else
- return $OCF_SUCCESS
+ return $OCF_ERR_GENERIC
fi
+ return $OCF_SUCCESS
;;
*)
ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"