185 lines
7.5 KiB
Diff
185 lines
7.5 KiB
Diff
|
From 053bb9c7356eae82b1089582bb2844388ae4df57 Mon Sep 17 00:00:00 2001
|
||
|
From: "Kaleb S. KEITHLEY" <kkeithle@redhat.com>
|
||
|
Date: Wed, 2 Jun 2021 07:49:12 -0400
|
||
|
Subject: [PATCH 550/584] common-ha: stability fixes for ganesha_grace and
|
||
|
ganesha_mon RAs
|
||
|
|
||
|
Include fixes suggested by ClusterHA devs.
|
||
|
|
||
|
1) It turns out that crm_attribute attrs and attrd_updater attrs really
|
||
|
are one and the same, despite what I was told years ago.
|
||
|
|
||
|
attrs created with crm_attribute ... --lifetime=reboot ... or
|
||
|
attrd_updater are one and same. As per ClusterHA devs having an attr
|
||
|
created with crm_attribute ... --lifetime=forever and also
|
||
|
creating/updating the same attr with attrd_updater is a recipe for
|
||
|
weird things to happen that will be difficult to debug.
|
||
|
|
||
|
2) using hostname -s or hostname for node names in crm_attribute and
|
||
|
attrd_updater potentially could use the wrong name if the host has
|
||
|
been renamed; use ocf_local_nodename() (in ocf-shellfuncs) instead.
|
||
|
|
||
|
https://github.com/gluster/glusterfs/issues/2276
|
||
|
https://github.com/gluster/glusterfs/pull/2283
|
||
|
commit 9bd2c697686ec40e2c4f711df961860c8a735baa
|
||
|
|
||
|
Change-Id:If572d396fae9206628714fb2ce00f72e94f2258f
|
||
|
BUG: 1945143
|
||
|
Signed-off-by: Kaleb S. KEITHLEY <kkeithle@redhat.com>
|
||
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/c/rhs-glusterfs/+/244593
|
||
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||
|
---
|
||
|
extras/ganesha/ocf/ganesha_grace | 28 +++++++++---------------
|
||
|
extras/ganesha/ocf/ganesha_mon | 47 ++++++++++++++--------------------------
|
||
|
2 files changed, 26 insertions(+), 49 deletions(-)
|
||
|
|
||
|
diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
|
||
|
index 825f716..edc6fa2 100644
|
||
|
--- a/extras/ganesha/ocf/ganesha_grace
|
||
|
+++ b/extras/ganesha/ocf/ganesha_grace
|
||
|
@@ -94,25 +94,21 @@ esac
|
||
|
ganesha_grace_start()
|
||
|
{
|
||
|
local rc=${OCF_ERR_GENERIC}
|
||
|
- local host=$(hostname -s)
|
||
|
+ local host=$(ocf_local_nodename)
|
||
|
|
||
|
- ocf_log debug "ganesha_grace_start()"
|
||
|
- # give ganesha_mon RA a chance to set the crm_attr first
|
||
|
+ ocf_log debug "ganesha_grace_start ${host}"
|
||
|
+ # give ganesha_mon RA a chance to set the attr first
|
||
|
# I mislike the sleep, but it's not clear that looping
|
||
|
# with a small sleep is necessarily better
|
||
|
# start has a 40sec timeout, so a 5sec sleep here is okay
|
||
|
sleep 5
|
||
|
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
|
||
|
+ attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
|
||
|
if [ $? -ne 0 ]; then
|
||
|
- host=$(hostname)
|
||
|
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
|
||
|
- if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
|
||
|
- fi
|
||
|
+ ocf_log info "grace start: attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
|
||
|
fi
|
||
|
|
||
|
# Three possibilities:
|
||
|
- # 1. There is no attribute at all and attr_updater returns
|
||
|
+ # 1. There is no attribute at all and attrd_updater returns
|
||
|
# a zero length string. This happens when
|
||
|
# ganesha_mon::monitor hasn't run at least once to set
|
||
|
# the attribute. The assumption here is that the system
|
||
|
@@ -164,17 +160,13 @@ ganesha_grace_notify()
|
||
|
|
||
|
ganesha_grace_monitor()
|
||
|
{
|
||
|
- local host=$(hostname -s)
|
||
|
+ local host=$(ocf_local_nodename)
|
||
|
|
||
|
- ocf_log debug "monitor"
|
||
|
+ ocf_log debug "ganesha_grace monitor ${host}"
|
||
|
|
||
|
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
|
||
|
+ attr=$(attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
|
||
|
if [ $? -ne 0 ]; then
|
||
|
- host=$(hostname)
|
||
|
- attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
|
||
|
- if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
|
||
|
- fi
|
||
|
+ ocf_log info "attrd_updater --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
|
||
|
fi
|
||
|
|
||
|
# if there is no attribute (yet), maybe it's because
|
||
|
diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
|
||
|
index 2b4a9d6..7fbbf70 100644
|
||
|
--- a/extras/ganesha/ocf/ganesha_mon
|
||
|
+++ b/extras/ganesha/ocf/ganesha_mon
|
||
|
@@ -124,7 +124,6 @@ ganesha_mon_stop()
|
||
|
|
||
|
ganesha_mon_monitor()
|
||
|
{
|
||
|
- local host=$(hostname -s)
|
||
|
local pid_file="/var/run/ganesha.pid"
|
||
|
local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
|
||
|
local proc_pid="/proc/"
|
||
|
@@ -141,31 +140,27 @@ ganesha_mon_monitor()
|
||
|
|
||
|
if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
|
||
|
|
||
|
- attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
|
||
|
+ attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1
|
||
|
if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
|
||
|
+ ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_ganesha_active} -v 1 failed"
|
||
|
fi
|
||
|
|
||
|
# ganesha_grace (nfs-grace) RA follows grace-active attr
|
||
|
# w/ constraint location
|
||
|
- attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
|
||
|
+ attrd_updater --name ${OCF_RESKEY_grace_active} -v 1
|
||
|
if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
|
||
|
+ ocf_log info "warning: attrd_updater --name ${OCF_RESKEY_grace_active} -v 1 failed"
|
||
|
fi
|
||
|
|
||
|
# ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
|
||
|
- # track grace-active crm_attr (attr != crm_attr)
|
||
|
- # we can't just use the attr as there's no way to query
|
||
|
- # its value in RHEL6 pacemaker
|
||
|
-
|
||
|
- crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
|
||
|
- if [ $? -ne 0 ]; then
|
||
|
- host=$(hostname)
|
||
|
- crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
|
||
|
- if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
|
||
|
- fi
|
||
|
- fi
|
||
|
+ # track grace-active attr.
|
||
|
+ #
|
||
|
+ # Originally we were told that attrs set with attrd_updater
|
||
|
+ # are different/distinct than attrs set with crm_attribute.
|
||
|
+ # Now, years later, we are told that they are the same and
|
||
|
+ # that the values of attrs set with attrd_updater can be
|
||
|
+ # retrieved with crm_attribute. Or with attrd_updater -Q
|
||
|
+ # now that we no longer have to deal with rhel6.
|
||
|
|
||
|
return ${OCF_SUCCESS}
|
||
|
fi
|
||
|
@@ -182,26 +177,16 @@ ganesha_mon_monitor()
|
||
|
# the remaining ganesha.nfsds into grace before
|
||
|
# initiating the VIP fail-over.
|
||
|
|
||
|
- attrd_updater -D -n ${OCF_RESKEY_grace_active}
|
||
|
- if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
|
||
|
- fi
|
||
|
-
|
||
|
- host=$(hostname -s)
|
||
|
- crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
|
||
|
+ attrd_updater --delete --name ${OCF_RESKEY_grace_active}
|
||
|
if [ $? -ne 0 ]; then
|
||
|
- host=$(hostname)
|
||
|
- crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
|
||
|
- if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
|
||
|
- fi
|
||
|
+ ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_grace_active} failed"
|
||
|
fi
|
||
|
|
||
|
sleep ${OCF_RESKEY_grace_delay}
|
||
|
|
||
|
- attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
|
||
|
+ attrd_updater --delete --name ${OCF_RESKEY_ganesha_active}
|
||
|
if [ $? -ne 0 ]; then
|
||
|
- ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
|
||
|
+ ocf_log info "warning: attrd_updater --delete --name ${OCF_RESKEY_ganesha_active} failed"
|
||
|
fi
|
||
|
|
||
|
return ${OCF_SUCCESS}
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|