import CS resource-agents-4.10.0-71.el9

This commit is contained in:
eabdullin 2025-03-11 08:06:29 +00:00
parent ef7496d4fe
commit 33646bee3d
17 changed files with 3021 additions and 55 deletions

View File

@ -0,0 +1,333 @@
From 7739c2a802c1dddb6757ff75cf7f6582a89bd518 Mon Sep 17 00:00:00 2001
From: id <happytobi@tscoding.de>
Date: Fri, 31 May 2024 09:00:18 +0200
Subject: [PATCH] azure-events-az: update to API versions, add retry
functionality for metadata requests, update tests
---
heartbeat/azure-events-az.in | 117 ++++++++++++++++++++++++-----------
heartbeat/ocf.py | 50 +++++++++++++--
2 files changed, 126 insertions(+), 41 deletions(-)
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
index 46d4d1f3d9..6d31e5abae 100644
--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
@@ -27,7 +27,7 @@ import ocf
##############################################################################
-VERSION = "0.10"
+VERSION = "0.20"
USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro())
attr_globalPullState = "azure-events-az_globalPullState"
@@ -39,9 +39,6 @@ attr_healthstate = "#health-azure"
default_loglevel = ocf.logging.INFO
default_relevantEventTypes = set(["Reboot", "Redeploy"])
-global_pullMaxAttempts = 3
-global_pullDelaySecs = 1
-
##############################################################################
class attrDict(defaultdict):
@@ -71,16 +68,22 @@ class azHelper:
metadata_host = "http://169.254.169.254/metadata"
instance_api = "instance"
events_api = "scheduledevents"
- api_version = "2019-08-01"
+ events_api_version = "2020-07-01"
+ instance_api_version = "2021-12-13"
@staticmethod
- def _sendMetadataRequest(endpoint, postData=None):
+ def _sendMetadataRequest(endpoint, postData=None, api_version="2019-08-01"):
"""
Send a request to Azure's Azure Metadata Service API
"""
- url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version)
+
+ retryCount = int(ocf.get_parameter("retry_count",3))
+ retryWaitTime = int(ocf.get_parameter("retry_wait",20))
+ requestTimeout = int(ocf.get_parameter("request_timeout",15))
+
+ url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, api_version)
data = ""
- ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData))
+ ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s, retry_count = %s, retry_wait time = %s, request_timeout = %s" % (endpoint, postData, retryCount, retryWaitTime, requestTimeout))
ocf.logger.debug("_sendMetadataRequest: url = %s" % url)
if postData and type(postData) != bytes:
@@ -89,18 +92,37 @@ class azHelper:
req = urllib2.Request(url, postData)
req.add_header("Metadata", "true")
req.add_header("User-Agent", USER_AGENT)
- try:
- resp = urllib2.urlopen(req)
- except URLError as e:
- if hasattr(e, 'reason'):
- ocf.logger.warning("Failed to reach the server: %s" % e.reason)
- clusterHelper.setAttr(attr_globalPullState, "IDLE")
- elif hasattr(e, 'code'):
- ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
- clusterHelper.setAttr(attr_globalPullState, "IDLE")
- else:
- data = resp.read()
- ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
+
+ if retryCount > 0:
+ ocf.logger.debug("_sendMetadataRequest: retry enabled")
+
+ successful = None
+ for retry in range(retryCount+1):
+ try:
+ resp = urllib2.urlopen(req, timeout=requestTimeout)
+ except Exception as e:
+ excType = e.__class__.__name__
+ if excType == TimeoutError.__name__:
+ ocf.logger.warning("Request timed out after %s seconds Error: %s" % (requestTimeout, e))
+ if excType == URLError.__name__:
+ if hasattr(e, 'reason'):
+ ocf.logger.warning("Failed to reach the server: %s" % e.reason)
+ elif hasattr(e, 'code'):
+ ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
+
+ if retryCount > 1 and retry != retryCount:
+ ocf.logger.warning("Request failed, retry (%s/%s) wait %s seconds before retry (wait time)" % (retry + 1,retryCount,retryWaitTime))
+ time.sleep(retryWaitTime)
+
+ else:
+ data = resp.read()
+ ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
+ successful = 1
+ break
+
+ # When no request was successful also with retry enabled, set the cluster to idle
+ if successful is None:
+ clusterHelper.setAttr(attr_globalPullState, "IDLE")
if data:
data = json.loads(data)
@@ -115,14 +137,15 @@ class azHelper:
"""
ocf.logger.debug("getInstanceInfo: begin")
- jsondata = azHelper._sendMetadataRequest(azHelper.instance_api)
+ jsondata = azHelper._sendMetadataRequest(azHelper.instance_api, None, azHelper.instance_api_version)
ocf.logger.debug("getInstanceInfo: json = %s" % jsondata)
if jsondata:
ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"]))
return attrDict(jsondata["compute"])
else:
- ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info")
+ apiCall = "%s/%s?api-version=%s" % (azHelper.metadata_host, azHelper.instance_api, azHelper.instance_api_version)
+ ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info - call: %s" % apiCall)
sys.exit(ocf.OCF_ERR_GENERIC)
@staticmethod
@@ -132,11 +155,17 @@ class azHelper:
"""
ocf.logger.debug("pullScheduledEvents: begin")
- jsondata = azHelper._sendMetadataRequest(azHelper.events_api)
+ jsondata = azHelper._sendMetadataRequest(azHelper.events_api, None, azHelper.events_api_version)
ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata)
- ocf.logger.debug("pullScheduledEvents: finished")
- return attrDict(jsondata)
+ if jsondata:
+ ocf.logger.debug("pullScheduledEvents: finished")
+ return attrDict(jsondata)
+ else:
+ apiCall = "%s/%s?api-version=%s" % (azHelper.metadata_host, azHelper.events_api, azHelper.events_api_version)
+ ocf.ocf_exit_reason("pullScheduledEvents: Unable to get scheduledevents info - call: %s" % apiCall)
+ sys.exit(ocf.OCF_ERR_GENERIC)
+
@staticmethod
def forceEvents(eventIDs):
@@ -534,7 +563,7 @@ class Node:
except ValueError:
# Handle the exception
ocf.logger.warn("Health attribute %s on node %s cannot be converted to an integer value" % (healthAttributeStr, node))
-
+
ocf.logger.debug("isNodeInStandby: finished - result %s" % isInStandy)
return isInStandy
@@ -584,7 +613,7 @@ class raAzEvents:
def monitor(self):
ocf.logger.debug("monitor: begin")
-
+
events = azHelper.pullScheduledEvents()
# get current document version
@@ -600,21 +629,21 @@ class raAzEvents:
ocf.logger.info("monitor: already handled curDocVersion, skip")
return ocf.OCF_SUCCESS
- localAzEventIDs = set()
+ localAzEventIds = dict()
for e in localEvents:
- localAzEventIDs.add(e.EventId)
+ localAzEventIds[e.EventId] = json.dumps(e)
curState = self.node.getState()
clusterEventIDs = self.node.getEventIDs()
ocf.logger.debug("monitor: curDocVersion has not been handled yet")
-
+
if clusterEventIDs:
# there are pending events set, so our state must be STOPPING or IN_EVENT
i = 0; touchedEventIDs = False
while i < len(clusterEventIDs):
# clean up pending events that are already finished according to AZ
- if clusterEventIDs[i] not in localAzEventIDs:
+ if clusterEventIDs[i] not in localAzEventIds.keys():
ocf.logger.info("monitor: remove finished local clusterEvent %s" % (clusterEventIDs[i]))
clusterEventIDs.pop(i)
touchedEventIDs = True
@@ -644,12 +673,12 @@ class raAzEvents:
ocf.logger.info("monitor: all local events finished, but some resources have not completed startup yet -> wait")
else:
if curState == AVAILABLE:
- if len(localAzEventIDs) > 0:
+ if len(localAzEventIds) > 0:
if clusterHelper.otherNodesAvailable(self.node):
- ocf.logger.info("monitor: can handle local events %s -> set state STOPPING" % (str(localAzEventIDs)))
- curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIDs)
+ ocf.logger.info("monitor: can handle local events %s -> set state STOPPING - %s" % (str(list(localAzEventIds.keys())), str(list(localAzEventIds.values()))))
+ curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIds.keys())
else:
- ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(localAzEventIDs))
+ ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD - %s" % (str(list(localAzEventIds.keys())), str(list(localAzEventIds.values()))))
self.node.setState(ON_HOLD)
else:
ocf.logger.debug("monitor: no local azEvents to handle")
@@ -761,6 +790,24 @@ def main():
longdesc="Set to true to enable verbose logging",
content_type="boolean",
default="false")
+ agent.add_parameter(
+ "retry_count",
+ shortdesc="Azure IMDS webservice retry count",
+ longdesc="Set to any number bigger than zero to enable retry count",
+ content_type="integer",
+ default="3")
+ agent.add_parameter(
+ "retry_wait",
+ shortdesc="Configure a retry wait time",
+ longdesc="Set retry wait time in seconds",
+ content_type="integer",
+ default="20")
+ agent.add_parameter(
+ "request_timeout",
+ shortdesc="Configure a request timeout",
+ longdesc="Set request timeout in seconds",
+ content_type="integer",
+ default="15")
agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
agent.add_action("validate-all", timeout=20, handler=validate_action)
diff --git a/heartbeat/ocf.py b/heartbeat/ocf.py
index dda2fed4bb..571cd19664 100644
--- a/heartbeat/ocf.py
+++ b/heartbeat/ocf.py
@@ -16,7 +16,7 @@
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
+#
import sys, os, logging, syslog
@@ -42,19 +42,19 @@
# OCF does not include the concept of master/slave resources so we
# need to extend it so we can discover a resource's complete state.
#
-# OCF_RUNNING_MASTER:
+# OCF_RUNNING_MASTER:
# The resource is in "master" mode and fully operational
# OCF_FAILED_MASTER:
# The resource is in "master" mode but in a failed state
-#
+#
# The extra two values should only be used during a probe.
#
# Probes are used to discover resources that were started outside of
# the CRM and/or left behind if the LRM fails.
-#
+#
# They can be identified in RA scripts by checking for:
# [ "${__OCF_ACTION}" = "monitor" -a "${OCF_RESKEY_CRM_meta_interval}" = "0" ]
-#
+#
# Failed "slaves" should continue to use: OCF_ERR_GENERIC
# Fully operational "slaves" should continue to use: OCF_SUCCESS
#
@@ -451,15 +451,17 @@ def value_for_parameter(param):
sys.exit(OCF_ERR_UNIMPLEMENTED)
+
if __name__ == "__main__":
import unittest
+ import logging
class TestMetadata(unittest.TestCase):
def test_noparams_noactions(self):
m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc")
self.assertEqual("""<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="foo">
+<resource-agent name="foo" version="1.0">
<version>1.0</version>
<longdesc lang="en">
longdesc
@@ -483,4 +485,40 @@ def test_params_actions(self):
m.add_action("start")
self.assertEqual(str(m.actions[0]), '<action name="start" />\n')
+ def test_retry_params_actions(self):
+ log= logging.getLogger( "test_retry_params_actions" )
+
+ m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc")
+ m.add_parameter(
+ "retry_count",
+ shortdesc="Azure ims webservice retry count",
+ longdesc="Set to any number bigger than zero to enable retry count",
+ content_type="integer",
+ default="0")
+ m.add_parameter(
+ "retry_wait",
+ shortdesc="Configure a retry wait time",
+ longdesc="Set retry wait time in seconds",
+ content_type="integer",
+ default="20")
+ m.add_parameter(
+ "request_timeout",
+ shortdesc="Configure a request timeout",
+ longdesc="Set request timeout in seconds",
+ content_type="integer",
+ default="15")
+
+ m.add_action("start")
+
+ log.debug( "actions= %s", str(m.actions[0] ))
+ self.assertEqual(str(m.actions[0]), '<action name="start" />\n')
+
+ log.debug( "parameters= %s", str(m.parameters[0] ))
+ log.debug( "parameters= %s", str(m.parameters[1] ))
+ log.debug( "parameters= %s", str(m.parameters[2] ))
+ self.assertEqual(str(m.parameters[0]), '<parameter name="retry_count">\n<longdesc lang="en">Set to any number bigger than zero to enable retry count</longdesc>\n<shortdesc lang="en">Azure ims webservice retry count</shortdesc>\n<content type="integer" default="0" />\n</parameter>\n')
+ self.assertEqual(str(m.parameters[1]), '<parameter name="retry_wait">\n<longdesc lang="en">Set retry wait time in seconds</longdesc>\n<shortdesc lang="en">Configure a retry wait time</shortdesc>\n<content type="integer" default="20" />\n</parameter>\n')
+ self.assertEqual(str(m.parameters[2]), '<parameter name="request_timeout">\n<longdesc lang="en">Set request timeout in seconds</longdesc>\n<shortdesc lang="en">Configure a request timeout</shortdesc>\n<content type="integer" default="15" />\n</parameter>\n')
+
+ logging.basicConfig( stream=sys.stderr )
unittest.main()

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,43 @@
From 2ab2c832180dacb2e66d38541beae0957416eb96 Mon Sep 17 00:00:00 2001
From: Antonio Romito <aromito@redhat.com>
Date: Mon, 9 Sep 2024 17:30:38 +0200
Subject: [PATCH] Improve handling of "stopping" container removal in
remove_container()
- Added handling for containers in a stopping state by checking the state and force-removing if necessary.
- Improved log messages to provide clearer information when force removal is needed.
Related: https://issues.redhat.com/browse/RHEL-58008
---
heartbeat/podman | 11 +++++++++--
1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 53867bff20..643ec4d894 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -254,6 +254,13 @@ remove_container()
ocf_run podman rm -v $CONTAINER
rc=$?
if [ $rc -ne 0 ]; then
+ if [ $rc -eq 2 ]; then
+ if podman inspect --format '{{.State.Status}}' $CONTAINER | grep -wq "stopping"; then
+ ocf_log err "Inactive container ${CONTAINER} is stuck in 'stopping' state. Force-remove it."
+ ocf_run podman rm -f $CONTAINER
+ rc=$?
+ fi
+ fi
# due to a podman bug (rhbz#1841485), sometimes a stopped
# container can still be associated with Exec sessions, in
# which case the "podman rm" has to be forced
@@ -517,8 +524,8 @@ podman_stop()
# but the associated container exit code is -1. If that's the case,
# assume there's no failure and continue with the rm as usual.
if [ $rc -eq 125 ] && \
- podman inspect --format '{{.State.Status}}:{{.State.ExitCode}}' $CONTAINER | grep -wq "stopped:-1"; then
- ocf_log warn "Container ${CONTAINER} had an unexpected stop outcome. Trying to remove it anyway."
+ podman inspect --format '{{.State.Status}}:{{.State.ExitCode}}' $CONTAINER | grep -Eq '^(exited|stopped):-1$'; then
+ ocf_log err "Container ${CONTAINER} had an unexpected stop outcome. Trying to remove it anyway."
else
ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
return $OCF_ERR_GENERIC

View File

@ -0,0 +1,106 @@
From d66a52cfb25f5436255ecc65a407c0166a720146 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 3 Sep 2024 12:55:28 +0200
Subject: [PATCH 1/2] Filesystem: dont sleep during stop-action when there are
no processes to kill
Thanks @SatomiOSAWA for the initial code.
---
heartbeat/Filesystem | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 3eb520e0c..f54969f20 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -685,12 +685,13 @@ signal_processes() {
pids=$(get_pids "$dir")
if [ -z "$pids" ]; then
ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
- return
+ return 1
fi
for pid in $pids; do
ocf_log info "sending signal $sig to: $(ps -f $pid | tail -1)"
kill -s $sig $pid
done
+ return 0
}
try_umount() {
local SUB="$1"
@@ -717,12 +718,13 @@ timeout_child() {
return $ret
}
fs_stop_loop() {
- local SUB="$1" signals="$2" sig
+ local SUB="$1" signals="$2" sig send_signal
while true; do
+ send_signal=false
for sig in $signals; do
- signal_processes "$SUB" $sig
+ signal_processes "$SUB" $sig && send_signal=true
done
- sleep $OCF_RESKEY_signal_delay
+ $send_signal && sleep $OCF_RESKEY_signal_delay
try_umount "$SUB" && return $OCF_SUCCESS
done
}
From cb6aaffc260eea0f0fee6fab44393c6cf12b8a83 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 9 Sep 2024 10:58:12 +0200
Subject: [PATCH 2/2] Filesystem: only use $umount_force after sending
kill_signals
---
heartbeat/Filesystem | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index f54969f20..4dd962fd9 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -694,8 +694,8 @@ signal_processes() {
return 0
}
try_umount() {
- local SUB="$1"
- $UMOUNT $umount_force "$SUB"
+ local force_arg="$1" SUB="$2"
+ $UMOUNT $force_arg "$SUB"
list_mounts | grep "${TAB}${SUB}${TAB}" >/dev/null 2>&1 || {
ocf_log info "unmounted $SUB successfully"
return $OCF_SUCCESS
@@ -718,14 +718,14 @@ timeout_child() {
return $ret
}
fs_stop_loop() {
- local SUB="$1" signals="$2" sig send_signal
+ local force_arg="$1" SUB="$2" signals="$3" sig send_signal
while true; do
send_signal=false
for sig in $signals; do
signal_processes "$SUB" $sig && send_signal=true
done
$send_signal && sleep $OCF_RESKEY_signal_delay
- try_umount "$SUB" && return $OCF_SUCCESS
+ try_umount "$force_arg" "$SUB" && return $OCF_SUCCESS
done
}
fs_stop() {
@@ -733,13 +733,13 @@ fs_stop() {
grace_time=$((timeout/2))
# try gracefully terminating processes for up to half of the configured timeout
- fs_stop_loop "$SUB" "$OCF_RESKEY_term_signals" &
+ fs_stop_loop "" "$SUB" "$OCF_RESKEY_term_signals" &
timeout_child $! $grace_time
ret=$?
[ $ret -eq $OCF_SUCCESS ] && return $ret
# try killing them for the rest of the timeout
- fs_stop_loop "$SUB" "$OCF_RESKEY_kill_signals" &
+ fs_stop_loop "$umount_force" "$SUB" "$OCF_RESKEY_kill_signals" &
timeout_child $! $grace_time
ret=$?
[ $ret -eq $OCF_SUCCESS ] && return $ret

View File

@ -0,0 +1,37 @@
From c72dc2f2e502486d93aeec26abc12e720b14a0a7 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 10 Oct 2024 16:41:03 +0200
Subject: [PATCH] azure-events*: use node name from cluster instead of hostname
to avoid failing if they're not the same
---
heartbeat/azure-events-az.in | 2 +-
heartbeat/azure-events.in | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
index 6d31e5aba..0ed001037 100644
--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
@@ -441,7 +441,7 @@ class Node:
self.raOwner = ra
self.azInfo = azHelper.getInstanceInfo()
self.azName = self.azInfo.name
- self.hostName = socket.gethostname()
+ self.hostName = clusterHelper._exec("crm_node", "-n")
self.setAttr("azName", self.azName)
clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in
index 90acaba62..32f71ee26 100644
--- a/heartbeat/azure-events.in
+++ b/heartbeat/azure-events.in
@@ -411,7 +411,7 @@ class Node:
self.raOwner = ra
self.azInfo = azHelper.getInstanceInfo()
self.azName = self.azInfo.name
- self.hostName = socket.gethostname()
+ self.hostName = clusterHelper._exec("crm_node", "-n")
self.setAttr("azName", self.azName)
clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)

View File

@ -0,0 +1,38 @@
From 38eaf00bc81af7530c56eba282918762a47a9326 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 19 Sep 2024 13:01:53 +0200
Subject: [PATCH] nfsserver: also stop rpc-statd for nfsv4_only to avoid stop
failing in some cases
E.g. nfs_no_notify=true nfsv4_only=true nfs_shared_infodir=/nfsmq/nfsinfo would cause a "Failed to unmount a bind mount" error
---
heartbeat/nfsserver | 16 +++++++---------
1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index 5793d7a70..fd9268afc 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -947,15 +947,13 @@ nfsserver_stop ()
sleep 1
done
- if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
- nfs_exec stop rpc-statd > /dev/null 2>&1
- ocf_log info "Stop: rpc-statd"
- rpcinfo -t localhost 100024 > /dev/null 2>&1
- rc=$?
- if [ "$rc" -eq "0" ]; then
- ocf_exit_reason "Failed to stop rpc-statd"
- return $OCF_ERR_GENERIC
- fi
+ nfs_exec stop rpc-statd > /dev/null 2>&1
+ ocf_log info "Stop: rpc-statd"
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop rpc-statd"
+ return $OCF_ERR_GENERIC
fi
nfs_exec stop nfs-idmapd > /dev/null 2>&1

View File

@ -0,0 +1,100 @@
From f02afd0fadb581ca0fc9798beaf28044cf211200 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 18 Sep 2024 11:53:52 +0200
Subject: [PATCH 1/2] Filesystem: on stop, try umount directly, before scanning
for users
48ed6e6d (Filesystem: improve stop-action and allow setting term/kill signals and signal_delay for large filesystems, 2023-07-04)
changed the logic from
"try umount; if that fails, find and kill users; repeat" to
"try to find and kill users; then try umount; repeat"
But even just walking /proc may take "a long time" on busy systems,
and may still turn up with "no users found".
It will take even longer for "force_umount=safe"
(observed 8 to 10 seconds just for "get_pids() with "safe" to return nothing)
than for "force_umount=yes" (still ~ 2 to 3 seconds),
but it will take "a long time" in any case.
(BTW, that may be longer than the hardcoded default of 6 seconds for "fast_stop",
which is also the default on many systems now)
If the dependencies are properly configured,
there should be no users left,
and the umount should just work.
Revert back to "try umount first", and only then try to find "rogue" users.
---
heartbeat/Filesystem | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 4dd962fd9..99bddaf62 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -732,6 +732,11 @@ fs_stop() {
local SUB="$1" timeout=$2 grace_time ret
grace_time=$((timeout/2))
+ # Just walking /proc may take "a long time", even if we don't find any users of this FS.
+ # If dependencies are properly configured, umount should just work.
+ # Only if that fails, try to find and kill processes that still use it.
+ try_umount "" "$SUB" && return $OCF_SUCCESS
+
# try gracefully terminating processes for up to half of the configured timeout
fs_stop_loop "" "$SUB" "$OCF_RESKEY_term_signals" &
timeout_child $! $grace_time
From b42d698f12aaeb871f4cc6a3c0327a27862b4376 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 18 Sep 2024 13:42:38 +0200
Subject: [PATCH 2/2] Filesystem: stop/get_pids to be signaled
The "safe" way to get process ids that may be using a particular filesystem
currently uses shell globs ("find /proc/[0-9]*").
With a million processes (and/or a less capable shell),
that may result in "Argument list too long".
Replace with find /proc -path "/proc/[0-9]*" instead.
While at it, also fix the non-posix -or to be -o,
and add explicit grouping parentheses \( \) and explicit -print.
Add a comment to not include "interesting" characters in mount point names.
---
heartbeat/Filesystem | 23 ++++++++++++++++++++---
1 file changed, 20 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 99bddaf62..3405e2c26 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -669,9 +669,26 @@ get_pids()
$FUSER -Mm $dir 2>/dev/null
fi
elif [ "$FORCE_UNMOUNT" = "safe" ]; then
- procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}')
- mmap_procs=$(grep " ${dir}/" /proc/[0-9]*/maps | awk -F/ '{print $3}')
- printf "${procs}\n${mmap_procs}" | sort | uniq
+ # Yes, in theory, ${dir} could contain "intersting" characters
+ # and would need to be quoted for glob (find) and regex (grep).
+ # Don't do that, then.
+
+ # Avoid /proc/[0-9]*, it may cause "Argument list too long".
+ # There are several ways to filter for /proc/<pid>
+ # -mindepth 1 -not -path "/proc/[0-9]*" -prune -o ...
+ # -path "/proc/[!0-9]*" -prune -o ...
+ # -path "/proc/[0-9]*" -a ...
+ # the latter seemd to be significantly faster for this one in my naive test.
+ procs=$(exec 2>/dev/null;
+ find /proc -path "/proc/[0-9]*" -type l \( -lname "${dir}/*" -o -lname "${dir}" \) -print |
+ awk -F/ '{print $3}' | uniq)
+
+ # This finds both /proc/<pid>/maps and /proc/<pid>/task/<tid>/maps;
+ # if you don't want the latter, add -maxdepth.
+ mmap_procs=$(exec 2>/dev/null;
+ find /proc -path "/proc/[0-9]*/maps" -print |
+ xargs -r grep -l " ${dir}/" | awk -F/ '{print $3}' | uniq)
+ printf "${procs}\n${mmap_procs}" | sort -u
fi
}

View File

@ -0,0 +1,48 @@
From 82958dc115c47232ae0468b1ddf64e728ec325e4 Mon Sep 17 00:00:00 2001
From: Georg Pfuetzenreuter <mail@georg-pfuetzenreuter.net>
Date: Wed, 9 Oct 2024 00:16:44 +0200
Subject: [PATCH] ocf-shellfuncs: systemd_drop_in only if needed
Avoid dbus overload upon many simultaneous "daemon-reload" invocations
(when a resource agent using systemd_drop_in() is called multiple times
as part of parallel resource operations in Pacemaker) by skipping the
file creation and reload if the expected data already exists.
Whilst at it, align the indentation of the heredoc with the other parts
of the function.
Signed-off-by: Georg Pfuetzenreuter <mail@georg-pfuetzenreuter.net>
---
heartbeat/ocf-shellfuncs.in | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 9335cbf00..5c4bb3264 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -662,14 +662,17 @@ systemd_drop_in()
systemdrundir="/run/systemd/system/resource-agents-deps.target.d"
mkdir -p "$systemdrundir"
conf_file="$systemdrundir/$1.conf"
- cat >"$conf_file" <<EOF
-[Unit]
-$2=$3
-EOF
- # The information is accessible through systemd API and systemd would
- # complain about improper permissions.
- chmod o+r "$conf_file"
- systemctl daemon-reload
+ conf_line="$2=$3"
+ if ! { [ -f "$conf_file" ] && grep -q "^$conf_line$" "$conf_file" ; } ; then
+ cat > "$conf_file" <<-EOF
+ [Unit]
+ $conf_line
+ EOF
+ # The information is accessible through systemd API and systemd would
+ # complain about improper permissions.
+ chmod o+r "$conf_file"
+ systemctl daemon-reload
+ fi
}
# usage: curl_retry RETRIES SLEEP ARGS URL

View File

@ -0,0 +1,132 @@
From 6fab544e702a7601714cd017aecc00193f23ae72 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 11 Oct 2024 13:13:10 +0200
Subject: [PATCH] IPaddr2: improve fail logic and check ip_status after adding
IP
* check that the label got applied
* return OCF_ERR_GENERIC to avoid false-positive when IP was manually added before starting the resource
* check ip_status after adding IP to fail without having to wait for the first monitor-action
Co-authored-by: Evan J. Felix <evan.felix@pnnl.gov>
---
heartbeat/IPaddr2 | 35 ++++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index e325aa574..27cae2d11 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -586,7 +586,7 @@ ip_init() {
exit $rc
fi
fi
-
+
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
if [ -n "$IFLABEL" ]; then
@@ -985,6 +985,7 @@ run_send_ua() {
# ok = served (for CIP: + hash bucket)
# partial = served and no hash bucket (CIP only)
# partial2 = served and no CIP iptables rule
+# partial3 = served with no label
# no = nothing
#
ip_served() {
@@ -1002,6 +1003,11 @@ ip_served() {
if [ -z "$IP_CIP" ]; then
for i in $cur_nic; do
+ # check address label
+ if [ -n "$IFLABEL" ] && [ -z "`$IP2UTIL -o -f $FAMILY addr show $nic label $IFLABEL`" ]; then
+ echo partial3
+ return 0
+ fi
# only mark as served when on the same interfaces as $NIC
[ "$i" = "$NIC" ] || continue
echo "ok"
@@ -1065,7 +1071,12 @@ ip_start() {
if [ "$ip_status" = "ok" ]; then
exit $OCF_SUCCESS
fi
-
+
+ if [ "$ip_status" = "partial3" ]; then
+ ocf_exit_reason "IP $OCF_RESKEY_ip available, but label missing"
+ exit $OCF_ERR_GENERIC
+ fi
+
if [ -n "$IP_CIP" ] && ([ $ip_status = "no" ] || [ $ip_status = "partial2" ]); then
$MODPROBE ip_conntrack
$IPADDR2_CIP_IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
@@ -1083,7 +1094,7 @@ ip_start() {
if [ -n "$IP_CIP" ] && [ $ip_status = "partial" ]; then
echo "+$IP_INC_NO" >$IP_CIP_FILE
fi
-
+
if [ "$ip_status" = "no" ]; then
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
for i in `find_interface $OCF_RESKEY_ip 32`; do
@@ -1094,7 +1105,7 @@ ip_start() {
esac
done
fi
-
+
add_interface "$OCF_RESKEY_ip" "$NETMASK" "${BRDCAST:-none}" "$NIC" "$IFLABEL" "$METRIC"
rc=$?
@@ -1102,6 +1113,12 @@ ip_start() {
ocf_exit_reason "Failed to add $OCF_RESKEY_ip"
exit $rc
fi
+
+ ip_status=`ip_served`
+ if [ "$ip_status" != "ok" ]; then
+ ocf_exit_reason "Failed to add $OCF_RESKEY_ip with error $ip_status"
+ exit $OCF_ERR_GENERIC
+ fi
fi
case $NIC in
@@ -1134,7 +1151,7 @@ ip_stop() {
ocf_take_lock $CIP_lockfile
ocf_release_lock_on_exit $CIP_lockfile
fi
-
+
if [ -f "$SENDARPPIDFILE" ] ; then
kill `cat "$SENDARPPIDFILE"`
if [ $? -ne 0 ]; then
@@ -1171,17 +1188,17 @@ ip_stop() {
i=`expr $i + 1`
done
else
- ip_del_if="no"
+ ip_del_if="no"
fi
fi
-
+
if [ "$ip_del_if" = "yes" ]; then
delete_interface $OCF_RESKEY_ip $NIC $NETMASK
if [ $? -ne 0 ]; then
ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]"
exit $OCF_ERR_GENERIC
fi
-
+
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
restore_loopback "$OCF_RESKEY_ip"
fi
@@ -1200,7 +1217,7 @@ ip_monitor() {
run_arp_sender refresh
return $OCF_SUCCESS
;;
- partial|no|partial2)
+ no)
exit $OCF_NOT_RUNNING
;;
*)

View File

@ -0,0 +1,455 @@
From 61cec34a754017537c61e79cd1212f2688c32429 Mon Sep 17 00:00:00 2001
From: harshkiprofile <83770157+harshkiprofile@users.noreply.github.com>
Date: Mon, 4 Nov 2024 12:19:10 +0530
Subject: [PATCH 1/7] Introduce a new shell function to reuse IMDS token
---
heartbeat/ocf-shellfuncs.in | 31 +++++++++++++++++++++++++++++++
1 file changed, 31 insertions(+)
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 5c4bb3264..0c4632cf9 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -1111,3 +1111,34 @@ ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace
if ocf_is_true "$HA_use_logd"; then
: ${HA_LOGD:=yes}
fi
+
+# File to store the token and timestamp
+TOKEN_FILE="/tmp/.imds_token"
+TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
+TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
+
+# Function to fetch a new token
+fetch_new_token() {
+ TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME")
+ echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
+ echo "$TOKEN"
+}
+
+# Function to retrieve or renew the token
+get_token() {
+ if [[ -f "$TOKEN_FILE" ]]; then
+ read -r STORED_TOKEN STORED_TIMESTAMP < "$TOKEN_FILE"
+ CURRENT_TIME=$(date +%s)
+ ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
+
+ if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
+ # Token is still valid
+ echo "$STORED_TOKEN"
+ return
+ fi
+ fi
+ # Fetch a new token if not valid
+ fetch_new_token
+}
+
+
From 00629fa44cb7a8dd1045fc8cad755e1d0c808476 Mon Sep 17 00:00:00 2001
From: harshkiprofile <83770157+harshkiprofile@users.noreply.github.com>
Date: Mon, 4 Nov 2024 12:21:18 +0530
Subject: [PATCH 2/7] Utilize the get_token function to reuse the token
---
heartbeat/aws-vpc-move-ip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 6115e5ba8..fbeb2ee64 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -270,7 +270,7 @@ ec2ip_validate() {
fi
fi
- TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+ TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
EC2_INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
From 36126cdcb90ad617ecfce03d986550907732aa4f Mon Sep 17 00:00:00 2001
From: harshkiprofile <83770157+harshkiprofile@users.noreply.github.com>
Date: Mon, 4 Nov 2024 12:22:16 +0530
Subject: [PATCH 3/7] Utilize to get_token function to reuse the token
---
heartbeat/awsvip | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index f2b238a0f..ca19ac086 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -266,7 +266,7 @@ if [ -n "${OCF_RESKEY_region}" ]; then
AWSCLI_CMD="$AWSCLI_CMD --region ${OCF_RESKEY_region}"
fi
SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}"
-TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
From dcd0050df5ba94905bc71d38b05cbb93f5687b61 Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Mon, 4 Nov 2024 20:05:33 +0530
Subject: [PATCH 4/7] Move token renewal function to aws.sh for reuse in AWS
agent scripts
---
heartbeat/Makefile.am | 1 +
heartbeat/aws-vpc-move-ip | 1 +
heartbeat/aws-vpc-route53.in | 3 ++-
heartbeat/aws.sh | 46 ++++++++++++++++++++++++++++++++++++
heartbeat/awseip | 3 ++-
heartbeat/awsvip | 1 +
heartbeat/ocf-shellfuncs.in | 33 +-------------------------
7 files changed, 54 insertions(+), 34 deletions(-)
create mode 100644 heartbeat/aws.sh
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 409847970..655740f14 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -218,6 +218,7 @@ ocfcommon_DATA = ocf-shellfuncs \
ocf-rarun \
ocf-distro \
apache-conf.sh \
+ aws.sh \
http-mon.sh \
sapdb-nosha.sh \
sapdb.sh \
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index fbeb2ee64..f4b0492f2 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -33,6 +33,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
# Defaults
OCF_RESKEY_awscli_default="/usr/bin/aws"
diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
index eba2ed95c..f7e756782 100644
--- a/heartbeat/aws-vpc-route53.in
+++ b/heartbeat/aws-vpc-route53.in
@@ -43,6 +43,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
# Defaults
OCF_RESKEY_awscli_default="/usr/bin/aws"
@@ -377,7 +378,7 @@ r53_monitor() {
_get_ip() {
case $OCF_RESKEY_ip in
local|public)
- TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+ TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
IPADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/${OCF_RESKEY_ip}-ipv4")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
new file mode 100644
index 000000000..fc557109c
--- /dev/null
+++ b/heartbeat/aws.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+#
+#
+# AWS Helper Scripts
+#
+#
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Defaults
+OCF_RESKEY_curl_retries_default="3"
+OCF_RESKEY_curl_sleep_default="1"
+
+: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
+: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
+
+# Function to enable reusable IMDS token retrieval for efficient repeated access
+# File to store the token and timestamp
+TOKEN_FILE="/tmp/.imds_token"
+TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
+TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
+
+# Function to fetch a new token
+fetch_new_token() {
+ TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME'" "http://169.254.169.254/latest/api/token")
+ echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
+ echo "$TOKEN"
+}
+
+# Function to retrieve or renew the token
+get_token() {
+ if [ -f "$TOKEN_FILE" ]; then
+ read -r STORED_TOKEN STORED_TIMESTAMP < "$TOKEN_FILE"
+ CURRENT_TIME=$(date +%s)
+ ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
+
+ if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
+ # Token is still valid
+ echo "$STORED_TOKEN"
+ return
+ fi
+ fi
+ # Fetch a new token if not valid
+ fetch_new_token
+}
\ No newline at end of file
diff --git a/heartbeat/awseip b/heartbeat/awseip
index ffb6223a1..049c2e566 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -38,6 +38,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
#######################################################################
@@ -306,7 +307,7 @@ fi
ELASTIC_IP="${OCF_RESKEY_elastic_ip}"
ALLOCATION_ID="${OCF_RESKEY_allocation_id}"
PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}"
-TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: 21600'" "http://169.254.169.254/latest/api/token")
+TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index ca19ac086..de67981d8 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -37,6 +37,7 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/aws.sh
#######################################################################
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 0c4632cf9..922c6ea45 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -1110,35 +1110,4 @@ ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace
# pacemaker sets HA_use_logd, some others use HA_LOGD :/
if ocf_is_true "$HA_use_logd"; then
: ${HA_LOGD:=yes}
-fi
-
-# File to store the token and timestamp
-TOKEN_FILE="/tmp/.imds_token"
-TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
-TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
-
-# Function to fetch a new token
-fetch_new_token() {
- TOKEN=$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME")
- echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
- echo "$TOKEN"
-}
-
-# Function to retrieve or renew the token
-get_token() {
- if [[ -f "$TOKEN_FILE" ]]; then
- read -r STORED_TOKEN STORED_TIMESTAMP < "$TOKEN_FILE"
- CURRENT_TIME=$(date +%s)
- ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
-
- if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
- # Token is still valid
- echo "$STORED_TOKEN"
- return
- fi
- fi
- # Fetch a new token if not valid
- fetch_new_token
-}
-
-
+fi
\ No newline at end of file
From 9f7be201923c8eab1b121f2067ed74a69841cf8a Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Tue, 5 Nov 2024 19:12:34 +0530
Subject: [PATCH 5/7] Refactor to use common temp path and update shell syntax
---
heartbeat/Makefile.am | 2 +-
heartbeat/aws.sh | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 655740f14..8352f3a3d 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -218,7 +218,7 @@ ocfcommon_DATA = ocf-shellfuncs \
ocf-rarun \
ocf-distro \
apache-conf.sh \
- aws.sh \
+ aws.sh \
http-mon.sh \
sapdb-nosha.sh \
sapdb.sh \
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index fc557109c..c77f93b91 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -17,7 +17,7 @@ OCF_RESKEY_curl_sleep_default="1"
# Function to enable reusable IMDS token retrieval for efficient repeated access
# File to store the token and timestamp
-TOKEN_FILE="/tmp/.imds_token"
+TOKEN_FILE="${HA_RSCTMP}/.aws_imds_token"
TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
@@ -35,7 +35,7 @@ get_token() {
CURRENT_TIME=$(date +%s)
ELAPSED_TIME=$((CURRENT_TIME - STORED_TIMESTAMP))
- if (( ELAPSED_TIME < (TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD) )); then
+ if [ "$ELAPSED_TIME" -lt "$((TOKEN_LIFETIME - TOKEN_EXPIRY_THRESHOLD))" ]; then
# Token is still valid
echo "$STORED_TOKEN"
return
From 4f61048064d1df3bebdb5c1441cf0020f213c01b Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Tue, 5 Nov 2024 19:30:15 +0530
Subject: [PATCH 6/7] Consolidate curl_retry and curl_sleep variable to a
single location in aws.sh
---
heartbeat/aws-vpc-move-ip | 4 ----
heartbeat/aws-vpc-route53.in | 4 ----
heartbeat/awseip | 4 ----
heartbeat/awsvip | 4 ----
4 files changed, 16 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index f4b0492f2..3aa9ceb02 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -48,8 +48,6 @@ OCF_RESKEY_interface_default="eth0"
OCF_RESKEY_iflabel_default=""
OCF_RESKEY_monapi_default="false"
OCF_RESKEY_lookup_type_default="InstanceId"
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
@@ -63,8 +61,6 @@ OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_iflabel=${OCF_RESKEY_iflabel_default}}
: ${OCF_RESKEY_monapi=${OCF_RESKEY_monapi_default}}
: ${OCF_RESKEY_lookup_type=${OCF_RESKEY_lookup_type_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
#######################################################################
diff --git a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
index f7e756782..85c8de3c1 100644
--- a/heartbeat/aws-vpc-route53.in
+++ b/heartbeat/aws-vpc-route53.in
@@ -54,8 +54,6 @@ OCF_RESKEY_hostedzoneid_default=""
OCF_RESKEY_fullname_default=""
OCF_RESKEY_ip_default="local"
OCF_RESKEY_ttl_default=10
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
@@ -65,8 +63,6 @@ OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_fullname:=${OCF_RESKEY_fullname_default}}
: ${OCF_RESKEY_ip:=${OCF_RESKEY_ip_default}}
: ${OCF_RESKEY_ttl:=${OCF_RESKEY_ttl_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
usage() {
cat <<-EOT
diff --git a/heartbeat/awseip b/heartbeat/awseip
index 049c2e566..4b1c3bc6a 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -50,16 +50,12 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
meta_data() {
cat <<END
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index de67981d8..8c71e7fac 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -49,16 +49,12 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
-: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
-: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
meta_data() {
cat <<END
From d451c5c595b08685f84ec85da96ae9cb4fc076fe Mon Sep 17 00:00:00 2001
From: harshkiprofile <beer18317@gmail.com>
Date: Tue, 5 Nov 2024 20:50:24 +0530
Subject: [PATCH 7/7] aws.sh needs to added to be symlinkstargets in
doc/man/Makefile.am
---
doc/man/Makefile.am | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index ef7639bff..447f5cba3 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -42,7 +42,7 @@ radir = $(abs_top_builddir)/heartbeat
# required for out-of-tree build
symlinkstargets = \
ocf-distro ocf.py ocf-rarun ocf-returncodes \
- findif.sh apache-conf.sh http-mon.sh mysql-common.sh \
+ findif.sh apache-conf.sh aws.sh http-mon.sh mysql-common.sh \
nfsserver-redhat.sh ora-common.sh
preptree:

View File

@ -0,0 +1,161 @@
From cc5ffa5e599c974c426e93faa821b342e96b916d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 11 Nov 2024 12:46:27 +0100
Subject: [PATCH 1/2] aws.sh: chmod 600 $TOKEN_FILE, add get_instance_id() with
DMI support, and use get_instance_id() in AWS agents
---
heartbeat/aws-vpc-move-ip | 2 +-
heartbeat/aws.sh | 30 +++++++++++++++++++++++++++---
heartbeat/awseip | 2 +-
heartbeat/awsvip | 2 +-
4 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 3aa9ceb02..09ae68b57 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -269,7 +269,7 @@ ec2ip_validate() {
TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
- EC2_INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+ EC2_INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
if [ -z "${EC2_INSTANCE_ID}" ]; then
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index c77f93b91..9cd343c16 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -9,8 +9,8 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
-OCF_RESKEY_curl_retries_default="3"
-OCF_RESKEY_curl_sleep_default="1"
+OCF_RESKEY_curl_retries_default="4"
+OCF_RESKEY_curl_sleep_default="3"
: ${OCF_RESKEY_curl_retries=${OCF_RESKEY_curl_retries_default}}
: ${OCF_RESKEY_curl_sleep=${OCF_RESKEY_curl_sleep_default}}
@@ -20,11 +20,13 @@ OCF_RESKEY_curl_sleep_default="1"
TOKEN_FILE="${HA_RSCTMP}/.aws_imds_token"
TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
+DMI_FILE="/sys/devices/virtual/dmi/id/board_asset_tag" # Only supported on nitro-based instances.
# Function to fetch a new token
fetch_new_token() {
TOKEN=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -sX PUT -H 'X-aws-ec2-metadata-token-ttl-seconds: $TOKEN_LIFETIME'" "http://169.254.169.254/latest/api/token")
echo "$TOKEN $(date +%s)" > "$TOKEN_FILE"
+ chmod 600 "$TOKEN_FILE"
echo "$TOKEN"
}
@@ -43,4 +45,26 @@ get_token() {
fi
# Fetch a new token if not valid
fetch_new_token
-}
\ No newline at end of file
+}
+
+get_instance_id() {
+ local INSTANCE_ID
+
+ # Try to get the EC2 instance ID from DMI first before falling back to IMDS.
+ ocf_log debug "EC2: Attempt to get EC2 Instance ID from local file."
+ if [ -r "$DMI_FILE" ] && [ -s "$DMI_FILE" ]; then
+ INSTANCE_ID="$(cat "$DMI_FILE")"
+ case "$INSTANCE_ID" in
+ i-0*) echo "$INSTANCE_ID"; return "$OCF_SUCCESS" ;;
+ esac
+ fi
+
+ INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to get EC2 Instance ID"
+ exit $OCF_ERR_GENERIC
+ fi
+
+ echo "$INSTANCE_ID"
+ return "$OCF_SUCCESS"
+}
diff --git a/heartbeat/awseip b/heartbeat/awseip
index 4b1c3bc6a..7f38376dc 100755
--- a/heartbeat/awseip
+++ b/heartbeat/awseip
@@ -305,7 +305,7 @@ ALLOCATION_ID="${OCF_RESKEY_allocation_id}"
PRIVATE_IP_ADDRESS="${OCF_RESKEY_private_ip_address}"
TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
-INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
case $__OCF_ACTION in
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index 8c71e7fac..0856ac5e4 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -265,7 +265,7 @@ fi
SECONDARY_PRIVATE_IP="${OCF_RESKEY_secondary_private_ip}"
TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
-INSTANCE_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/instance-id")
+INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
MAC_ADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/mac")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
From b8d3ecc6a8ce4baf4b28d02978dd573728ccf5fa Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 18 Nov 2024 11:10:42 +0100
Subject: [PATCH 2/2] aws.sh/ocf-shellfuncs: add ability to fresh token if it's
invalid
---
heartbeat/aws.sh | 1 +
heartbeat/ocf-shellfuncs.in | 11 ++++++++++-
2 files changed, 11 insertions(+), 1 deletion(-)
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index 9cd343c16..64f2e13a7 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -18,6 +18,7 @@ OCF_RESKEY_curl_sleep_default="3"
# Function to enable reusable IMDS token retrieval for efficient repeated access
# File to store the token and timestamp
TOKEN_FILE="${HA_RSCTMP}/.aws_imds_token"
+TOKEN_FUNC="fetch_new_token" # Used by curl_retry() if saved token is invalid
TOKEN_LIFETIME=21600 # Token lifetime in seconds (6 hours)
TOKEN_EXPIRY_THRESHOLD=3600 # Renew token if less than 60 minutes (1 hour) remaining
DMI_FILE="/sys/devices/virtual/dmi/id/board_asset_tag" # Only supported on nitro-based instances.
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 922c6ea45..8e51fa3c8 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -697,6 +697,15 @@ curl_retry()
ocf_log debug "result: $result"
[ $rc -eq 0 ] && break
+ if [ -n "$TOKEN" ] && [ -n "$TOKEN_FILE" ] && \
+ [ -f "$TOKEN_FILE" ] && [ -n "$TOKEN_FUNC" ] && \
+ echo "$result" | grep -q "The requested URL returned error: 401$"; then
+ local OLD_TOKEN="$TOKEN"
+ ocf_log err "Token invalid. Getting new token."
+ TOKEN=$($TOKEN_FUNC)
+ [ $? -ne 0 ] && exit $OCF_ERR_GENERIC
+ args=$(echo "$args" | sed "s/$OLD_TOKEN/$TOKEN/")
+ fi
sleep $sleep
done
@@ -1110,4 +1119,4 @@ ocf_is_true "$OCF_TRACE_RA" && ocf_start_trace
# pacemaker sets HA_use_logd, some others use HA_LOGD :/
if ocf_is_true "$HA_use_logd"; then
: ${HA_LOGD:=yes}
-fi
\ No newline at end of file
+fi

View File

@ -0,0 +1,184 @@
From 392d40048a25d7cb73ec5b5e9f7a5862f7a3fd48 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 11 Nov 2024 12:22:27 +0100
Subject: [PATCH 1/2] aws.sh: add get_interface_mac()
---
heartbeat/aws.sh | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index 64f2e13a7..ebb4eb1f4 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -69,3 +69,24 @@ get_instance_id() {
echo "$INSTANCE_ID"
return "$OCF_SUCCESS"
}
+
+get_interface_mac() {
+ local MAC_FILE MAC_ADDR rc
+ MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
+ if [ -f "$MAC_FILE" ]; then
+ cmd="cat ${MAC_FILE}"
+ else
+ cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
+ fi
+ ocf_log debug "executing command: $cmd"
+ MAC_ADDR="$(eval $cmd)"
+ rc=$?
+ if [ $rc != 0 ]; then
+ ocf_log warn "command failed, rc: $rc"
+ return $OCF_ERR_GENERIC
+ fi
+ ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
+
+ echo $MAC_ADDR
+ return $OCF_SUCCESS
+}
From 87337ac4da931d5a53c83d53d4bab17ee123ba9f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 11 Nov 2024 12:26:38 +0100
Subject: [PATCH 2/2] awsvip: let user specify which interface to use, and make
the parameter optional in aws-vpc-move-ip
---
heartbeat/aws-vpc-move-ip | 20 ++++----------------
heartbeat/aws.sh | 4 +++-
heartbeat/awsvip | 24 +++++++++++++++++-------
3 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
index 09ae68b57..2afc0ba53 100755
--- a/heartbeat/aws-vpc-move-ip
+++ b/heartbeat/aws-vpc-move-ip
@@ -157,7 +157,7 @@ Role to use to query/update the route table
<content type="string" default="${OCF_RESKEY_routing_table_role_default}" />
</parameter>
-<parameter name="interface" required="1">
+<parameter name="interface" required="0">
<longdesc lang="en">
Name of the network interface, i.e. eth0
</longdesc>
@@ -321,7 +321,7 @@ ec2ip_monitor() {
ocf_log debug "monitor: Enhanced Monitoring disabled - omitting API call"
fi
- cmd="ip addr show to $OCF_RESKEY_ip up"
+ cmd="ip addr show dev $OCF_RESKEY_interface to $OCF_RESKEY_ip up"
ocf_log debug "executing command: $cmd"
RESULT=$($cmd | grep "$OCF_RESKEY_ip")
if [ -z "$RESULT" ]; then
@@ -331,7 +331,7 @@ ec2ip_monitor() {
level="info"
fi
- ocf_log "$level" "IP $OCF_RESKEY_ip not assigned to running interface"
+ ocf_log "$level" "IP $OCF_RESKEY_ip not assigned to interface $OCF_RESKEY_interface"
return $OCF_NOT_RUNNING
fi
@@ -369,19 +369,7 @@ ec2ip_drop() {
}
ec2ip_get_instance_eni() {
- MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
- if [ -f $MAC_FILE ]; then
- cmd="cat ${MAC_FILE}"
- else
- cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
- fi
- ocf_log debug "executing command: $cmd"
- MAC_ADDR="$(eval $cmd)"
- rc=$?
- if [ $rc != 0 ]; then
- ocf_log warn "command failed, rc: $rc"
- return $OCF_ERR_GENERIC
- fi
+ MAC_ADDR=$(get_interface_mac)
ocf_log debug "MAC address associated with interface ${OCF_RESKEY_interface}: ${MAC_ADDR}"
cmd="curl_retry \"$OCF_RESKEY_curl_retries\" \"$OCF_RESKEY_curl_sleep\" \"--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'\" \"http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDR}/interface-id\""
diff --git a/heartbeat/aws.sh b/heartbeat/aws.sh
index ebb4eb1f4..216033afe 100644
--- a/heartbeat/aws.sh
+++ b/heartbeat/aws.sh
@@ -73,7 +73,9 @@ get_instance_id() {
get_interface_mac() {
local MAC_FILE MAC_ADDR rc
MAC_FILE="/sys/class/net/${OCF_RESKEY_interface}/address"
- if [ -f "$MAC_FILE" ]; then
+ if [ -z "$OCF_RESKEY_interface" ]; then
+ cmd="curl_retry \"$OCF_RESKEY_curl_retries\" \"$OCF_RESKEY_curl_sleep\" \"--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'\" \"http://169.254.169.254/latest/meta-data/mac\""
+ elif [ -f "$MAC_FILE" ]; then
cmd="cat ${MAC_FILE}"
else
cmd="ip -br link show dev ${OCF_RESKEY_interface} | tr -s ' ' | cut -d' ' -f3"
diff --git a/heartbeat/awsvip b/heartbeat/awsvip
index 0856ac5e4..015180d5a 100755
--- a/heartbeat/awsvip
+++ b/heartbeat/awsvip
@@ -49,12 +49,14 @@ OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
+OCF_RESKEY_interface_default=""
: ${OCF_RESKEY_awscli=${OCF_RESKEY_awscli_default}}
: ${OCF_RESKEY_auth_type=${OCF_RESKEY_auth_type_default}}
: ${OCF_RESKEY_profile=${OCF_RESKEY_profile_default}}
: ${OCF_RESKEY_region=${OCF_RESKEY_region_default}}
: ${OCF_RESKEY_api_delay=${OCF_RESKEY_api_delay_default}}
+: ${OCF_RESKEY_interface=${OCF_RESKEY_interface_default}}
meta_data() {
cat <<END
@@ -125,6 +127,14 @@ a short delay between API calls, to avoid sending API too quick
<content type="integer" default="${OCF_RESKEY_api_delay_default}" />
</parameter>
+<parameter name="interface" required="0">
+<longdesc lang="en">
+Name of the network interface, i.e. eth0
+</longdesc>
+<shortdesc lang="en">network interface name</shortdesc>
+<content type="string" default="${OCF_RESKEY_interface_default}" />
+</parameter>
+
<parameter name="curl_retries" unique="0">
<longdesc lang="en">
curl retries before failing
@@ -207,16 +217,16 @@ awsvip_stop() {
}
awsvip_monitor() {
- $AWSCLI_CMD ec2 describe-instances \
- --instance-id "${INSTANCE_ID}" \
- --query 'Reservations[].Instances[].NetworkInterfaces[].PrivateIpAddresses[].PrivateIpAddress[]' \
+ $AWSCLI_CMD ec2 describe-network-interfaces \
+ --network-interface-ids "${NETWORK_ID}" \
+ --query 'NetworkInterfaces[].PrivateIpAddresses[].PrivateIpAddress[]' \
--output text | \
grep -qE "(^|\s)${SECONDARY_PRIVATE_IP}(\s|$)"
- RET=$?
-
- if [ $RET -ne 0 ]; then
+ if [ $? -ne 0 ]; then
+ [ "$__OCF_ACTION" = "monitor" ] && ! ocf_is_probe && ocf_log error "IP $SECONDARY_PRIVATE_IP not assigned to interface ${NETWORK_ID}"
return $OCF_NOT_RUNNING
fi
+
return $OCF_SUCCESS
}
@@ -267,7 +277,7 @@ TOKEN=$(get_token)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
INSTANCE_ID=$(get_instance_id)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
-MAC_ADDRESS=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/mac")
+MAC_ADDRESS=$(get_interface_mac)
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC
NETWORK_ID=$(curl_retry "$OCF_RESKEY_curl_retries" "$OCF_RESKEY_curl_sleep" "--show-error -s -H 'X-aws-ec2-metadata-token: $TOKEN'" "http://169.254.169.254/latest/meta-data/network/interfaces/macs/${MAC_ADDRESS}/interface-id")
[ $? -ne 0 ] && exit $OCF_ERR_GENERIC

View File

@ -0,0 +1,63 @@
From 71bc76dc4fa57726e80d0ddcc0bdcfe708af8763 Mon Sep 17 00:00:00 2001
From: "Fabio M. Di Nitto" <fdinitto@redhat.com>
Date: Thu, 5 Dec 2024 11:02:40 +0100
Subject: [PATCH] openstack-cinder-volume: wait for volume to be available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
monitor the vol till it´s attached to the host and avoid a race between
openstack APIs receiving the request and completing the operation.
Signed-off-by: Fabio M. Di Nitto <fdinitto@redhat.com>
---
heartbeat/openstack-cinder-volume | 29 ++++++++++++++++++-----------
1 file changed, 18 insertions(+), 11 deletions(-)
diff --git a/heartbeat/openstack-cinder-volume b/heartbeat/openstack-cinder-volume
index 116442c41b..2b64d4d887 100755
--- a/heartbeat/openstack-cinder-volume
+++ b/heartbeat/openstack-cinder-volume
@@ -141,17 +141,19 @@ osvol_monitor() {
node_id=$(_get_node_id)
- if [ "$__OCF_ACTION" = "monitor" ] && ocf_is_true $OCF_RESKEY_volume_local_check ; then
- #
- # Is the volue attached?
- # We check the local devices
- #
- short_volume_id=$(echo $OCF_RESKEY_volume_id | awk '{print substr($0, 0, 20)}')
- if lsblk /dev/disk/by-id/virtio-$short_volume_id 1>/dev/null 2>&1; then
- return $OCF_SUCCESS
- else
- ocf_log warn "$OCF_RESKEY_volume_id is not attached to instance $node_id"
- return $OCF_NOT_RUNNING
+ if ocf_is_true $OCF_RESKEY_volume_local_check ; then
+ if [ "$__OCF_ACTION" = "monitor" ] || [ "$__OCF_ACTION" = "start" ] ; then
+ #
+ # Is the volue attached?
+ # We check the local devices
+ #
+ short_volume_id=$(echo $OCF_RESKEY_volume_id | awk '{print substr($0, 0, 20)}')
+ if lsblk /dev/disk/by-id/virtio-$short_volume_id 1>/dev/null 2>&1; then
+ return $OCF_SUCCESS
+ else
+ ocf_log warn "$OCF_RESKEY_volume_id is not attached to instance $node_id"
+ return $OCF_NOT_RUNNING
+ fi
fi
fi
@@ -247,6 +249,11 @@ osvol_start() {
return $OCF_ERR_GENERIC
fi
+ while ! osvol_monitor; do
+ ocf_log info "Waiting for cinder volume $OCF_RESKEY_volume_id to appear on $node_id"
+ sleep 1
+ done
+
return $OCF_SUCCESS
}

View File

@ -0,0 +1,44 @@
From d89b3fb29033c3a60eb0896033af5981c7b9f64a Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 10 Jan 2025 11:39:48 +0100
Subject: [PATCH] openstack-cinder-volume: fix detach not working during
start-action after #2000
---
heartbeat/openstack-cinder-volume | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/heartbeat/openstack-cinder-volume b/heartbeat/openstack-cinder-volume
index 2b64d4d88..5bb1acddd 100755
--- a/heartbeat/openstack-cinder-volume
+++ b/heartbeat/openstack-cinder-volume
@@ -142,9 +142,9 @@ osvol_monitor() {
node_id=$(_get_node_id)
if ocf_is_true $OCF_RESKEY_volume_local_check ; then
- if [ "$__OCF_ACTION" = "monitor" ] || [ "$__OCF_ACTION" = "start" ] ; then
+ if [ "$__OCF_ACTION" = "monitor" ] || [ "$1" = "quick" ]; then
#
- # Is the volue attached?
+ # Is the volume attached?
# We check the local devices
#
short_volume_id=$(echo $OCF_RESKEY_volume_id | awk '{print substr($0, 0, 20)}')
@@ -158,7 +158,7 @@ osvol_monitor() {
fi
#
- # Is the volue attached?
+ # Is the volume attached?
# We use the API
#
result=$(run_openstackcli "volume show \
@@ -249,7 +249,7 @@ osvol_start() {
return $OCF_ERR_GENERIC
fi
- while ! osvol_monitor; do
+ while ! osvol_monitor quick; do
ocf_log info "Waiting for cinder volume $OCF_RESKEY_volume_id to appear on $node_id"
sleep 1
done

View File

@ -1,49 +0,0 @@
diff --color -uNr a/heartbeat/awseip b/heartbeat/awseip
--- a/heartbeat/awseip 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/awseip 2021-02-15 16:47:36.624610378 +0100
@@ -43,7 +43,7 @@
#
# Defaults
#
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
OCF_RESKEY_api_delay_default="3"
diff --color -uNr a/heartbeat/awsvip b/heartbeat/awsvip
--- a/heartbeat/awsvip 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/awsvip 2021-02-15 16:47:48.960632484 +0100
@@ -42,7 +42,7 @@
#
# Defaults
#
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
diff --color -uNr a/heartbeat/aws-vpc-move-ip b/heartbeat/aws-vpc-move-ip
--- a/heartbeat/aws-vpc-move-ip 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/aws-vpc-move-ip 2021-02-15 16:47:55.484644118 +0100
@@ -35,7 +35,7 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""
diff --color -uNr a/heartbeat/aws-vpc-route53.in b/heartbeat/aws-vpc-route53.in
--- a/heartbeat/aws-vpc-route53.in 2020-12-03 14:31:17.000000000 +0100
+++ b/heartbeat/aws-vpc-route53.in 2021-02-15 16:47:59.808651828 +0100
@@ -45,7 +45,7 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
-OCF_RESKEY_awscli_default="/usr/bin/aws"
+OCF_RESKEY_awscli_default="/usr/lib/fence-agents/support/awscli/bin/aws"
OCF_RESKEY_auth_type_default="key"
OCF_RESKEY_profile_default="default"
OCF_RESKEY_region_default=""

View File

@ -0,0 +1,19 @@
--- a/heartbeat/powervs-subnet.in 2024-10-18 10:59:30.418142172 +0200
+++ b/heartbeat/powervs-subnet.in 2024-10-18 12:30:15.954883160 +0200
@@ -33,9 +33,13 @@
import textwrap
import time
-import requests
-import requests.adapters
-import urllib3.util
+try:
+ sys.path.insert(0, '/usr/lib/fence-agents/support/ibm')
+ import requests
+ import requests.adapters
+ import urllib3.util
+except ImportError:
+ pass
OCF_FUNCTIONS_DIR = os.environ.get(
"OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT")

View File

@ -45,7 +45,7 @@
Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.10.0
Release: 64%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
Release: 71%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPLv2+ and LGPLv2+
URL: https://github.com/ClusterLabs/resource-agents
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
@ -134,11 +134,25 @@ Patch81: RHEL-40393-Filesystem-2-update-bsd-logic.patch
Patch82: RHEL-32829-db2-fix-OCF_SUCESS-typo.patch
Patch83: RHEL-43579-galera-mysql-redis-remove-Unpromoted-monitor-action.patch
Patch84: RHEL-22715-LVM-activate-fix-false-positive.patch
Patch85: RHEL-58038-Filesystem-dont-sleep-no-processes-only-send-force-net-fs-after-kill.patch
Patch86: RHEL-59576-Filesystem-try-umount-first-avoid-arguments-list-too-long.patch
Patch87: RHEL-59172-nfsserver-also-stop-rpc-statd-for-nfsv4_only.patch
Patch88: RHEL-58008-podman-force-remove-container-if-necessary.patch
Patch89: RHEL-61888-ocf-shellfuncs-only-create-update-reload-systemd-drop-in-if-needed.patch
Patch90: RHEL-62200-IPaddr2-improve-fail-logic-check-ip_status-after-adding-IP.patch
Patch91: RHEL-40589-azure-events-az-update-API-versions-add-retry-for-metadata.patch
Patch92: RHEL-58632-azure-events-use-node-name-from-cluster.patch
Patch93: RHEL-42513-powervs-subnet-new-ra.patch
Patch94: RHEL-66292-1-aws-agents-reuse-imds-token-until-it-expires.patch
Patch95: RHEL-66292-2-aws-agents-reuse-imds-token-improvements.patch
Patch96: RHEL-68739-awsvip-add-interface-parameter.patch
Patch97: RHEL-69734-1-openstack-cinder-volume-wait-for-volume-to-be-available.patch
Patch98: RHEL-69734-2-openstack-cinder-volume-fix-detach-not-working-during-start-action.patch
# bundled ha-cloud-support libs
Patch500: ha-cloud-support-aws.patch
Patch501: ha-cloud-support-aliyun.patch
Patch502: ha-cloud-support-gcloud.patch
Patch500: ha-cloud-support-aliyun.patch
Patch501: ha-cloud-support-gcloud.patch
Patch502: ha-cloud-support-ibm.patch
Obsoletes: heartbeat-resources <= %{version}
Provides: heartbeat-resources = %{version}
@ -224,7 +238,7 @@ A set of scripts to interface with several services to operate in a
High Availability environment for both Pacemaker and rgmanager
service managers.
%ifarch x86_64
%ifarch x86_64 ppc64le
%package cloud
License: GPLv2+ and LGPLv2+
Summary: Cloud resource agents
@ -342,6 +356,20 @@ exit 1
%patch -p1 -P 82
%patch -p1 -P 83
%patch -p1 -P 84
%patch -p1 -P 85
%patch -p1 -P 86
%patch -p1 -P 87
%patch -p1 -P 88
%patch -p1 -P 89
%patch -p1 -P 90
%patch -p1 -P 91
%patch -p1 -P 92
%patch -p1 -P 93
%patch -p1 -P 94
%patch -p1 -P 95
%patch -p1 -P 96
%patch -p1 -P 97
%patch -p1 -P 98
# bundled ha-cloud-support libs
%patch -p1 -P 500
@ -477,6 +505,8 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%exclude %{_mandir}/man7/*aliyun-vpc-move-ip*
%exclude /usr/lib/ocf/resource.d/heartbeat/gcp*
%exclude %{_mandir}/man7/*gcp*
%exclude /usr/lib/ocf/resource.d/heartbeat/powervs-*
%exclude %{_mandir}/man7/*powervs-*
%exclude /usr/lib/ocf/resource.d/heartbeat/pgsqlms
%exclude %{_mandir}/man7/*pgsqlms*
%exclude %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
@ -640,8 +670,9 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%{_libexecdir}/heartbeat
%endif
%ifarch x86_64
%ifarch x86_64 ppc64le
%files cloud
%ifarch x86_64
/usr/lib/ocf/resource.d/heartbeat/aliyun-*
%{_mandir}/man7/*aliyun-*
/usr/lib/ocf/resource.d/heartbeat/aws*
@ -653,6 +684,11 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%exclude /usr/lib/ocf/resource.d/heartbeat/gcp-vpc-move-ip
%exclude %{_mandir}/man7/*gcp-vpc-move-ip*
%endif
%ifarch ppc64le
/usr/lib/ocf/resource.d/heartbeat/powervs-*
%{_mandir}/man7/*powervs-*
%endif
%endif
%files paf
%doc paf_README.md
@ -663,6 +699,57 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
%changelog
* Fri Jan 10 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-71
- openstack-cinder-volume: wait for volume to be available
Resolves: RHEL-69734
* Wed Nov 27 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-69
- AWS agents: reuse IMDS token until it expires
- awsvip: add interface parameter
Resolves: RHEL-66292
Resolves: RHEL-68739
* Wed Oct 23 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-68
- powervs-subnet: new resource agent
Resolves: RHEL-42513
* Mon Oct 14 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-67
- ocf-shellfuncs: only create/update and reload systemd drop-in if
needed
- IPaddr2: improve fail logic and check ip_status after adding IP
- azure-events-az: update API versions, and add retry functionality
for metadata requests
- azure-events*: use node name from cluster instead of hostname to
avoid failing if they're not the same
Resolves: RHEL-61888
Resolves: RHEL-62200
Resolves: RHEL-40589
Resolves: RHEL-58632
* Wed Oct 2 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-66
- nfsserver: also stop rpc-statd for nfsv4_only to avoid stop failing
in some cases
- podman: force-remove containers in stopping state if necessary
Resolves: RHEL-59172
Resolves: RHEL-58008
* Wed Sep 25 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-65
- Filesystem: dont sleep during stop-action when there are no
processes to kill, and only use force argument for network
filesystems after sending kill_signals
- Filesystem: try umount first during stop-action, and avoid potential
"Argument list too long" for force_unmount=safe
- AWS agents: use awscli2
Resolves: RHEL-58038
Resolves: RHEL-59576
Resolves: RHEL-46233
* Thu Aug 29 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-64
- IPsrcaddr: add IPv6 support