- ocf-shellfuncs: only create/update and reload systemd drop-in if

needed - IPaddr2: improve fail logic and check ip_status after adding IP - azure-events-az: update API versions, and add retry functionality for metadata requests - azure-events*: use node name from cluster instead of hostname to avoid failing if they're not the same Resolves: RHEL-61888 Resolves: RHEL-62200 Resolves: RHEL-40589 Resolves: RHEL-58632
2024-10-14 12:24:34 +02:00 · 2024-10-14 12:24:34 +02:00 · 98e69c2d35
commit 98e69c2d35
parent 5307e871ec
5 changed files with 573 additions and 1 deletions
--- a/RHEL-40589-azure-events-az-update-API-versions-add-retry-for-metadata.patch
+++ b/RHEL-40589-azure-events-az-update-API-versions-add-retry-for-metadata.patch
@ -0,0 +1,333 @@
+From 7739c2a802c1dddb6757ff75cf7f6582a89bd518 Mon Sep 17 00:00:00 2001
+From: id <happytobi@tscoding.de>
+Date: Fri, 31 May 2024 09:00:18 +0200
+Subject: [PATCH] azure-events-az: update to API versions, add retry
+ functionality for metadata requests, update tests
+
+---
+ heartbeat/azure-events-az.in | 117 ++++++++++++++++++++++++-----------
+ heartbeat/ocf.py             |  50 +++++++++++++--
+ 2 files changed, 126 insertions(+), 41 deletions(-)
+
+diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
+index 46d4d1f3d9..6d31e5abae 100644
+--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
+@@ -27,7 +27,7 @@ import ocf
+ ##############################################################################
+ 
+ 
+-VERSION = "0.10"
+VERSION = "0.20"
+ USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro())
+ 
+ attr_globalPullState = "azure-events-az_globalPullState"
+@@ -39,9 +39,6 @@ attr_healthstate = "#health-azure"
+ default_loglevel = ocf.logging.INFO
+ default_relevantEventTypes = set(["Reboot", "Redeploy"])
+ 
+-global_pullMaxAttempts = 3
+-global_pullDelaySecs = 1
+-
+ ##############################################################################
+ 
+ class attrDict(defaultdict):
+@@ -71,16 +68,22 @@ class azHelper:
+ 	metadata_host = "http://169.254.169.254/metadata"
+ 	instance_api  = "instance"
+ 	events_api    = "scheduledevents"
+-	api_version   = "2019-08-01"
+	events_api_version = "2020-07-01"
+	instance_api_version = "2021-12-13"
+ 
+ 	@staticmethod
+-	def _sendMetadataRequest(endpoint, postData=None):
+	def _sendMetadataRequest(endpoint, postData=None, api_version="2019-08-01"):
+ 		"""
+ 		Send a request to Azure's Azure Metadata Service API
+ 		"""
+-		url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version)
+
+		retryCount = int(ocf.get_parameter("retry_count",3))
+		retryWaitTime = int(ocf.get_parameter("retry_wait",20))
+		requestTimeout = int(ocf.get_parameter("request_timeout",15))
+
+		url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, api_version)
+ 		data = ""
+-		ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData))
+		ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s, retry_count = %s, retry_wait time = %s, request_timeout = %s" % (endpoint, postData, retryCount, retryWaitTime, requestTimeout))
+ 		ocf.logger.debug("_sendMetadataRequest: url = %s" % url)
+ 
+ 		if postData and type(postData) != bytes:
+@@ -89,18 +92,37 @@ class azHelper:
+ 		req = urllib2.Request(url, postData)
+ 		req.add_header("Metadata", "true")
+ 		req.add_header("User-Agent", USER_AGENT)
+-		try:
+-			resp = urllib2.urlopen(req)
+-		except URLError as e:
+-			if hasattr(e, 'reason'):
+-				ocf.logger.warning("Failed to reach the server: %s" % e.reason)
+-				clusterHelper.setAttr(attr_globalPullState, "IDLE")
+-			elif hasattr(e, 'code'):
+-				ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
+-				clusterHelper.setAttr(attr_globalPullState, "IDLE")
+-		else:
+-			data = resp.read()
+-			ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
+
+		if retryCount > 0:
+			ocf.logger.debug("_sendMetadataRequest: retry enabled")
+
+		successful = None
+		for retry in range(retryCount+1):
+			try:
+				resp = urllib2.urlopen(req, timeout=requestTimeout)
+			except Exception as e:
+				excType = e.__class__.__name__
+				if excType == TimeoutError.__name__:
+					ocf.logger.warning("Request timed out after %s seconds Error: %s" % (requestTimeout, e))
+				if excType == URLError.__name__:
+					if hasattr(e, 'reason'):
+						ocf.logger.warning("Failed to reach the server: %s" % e.reason)
+					elif hasattr(e, 'code'):
+						ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
+
+				if retryCount > 1 and retry != retryCount:
+					ocf.logger.warning("Request failed, retry (%s/%s) wait %s seconds before retry (wait time)" % (retry + 1,retryCount,retryWaitTime))
+					time.sleep(retryWaitTime)
+
+			else:
+				data = resp.read()
+				ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
+				successful = 1
+				break
+
+		# When no request was successful also with retry enabled, set the cluster to idle
+		if successful is None:
+			clusterHelper.setAttr(attr_globalPullState, "IDLE")
+ 
+ 		if data:
+ 			data = json.loads(data)
+@@ -115,14 +137,15 @@ class azHelper:
+ 		"""
+ 		ocf.logger.debug("getInstanceInfo: begin")
+ 
+-		jsondata = azHelper._sendMetadataRequest(azHelper.instance_api)
+		jsondata = azHelper._sendMetadataRequest(azHelper.instance_api, None, azHelper.instance_api_version)
+ 		ocf.logger.debug("getInstanceInfo: json = %s" % jsondata)
+ 
+ 		if jsondata:
+ 			ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"]))
+ 			return attrDict(jsondata["compute"])
+ 		else:
+-			ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info")
+			apiCall = "%s/%s?api-version=%s" % (azHelper.metadata_host, azHelper.instance_api, azHelper.instance_api_version)
+			ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info - call: %s" % apiCall)
+ 			sys.exit(ocf.OCF_ERR_GENERIC)
+ 
+ 	@staticmethod
+@@ -132,11 +155,17 @@ class azHelper:
+ 		"""
+ 		ocf.logger.debug("pullScheduledEvents: begin")
+ 
+-		jsondata = azHelper._sendMetadataRequest(azHelper.events_api)
+		jsondata = azHelper._sendMetadataRequest(azHelper.events_api, None, azHelper.events_api_version)
+ 		ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata)
+ 
+-		ocf.logger.debug("pullScheduledEvents: finished")
+-		return attrDict(jsondata)
+		if jsondata:
+			ocf.logger.debug("pullScheduledEvents: finished")
+			return attrDict(jsondata)
+		else:
+			apiCall = "%s/%s?api-version=%s" % (azHelper.metadata_host, azHelper.events_api, azHelper.events_api_version)
+			ocf.ocf_exit_reason("pullScheduledEvents: Unable to get scheduledevents info - call: %s" % apiCall)
+			sys.exit(ocf.OCF_ERR_GENERIC)
+
+ 
+ 	@staticmethod
+ 	def forceEvents(eventIDs):
+@@ -534,7 +563,7 @@ class Node:
+ 			except ValueError:
+ 				# Handle the exception
+ 				ocf.logger.warn("Health attribute %s on node %s cannot be converted to an integer value" % (healthAttributeStr, node))
+-		
+
+ 		ocf.logger.debug("isNodeInStandby: finished - result %s" % isInStandy)
+ 		return isInStandy
+ 
+@@ -584,7 +613,7 @@ class raAzEvents:
+ 
+ 	def monitor(self):
+ 		ocf.logger.debug("monitor: begin")
+-		
+
+ 		events = azHelper.pullScheduledEvents()
+ 
+ 		# get current document version
+@@ -600,21 +629,21 @@ class raAzEvents:
+ 			ocf.logger.info("monitor: already handled curDocVersion, skip")
+ 			return ocf.OCF_SUCCESS
+ 
+-		localAzEventIDs = set()
+		localAzEventIds = dict()
+ 		for e in localEvents:
+-			localAzEventIDs.add(e.EventId)
+			localAzEventIds[e.EventId] = json.dumps(e)
+ 
+ 		curState = self.node.getState()
+ 		clusterEventIDs = self.node.getEventIDs()
+ 
+ 		ocf.logger.debug("monitor: curDocVersion has not been handled yet")
+-		
+
+ 		if clusterEventIDs:
+ 			# there are pending events set, so our state must be STOPPING or IN_EVENT
+ 			i = 0; touchedEventIDs = False
+ 			while i < len(clusterEventIDs):
+ 				# clean up pending events that are already finished according to AZ
+-				if clusterEventIDs[i] not in localAzEventIDs:
+				if clusterEventIDs[i] not in localAzEventIds.keys():
+ 					ocf.logger.info("monitor: remove finished local clusterEvent %s" % (clusterEventIDs[i]))
+ 					clusterEventIDs.pop(i)
+ 					touchedEventIDs = True
+@@ -644,12 +673,12 @@ class raAzEvents:
+ 					ocf.logger.info("monitor: all local events finished, but some resources have not completed startup yet -> wait")
+ 		else:
+ 			if curState == AVAILABLE:
+-				if len(localAzEventIDs) > 0:
+				if len(localAzEventIds) > 0:
+ 					if clusterHelper.otherNodesAvailable(self.node):
+-						ocf.logger.info("monitor: can handle local events %s -> set state STOPPING" % (str(localAzEventIDs)))
+-						curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIDs)
+						ocf.logger.info("monitor: can handle local events %s -> set state STOPPING - %s" % (str(list(localAzEventIds.keys())), str(list(localAzEventIds.values()))))
+						curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIds.keys())
+ 					else:
+-						ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(localAzEventIDs))
+						ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD - %s" % (str(list(localAzEventIds.keys())), str(list(localAzEventIds.values()))))
+ 						self.node.setState(ON_HOLD)
+ 				else:
+ 					ocf.logger.debug("monitor: no local azEvents to handle")
+@@ -761,6 +790,24 @@ def main():
+ 		longdesc="Set to true to enable verbose logging",
+ 		content_type="boolean",
+ 		default="false")
+	agent.add_parameter(
+		"retry_count",
+		shortdesc="Azure IMDS webservice retry count",
+		longdesc="Set to any number bigger than zero to enable retry count",
+		content_type="integer",
+		default="3")
+	agent.add_parameter(
+		"retry_wait",
+		shortdesc="Configure a retry wait time",
+		longdesc="Set retry wait time in seconds",
+		content_type="integer",
+		default="20")
+	agent.add_parameter(
+		"request_timeout",
+		shortdesc="Configure a request timeout",
+		longdesc="Set request timeout in seconds",
+		content_type="integer",
+		default="15")
+ 	agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
+ 	agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
+ 	agent.add_action("validate-all", timeout=20, handler=validate_action)
+diff --git a/heartbeat/ocf.py b/heartbeat/ocf.py
+index dda2fed4bb..571cd19664 100644
+--- a/heartbeat/ocf.py
+++ b/heartbeat/ocf.py
+@@ -16,7 +16,7 @@
+ # You should have received a copy of the GNU Lesser General Public
+ # License along with this library; if not, write to the Free Software
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+-# 
+#
+ 
+ import sys, os, logging, syslog
+ 
+@@ -42,19 +42,19 @@
+ # OCF does not include the concept of master/slave resources so we
+ #   need to extend it so we can discover a resource's complete state.
+ #
+-# OCF_RUNNING_MASTER:  
+# OCF_RUNNING_MASTER:
+ #    The resource is in "master" mode and fully operational
+ # OCF_FAILED_MASTER:
+ #    The resource is in "master" mode but in a failed state
+-# 
+#
+ # The extra two values should only be used during a probe.
+ #
+ # Probes are used to discover resources that were started outside of
+ #    the CRM and/or left behind if the LRM fails.
+-# 
+#
+ # They can be identified in RA scripts by checking for:
+ #   [ "${__OCF_ACTION}" = "monitor" -a "${OCF_RESKEY_CRM_meta_interval}" = "0" ]
+-# 
+#
+ # Failed "slaves" should continue to use: OCF_ERR_GENERIC
+ # Fully operational "slaves" should continue to use: OCF_SUCCESS
+ #
+@@ -451,15 +451,17 @@ def value_for_parameter(param):
+ 	sys.exit(OCF_ERR_UNIMPLEMENTED)
+ 
+ 
+
+ if __name__ == "__main__":
+ 	import unittest
+	import logging
+ 
+ 	class TestMetadata(unittest.TestCase):
+ 		def test_noparams_noactions(self):
+ 			m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc")
+ 			self.assertEqual("""<?xml version="1.0"?>
+ <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+-<resource-agent name="foo">
+<resource-agent name="foo" version="1.0">
+ <version>1.0</version>
+ <longdesc lang="en">
+ longdesc
+@@ -483,4 +485,40 @@ def test_params_actions(self):
+ 			m.add_action("start")
+ 			self.assertEqual(str(m.actions[0]), '<action name="start" />\n')
+ 
+		def test_retry_params_actions(self):
+			log= logging.getLogger( "test_retry_params_actions" )
+
+			m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc")
+			m.add_parameter(
+				"retry_count",
+				shortdesc="Azure ims webservice retry count",
+				longdesc="Set to any number bigger than zero to enable retry count",
+				content_type="integer",
+				default="0")
+			m.add_parameter(
+				"retry_wait",
+				shortdesc="Configure a retry wait time",
+				longdesc="Set retry wait time in seconds",
+				content_type="integer",
+				default="20")
+			m.add_parameter(
+				"request_timeout",
+				shortdesc="Configure a request timeout",
+				longdesc="Set request timeout in seconds",
+				content_type="integer",
+				default="15")
+
+			m.add_action("start")
+
+			log.debug( "actions= %s", str(m.actions[0] ))
+			self.assertEqual(str(m.actions[0]), '<action name="start" />\n')
+
+			log.debug( "parameters= %s", str(m.parameters[0] ))
+			log.debug( "parameters= %s", str(m.parameters[1] ))
+			log.debug( "parameters= %s", str(m.parameters[2] ))
+			self.assertEqual(str(m.parameters[0]), '<parameter name="retry_count">\n<longdesc lang="en">Set to any number bigger than zero to enable retry count</longdesc>\n<shortdesc lang="en">Azure ims webservice retry count</shortdesc>\n<content type="integer" default="0" />\n</parameter>\n')
+			self.assertEqual(str(m.parameters[1]), '<parameter name="retry_wait">\n<longdesc lang="en">Set retry wait time in seconds</longdesc>\n<shortdesc lang="en">Configure a retry wait time</shortdesc>\n<content type="integer" default="20" />\n</parameter>\n')
+			self.assertEqual(str(m.parameters[2]), '<parameter name="request_timeout">\n<longdesc lang="en">Set request timeout in seconds</longdesc>\n<shortdesc lang="en">Configure a request timeout</shortdesc>\n<content type="integer" default="15" />\n</parameter>\n')
+
+	logging.basicConfig( stream=sys.stderr )
+ 	unittest.main()
--- a/RHEL-58632-azure-events-use-node-name-from-cluster.patch
+++ b/RHEL-58632-azure-events-use-node-name-from-cluster.patch
@ -0,0 +1,37 @@
+From c72dc2f2e502486d93aeec26abc12e720b14a0a7 Mon Sep 17 00:00:00 2001
+From: Oyvind Albrigtsen <oalbrigt@redhat.com>
+Date: Thu, 10 Oct 2024 16:41:03 +0200
+Subject: [PATCH] azure-events*: use node name from cluster instead of hostname
+ to avoid failing if they're not the same
+
+---
+ heartbeat/azure-events-az.in | 2 +-
+ heartbeat/azure-events.in    | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
+index 6d31e5aba..0ed001037 100644
+--- a/heartbeat/azure-events-az.in
+++ b/heartbeat/azure-events-az.in
+@@ -441,7 +441,7 @@ class Node:
+ 		self.raOwner  = ra
+ 		self.azInfo   = azHelper.getInstanceInfo()
+ 		self.azName   = self.azInfo.name
+-		self.hostName = socket.gethostname()
+		self.hostName = clusterHelper._exec("crm_node", "-n")
+ 		self.setAttr("azName", self.azName)
+ 		clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
+ 
+diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in
+index 90acaba62..32f71ee26 100644
+--- a/heartbeat/azure-events.in
+++ b/heartbeat/azure-events.in
+@@ -411,7 +411,7 @@ class Node:
+ 		self.raOwner  = ra
+ 		self.azInfo   = azHelper.getInstanceInfo()
+ 		self.azName   = self.azInfo.name
+-		self.hostName = socket.gethostname()
+		self.hostName = clusterHelper._exec("crm_node", "-n")
+ 		self.setAttr("azName", self.azName)
+ 		clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
+ 
--- a/RHEL-61888-ocf-shellfuncs-only-create-update-reload-systemd-drop-in-if-needed.patch
+++ b/RHEL-61888-ocf-shellfuncs-only-create-update-reload-systemd-drop-in-if-needed.patch
@ -0,0 +1,48 @@
+From 82958dc115c47232ae0468b1ddf64e728ec325e4 Mon Sep 17 00:00:00 2001
+From: Georg Pfuetzenreuter <mail@georg-pfuetzenreuter.net>
+Date: Wed, 9 Oct 2024 00:16:44 +0200
+Subject: [PATCH] ocf-shellfuncs: systemd_drop_in only if needed
+
+Avoid dbus overload upon many simultaneous "daemon-reload" invocations
+(when a resource agent using systemd_drop_in() is called multiple times
+as part of parallel resource operations in Pacemaker) by skipping the
+file creation and reload if the expected data already exists.
+
+Whilst at it, align the indentation of the heredoc with the other parts
+of the function.
+
+Signed-off-by: Georg Pfuetzenreuter <mail@georg-pfuetzenreuter.net>
+---
+ heartbeat/ocf-shellfuncs.in | 19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
+index 9335cbf00..5c4bb3264 100644
+--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
+@@ -662,14 +662,17 @@ systemd_drop_in()
+ 	systemdrundir="/run/systemd/system/resource-agents-deps.target.d"
+ 	mkdir -p "$systemdrundir"
+ 	conf_file="$systemdrundir/$1.conf"
+-	cat >"$conf_file" <<EOF
+-[Unit]
+-$2=$3
+-EOF
+-	# The information is accessible through systemd API and systemd would
+-	# complain about improper permissions.
+-	chmod o+r "$conf_file"
+-	systemctl daemon-reload
+	conf_line="$2=$3"
+	if ! { [ -f "$conf_file" ] && grep -q "^$conf_line$" "$conf_file" ; } ; then
+		cat > "$conf_file" <<-EOF
+			[Unit]
+			$conf_line
+			EOF
+		# The information is accessible through systemd API and systemd would
+		# complain about improper permissions.
+		chmod o+r "$conf_file"
+		systemctl daemon-reload
+	fi
+ }
+ 
+ # usage: curl_retry RETRIES SLEEP ARGS URL
--- a/RHEL-62200-IPaddr2-improve-fail-logic-check-ip_status-after-adding-IP.patch
+++ b/RHEL-62200-IPaddr2-improve-fail-logic-check-ip_status-after-adding-IP.patch
@ -0,0 +1,132 @@
+From 6fab544e702a7601714cd017aecc00193f23ae72 Mon Sep 17 00:00:00 2001
+From: Oyvind Albrigtsen <oalbrigt@redhat.com>
+Date: Fri, 11 Oct 2024 13:13:10 +0200
+Subject: [PATCH] IPaddr2: improve fail logic and check ip_status after adding
+ IP
+
+* check that the label got applied
+* return OCF_ERR_GENERIC to avoid false-positive when IP was manually added before starting the resource
+* check ip_status after adding IP to fail without having to wait for the first monitor-action
+
+Co-authored-by: Evan J. Felix <evan.felix@pnnl.gov>
+---
+ heartbeat/IPaddr2 | 35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
+index e325aa574..27cae2d11 100755
+--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
+@@ -586,7 +586,7 @@ ip_init() {
+ 			exit $rc
+ 		fi
+ 	fi
+-	
+
+ 	SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
+ 
+ 	if [ -n "$IFLABEL" ]; then
+@@ -985,6 +985,7 @@ run_send_ua() {
+ # ok = served (for CIP: + hash bucket)
+ # partial = served and no hash bucket (CIP only)
+ # partial2 = served and no CIP iptables rule
+# partial3 = served with no label
+ # no = nothing
+ #
+ ip_served() {
+@@ -1002,6 +1003,11 @@ ip_served() {
+ 
+ 	if [ -z "$IP_CIP" ]; then
+ 		for i in $cur_nic; do
+			# check address label
+			if [ -n "$IFLABEL" ] && [ -z "`$IP2UTIL -o -f $FAMILY addr show $nic label $IFLABEL`" ]; then
+				echo partial3
+				return 0
+			fi
+ 			# only mark as served when on the same interfaces as $NIC
+ 			[ "$i" = "$NIC" ] || continue
+ 			echo "ok"
+@@ -1065,7 +1071,12 @@ ip_start() {
+ 	if [ "$ip_status" = "ok" ]; then
+ 		exit $OCF_SUCCESS
+ 	fi
+-	
+
+	if [ "$ip_status" = "partial3" ]; then
+		ocf_exit_reason "IP $OCF_RESKEY_ip available, but label missing"
+		exit $OCF_ERR_GENERIC
+	fi
+
+ 	if [ -n "$IP_CIP" ] && ([ $ip_status = "no" ] || [ $ip_status = "partial2" ]); then
+ 		$MODPROBE ip_conntrack
+ 		$IPADDR2_CIP_IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
+@@ -1083,7 +1094,7 @@ ip_start() {
+ 	if [ -n "$IP_CIP" ] && [ $ip_status = "partial" ]; then
+ 		echo "+$IP_INC_NO" >$IP_CIP_FILE
+ 	fi
+-	
+
+ 	if [ "$ip_status" = "no" ]; then
+ 		if ocf_is_true ${OCF_RESKEY_lvs_support}; then
+ 			for i in `find_interface $OCF_RESKEY_ip 32`; do
+@@ -1094,7 +1105,7 @@ ip_start() {
+ 				esac
+ 			done
+ 		fi
+-		
+
+ 		add_interface "$OCF_RESKEY_ip" "$NETMASK" "${BRDCAST:-none}" "$NIC" "$IFLABEL" "$METRIC"
+ 		rc=$?
+ 
+@@ -1102,6 +1113,12 @@ ip_start() {
+ 			ocf_exit_reason "Failed to add $OCF_RESKEY_ip"
+ 			exit $rc
+ 		fi
+
+		ip_status=`ip_served`
+		if [ "$ip_status" != "ok" ]; then
+			ocf_exit_reason "Failed to add $OCF_RESKEY_ip with error $ip_status"
+			exit $OCF_ERR_GENERIC
+		fi
+ 	fi
+ 
+ 	case $NIC in
+@@ -1134,7 +1151,7 @@ ip_stop() {
+ 	    ocf_take_lock $CIP_lockfile
+ 	    ocf_release_lock_on_exit $CIP_lockfile
+ 	fi
+-	
+
+ 	if [ -f "$SENDARPPIDFILE" ] ; then
+ 		kill `cat "$SENDARPPIDFILE"`
+ 		if [ $? -ne 0 ]; then
+@@ -1171,17 +1188,17 @@ ip_stop() {
+ 				i=`expr $i + 1`
+ 			done
+ 		else
+-			ip_del_if="no"		
+			ip_del_if="no"
+ 		fi
+ 	fi
+-	
+
+ 	if [ "$ip_del_if" = "yes" ]; then
+ 		delete_interface $OCF_RESKEY_ip $NIC $NETMASK
+ 		if [ $? -ne 0 ]; then
+ 			ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]"
+ 			exit $OCF_ERR_GENERIC
+ 		fi
+-	
+
+ 		if ocf_is_true ${OCF_RESKEY_lvs_support}; then
+ 			restore_loopback "$OCF_RESKEY_ip"
+ 		fi
+@@ -1200,7 +1217,7 @@ ip_monitor() {
+ 		run_arp_sender refresh
+ 		return $OCF_SUCCESS
+ 		;;
+-	partial|no|partial2)
+	no)
+ 		exit $OCF_NOT_RUNNING
+ 		;;
+ 	*)
--- a/resource-agents.spec
+++ b/resource-agents.spec
@ -45,7 +45,7 @@
 Name:		resource-agents
 Summary:	Open Source HA Reusable Cluster Resource Scripts
 Version:	4.10.0
-Release:	66%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
+Release:	67%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
 License:	GPLv2+ and LGPLv2+
 URL:		https://github.com/ClusterLabs/resource-agents
 Source0:	%{upstream_prefix}-%{upstream_version}.tar.gz
@ -138,6 +138,10 @@ Patch85:	RHEL-58038-Filesystem-dont-sleep-no-processes-only-send-force-net-fs-af
 Patch86:	RHEL-59576-Filesystem-try-umount-first-avoid-arguments-list-too-long.patch
 Patch87:	RHEL-59172-nfsserver-also-stop-rpc-statd-for-nfsv4_only.patch
 Patch88:	RHEL-58008-podman-force-remove-container-if-necessary.patch
+Patch89:	RHEL-61888-ocf-shellfuncs-only-create-update-reload-systemd-drop-in-if-needed.patch
+Patch90:	RHEL-62200-IPaddr2-improve-fail-logic-check-ip_status-after-adding-IP.patch
+Patch91:	RHEL-40589-azure-events-az-update-API-versions-add-retry-for-metadata.patch
+Patch92:	RHEL-58632-azure-events-use-node-name-from-cluster.patch

 # bundled ha-cloud-support libs
 Patch500:	ha-cloud-support-aliyun.patch
@ -349,6 +353,10 @@ exit 1
 %patch -p1 -P 86
 %patch -p1 -P 87
 %patch -p1 -P 88
+%patch -p1 -P 89
+%patch -p1 -P 90
+%patch -p1 -P 91
+%patch -p1 -P 92

 # bundled ha-cloud-support libs
 %patch -p1 -P 500
@ -669,6 +677,20 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
 %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm

 %changelog
+* Mon Oct 14 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-67
+- ocf-shellfuncs: only create/update and reload systemd drop-in if
+  needed
+- IPaddr2: improve fail logic and check ip_status after adding IP
+- azure-events-az: update API versions, and add retry functionality
+  for metadata requests
+- azure-events*: use node name from cluster instead of hostname to
+  avoid failing if they're not the same
+
+  Resolves: RHEL-61888
+  Resolves: RHEL-62200
+  Resolves: RHEL-40589
+  Resolves: RHEL-58632
+
 * Wed Oct  2 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-66
 - nfsserver: also stop rpc-statd for nfsv4_only to avoid stop failing
  in some cases