- ocf-shellfuncs: only create/update and reload systemd drop-in if
needed - IPaddr2: improve fail logic and check ip_status after adding IP - azure-events-az: update API versions, and add retry functionality for metadata requests - azure-events*: use node name from cluster instead of hostname to avoid failing if they're not the same Resolves: RHEL-61888 Resolves: RHEL-62200 Resolves: RHEL-40589 Resolves: RHEL-58632
This commit is contained in:
parent
5307e871ec
commit
98e69c2d35
@ -0,0 +1,333 @@
|
|||||||
|
From 7739c2a802c1dddb6757ff75cf7f6582a89bd518 Mon Sep 17 00:00:00 2001
|
||||||
|
From: id <happytobi@tscoding.de>
|
||||||
|
Date: Fri, 31 May 2024 09:00:18 +0200
|
||||||
|
Subject: [PATCH] azure-events-az: update to API versions, add retry
|
||||||
|
functionality for metadata requests, update tests
|
||||||
|
|
||||||
|
---
|
||||||
|
heartbeat/azure-events-az.in | 117 ++++++++++++++++++++++++-----------
|
||||||
|
heartbeat/ocf.py | 50 +++++++++++++--
|
||||||
|
2 files changed, 126 insertions(+), 41 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
|
||||||
|
index 46d4d1f3d9..6d31e5abae 100644
|
||||||
|
--- a/heartbeat/azure-events-az.in
|
||||||
|
+++ b/heartbeat/azure-events-az.in
|
||||||
|
@@ -27,7 +27,7 @@ import ocf
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
-VERSION = "0.10"
|
||||||
|
+VERSION = "0.20"
|
||||||
|
USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro())
|
||||||
|
|
||||||
|
attr_globalPullState = "azure-events-az_globalPullState"
|
||||||
|
@@ -39,9 +39,6 @@ attr_healthstate = "#health-azure"
|
||||||
|
default_loglevel = ocf.logging.INFO
|
||||||
|
default_relevantEventTypes = set(["Reboot", "Redeploy"])
|
||||||
|
|
||||||
|
-global_pullMaxAttempts = 3
|
||||||
|
-global_pullDelaySecs = 1
|
||||||
|
-
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
class attrDict(defaultdict):
|
||||||
|
@@ -71,16 +68,22 @@ class azHelper:
|
||||||
|
metadata_host = "http://169.254.169.254/metadata"
|
||||||
|
instance_api = "instance"
|
||||||
|
events_api = "scheduledevents"
|
||||||
|
- api_version = "2019-08-01"
|
||||||
|
+ events_api_version = "2020-07-01"
|
||||||
|
+ instance_api_version = "2021-12-13"
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
- def _sendMetadataRequest(endpoint, postData=None):
|
||||||
|
+ def _sendMetadataRequest(endpoint, postData=None, api_version="2019-08-01"):
|
||||||
|
"""
|
||||||
|
Send a request to Azure's Azure Metadata Service API
|
||||||
|
"""
|
||||||
|
- url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version)
|
||||||
|
+
|
||||||
|
+ retryCount = int(ocf.get_parameter("retry_count",3))
|
||||||
|
+ retryWaitTime = int(ocf.get_parameter("retry_wait",20))
|
||||||
|
+ requestTimeout = int(ocf.get_parameter("request_timeout",15))
|
||||||
|
+
|
||||||
|
+ url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, api_version)
|
||||||
|
data = ""
|
||||||
|
- ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData))
|
||||||
|
+ ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s, retry_count = %s, retry_wait time = %s, request_timeout = %s" % (endpoint, postData, retryCount, retryWaitTime, requestTimeout))
|
||||||
|
ocf.logger.debug("_sendMetadataRequest: url = %s" % url)
|
||||||
|
|
||||||
|
if postData and type(postData) != bytes:
|
||||||
|
@@ -89,18 +92,37 @@ class azHelper:
|
||||||
|
req = urllib2.Request(url, postData)
|
||||||
|
req.add_header("Metadata", "true")
|
||||||
|
req.add_header("User-Agent", USER_AGENT)
|
||||||
|
- try:
|
||||||
|
- resp = urllib2.urlopen(req)
|
||||||
|
- except URLError as e:
|
||||||
|
- if hasattr(e, 'reason'):
|
||||||
|
- ocf.logger.warning("Failed to reach the server: %s" % e.reason)
|
||||||
|
- clusterHelper.setAttr(attr_globalPullState, "IDLE")
|
||||||
|
- elif hasattr(e, 'code'):
|
||||||
|
- ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
|
||||||
|
- clusterHelper.setAttr(attr_globalPullState, "IDLE")
|
||||||
|
- else:
|
||||||
|
- data = resp.read()
|
||||||
|
- ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
|
||||||
|
+
|
||||||
|
+ if retryCount > 0:
|
||||||
|
+ ocf.logger.debug("_sendMetadataRequest: retry enabled")
|
||||||
|
+
|
||||||
|
+ successful = None
|
||||||
|
+ for retry in range(retryCount+1):
|
||||||
|
+ try:
|
||||||
|
+ resp = urllib2.urlopen(req, timeout=requestTimeout)
|
||||||
|
+ except Exception as e:
|
||||||
|
+ excType = e.__class__.__name__
|
||||||
|
+ if excType == TimeoutError.__name__:
|
||||||
|
+ ocf.logger.warning("Request timed out after %s seconds Error: %s" % (requestTimeout, e))
|
||||||
|
+ if excType == URLError.__name__:
|
||||||
|
+ if hasattr(e, 'reason'):
|
||||||
|
+ ocf.logger.warning("Failed to reach the server: %s" % e.reason)
|
||||||
|
+ elif hasattr(e, 'code'):
|
||||||
|
+ ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
|
||||||
|
+
|
||||||
|
+ if retryCount > 1 and retry != retryCount:
|
||||||
|
+ ocf.logger.warning("Request failed, retry (%s/%s) wait %s seconds before retry (wait time)" % (retry + 1,retryCount,retryWaitTime))
|
||||||
|
+ time.sleep(retryWaitTime)
|
||||||
|
+
|
||||||
|
+ else:
|
||||||
|
+ data = resp.read()
|
||||||
|
+ ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
|
||||||
|
+ successful = 1
|
||||||
|
+ break
|
||||||
|
+
|
||||||
|
+ # When no request was successful also with retry enabled, set the cluster to idle
|
||||||
|
+ if successful is None:
|
||||||
|
+ clusterHelper.setAttr(attr_globalPullState, "IDLE")
|
||||||
|
|
||||||
|
if data:
|
||||||
|
data = json.loads(data)
|
||||||
|
@@ -115,14 +137,15 @@ class azHelper:
|
||||||
|
"""
|
||||||
|
ocf.logger.debug("getInstanceInfo: begin")
|
||||||
|
|
||||||
|
- jsondata = azHelper._sendMetadataRequest(azHelper.instance_api)
|
||||||
|
+ jsondata = azHelper._sendMetadataRequest(azHelper.instance_api, None, azHelper.instance_api_version)
|
||||||
|
ocf.logger.debug("getInstanceInfo: json = %s" % jsondata)
|
||||||
|
|
||||||
|
if jsondata:
|
||||||
|
ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"]))
|
||||||
|
return attrDict(jsondata["compute"])
|
||||||
|
else:
|
||||||
|
- ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info")
|
||||||
|
+ apiCall = "%s/%s?api-version=%s" % (azHelper.metadata_host, azHelper.instance_api, azHelper.instance_api_version)
|
||||||
|
+ ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info - call: %s" % apiCall)
|
||||||
|
sys.exit(ocf.OCF_ERR_GENERIC)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
@@ -132,11 +155,17 @@ class azHelper:
|
||||||
|
"""
|
||||||
|
ocf.logger.debug("pullScheduledEvents: begin")
|
||||||
|
|
||||||
|
- jsondata = azHelper._sendMetadataRequest(azHelper.events_api)
|
||||||
|
+ jsondata = azHelper._sendMetadataRequest(azHelper.events_api, None, azHelper.events_api_version)
|
||||||
|
ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata)
|
||||||
|
|
||||||
|
- ocf.logger.debug("pullScheduledEvents: finished")
|
||||||
|
- return attrDict(jsondata)
|
||||||
|
+ if jsondata:
|
||||||
|
+ ocf.logger.debug("pullScheduledEvents: finished")
|
||||||
|
+ return attrDict(jsondata)
|
||||||
|
+ else:
|
||||||
|
+ apiCall = "%s/%s?api-version=%s" % (azHelper.metadata_host, azHelper.events_api, azHelper.events_api_version)
|
||||||
|
+ ocf.ocf_exit_reason("pullScheduledEvents: Unable to get scheduledevents info - call: %s" % apiCall)
|
||||||
|
+ sys.exit(ocf.OCF_ERR_GENERIC)
|
||||||
|
+
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def forceEvents(eventIDs):
|
||||||
|
@@ -534,7 +563,7 @@ class Node:
|
||||||
|
except ValueError:
|
||||||
|
# Handle the exception
|
||||||
|
ocf.logger.warn("Health attribute %s on node %s cannot be converted to an integer value" % (healthAttributeStr, node))
|
||||||
|
-
|
||||||
|
+
|
||||||
|
ocf.logger.debug("isNodeInStandby: finished - result %s" % isInStandy)
|
||||||
|
return isInStandy
|
||||||
|
|
||||||
|
@@ -584,7 +613,7 @@ class raAzEvents:
|
||||||
|
|
||||||
|
def monitor(self):
|
||||||
|
ocf.logger.debug("monitor: begin")
|
||||||
|
-
|
||||||
|
+
|
||||||
|
events = azHelper.pullScheduledEvents()
|
||||||
|
|
||||||
|
# get current document version
|
||||||
|
@@ -600,21 +629,21 @@ class raAzEvents:
|
||||||
|
ocf.logger.info("monitor: already handled curDocVersion, skip")
|
||||||
|
return ocf.OCF_SUCCESS
|
||||||
|
|
||||||
|
- localAzEventIDs = set()
|
||||||
|
+ localAzEventIds = dict()
|
||||||
|
for e in localEvents:
|
||||||
|
- localAzEventIDs.add(e.EventId)
|
||||||
|
+ localAzEventIds[e.EventId] = json.dumps(e)
|
||||||
|
|
||||||
|
curState = self.node.getState()
|
||||||
|
clusterEventIDs = self.node.getEventIDs()
|
||||||
|
|
||||||
|
ocf.logger.debug("monitor: curDocVersion has not been handled yet")
|
||||||
|
-
|
||||||
|
+
|
||||||
|
if clusterEventIDs:
|
||||||
|
# there are pending events set, so our state must be STOPPING or IN_EVENT
|
||||||
|
i = 0; touchedEventIDs = False
|
||||||
|
while i < len(clusterEventIDs):
|
||||||
|
# clean up pending events that are already finished according to AZ
|
||||||
|
- if clusterEventIDs[i] not in localAzEventIDs:
|
||||||
|
+ if clusterEventIDs[i] not in localAzEventIds.keys():
|
||||||
|
ocf.logger.info("monitor: remove finished local clusterEvent %s" % (clusterEventIDs[i]))
|
||||||
|
clusterEventIDs.pop(i)
|
||||||
|
touchedEventIDs = True
|
||||||
|
@@ -644,12 +673,12 @@ class raAzEvents:
|
||||||
|
ocf.logger.info("monitor: all local events finished, but some resources have not completed startup yet -> wait")
|
||||||
|
else:
|
||||||
|
if curState == AVAILABLE:
|
||||||
|
- if len(localAzEventIDs) > 0:
|
||||||
|
+ if len(localAzEventIds) > 0:
|
||||||
|
if clusterHelper.otherNodesAvailable(self.node):
|
||||||
|
- ocf.logger.info("monitor: can handle local events %s -> set state STOPPING" % (str(localAzEventIDs)))
|
||||||
|
- curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIDs)
|
||||||
|
+ ocf.logger.info("monitor: can handle local events %s -> set state STOPPING - %s" % (str(list(localAzEventIds.keys())), str(list(localAzEventIds.values()))))
|
||||||
|
+ curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIds.keys())
|
||||||
|
else:
|
||||||
|
- ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(localAzEventIDs))
|
||||||
|
+ ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD - %s" % (str(list(localAzEventIds.keys())), str(list(localAzEventIds.values()))))
|
||||||
|
self.node.setState(ON_HOLD)
|
||||||
|
else:
|
||||||
|
ocf.logger.debug("monitor: no local azEvents to handle")
|
||||||
|
@@ -761,6 +790,24 @@ def main():
|
||||||
|
longdesc="Set to true to enable verbose logging",
|
||||||
|
content_type="boolean",
|
||||||
|
default="false")
|
||||||
|
+ agent.add_parameter(
|
||||||
|
+ "retry_count",
|
||||||
|
+ shortdesc="Azure IMDS webservice retry count",
|
||||||
|
+ longdesc="Set to any number bigger than zero to enable retry count",
|
||||||
|
+ content_type="integer",
|
||||||
|
+ default="3")
|
||||||
|
+ agent.add_parameter(
|
||||||
|
+ "retry_wait",
|
||||||
|
+ shortdesc="Configure a retry wait time",
|
||||||
|
+ longdesc="Set retry wait time in seconds",
|
||||||
|
+ content_type="integer",
|
||||||
|
+ default="20")
|
||||||
|
+ agent.add_parameter(
|
||||||
|
+ "request_timeout",
|
||||||
|
+ shortdesc="Configure a request timeout",
|
||||||
|
+ longdesc="Set request timeout in seconds",
|
||||||
|
+ content_type="integer",
|
||||||
|
+ default="15")
|
||||||
|
agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
|
||||||
|
agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
|
||||||
|
agent.add_action("validate-all", timeout=20, handler=validate_action)
|
||||||
|
diff --git a/heartbeat/ocf.py b/heartbeat/ocf.py
|
||||||
|
index dda2fed4bb..571cd19664 100644
|
||||||
|
--- a/heartbeat/ocf.py
|
||||||
|
+++ b/heartbeat/ocf.py
|
||||||
|
@@ -16,7 +16,7 @@
|
||||||
|
# You should have received a copy of the GNU Lesser General Public
|
||||||
|
# License along with this library; if not, write to the Free Software
|
||||||
|
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
-#
|
||||||
|
+#
|
||||||
|
|
||||||
|
import sys, os, logging, syslog
|
||||||
|
|
||||||
|
@@ -42,19 +42,19 @@
|
||||||
|
# OCF does not include the concept of master/slave resources so we
|
||||||
|
# need to extend it so we can discover a resource's complete state.
|
||||||
|
#
|
||||||
|
-# OCF_RUNNING_MASTER:
|
||||||
|
+# OCF_RUNNING_MASTER:
|
||||||
|
# The resource is in "master" mode and fully operational
|
||||||
|
# OCF_FAILED_MASTER:
|
||||||
|
# The resource is in "master" mode but in a failed state
|
||||||
|
-#
|
||||||
|
+#
|
||||||
|
# The extra two values should only be used during a probe.
|
||||||
|
#
|
||||||
|
# Probes are used to discover resources that were started outside of
|
||||||
|
# the CRM and/or left behind if the LRM fails.
|
||||||
|
-#
|
||||||
|
+#
|
||||||
|
# They can be identified in RA scripts by checking for:
|
||||||
|
# [ "${__OCF_ACTION}" = "monitor" -a "${OCF_RESKEY_CRM_meta_interval}" = "0" ]
|
||||||
|
-#
|
||||||
|
+#
|
||||||
|
# Failed "slaves" should continue to use: OCF_ERR_GENERIC
|
||||||
|
# Fully operational "slaves" should continue to use: OCF_SUCCESS
|
||||||
|
#
|
||||||
|
@@ -451,15 +451,17 @@ def value_for_parameter(param):
|
||||||
|
sys.exit(OCF_ERR_UNIMPLEMENTED)
|
||||||
|
|
||||||
|
|
||||||
|
+
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import unittest
|
||||||
|
+ import logging
|
||||||
|
|
||||||
|
class TestMetadata(unittest.TestCase):
|
||||||
|
def test_noparams_noactions(self):
|
||||||
|
m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc")
|
||||||
|
self.assertEqual("""<?xml version="1.0"?>
|
||||||
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||||||
|
-<resource-agent name="foo">
|
||||||
|
+<resource-agent name="foo" version="1.0">
|
||||||
|
<version>1.0</version>
|
||||||
|
<longdesc lang="en">
|
||||||
|
longdesc
|
||||||
|
@@ -483,4 +485,40 @@ def test_params_actions(self):
|
||||||
|
m.add_action("start")
|
||||||
|
self.assertEqual(str(m.actions[0]), '<action name="start" />\n')
|
||||||
|
|
||||||
|
+ def test_retry_params_actions(self):
|
||||||
|
+ log= logging.getLogger( "test_retry_params_actions" )
|
||||||
|
+
|
||||||
|
+ m = Agent("foo", shortdesc="shortdesc", longdesc="longdesc")
|
||||||
|
+ m.add_parameter(
|
||||||
|
+ "retry_count",
|
||||||
|
+ shortdesc="Azure ims webservice retry count",
|
||||||
|
+ longdesc="Set to any number bigger than zero to enable retry count",
|
||||||
|
+ content_type="integer",
|
||||||
|
+ default="0")
|
||||||
|
+ m.add_parameter(
|
||||||
|
+ "retry_wait",
|
||||||
|
+ shortdesc="Configure a retry wait time",
|
||||||
|
+ longdesc="Set retry wait time in seconds",
|
||||||
|
+ content_type="integer",
|
||||||
|
+ default="20")
|
||||||
|
+ m.add_parameter(
|
||||||
|
+ "request_timeout",
|
||||||
|
+ shortdesc="Configure a request timeout",
|
||||||
|
+ longdesc="Set request timeout in seconds",
|
||||||
|
+ content_type="integer",
|
||||||
|
+ default="15")
|
||||||
|
+
|
||||||
|
+ m.add_action("start")
|
||||||
|
+
|
||||||
|
+ log.debug( "actions= %s", str(m.actions[0] ))
|
||||||
|
+ self.assertEqual(str(m.actions[0]), '<action name="start" />\n')
|
||||||
|
+
|
||||||
|
+ log.debug( "parameters= %s", str(m.parameters[0] ))
|
||||||
|
+ log.debug( "parameters= %s", str(m.parameters[1] ))
|
||||||
|
+ log.debug( "parameters= %s", str(m.parameters[2] ))
|
||||||
|
+ self.assertEqual(str(m.parameters[0]), '<parameter name="retry_count">\n<longdesc lang="en">Set to any number bigger than zero to enable retry count</longdesc>\n<shortdesc lang="en">Azure ims webservice retry count</shortdesc>\n<content type="integer" default="0" />\n</parameter>\n')
|
||||||
|
+ self.assertEqual(str(m.parameters[1]), '<parameter name="retry_wait">\n<longdesc lang="en">Set retry wait time in seconds</longdesc>\n<shortdesc lang="en">Configure a retry wait time</shortdesc>\n<content type="integer" default="20" />\n</parameter>\n')
|
||||||
|
+ self.assertEqual(str(m.parameters[2]), '<parameter name="request_timeout">\n<longdesc lang="en">Set request timeout in seconds</longdesc>\n<shortdesc lang="en">Configure a request timeout</shortdesc>\n<content type="integer" default="15" />\n</parameter>\n')
|
||||||
|
+
|
||||||
|
+ logging.basicConfig( stream=sys.stderr )
|
||||||
|
unittest.main()
|
37
RHEL-58632-azure-events-use-node-name-from-cluster.patch
Normal file
37
RHEL-58632-azure-events-use-node-name-from-cluster.patch
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
From c72dc2f2e502486d93aeec26abc12e720b14a0a7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Thu, 10 Oct 2024 16:41:03 +0200
|
||||||
|
Subject: [PATCH] azure-events*: use node name from cluster instead of hostname
|
||||||
|
to avoid failing if they're not the same
|
||||||
|
|
||||||
|
---
|
||||||
|
heartbeat/azure-events-az.in | 2 +-
|
||||||
|
heartbeat/azure-events.in | 2 +-
|
||||||
|
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
|
||||||
|
index 6d31e5aba..0ed001037 100644
|
||||||
|
--- a/heartbeat/azure-events-az.in
|
||||||
|
+++ b/heartbeat/azure-events-az.in
|
||||||
|
@@ -441,7 +441,7 @@ class Node:
|
||||||
|
self.raOwner = ra
|
||||||
|
self.azInfo = azHelper.getInstanceInfo()
|
||||||
|
self.azName = self.azInfo.name
|
||||||
|
- self.hostName = socket.gethostname()
|
||||||
|
+ self.hostName = clusterHelper._exec("crm_node", "-n")
|
||||||
|
self.setAttr("azName", self.azName)
|
||||||
|
clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/azure-events.in b/heartbeat/azure-events.in
|
||||||
|
index 90acaba62..32f71ee26 100644
|
||||||
|
--- a/heartbeat/azure-events.in
|
||||||
|
+++ b/heartbeat/azure-events.in
|
||||||
|
@@ -411,7 +411,7 @@ class Node:
|
||||||
|
self.raOwner = ra
|
||||||
|
self.azInfo = azHelper.getInstanceInfo()
|
||||||
|
self.azName = self.azInfo.name
|
||||||
|
- self.hostName = socket.gethostname()
|
||||||
|
+ self.hostName = clusterHelper._exec("crm_node", "-n")
|
||||||
|
self.setAttr("azName", self.azName)
|
||||||
|
clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
|
||||||
|
|
@ -0,0 +1,48 @@
|
|||||||
|
From 82958dc115c47232ae0468b1ddf64e728ec325e4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Georg Pfuetzenreuter <mail@georg-pfuetzenreuter.net>
|
||||||
|
Date: Wed, 9 Oct 2024 00:16:44 +0200
|
||||||
|
Subject: [PATCH] ocf-shellfuncs: systemd_drop_in only if needed
|
||||||
|
|
||||||
|
Avoid dbus overload upon many simultaneous "daemon-reload" invocations
|
||||||
|
(when a resource agent using systemd_drop_in() is called multiple times
|
||||||
|
as part of parallel resource operations in Pacemaker) by skipping the
|
||||||
|
file creation and reload if the expected data already exists.
|
||||||
|
|
||||||
|
Whilst at it, align the indentation of the heredoc with the other parts
|
||||||
|
of the function.
|
||||||
|
|
||||||
|
Signed-off-by: Georg Pfuetzenreuter <mail@georg-pfuetzenreuter.net>
|
||||||
|
---
|
||||||
|
heartbeat/ocf-shellfuncs.in | 19 +++++++++++--------
|
||||||
|
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
|
||||||
|
index 9335cbf00..5c4bb3264 100644
|
||||||
|
--- a/heartbeat/ocf-shellfuncs.in
|
||||||
|
+++ b/heartbeat/ocf-shellfuncs.in
|
||||||
|
@@ -662,14 +662,17 @@ systemd_drop_in()
|
||||||
|
systemdrundir="/run/systemd/system/resource-agents-deps.target.d"
|
||||||
|
mkdir -p "$systemdrundir"
|
||||||
|
conf_file="$systemdrundir/$1.conf"
|
||||||
|
- cat >"$conf_file" <<EOF
|
||||||
|
-[Unit]
|
||||||
|
-$2=$3
|
||||||
|
-EOF
|
||||||
|
- # The information is accessible through systemd API and systemd would
|
||||||
|
- # complain about improper permissions.
|
||||||
|
- chmod o+r "$conf_file"
|
||||||
|
- systemctl daemon-reload
|
||||||
|
+ conf_line="$2=$3"
|
||||||
|
+ if ! { [ -f "$conf_file" ] && grep -q "^$conf_line$" "$conf_file" ; } ; then
|
||||||
|
+ cat > "$conf_file" <<-EOF
|
||||||
|
+ [Unit]
|
||||||
|
+ $conf_line
|
||||||
|
+ EOF
|
||||||
|
+ # The information is accessible through systemd API and systemd would
|
||||||
|
+ # complain about improper permissions.
|
||||||
|
+ chmod o+r "$conf_file"
|
||||||
|
+ systemctl daemon-reload
|
||||||
|
+ fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# usage: curl_retry RETRIES SLEEP ARGS URL
|
@ -0,0 +1,132 @@
|
|||||||
|
From 6fab544e702a7601714cd017aecc00193f23ae72 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Fri, 11 Oct 2024 13:13:10 +0200
|
||||||
|
Subject: [PATCH] IPaddr2: improve fail logic and check ip_status after adding
|
||||||
|
IP
|
||||||
|
|
||||||
|
* check that the label got applied
|
||||||
|
* return OCF_ERR_GENERIC to avoid false-positive when IP was manually added before starting the resource
|
||||||
|
* check ip_status after adding IP to fail without having to wait for the first monitor-action
|
||||||
|
|
||||||
|
Co-authored-by: Evan J. Felix <evan.felix@pnnl.gov>
|
||||||
|
---
|
||||||
|
heartbeat/IPaddr2 | 35 ++++++++++++++++++++++++++---------
|
||||||
|
1 file changed, 26 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
|
||||||
|
index e325aa574..27cae2d11 100755
|
||||||
|
--- a/heartbeat/IPaddr2
|
||||||
|
+++ b/heartbeat/IPaddr2
|
||||||
|
@@ -586,7 +586,7 @@ ip_init() {
|
||||||
|
exit $rc
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
-
|
||||||
|
+
|
||||||
|
SENDARPPIDFILE="$SENDARPPIDDIR/send_arp-$OCF_RESKEY_ip"
|
||||||
|
|
||||||
|
if [ -n "$IFLABEL" ]; then
|
||||||
|
@@ -985,6 +985,7 @@ run_send_ua() {
|
||||||
|
# ok = served (for CIP: + hash bucket)
|
||||||
|
# partial = served and no hash bucket (CIP only)
|
||||||
|
# partial2 = served and no CIP iptables rule
|
||||||
|
+# partial3 = served with no label
|
||||||
|
# no = nothing
|
||||||
|
#
|
||||||
|
ip_served() {
|
||||||
|
@@ -1002,6 +1003,11 @@ ip_served() {
|
||||||
|
|
||||||
|
if [ -z "$IP_CIP" ]; then
|
||||||
|
for i in $cur_nic; do
|
||||||
|
+ # check address label
|
||||||
|
+ if [ -n "$IFLABEL" ] && [ -z "`$IP2UTIL -o -f $FAMILY addr show $nic label $IFLABEL`" ]; then
|
||||||
|
+ echo partial3
|
||||||
|
+ return 0
|
||||||
|
+ fi
|
||||||
|
# only mark as served when on the same interfaces as $NIC
|
||||||
|
[ "$i" = "$NIC" ] || continue
|
||||||
|
echo "ok"
|
||||||
|
@@ -1065,7 +1071,12 @@ ip_start() {
|
||||||
|
if [ "$ip_status" = "ok" ]; then
|
||||||
|
exit $OCF_SUCCESS
|
||||||
|
fi
|
||||||
|
-
|
||||||
|
+
|
||||||
|
+ if [ "$ip_status" = "partial3" ]; then
|
||||||
|
+ ocf_exit_reason "IP $OCF_RESKEY_ip available, but label missing"
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
if [ -n "$IP_CIP" ] && ([ $ip_status = "no" ] || [ $ip_status = "partial2" ]); then
|
||||||
|
$MODPROBE ip_conntrack
|
||||||
|
$IPADDR2_CIP_IPTABLES -I INPUT -d $OCF_RESKEY_ip -i $NIC -j CLUSTERIP \
|
||||||
|
@@ -1083,7 +1094,7 @@ ip_start() {
|
||||||
|
if [ -n "$IP_CIP" ] && [ $ip_status = "partial" ]; then
|
||||||
|
echo "+$IP_INC_NO" >$IP_CIP_FILE
|
||||||
|
fi
|
||||||
|
-
|
||||||
|
+
|
||||||
|
if [ "$ip_status" = "no" ]; then
|
||||||
|
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
|
||||||
|
for i in `find_interface $OCF_RESKEY_ip 32`; do
|
||||||
|
@@ -1094,7 +1105,7 @@ ip_start() {
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
-
|
||||||
|
+
|
||||||
|
add_interface "$OCF_RESKEY_ip" "$NETMASK" "${BRDCAST:-none}" "$NIC" "$IFLABEL" "$METRIC"
|
||||||
|
rc=$?
|
||||||
|
|
||||||
|
@@ -1102,6 +1113,12 @@ ip_start() {
|
||||||
|
ocf_exit_reason "Failed to add $OCF_RESKEY_ip"
|
||||||
|
exit $rc
|
||||||
|
fi
|
||||||
|
+
|
||||||
|
+ ip_status=`ip_served`
|
||||||
|
+ if [ "$ip_status" != "ok" ]; then
|
||||||
|
+ ocf_exit_reason "Failed to add $OCF_RESKEY_ip with error $ip_status"
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
+ fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
case $NIC in
|
||||||
|
@@ -1134,7 +1151,7 @@ ip_stop() {
|
||||||
|
ocf_take_lock $CIP_lockfile
|
||||||
|
ocf_release_lock_on_exit $CIP_lockfile
|
||||||
|
fi
|
||||||
|
-
|
||||||
|
+
|
||||||
|
if [ -f "$SENDARPPIDFILE" ] ; then
|
||||||
|
kill `cat "$SENDARPPIDFILE"`
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
@@ -1171,17 +1188,17 @@ ip_stop() {
|
||||||
|
i=`expr $i + 1`
|
||||||
|
done
|
||||||
|
else
|
||||||
|
- ip_del_if="no"
|
||||||
|
+ ip_del_if="no"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
-
|
||||||
|
+
|
||||||
|
if [ "$ip_del_if" = "yes" ]; then
|
||||||
|
delete_interface $OCF_RESKEY_ip $NIC $NETMASK
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]"
|
||||||
|
exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
-
|
||||||
|
+
|
||||||
|
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
|
||||||
|
restore_loopback "$OCF_RESKEY_ip"
|
||||||
|
fi
|
||||||
|
@@ -1200,7 +1217,7 @@ ip_monitor() {
|
||||||
|
run_arp_sender refresh
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
;;
|
||||||
|
- partial|no|partial2)
|
||||||
|
+ no)
|
||||||
|
exit $OCF_NOT_RUNNING
|
||||||
|
;;
|
||||||
|
*)
|
@ -45,7 +45,7 @@
|
|||||||
Name: resource-agents
|
Name: resource-agents
|
||||||
Summary: Open Source HA Reusable Cluster Resource Scripts
|
Summary: Open Source HA Reusable Cluster Resource Scripts
|
||||||
Version: 4.10.0
|
Version: 4.10.0
|
||||||
Release: 66%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
Release: 67%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
||||||
License: GPLv2+ and LGPLv2+
|
License: GPLv2+ and LGPLv2+
|
||||||
URL: https://github.com/ClusterLabs/resource-agents
|
URL: https://github.com/ClusterLabs/resource-agents
|
||||||
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
||||||
@ -138,6 +138,10 @@ Patch85: RHEL-58038-Filesystem-dont-sleep-no-processes-only-send-force-net-fs-af
|
|||||||
Patch86: RHEL-59576-Filesystem-try-umount-first-avoid-arguments-list-too-long.patch
|
Patch86: RHEL-59576-Filesystem-try-umount-first-avoid-arguments-list-too-long.patch
|
||||||
Patch87: RHEL-59172-nfsserver-also-stop-rpc-statd-for-nfsv4_only.patch
|
Patch87: RHEL-59172-nfsserver-also-stop-rpc-statd-for-nfsv4_only.patch
|
||||||
Patch88: RHEL-58008-podman-force-remove-container-if-necessary.patch
|
Patch88: RHEL-58008-podman-force-remove-container-if-necessary.patch
|
||||||
|
Patch89: RHEL-61888-ocf-shellfuncs-only-create-update-reload-systemd-drop-in-if-needed.patch
|
||||||
|
Patch90: RHEL-62200-IPaddr2-improve-fail-logic-check-ip_status-after-adding-IP.patch
|
||||||
|
Patch91: RHEL-40589-azure-events-az-update-API-versions-add-retry-for-metadata.patch
|
||||||
|
Patch92: RHEL-58632-azure-events-use-node-name-from-cluster.patch
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
Patch500: ha-cloud-support-aliyun.patch
|
Patch500: ha-cloud-support-aliyun.patch
|
||||||
@ -349,6 +353,10 @@ exit 1
|
|||||||
%patch -p1 -P 86
|
%patch -p1 -P 86
|
||||||
%patch -p1 -P 87
|
%patch -p1 -P 87
|
||||||
%patch -p1 -P 88
|
%patch -p1 -P 88
|
||||||
|
%patch -p1 -P 89
|
||||||
|
%patch -p1 -P 90
|
||||||
|
%patch -p1 -P 91
|
||||||
|
%patch -p1 -P 92
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
%patch -p1 -P 500
|
%patch -p1 -P 500
|
||||||
@ -669,6 +677,20 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
|
|||||||
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Mon Oct 14 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-67
|
||||||
|
- ocf-shellfuncs: only create/update and reload systemd drop-in if
|
||||||
|
needed
|
||||||
|
- IPaddr2: improve fail logic and check ip_status after adding IP
|
||||||
|
- azure-events-az: update API versions, and add retry functionality
|
||||||
|
for metadata requests
|
||||||
|
- azure-events*: use node name from cluster instead of hostname to
|
||||||
|
avoid failing if they're not the same
|
||||||
|
|
||||||
|
Resolves: RHEL-61888
|
||||||
|
Resolves: RHEL-62200
|
||||||
|
Resolves: RHEL-40589
|
||||||
|
Resolves: RHEL-58632
|
||||||
|
|
||||||
* Wed Oct 2 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-66
|
* Wed Oct 2 2024 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-66
|
||||||
- nfsserver: also stop rpc-statd for nfsv4_only to avoid stop failing
|
- nfsserver: also stop rpc-statd for nfsv4_only to avoid stop failing
|
||||||
in some cases
|
in some cases
|
||||||
|
Loading…
Reference in New Issue
Block a user