Auto sync2gitlab import of resource-agents-4.9.0-30.el8.src.rpm
This commit is contained in:
parent
517f406909
commit
3c533db5d0
195
bz1905820-LVM-activate-fix-return-codes.patch
Normal file
195
bz1905820-LVM-activate-fix-return-codes.patch
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
From 640c2b57f0f3e7256d587ddd5960341cb38b1982 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Reid Wahl <nrwahl@protonmail.com>
|
||||||
|
Date: Sun, 13 Dec 2020 14:58:34 -0800
|
||||||
|
Subject: [PATCH] LVM-activate: Fix return codes
|
||||||
|
|
||||||
|
OCF_ERR_ARGS should be used when the configuration isn't valid for the
|
||||||
|
**local** node, and so the resource should not attempt to start again
|
||||||
|
locally until the issue is corrected.
|
||||||
|
|
||||||
|
OCF_ERR_CONFIGURED should be used when the configuration isn't valid on
|
||||||
|
**any** node, and so the resource should not attempt to start again
|
||||||
|
anywhere until the issue is corrected.
|
||||||
|
|
||||||
|
One remaining gray area: Should lvmlockd/lvmetad/clvmd improperly
|
||||||
|
running (or improperly not running) be an OCF_ERR_GENERIC or
|
||||||
|
OCF_ERR_ARGS? The fact that it's a state issue rather than a config
|
||||||
|
issue suggests OCF_ERR_GENERIC. The fact that it won't be fixed without
|
||||||
|
user intervention suggests OCF_ERR_ARGS. The approach here is to use
|
||||||
|
GENERIC for all of these. One can make the case that "improperly
|
||||||
|
running" should use ARGS, since a process must be manually stopped to
|
||||||
|
fix the issue, and that "improperly not running" should use GENERIC,
|
||||||
|
since there's a small chance the process died and will be recovered in
|
||||||
|
some way.
|
||||||
|
|
||||||
|
More info about return code meanings:
|
||||||
|
- https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/html/agents.html#how-are-ocf-return-codes-interpreted
|
||||||
|
|
||||||
|
Resolves: RHBZ#1905820
|
||||||
|
|
||||||
|
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
|
||||||
|
---
|
||||||
|
heartbeat/LVM-activate | 47 +++++++++++++++++++++---------------------
|
||||||
|
1 file changed, 23 insertions(+), 24 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
|
||||||
|
index c86606637..e951a08e9 100755
|
||||||
|
--- a/heartbeat/LVM-activate
|
||||||
|
+++ b/heartbeat/LVM-activate
|
||||||
|
@@ -333,8 +333,7 @@ config_verify()
|
||||||
|
real=$(lvmconfig "$name" | cut -d'=' -f2)
|
||||||
|
if [ "$real" != "$expect" ]; then
|
||||||
|
ocf_exit_reason "config item $name: expect=$expect but real=$real"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
-
|
||||||
|
+ exit $OCF_ERR_ARGS
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
@@ -366,12 +365,12 @@ lvmlockd_check()
|
||||||
|
fi
|
||||||
|
|
||||||
|
ocf_exit_reason "lvmlockd daemon is not running!"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
|
||||||
|
if pgrep clvmd >/dev/null 2>&1 ; then
|
||||||
|
ocf_exit_reason "clvmd daemon is running unexpectedly."
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
@@ -402,17 +401,17 @@ clvmd_check()
|
||||||
|
# Good: clvmd is running, and lvmlockd is not running
|
||||||
|
if ! pgrep clvmd >/dev/null 2>&1 ; then
|
||||||
|
ocf_exit_reason "clvmd daemon is not running!"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
|
||||||
|
if pgrep lvmetad >/dev/null 2>&1 ; then
|
||||||
|
ocf_exit_reason "Please stop lvmetad daemon when clvmd is running."
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
|
||||||
|
if pgrep lvmlockd >/dev/null 2>&1 ; then
|
||||||
|
ocf_exit_reason "lvmlockd daemon is running unexpectedly."
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
@@ -424,12 +423,12 @@ systemid_check()
|
||||||
|
source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2)
|
||||||
|
if [ "$source" = "" ] || [ "$source" = "none" ]; then
|
||||||
|
ocf_exit_reason "system_id_source in lvm.conf is not set correctly!"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_ARGS
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -z ${SYSTEM_ID} ]; then
|
||||||
|
ocf_exit_reason "local/system_id is not set!"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_ARGS
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
@@ -441,18 +440,18 @@ tagging_check()
|
||||||
|
# The volume_list must be initialized to something in order to
|
||||||
|
# guarantee our tag will be filtered on startup
|
||||||
|
if ! lvm dumpconfig activation/volume_list; then
|
||||||
|
- ocf_log err "LVM: Improper setup detected"
|
||||||
|
+ ocf_log err "LVM: Improper setup detected"
|
||||||
|
ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_ARGS
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Our tag must _NOT_ be in the volume_list. This agent
|
||||||
|
# overrides the volume_list during activation using the
|
||||||
|
# special tag reserved for cluster activation
|
||||||
|
if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\""; then
|
||||||
|
- ocf_log err "LVM: Improper setup detected"
|
||||||
|
+ ocf_log err "LVM: Improper setup detected"
|
||||||
|
ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_ARGS
|
||||||
|
fi
|
||||||
|
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
@@ -463,13 +462,13 @@ read_parameters()
|
||||||
|
if [ -z "$VG" ]
|
||||||
|
then
|
||||||
|
ocf_exit_reason "You must identify the volume group name!"
|
||||||
|
- exit $OCF_ERR_ARGS
|
||||||
|
+ exit $OCF_ERR_CONFIGURED
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ]
|
||||||
|
then
|
||||||
|
ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode"
|
||||||
|
- exit $OCF_ERR_ARGS
|
||||||
|
+ exit $OCF_ERR_CONFIGURED
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Convert VG_access_mode from string to index
|
||||||
|
@@ -519,8 +518,10 @@ lvm_validate() {
|
||||||
|
exit $OCF_NOT_RUNNING
|
||||||
|
fi
|
||||||
|
|
||||||
|
+ # Could be a transient error (e.g., iSCSI connection
|
||||||
|
+ # issue) so use OCF_ERR_GENERIC
|
||||||
|
ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Inconsistency might be due to missing physical volumes, which doesn't
|
||||||
|
@@ -549,7 +550,7 @@ lvm_validate() {
|
||||||
|
mode=$?
|
||||||
|
if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then
|
||||||
|
ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!"
|
||||||
|
- exit $OCF_ERR_ARGS
|
||||||
|
+ exit $OCF_ERR_CONFIGURED
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Nothing to do if the VG has no logical volume
|
||||||
|
@@ -561,11 +562,11 @@ lvm_validate() {
|
||||||
|
|
||||||
|
# Check if the given $LV is in the $VG
|
||||||
|
if [ -n "$LV" ]; then
|
||||||
|
- OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
|
||||||
|
+ output=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
- ocf_log err "lvs: ${OUT}"
|
||||||
|
+ ocf_log err "lvs: ${output}"
|
||||||
|
ocf_exit_reason "LV ($LV) is not in the given VG ($VG)."
|
||||||
|
- exit $OCF_ERR_ARGS
|
||||||
|
+ exit $OCF_ERR_CONFIGURED
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
@@ -580,7 +581,6 @@ lvm_validate() {
|
||||||
|
3)
|
||||||
|
systemid_check
|
||||||
|
;;
|
||||||
|
-
|
||||||
|
4)
|
||||||
|
tagging_check
|
||||||
|
;;
|
||||||
|
@@ -808,10 +808,9 @@ lvm_status() {
|
||||||
|
dd if=${dm_name} of=/dev/null bs=1 count=1 >/dev/null \
|
||||||
|
2>&1
|
||||||
|
if [ $? -ne 0 ]; then
|
||||||
|
- return $OCF_NOT_RUNNING
|
||||||
|
- else
|
||||||
|
- return $OCF_SUCCESS
|
||||||
|
+ return $OCF_ERR_GENERIC
|
||||||
|
fi
|
||||||
|
+ return $OCF_SUCCESS
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"
|
903
bz1977012-azure-events-az-new-ra.patch
Normal file
903
bz1977012-azure-events-az-new-ra.patch
Normal file
@ -0,0 +1,903 @@
|
|||||||
|
From 5dcd5153f0318e4766f7f4d3e61dfdb4b352c39c Mon Sep 17 00:00:00 2001
|
||||||
|
From: MSSedusch <sedusch@microsoft.com>
|
||||||
|
Date: Mon, 30 May 2022 15:08:10 +0200
|
||||||
|
Subject: [PATCH 1/2] add new Azure Events AZ resource agent
|
||||||
|
|
||||||
|
---
|
||||||
|
.gitignore | 1 +
|
||||||
|
configure.ac | 8 +
|
||||||
|
doc/man/Makefile.am | 4 +
|
||||||
|
heartbeat/Makefile.am | 4 +
|
||||||
|
heartbeat/azure-events-az.in | 782 +++++++++++++++++++++++++++++++++++
|
||||||
|
5 files changed, 799 insertions(+)
|
||||||
|
create mode 100644 heartbeat/azure-events-az.in
|
||||||
|
|
||||||
|
diff --git a/.gitignore b/.gitignore
|
||||||
|
index 0c259b5cf..e2b7c039c 100644
|
||||||
|
--- a/.gitignore
|
||||||
|
+++ b/.gitignore
|
||||||
|
@@ -54,6 +54,7 @@ heartbeat/Squid
|
||||||
|
heartbeat/SysInfo
|
||||||
|
heartbeat/aws-vpc-route53
|
||||||
|
heartbeat/azure-events
|
||||||
|
+heartbeat/azure-events-az
|
||||||
|
heartbeat/clvm
|
||||||
|
heartbeat/conntrackd
|
||||||
|
heartbeat/dnsupdate
|
||||||
|
diff --git a/configure.ac b/configure.ac
|
||||||
|
index eeecfad0e..5716a2be2 100644
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -523,6 +523,13 @@ if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
|
||||||
|
fi
|
||||||
|
AM_CONDITIONAL(BUILD_AZURE_EVENTS, test $BUILD_AZURE_EVENTS -eq 1)
|
||||||
|
|
||||||
|
+BUILD_AZURE_EVENTS_AZ=1
|
||||||
|
+if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
|
||||||
|
+ BUILD_AZURE_EVENTS_AZ=0
|
||||||
|
+ AC_MSG_WARN("Not building azure-events-az")
|
||||||
|
+fi
|
||||||
|
+AM_CONDITIONAL(BUILD_AZURE_EVENTS_AZ, test $BUILD_AZURE_EVENTS_AZ -eq 1)
|
||||||
|
+
|
||||||
|
BUILD_GCP_PD_MOVE=1
|
||||||
|
if test -z "$PYTHON" || test "x${HAVE_PYMOD_GOOGLEAPICLIENT}" != xyes || test $BUILD_OCF_PY -eq 0; then
|
||||||
|
BUILD_GCP_PD_MOVE=0
|
||||||
|
@@ -976,6 +983,7 @@ rgmanager/Makefile \
|
||||||
|
|
||||||
|
dnl Files we output that need to be executable
|
||||||
|
AC_CONFIG_FILES([heartbeat/azure-events], [chmod +x heartbeat/azure-events])
|
||||||
|
+AC_CONFIG_FILES([heartbeat/azure-events-az], [chmod +x heartbeat/azure-events-az])
|
||||||
|
AC_CONFIG_FILES([heartbeat/AoEtarget], [chmod +x heartbeat/AoEtarget])
|
||||||
|
AC_CONFIG_FILES([heartbeat/ManageRAID], [chmod +x heartbeat/ManageRAID])
|
||||||
|
AC_CONFIG_FILES([heartbeat/ManageVE], [chmod +x heartbeat/ManageVE])
|
||||||
|
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
|
||||||
|
index cd8fd16bf..658c700ac 100644
|
||||||
|
--- a/doc/man/Makefile.am
|
||||||
|
+++ b/doc/man/Makefile.am
|
||||||
|
@@ -219,6 +219,10 @@ if BUILD_AZURE_EVENTS
|
||||||
|
man_MANS += ocf_heartbeat_azure-events.7
|
||||||
|
endif
|
||||||
|
|
||||||
|
+if BUILD_AZURE_EVENTS_AZ
|
||||||
|
+man_MANS += ocf_heartbeat_azure-events-az.7
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
if BUILD_GCP_PD_MOVE
|
||||||
|
man_MANS += ocf_heartbeat_gcp-pd-move.7
|
||||||
|
endif
|
||||||
|
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
|
||||||
|
index 20d41e36a..1133dc13e 100644
|
||||||
|
--- a/heartbeat/Makefile.am
|
||||||
|
+++ b/heartbeat/Makefile.am
|
||||||
|
@@ -188,6 +188,10 @@ if BUILD_AZURE_EVENTS
|
||||||
|
ocf_SCRIPTS += azure-events
|
||||||
|
endif
|
||||||
|
|
||||||
|
+if BUILD_AZURE_EVENTS_AZ
|
||||||
|
+ocf_SCRIPTS += azure-events-az
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
if BUILD_GCP_PD_MOVE
|
||||||
|
ocf_SCRIPTS += gcp-pd-move
|
||||||
|
endif
|
||||||
|
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..616fc8d9e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/heartbeat/azure-events-az.in
|
||||||
|
@@ -0,0 +1,782 @@
|
||||||
|
+#!@PYTHON@ -tt
|
||||||
|
+#
|
||||||
|
+# Resource agent for monitoring Azure Scheduled Events
|
||||||
|
+#
|
||||||
|
+# License: GNU General Public License (GPL)
|
||||||
|
+# (c) 2018 Tobias Niekamp, Microsoft Corp.
|
||||||
|
+# and Linux-HA contributors
|
||||||
|
+
|
||||||
|
+import os
|
||||||
|
+import sys
|
||||||
|
+import time
|
||||||
|
+import subprocess
|
||||||
|
+import json
|
||||||
|
+try:
|
||||||
|
+ import urllib2
|
||||||
|
+ from urllib2 import URLError
|
||||||
|
+except ImportError:
|
||||||
|
+ import urllib.request as urllib2
|
||||||
|
+ from urllib.error import URLError
|
||||||
|
+import socket
|
||||||
|
+from collections import defaultdict
|
||||||
|
+
|
||||||
|
+OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT"))
|
||||||
|
+sys.path.append(OCF_FUNCTIONS_DIR)
|
||||||
|
+import ocf
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+VERSION = "0.10"
|
||||||
|
+USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro())
|
||||||
|
+
|
||||||
|
+attr_globalPullState = "azure-events-az_globalPullState"
|
||||||
|
+attr_lastDocVersion = "azure-events-az_lastDocVersion"
|
||||||
|
+attr_curNodeState = "azure-events-az_curNodeState"
|
||||||
|
+attr_pendingEventIDs = "azure-events-az_pendingEventIDs"
|
||||||
|
+attr_healthstate = "#health-azure"
|
||||||
|
+
|
||||||
|
+default_loglevel = ocf.logging.INFO
|
||||||
|
+default_relevantEventTypes = set(["Reboot", "Redeploy"])
|
||||||
|
+
|
||||||
|
+global_pullMaxAttempts = 3
|
||||||
|
+global_pullDelaySecs = 1
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+class attrDict(defaultdict):
|
||||||
|
+ """
|
||||||
|
+ A wrapper for accessing dict keys like an attribute
|
||||||
|
+ """
|
||||||
|
+ def __init__(self, data):
|
||||||
|
+ super(attrDict, self).__init__(attrDict)
|
||||||
|
+ for d in data.keys():
|
||||||
|
+ self.__setattr__(d, data[d])
|
||||||
|
+
|
||||||
|
+ def __getattr__(self, key):
|
||||||
|
+ try:
|
||||||
|
+ return self[key]
|
||||||
|
+ except KeyError:
|
||||||
|
+ raise AttributeError(key)
|
||||||
|
+
|
||||||
|
+ def __setattr__(self, key, value):
|
||||||
|
+ self[key] = value
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+class azHelper:
|
||||||
|
+ """
|
||||||
|
+ Helper class for Azure's metadata API (including Scheduled Events)
|
||||||
|
+ """
|
||||||
|
+ metadata_host = "http://169.254.169.254/metadata"
|
||||||
|
+ instance_api = "instance"
|
||||||
|
+ events_api = "scheduledevents"
|
||||||
|
+ api_version = "2019-08-01"
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def _sendMetadataRequest(endpoint, postData=None):
|
||||||
|
+ """
|
||||||
|
+ Send a request to Azure's Azure Metadata Service API
|
||||||
|
+ """
|
||||||
|
+ url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version)
|
||||||
|
+ data = ""
|
||||||
|
+ ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData))
|
||||||
|
+ ocf.logger.debug("_sendMetadataRequest: url = %s" % url)
|
||||||
|
+
|
||||||
|
+ if postData and type(postData) != bytes:
|
||||||
|
+ postData = postData.encode()
|
||||||
|
+
|
||||||
|
+ req = urllib2.Request(url, postData)
|
||||||
|
+ req.add_header("Metadata", "true")
|
||||||
|
+ req.add_header("User-Agent", USER_AGENT)
|
||||||
|
+ try:
|
||||||
|
+ resp = urllib2.urlopen(req)
|
||||||
|
+ except URLError as e:
|
||||||
|
+ if hasattr(e, 'reason'):
|
||||||
|
+ ocf.logger.warning("Failed to reach the server: %s" % e.reason)
|
||||||
|
+ clusterHelper.setAttr(attr_globalPullState, "IDLE")
|
||||||
|
+ elif hasattr(e, 'code'):
|
||||||
|
+ ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
|
||||||
|
+ clusterHelper.setAttr(attr_globalPullState, "IDLE")
|
||||||
|
+ else:
|
||||||
|
+ data = resp.read()
|
||||||
|
+ ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
|
||||||
|
+
|
||||||
|
+ if data:
|
||||||
|
+ data = json.loads(data)
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("_sendMetadataRequest: finished")
|
||||||
|
+ return data
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def getInstanceInfo():
|
||||||
|
+ """
|
||||||
|
+ Fetch details about the current VM from Azure's Azure Metadata Service API
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("getInstanceInfo: begin")
|
||||||
|
+
|
||||||
|
+ jsondata = azHelper._sendMetadataRequest(azHelper.instance_api)
|
||||||
|
+ ocf.logger.debug("getInstanceInfo: json = %s" % jsondata)
|
||||||
|
+
|
||||||
|
+ if jsondata:
|
||||||
|
+ ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"]))
|
||||||
|
+ return attrDict(jsondata["compute"])
|
||||||
|
+ else:
|
||||||
|
+ ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info")
|
||||||
|
+ sys.exit(ocf.OCF_ERR_GENERIC)
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def pullScheduledEvents():
|
||||||
|
+ """
|
||||||
|
+ Retrieve all currently scheduled events via Azure Metadata Service API
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("pullScheduledEvents: begin")
|
||||||
|
+
|
||||||
|
+ jsondata = azHelper._sendMetadataRequest(azHelper.events_api)
|
||||||
|
+ ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata)
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("pullScheduledEvents: finished")
|
||||||
|
+ return attrDict(jsondata)
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def forceEvents(eventIDs):
|
||||||
|
+ """
|
||||||
|
+ Force a set of events to start immediately
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("forceEvents: begin")
|
||||||
|
+
|
||||||
|
+ events = []
|
||||||
|
+ for e in eventIDs:
|
||||||
|
+ events.append({
|
||||||
|
+ "EventId": e,
|
||||||
|
+ })
|
||||||
|
+ postData = {
|
||||||
|
+ "StartRequests" : events
|
||||||
|
+ }
|
||||||
|
+ ocf.logger.info("forceEvents: postData = %s" % postData)
|
||||||
|
+ resp = azHelper._sendMetadataRequest(azHelper.events_api, postData=json.dumps(postData))
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("forceEvents: finished")
|
||||||
|
+ return
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+class clusterHelper:
|
||||||
|
+ """
|
||||||
|
+ Helper functions for Pacemaker control via crm
|
||||||
|
+ """
|
||||||
|
+ @staticmethod
|
||||||
|
+ def _getLocation(node):
|
||||||
|
+ """
|
||||||
|
+ Helper function to retrieve local/global attributes
|
||||||
|
+ """
|
||||||
|
+ if node:
|
||||||
|
+ return ["--node", node]
|
||||||
|
+ else:
|
||||||
|
+ return ["--type", "crm_config"]
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def _exec(command, *args):
|
||||||
|
+ """
|
||||||
|
+ Helper function to execute a UNIX command
|
||||||
|
+ """
|
||||||
|
+ args = list(args)
|
||||||
|
+ ocf.logger.debug("_exec: begin; command = %s, args = %s" % (command, str(args)))
|
||||||
|
+
|
||||||
|
+ def flatten(*n):
|
||||||
|
+ return (str(e) for a in n
|
||||||
|
+ for e in (flatten(*a) if isinstance(a, (tuple, list)) else (str(a),)))
|
||||||
|
+ command = list(flatten([command] + args))
|
||||||
|
+ ocf.logger.debug("_exec: cmd = %s" % " ".join(command))
|
||||||
|
+ try:
|
||||||
|
+ ret = subprocess.check_output(command)
|
||||||
|
+ if type(ret) != str:
|
||||||
|
+ ret = ret.decode()
|
||||||
|
+ ocf.logger.debug("_exec: return = %s" % ret)
|
||||||
|
+ return ret.rstrip()
|
||||||
|
+ except Exception as err:
|
||||||
|
+ ocf.logger.exception(err)
|
||||||
|
+ return None
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def setAttr(key, value, node=None):
|
||||||
|
+ """
|
||||||
|
+ Set the value of a specific global/local attribute in the Pacemaker cluster
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("setAttr: begin; key = %s, value = %s, node = %s" % (key, value, node))
|
||||||
|
+
|
||||||
|
+ if value:
|
||||||
|
+ ret = clusterHelper._exec("crm_attribute",
|
||||||
|
+ "--name", key,
|
||||||
|
+ "--update", value,
|
||||||
|
+ clusterHelper._getLocation(node))
|
||||||
|
+ else:
|
||||||
|
+ ret = clusterHelper._exec("crm_attribute",
|
||||||
|
+ "--name", key,
|
||||||
|
+ "--delete",
|
||||||
|
+ clusterHelper._getLocation(node))
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("setAttr: finished")
|
||||||
|
+ return len(ret) == 0
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def getAttr(key, node=None):
|
||||||
|
+ """
|
||||||
|
+ Retrieve a global/local attribute from the Pacemaker cluster
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("getAttr: begin; key = %s, node = %s" % (key, node))
|
||||||
|
+
|
||||||
|
+ val = clusterHelper._exec("crm_attribute",
|
||||||
|
+ "--name", key,
|
||||||
|
+ "--query", "--quiet",
|
||||||
|
+ "--default", "",
|
||||||
|
+ clusterHelper._getLocation(node))
|
||||||
|
+ ocf.logger.debug("getAttr: finished")
|
||||||
|
+ if not val:
|
||||||
|
+ return None
|
||||||
|
+ return val if not val.isdigit() else int(val)
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def getAllNodes():
|
||||||
|
+ """
|
||||||
|
+ Get a list of hostnames for all nodes in the Pacemaker cluster
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("getAllNodes: begin")
|
||||||
|
+
|
||||||
|
+ nodes = []
|
||||||
|
+ nodeList = clusterHelper._exec("crm_node", "--list")
|
||||||
|
+ for n in nodeList.split("\n"):
|
||||||
|
+ nodes.append(n.split()[1])
|
||||||
|
+ ocf.logger.debug("getAllNodes: finished; return %s" % str(nodes))
|
||||||
|
+
|
||||||
|
+ return nodes
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def getHostNameFromAzName(azName):
|
||||||
|
+ """
|
||||||
|
+ Helper function to get the actual host name from an Azure node name
|
||||||
|
+ """
|
||||||
|
+ return clusterHelper.getAttr("hostName_%s" % azName)
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def removeHoldFromNodes():
|
||||||
|
+ """
|
||||||
|
+ Remove the ON_HOLD state from all nodes in the Pacemaker cluster
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("removeHoldFromNodes: begin")
|
||||||
|
+
|
||||||
|
+ for n in clusterHelper.getAllNodes():
|
||||||
|
+ if clusterHelper.getAttr(attr_curNodeState, node=n) == "ON_HOLD":
|
||||||
|
+ clusterHelper.setAttr(attr_curNodeState, "AVAILABLE", node=n)
|
||||||
|
+ ocf.logger.info("removeHoldFromNodes: removed ON_HOLD from node %s" % n)
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("removeHoldFromNodes: finished")
|
||||||
|
+ return False
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def otherNodesAvailable(exceptNode):
|
||||||
|
+ """
|
||||||
|
+ Check if there are any nodes (except a given node) in the Pacemaker cluster that have state AVAILABLE
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("otherNodesAvailable: begin; exceptNode = %s" % exceptNode)
|
||||||
|
+
|
||||||
|
+ for n in clusterHelper.getAllNodes():
|
||||||
|
+ state = clusterHelper.getAttr(attr_curNodeState, node=n)
|
||||||
|
+ state = stringToNodeState(state) if state else AVAILABLE
|
||||||
|
+ if state == AVAILABLE and n != exceptNode.hostName:
|
||||||
|
+ ocf.logger.info("otherNodesAvailable: at least %s is available" % n)
|
||||||
|
+ ocf.logger.debug("otherNodesAvailable: finished")
|
||||||
|
+ return True
|
||||||
|
+ ocf.logger.info("otherNodesAvailable: no other nodes are available")
|
||||||
|
+ ocf.logger.debug("otherNodesAvailable: finished")
|
||||||
|
+
|
||||||
|
+ return False
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def transitionSummary():
|
||||||
|
+ """
|
||||||
|
+ Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby)
|
||||||
|
+ """
|
||||||
|
+ # <tniek> Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node?
|
||||||
|
+ # # crm_simulate -Ls
|
||||||
|
+ # Transition Summary:
|
||||||
|
+ # * Promote rsc_SAPHana_HN1_HDB03:0 (Slave -> Master hsr3-db1)
|
||||||
|
+ # * Stop rsc_SAPHana_HN1_HDB03:1 (hsr3-db0)
|
||||||
|
+ # * Move rsc_ip_HN1_HDB03 (Started hsr3-db0 -> hsr3-db1)
|
||||||
|
+ # * Start rsc_nc_HN1_HDB03 (hsr3-db1)
|
||||||
|
+ # # Excepted result when there are no pending actions:
|
||||||
|
+ # Transition Summary:
|
||||||
|
+ ocf.logger.debug("transitionSummary: begin")
|
||||||
|
+
|
||||||
|
+ summary = clusterHelper._exec("crm_simulate", "-Ls")
|
||||||
|
+ if not summary:
|
||||||
|
+ ocf.logger.warning("transitionSummary: could not load transition summary")
|
||||||
|
+ return False
|
||||||
|
+ if summary.find("Transition Summary:") < 0:
|
||||||
|
+ ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary)
|
||||||
|
+ return False
|
||||||
|
+ summary = summary.split("Transition Summary:")[1]
|
||||||
|
+ ret = summary.split("\n").pop(0)
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret))
|
||||||
|
+ return ret
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def listOperationsOnNode(node):
|
||||||
|
+ """
|
||||||
|
+ Get a list of all current operations for a given node (used to check if any resources are pending)
|
||||||
|
+ """
|
||||||
|
+ # hsr3-db1:/home/tniek # crm_resource --list-operations -N hsr3-db0
|
||||||
|
+ # rsc_azure-events-az (ocf::heartbeat:azure-events-az): Started: rsc_azure-events-az_start_0 (node=hsr3-db0, call=91, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=115ms): complete
|
||||||
|
+ # rsc_azure-events-az (ocf::heartbeat:azure-events-az): Started: rsc_azure-events-az_monitor_10000 (node=hsr3-db0, call=93, rc=0, last-rc-change=Fri Jun 8 22:37:47 2018, exec=197ms): complete
|
||||||
|
+ # rsc_SAPHana_HN1_HDB03 (ocf::suse:SAPHana): Master: rsc_SAPHana_HN1_HDB03_start_0 (node=hsr3-db0, call=-1, rc=193, last-rc-change=Fri Jun 8 22:37:46 2018, exec=0ms): pending
|
||||||
|
+ # rsc_SAPHanaTopology_HN1_HDB03 (ocf::suse:SAPHanaTopology): Started: rsc_SAPHanaTopology_HN1_HDB03_start_0 (node=hsr3-db0, call=90, rc=0, last-rc-change=Fri Jun 8 22:37:46 2018, exec=3214ms): complete
|
||||||
|
+ ocf.logger.debug("listOperationsOnNode: begin; node = %s" % node)
|
||||||
|
+
|
||||||
|
+ resources = clusterHelper._exec("crm_resource", "--list-operations", "-N", node)
|
||||||
|
+ if len(resources) == 0:
|
||||||
|
+ ret = []
|
||||||
|
+ else:
|
||||||
|
+ ret = resources.split("\n")
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("listOperationsOnNode: finished; return = %s" % str(ret))
|
||||||
|
+ return ret
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def noPendingResourcesOnNode(node):
|
||||||
|
+ """
|
||||||
|
+ Check that there are no pending resources on a given node
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("noPendingResourcesOnNode: begin; node = %s" % node)
|
||||||
|
+
|
||||||
|
+ for r in clusterHelper.listOperationsOnNode(node):
|
||||||
|
+ ocf.logger.debug("noPendingResourcesOnNode: * %s" % r)
|
||||||
|
+ resource = r.split()[-1]
|
||||||
|
+ if resource == "pending":
|
||||||
|
+ ocf.logger.info("noPendingResourcesOnNode: found resource %s that is still pending" % resource)
|
||||||
|
+ ocf.logger.debug("noPendingResourcesOnNode: finished; return = False")
|
||||||
|
+ return False
|
||||||
|
+ ocf.logger.info("noPendingResourcesOnNode: no pending resources on node %s" % node)
|
||||||
|
+ ocf.logger.debug("noPendingResourcesOnNode: finished; return = True")
|
||||||
|
+
|
||||||
|
+ return True
|
||||||
|
+
|
||||||
|
+ @staticmethod
|
||||||
|
+ def allResourcesStoppedOnNode(node):
|
||||||
|
+ """
|
||||||
|
+ Check that all resources on a given node are stopped
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("allResourcesStoppedOnNode: begin; node = %s" % node)
|
||||||
|
+
|
||||||
|
+ if clusterHelper.noPendingResourcesOnNode(node):
|
||||||
|
+ if len(clusterHelper.transitionSummary()) == 0:
|
||||||
|
+ ocf.logger.info("allResourcesStoppedOnNode: no pending resources on node %s and empty transition summary" % node)
|
||||||
|
+ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = True")
|
||||||
|
+ return True
|
||||||
|
+ ocf.logger.info("allResourcesStoppedOnNode: transition summary is not empty")
|
||||||
|
+ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False")
|
||||||
|
+ return False
|
||||||
|
+
|
||||||
|
+ ocf.logger.info("allResourcesStoppedOnNode: still pending resources on node %s" % node)
|
||||||
|
+ ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False")
|
||||||
|
+ return False
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+AVAILABLE = 0 # Node is online and ready to handle events
|
||||||
|
+STOPPING = 1 # Standby has been triggered, but some resources are still running
|
||||||
|
+IN_EVENT = 2 # All resources are stopped, and event has been initiated via Azure Metadata Service
|
||||||
|
+ON_HOLD = 3 # Node has a pending event that cannot be started there are no other nodes available
|
||||||
|
+
|
||||||
|
+def stringToNodeState(name):
|
||||||
|
+ if type(name) == int: return name
|
||||||
|
+ if name == "STOPPING": return STOPPING
|
||||||
|
+ if name == "IN_EVENT": return IN_EVENT
|
||||||
|
+ if name == "ON_HOLD": return ON_HOLD
|
||||||
|
+ return AVAILABLE
|
||||||
|
+
|
||||||
|
+def nodeStateToString(state):
|
||||||
|
+ if state == STOPPING: return "STOPPING"
|
||||||
|
+ if state == IN_EVENT: return "IN_EVENT"
|
||||||
|
+ if state == ON_HOLD: return "ON_HOLD"
|
||||||
|
+ return "AVAILABLE"
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+class Node:
|
||||||
|
+ """
|
||||||
|
+ Core class implementing logic for a cluster node
|
||||||
|
+ """
|
||||||
|
+ def __init__(self, ra):
|
||||||
|
+ self.raOwner = ra
|
||||||
|
+ self.azInfo = azHelper.getInstanceInfo()
|
||||||
|
+ self.azName = self.azInfo.name
|
||||||
|
+ self.hostName = socket.gethostname()
|
||||||
|
+ self.setAttr("azName", self.azName)
|
||||||
|
+ clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
|
||||||
|
+
|
||||||
|
+ def getAttr(self, key):
|
||||||
|
+ """
|
||||||
|
+ Get a local attribute
|
||||||
|
+ """
|
||||||
|
+ return clusterHelper.getAttr(key, node=self.hostName)
|
||||||
|
+
|
||||||
|
+ def setAttr(self, key, value):
|
||||||
|
+ """
|
||||||
|
+ Set a local attribute
|
||||||
|
+ """
|
||||||
|
+ return clusterHelper.setAttr(key, value, node=self.hostName)
|
||||||
|
+
|
||||||
|
+ def selfOrOtherNode(self, node):
|
||||||
|
+ """
|
||||||
|
+ Helper function to distinguish self/other node
|
||||||
|
+ """
|
||||||
|
+ return node if node else self.hostName
|
||||||
|
+
|
||||||
|
+ def setState(self, state, node=None):
|
||||||
|
+ """
|
||||||
|
+ Set the state for a given node (or self)
|
||||||
|
+ """
|
||||||
|
+ node = self.selfOrOtherNode(node)
|
||||||
|
+ ocf.logger.debug("setState: begin; node = %s, state = %s" % (node, nodeStateToString(state)))
|
||||||
|
+
|
||||||
|
+ clusterHelper.setAttr(attr_curNodeState, nodeStateToString(state), node=node)
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("setState: finished")
|
||||||
|
+
|
||||||
|
+ def getState(self, node=None):
|
||||||
|
+ """
|
||||||
|
+ Get the state for a given node (or self)
|
||||||
|
+ """
|
||||||
|
+ node = self.selfOrOtherNode(node)
|
||||||
|
+ ocf.logger.debug("getState: begin; node = %s" % node)
|
||||||
|
+
|
||||||
|
+ state = clusterHelper.getAttr(attr_curNodeState, node=node)
|
||||||
|
+ ocf.logger.debug("getState: state = %s" % state)
|
||||||
|
+ ocf.logger.debug("getState: finished")
|
||||||
|
+ if not state:
|
||||||
|
+ return AVAILABLE
|
||||||
|
+ return stringToNodeState(state)
|
||||||
|
+
|
||||||
|
+ def setEventIDs(self, eventIDs, node=None):
|
||||||
|
+ """
|
||||||
|
+ Set pending EventIDs for a given node (or self)
|
||||||
|
+ """
|
||||||
|
+ node = self.selfOrOtherNode(node)
|
||||||
|
+ ocf.logger.debug("setEventIDs: begin; node = %s, eventIDs = %s" % (node, str(eventIDs)))
|
||||||
|
+
|
||||||
|
+ if eventIDs:
|
||||||
|
+ eventIDStr = ",".join(eventIDs)
|
||||||
|
+ else:
|
||||||
|
+ eventIDStr = None
|
||||||
|
+ clusterHelper.setAttr(attr_pendingEventIDs, eventIDStr, node=node)
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("setEventIDs: finished")
|
||||||
|
+ return
|
||||||
|
+
|
||||||
|
+ def getEventIDs(self, node=None):
|
||||||
|
+ """
|
||||||
|
+ Get pending EventIDs for a given node (or self)
|
||||||
|
+ """
|
||||||
|
+ node = self.selfOrOtherNode(node)
|
||||||
|
+ ocf.logger.debug("getEventIDs: begin; node = %s" % node)
|
||||||
|
+
|
||||||
|
+ eventIDStr = clusterHelper.getAttr(attr_pendingEventIDs, node=node)
|
||||||
|
+ if eventIDStr:
|
||||||
|
+ eventIDs = eventIDStr.split(",")
|
||||||
|
+ else:
|
||||||
|
+ eventIDs = None
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("getEventIDs: finished; eventIDs = %s" % str(eventIDs))
|
||||||
|
+ return eventIDs
|
||||||
|
+
|
||||||
|
+ def updateNodeStateAndEvents(self, state, eventIDs, node=None):
|
||||||
|
+ """
|
||||||
|
+ Set the state and pending EventIDs for a given node (or self)
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("updateNodeStateAndEvents: begin; node = %s, state = %s, eventIDs = %s" % (node, nodeStateToString(state), str(eventIDs)))
|
||||||
|
+
|
||||||
|
+ self.setState(state, node=node)
|
||||||
|
+ self.setEventIDs(eventIDs, node=node)
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("updateNodeStateAndEvents: finished")
|
||||||
|
+ return state
|
||||||
|
+
|
||||||
|
+ def putNodeStandby(self, node=None):
|
||||||
|
+ """
|
||||||
|
+ Put self to standby
|
||||||
|
+ """
|
||||||
|
+ node = self.selfOrOtherNode(node)
|
||||||
|
+ ocf.logger.debug("putNodeStandby: begin; node = %s" % node)
|
||||||
|
+
|
||||||
|
+ clusterHelper._exec("crm_attribute",
|
||||||
|
+ "--node", node,
|
||||||
|
+ "--name", attr_healthstate,
|
||||||
|
+ "--update", "-1000000",
|
||||||
|
+ "--lifetime=forever")
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("putNodeStandby: finished")
|
||||||
|
+
|
||||||
|
+ def isNodeInStandby(self, node=None):
|
||||||
|
+ """
|
||||||
|
+ check if node is in standby
|
||||||
|
+ """
|
||||||
|
+ node = self.selfOrOtherNode(node)
|
||||||
|
+ ocf.logger.debug("isNodeInStandby: begin; node = %s" % node)
|
||||||
|
+ isInStandy = False
|
||||||
|
+
|
||||||
|
+ healthAttributeStr = clusterHelper.getAttr(attr_healthstate, node)
|
||||||
|
+ if healthAttributeStr is not None:
|
||||||
|
+ try:
|
||||||
|
+ healthAttribute = int(healthAttributeStr)
|
||||||
|
+ isInStandy = healthAttribute < 0
|
||||||
|
+ except ValueError:
|
||||||
|
+ # Handle the exception
|
||||||
|
+ ocf.logger.warn("Health attribute %s on node %s cannot be converted to an integer value" % (healthAttributeStr, node))
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("isNodeInStandby: finished - result %s" % isInStandy)
|
||||||
|
+ return isInStandy
|
||||||
|
+
|
||||||
|
+ def putNodeOnline(self, node=None):
|
||||||
|
+ """
|
||||||
|
+ Put self back online
|
||||||
|
+ """
|
||||||
|
+ node = self.selfOrOtherNode(node)
|
||||||
|
+ ocf.logger.debug("putNodeOnline: begin; node = %s" % node)
|
||||||
|
+
|
||||||
|
+ clusterHelper._exec("crm_attribute",
|
||||||
|
+ "--node", node,
|
||||||
|
+ "--name", "#health-azure",
|
||||||
|
+ "--update", "0",
|
||||||
|
+ "--lifetime=forever")
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("putNodeOnline: finished")
|
||||||
|
+
|
||||||
|
+ def separateEvents(self, events):
|
||||||
|
+ """
|
||||||
|
+ Split own/other nodes' events
|
||||||
|
+ """
|
||||||
|
+ ocf.logger.debug("separateEvents: begin; events = %s" % str(events))
|
||||||
|
+
|
||||||
|
+ localEvents = []
|
||||||
|
+ remoteEvents = []
|
||||||
|
+ for e in events:
|
||||||
|
+ e = attrDict(e)
|
||||||
|
+ if e.EventType not in self.raOwner.relevantEventTypes:
|
||||||
|
+ continue
|
||||||
|
+ if self.azName in e.Resources:
|
||||||
|
+ localEvents.append(e)
|
||||||
|
+ else:
|
||||||
|
+ remoteEvents.append(e)
|
||||||
|
+ ocf.logger.debug("separateEvents: finished; localEvents = %s, remoteEvents = %s" % (str(localEvents), str(remoteEvents)))
|
||||||
|
+ return (localEvents, remoteEvents)
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+class raAzEvents:
|
||||||
|
+ """
|
||||||
|
+ Main class for resource agent
|
||||||
|
+ """
|
||||||
|
+ def __init__(self, relevantEventTypes):
|
||||||
|
+ self.node = Node(self)
|
||||||
|
+ self.relevantEventTypes = relevantEventTypes
|
||||||
|
+
|
||||||
|
+ def monitor(self):
|
||||||
|
+ ocf.logger.debug("monitor: begin")
|
||||||
|
+
|
||||||
|
+ events = azHelper.pullScheduledEvents()
|
||||||
|
+
|
||||||
|
+ # get current document version
|
||||||
|
+ curDocVersion = events.DocumentIncarnation
|
||||||
|
+ lastDocVersion = self.node.getAttr(attr_lastDocVersion)
|
||||||
|
+ ocf.logger.debug("monitor: lastDocVersion = %s; curDocVersion = %s" % (lastDocVersion, curDocVersion))
|
||||||
|
+
|
||||||
|
+ # split events local/remote
|
||||||
|
+ (localEvents, remoteEvents) = self.node.separateEvents(events.Events)
|
||||||
|
+
|
||||||
|
+ # ensure local events are only executing once
|
||||||
|
+ if curDocVersion == lastDocVersion:
|
||||||
|
+ ocf.logger.info("monitor: already handled curDocVersion, skip")
|
||||||
|
+ return ocf.OCF_SUCCESS
|
||||||
|
+
|
||||||
|
+ localAzEventIDs = set()
|
||||||
|
+ for e in localEvents:
|
||||||
|
+ localAzEventIDs.add(e.EventId)
|
||||||
|
+
|
||||||
|
+ curState = self.node.getState()
|
||||||
|
+ clusterEventIDs = self.node.getEventIDs()
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("monitor: curDocVersion has not been handled yet")
|
||||||
|
+
|
||||||
|
+ if clusterEventIDs:
|
||||||
|
+ # there are pending events set, so our state must be STOPPING or IN_EVENT
|
||||||
|
+ i = 0; touchedEventIDs = False
|
||||||
|
+ while i < len(clusterEventIDs):
|
||||||
|
+ # clean up pending events that are already finished according to AZ
|
||||||
|
+ if clusterEventIDs[i] not in localAzEventIDs:
|
||||||
|
+ ocf.logger.info("monitor: remove finished local clusterEvent %s" % (clusterEventIDs[i]))
|
||||||
|
+ clusterEventIDs.pop(i)
|
||||||
|
+ touchedEventIDs = True
|
||||||
|
+ else:
|
||||||
|
+ i += 1
|
||||||
|
+ if len(clusterEventIDs) > 0:
|
||||||
|
+ # there are still pending events (either because we're still stopping, or because the event is still in place)
|
||||||
|
+ # either way, we need to wait
|
||||||
|
+ if touchedEventIDs:
|
||||||
|
+ ocf.logger.info("monitor: added new local clusterEvent %s" % str(clusterEventIDs))
|
||||||
|
+ self.node.setEventIDs(clusterEventIDs)
|
||||||
|
+ else:
|
||||||
|
+ ocf.logger.info("monitor: no local clusterEvents were updated")
|
||||||
|
+ else:
|
||||||
|
+ # there are no more pending events left after cleanup
|
||||||
|
+ if clusterHelper.noPendingResourcesOnNode(self.node.hostName):
|
||||||
|
+ # and no pending resources on the node -> set it back online
|
||||||
|
+ ocf.logger.info("monitor: all local events finished -> clean up, put node online and AVAILABLE")
|
||||||
|
+ curState = self.node.updateNodeStateAndEvents(AVAILABLE, None)
|
||||||
|
+ self.node.putNodeOnline()
|
||||||
|
+ clusterHelper.removeHoldFromNodes()
|
||||||
|
+ # If Azure Scheduled Events are not used for 24 hours (e.g. because the cluster was asleep), it will be disabled for a VM.
|
||||||
|
+ # When the cluster wakes up and starts using it again, the DocumentIncarnation is reset.
|
||||||
|
+ # We need to remove it during cleanup, otherwise azure-events-az will not process the event after wakeup
|
||||||
|
+ self.node.setAttr(attr_lastDocVersion, None)
|
||||||
|
+ else:
|
||||||
|
+ ocf.logger.info("monitor: all local events finished, but some resources have not completed startup yet -> wait")
|
||||||
|
+ else:
|
||||||
|
+ if curState == AVAILABLE:
|
||||||
|
+ if len(localAzEventIDs) > 0:
|
||||||
|
+ if clusterHelper.otherNodesAvailable(self.node):
|
||||||
|
+ ocf.logger.info("monitor: can handle local events %s -> set state STOPPING" % (str(localAzEventIDs)))
|
||||||
|
+ curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIDs)
|
||||||
|
+ else:
|
||||||
|
+ ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(localAzEventIDs))
|
||||||
|
+ self.node.setState(ON_HOLD)
|
||||||
|
+ else:
|
||||||
|
+ ocf.logger.debug("monitor: no local azEvents to handle")
|
||||||
|
+
|
||||||
|
+ if curState == STOPPING:
|
||||||
|
+ eventIDsForNode = {}
|
||||||
|
+ if clusterHelper.noPendingResourcesOnNode(self.node.hostName):
|
||||||
|
+ if not self.node.isNodeInStandby():
|
||||||
|
+ ocf.logger.info("monitor: all local resources are started properly -> put node standby and exit")
|
||||||
|
+ self.node.putNodeStandby()
|
||||||
|
+ return ocf.OCF_SUCCESS
|
||||||
|
+
|
||||||
|
+ for e in localEvents:
|
||||||
|
+ ocf.logger.info("monitor: handling remote event %s (%s; nodes = %s)" % (e.EventId, e.EventType, str(e.Resources)))
|
||||||
|
+ # before we can force an event to start, we need to ensure all nodes involved have stopped their resources
|
||||||
|
+ if e.EventStatus == "Scheduled":
|
||||||
|
+ allNodesStopped = True
|
||||||
|
+ for azName in e.Resources:
|
||||||
|
+ hostName = clusterHelper.getHostNameFromAzName(azName)
|
||||||
|
+ state = self.node.getState(node=hostName)
|
||||||
|
+ if state == STOPPING:
|
||||||
|
+ # the only way we can continue is when node state is STOPPING, but all resources have been stopped
|
||||||
|
+ if not clusterHelper.allResourcesStoppedOnNode(hostName):
|
||||||
|
+ ocf.logger.info("monitor: (at least) node %s has still resources running -> wait" % hostName)
|
||||||
|
+ allNodesStopped = False
|
||||||
|
+ break
|
||||||
|
+ elif state in (AVAILABLE, IN_EVENT, ON_HOLD):
|
||||||
|
+ ocf.logger.info("monitor: node %s is still %s -> remote event needs to be picked up locally" % (hostName, nodeStateToString(state)))
|
||||||
|
+ allNodesStopped = False
|
||||||
|
+ break
|
||||||
|
+ if allNodesStopped:
|
||||||
|
+ ocf.logger.info("monitor: nodes %s are stopped -> add remote event %s to force list" % (str(e.Resources), e.EventId))
|
||||||
|
+ for n in e.Resources:
|
||||||
|
+ hostName = clusterHelper.getHostNameFromAzName(n)
|
||||||
|
+ if hostName in eventIDsForNode:
|
||||||
|
+ eventIDsForNode[hostName].append(e.EventId)
|
||||||
|
+ else:
|
||||||
|
+ eventIDsForNode[hostName] = [e.EventId]
|
||||||
|
+ elif e.EventStatus == "Started":
|
||||||
|
+ ocf.logger.info("monitor: remote event already started")
|
||||||
|
+
|
||||||
|
+ # force the start of all events whose nodes are ready (i.e. have no more resources running)
|
||||||
|
+ if len(eventIDsForNode.keys()) > 0:
|
||||||
|
+ eventIDsToForce = set([item for sublist in eventIDsForNode.values() for item in sublist])
|
||||||
|
+ ocf.logger.info("monitor: set nodes %s to IN_EVENT; force remote events %s" % (str(eventIDsForNode.keys()), str(eventIDsToForce)))
|
||||||
|
+ for node, eventId in eventIDsForNode.items():
|
||||||
|
+ self.node.updateNodeStateAndEvents(IN_EVENT, eventId, node=node)
|
||||||
|
+ azHelper.forceEvents(eventIDsToForce)
|
||||||
|
+ self.node.setAttr(attr_lastDocVersion, curDocVersion)
|
||||||
|
+ else:
|
||||||
|
+ ocf.logger.info("monitor: some local resources are not clean yet -> wait")
|
||||||
|
+
|
||||||
|
+ ocf.logger.debug("monitor: finished")
|
||||||
|
+ return ocf.OCF_SUCCESS
|
||||||
|
+
|
||||||
|
+##############################################################################
|
||||||
|
+
|
||||||
|
+def setLoglevel(verbose):
|
||||||
|
+ # set up writing into syslog
|
||||||
|
+ loglevel = default_loglevel
|
||||||
|
+ if verbose:
|
||||||
|
+ opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=1))
|
||||||
|
+ urllib2.install_opener(opener)
|
||||||
|
+ loglevel = ocf.logging.DEBUG
|
||||||
|
+ ocf.log.setLevel(loglevel)
|
||||||
|
+
|
||||||
|
+description = (
|
||||||
|
+ "Microsoft Azure Scheduled Events monitoring agent",
|
||||||
|
+ """This resource agent implements a monitor for scheduled
|
||||||
|
+(maintenance) events for a Microsoft Azure VM.
|
||||||
|
+
|
||||||
|
+If any relevant events are found, it moves all Pacemaker resources
|
||||||
|
+away from the affected node to allow for a graceful shutdown.
|
||||||
|
+
|
||||||
|
+ Usage:
|
||||||
|
+ [OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events-az ACTION
|
||||||
|
+
|
||||||
|
+ action (required): Supported values: monitor, help, meta-data
|
||||||
|
+ eventTypes (optional): List of event types to be considered
|
||||||
|
+ relevant by the resource agent (comma-separated).
|
||||||
|
+ Supported values: Freeze,Reboot,Redeploy
|
||||||
|
+ Default = Reboot,Redeploy
|
||||||
|
+/ verbose (optional): If set to true, displays debug info.
|
||||||
|
+ Default = false
|
||||||
|
+
|
||||||
|
+ Deployment:
|
||||||
|
+ crm configure primitive rsc_azure-events-az ocf:heartbeat:azure-events-az \
|
||||||
|
+ op monitor interval=10s
|
||||||
|
+ crm configure clone cln_azure-events-az rsc_azure-events-az
|
||||||
|
+
|
||||||
|
+For further information on Microsoft Azure Scheduled Events, please
|
||||||
|
+refer to the following documentation:
|
||||||
|
+https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events
|
||||||
|
+""")
|
||||||
|
+
|
||||||
|
+def monitor_action(eventTypes):
|
||||||
|
+ relevantEventTypes = set(eventTypes.split(",") if eventTypes else [])
|
||||||
|
+ ra = raAzEvents(relevantEventTypes)
|
||||||
|
+ return ra.monitor()
|
||||||
|
+
|
||||||
|
+def validate_action(eventTypes):
|
||||||
|
+ if eventTypes:
|
||||||
|
+ for event in eventTypes.split(","):
|
||||||
|
+ if event not in ("Freeze", "Reboot", "Redeploy"):
|
||||||
|
+ ocf.ocf_exit_reason("Event type not one of Freeze, Reboot, Redeploy: " + eventTypes)
|
||||||
|
+ return ocf.OCF_ERR_CONFIGURED
|
||||||
|
+ return ocf.OCF_SUCCESS
|
||||||
|
+
|
||||||
|
+def main():
|
||||||
|
+ agent = ocf.Agent("azure-events-az", shortdesc=description[0], longdesc=description[1])
|
||||||
|
+ agent.add_parameter(
|
||||||
|
+ "eventTypes",
|
||||||
|
+ shortdesc="List of resources to be considered",
|
||||||
|
+ longdesc="A comma-separated list of event types that will be handled by this resource agent. (Possible values: Freeze,Reboot,Redeploy)",
|
||||||
|
+ content_type="string",
|
||||||
|
+ default="Reboot,Redeploy")
|
||||||
|
+ agent.add_parameter(
|
||||||
|
+ "verbose",
|
||||||
|
+ shortdesc="Enable verbose agent logging",
|
||||||
|
+ longdesc="Set to true to enable verbose logging",
|
||||||
|
+ content_type="boolean",
|
||||||
|
+ default="false")
|
||||||
|
+ agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
|
||||||
|
+ agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
|
||||||
|
+ agent.add_action("validate-all", timeout=20, handler=validate_action)
|
||||||
|
+ agent.add_action("monitor", timeout=240, interval=10, handler=monitor_action)
|
||||||
|
+ setLoglevel(ocf.is_true(ocf.get_parameter("verbose", "false")))
|
||||||
|
+ agent.run()
|
||||||
|
+
|
||||||
|
+if __name__ == '__main__':
|
||||||
|
+ main()
|
||||||
|
\ No newline at end of file
|
||||||
|
|
||||||
|
From a95337d882c7cc69d604b050159ad50b679f18be Mon Sep 17 00:00:00 2001
|
||||||
|
From: MSSedusch <sedusch@microsoft.com>
|
||||||
|
Date: Thu, 2 Jun 2022 14:10:33 +0200
|
||||||
|
Subject: [PATCH 2/2] Remove developer documentation
|
||||||
|
|
||||||
|
---
|
||||||
|
heartbeat/azure-events-az.in | 11 -----------
|
||||||
|
1 file changed, 11 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
|
||||||
|
index 616fc8d9e..59d095306 100644
|
||||||
|
--- a/heartbeat/azure-events-az.in
|
||||||
|
+++ b/heartbeat/azure-events-az.in
|
||||||
|
@@ -723,17 +723,6 @@ description = (
|
||||||
|
If any relevant events are found, it moves all Pacemaker resources
|
||||||
|
away from the affected node to allow for a graceful shutdown.
|
||||||
|
|
||||||
|
- Usage:
|
||||||
|
- [OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events-az ACTION
|
||||||
|
-
|
||||||
|
- action (required): Supported values: monitor, help, meta-data
|
||||||
|
- eventTypes (optional): List of event types to be considered
|
||||||
|
- relevant by the resource agent (comma-separated).
|
||||||
|
- Supported values: Freeze,Reboot,Redeploy
|
||||||
|
- Default = Reboot,Redeploy
|
||||||
|
-/ verbose (optional): If set to true, displays debug info.
|
||||||
|
- Default = false
|
||||||
|
-
|
||||||
|
Deployment:
|
||||||
|
crm configure primitive rsc_azure-events-az ocf:heartbeat:azure-events-az \
|
||||||
|
op monitor interval=10s
|
79
bz2109159-storage_mon-1-exit-after-help.patch
Normal file
79
bz2109159-storage_mon-1-exit-after-help.patch
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
From b3eadb8523b599af800a7c772606aa0e90cf142f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Fujii Masao <fujii@postgresql.org>
|
||||||
|
Date: Tue, 19 Jul 2022 17:03:02 +0900
|
||||||
|
Subject: [PATCH 1/2] Make storage_mon -h exit just after printing help
|
||||||
|
messages.
|
||||||
|
|
||||||
|
Previously, when -h or an invalid option was specified, storage_mon
|
||||||
|
printed the help messages, proceeded processing and then could
|
||||||
|
throw an error. This was not the behavior that, e.g., users who want
|
||||||
|
to specify -h option to see the help messages are expecting. To fix
|
||||||
|
this issue, this commit changes storage_mon so that it exits just
|
||||||
|
after printing the help messages when -h or an invalid option is
|
||||||
|
specified.
|
||||||
|
---
|
||||||
|
tools/storage_mon.c | 4 +++-
|
||||||
|
1 file changed, 3 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index 7b65bb419..1303371f7 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
|
||||||
|
fprintf(f, " --timeout <n> max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
|
||||||
|
fprintf(f, " --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
|
||||||
|
fprintf(f, " --verbose emit extra output to stdout\n");
|
||||||
|
- fprintf(f, " --help print this messages\n");
|
||||||
|
+ fprintf(f, " --help print this messages, then exit\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check one device */
|
||||||
|
@@ -178,9 +178,11 @@ int main(int argc, char *argv[])
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
usage(argv[0], stdout);
|
||||||
|
+ exit(0);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage(argv[0], stderr);
|
||||||
|
+ exit(-1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
From e62795f02d25a772a239e0a4f9eb9d6470c134ee Mon Sep 17 00:00:00 2001
|
||||||
|
From: Fujii Masao <fujii@postgresql.org>
|
||||||
|
Date: Tue, 19 Jul 2022 17:56:32 +0900
|
||||||
|
Subject: [PATCH 2/2] Fix typo in help message.
|
||||||
|
|
||||||
|
---
|
||||||
|
tools/storage_mon.c | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index 1303371f7..3c82d5ee8 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
|
||||||
|
fprintf(f, " --timeout <n> max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
|
||||||
|
fprintf(f, " --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
|
||||||
|
fprintf(f, " --verbose emit extra output to stdout\n");
|
||||||
|
- fprintf(f, " --help print this messages, then exit\n");
|
||||||
|
+ fprintf(f, " --help print this message\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check one device */
|
||||||
|
@@ -178,11 +178,11 @@ int main(int argc, char *argv[])
|
||||||
|
break;
|
||||||
|
case 'h':
|
||||||
|
usage(argv[0], stdout);
|
||||||
|
- exit(0);
|
||||||
|
+ return 0;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
usage(argv[0], stderr);
|
||||||
|
- exit(-1);
|
||||||
|
+ return -1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
36
bz2109159-storage_mon-2-fix-specified-scores-count.patch
Normal file
36
bz2109159-storage_mon-2-fix-specified-scores-count.patch
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
From a68957e8f1e8169438acf5a4321f47ed7d8ceec1 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Fujii Masao <fujii@postgresql.org>
|
||||||
|
Date: Tue, 19 Jul 2022 20:28:38 +0900
|
||||||
|
Subject: [PATCH] storage_mon: Fix bug in checking of number of specified
|
||||||
|
scores.
|
||||||
|
|
||||||
|
Previously specifying the maximum allowed number (MAX_DEVICES, currently 25)
|
||||||
|
of devices and scores as arguments could cause storage_mon to fail unexpectedly
|
||||||
|
with the error message "too many scores, max is 25". This issue happened
|
||||||
|
because storage_mon checked whether the number of specified scores
|
||||||
|
exceeded the upper limit by using the local variable "device_count" indicating
|
||||||
|
the number of specified devices (not scores). So after the maximum number
|
||||||
|
of devices arguments were interpreted, the appearance of next score argument
|
||||||
|
caused the error even when the number of interpreted scores arguments had
|
||||||
|
not exceeded the maximum.
|
||||||
|
|
||||||
|
This patch fixes storage_mon so that it uses the local variable "score_count"
|
||||||
|
indicating the number of specified scores, to check whether arguments for
|
||||||
|
scores are specified more than the upper limit.
|
||||||
|
---
|
||||||
|
tools/storage_mon.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index 3c82d5ee8..c749076c2 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -154,7 +154,7 @@ int main(int argc, char *argv[])
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 's':
|
||||||
|
- if (device_count < MAX_DEVICES) {
|
||||||
|
+ if (score_count < MAX_DEVICES) {
|
||||||
|
int score = atoi(optarg);
|
||||||
|
if (score < 1 || score > 10) {
|
||||||
|
fprintf(stderr, "Score must be between 1 and 10 inclusive\n");
|
43
bz2109159-storage_mon-3-fix-child-process-exit.patch
Normal file
43
bz2109159-storage_mon-3-fix-child-process-exit.patch
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
From c6ea93fcb499c84c3d8e9aad2ced65065a3f6d51 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Fujii Masao <fujii@postgresql.org>
|
||||||
|
Date: Tue, 19 Jul 2022 22:34:08 +0900
|
||||||
|
Subject: [PATCH] Fix bug in handling of child process exit.
|
||||||
|
|
||||||
|
When storage_mon detects that a child process exits with zero,
|
||||||
|
it resets the test_forks[] entry for the child process to 0, to avoid
|
||||||
|
waitpid() for the process again in the loop. But, previously,
|
||||||
|
storage_mon didn't do that when it detected that a child process
|
||||||
|
exited with non-zero. Which caused waitpid() to be called again
|
||||||
|
for the process already gone and to report an error like
|
||||||
|
"waitpid on XXX failed: No child processes" unexpectedly.
|
||||||
|
In this case, basically storage_mon should wait until all the child
|
||||||
|
processes exit and return the final score, instead.
|
||||||
|
|
||||||
|
This patch fixes this issue by making storage_mon reset test_works[]
|
||||||
|
entry even when a child process exits with non-zero.
|
||||||
|
---
|
||||||
|
tools/storage_mon.c | 8 ++++----
|
||||||
|
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index 3c82d5ee8..83a48ca36 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -232,13 +232,13 @@ int main(int argc, char *argv[])
|
||||||
|
|
||||||
|
if (w == test_forks[i]) {
|
||||||
|
if (WIFEXITED(wstatus)) {
|
||||||
|
- if (WEXITSTATUS(wstatus) == 0) {
|
||||||
|
- finished_count++;
|
||||||
|
- test_forks[i] = 0;
|
||||||
|
- } else {
|
||||||
|
+ if (WEXITSTATUS(wstatus) != 0) {
|
||||||
|
syslog(LOG_ERR, "Error reading from device %s", devices[i]);
|
||||||
|
final_score += scores[i];
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ finished_count++;
|
||||||
|
+ test_forks[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
417
bz2109159-storage_mon-4-fix-possible-false-negatives.patch
Normal file
417
bz2109159-storage_mon-4-fix-possible-false-negatives.patch
Normal file
@ -0,0 +1,417 @@
|
|||||||
|
From 0bb52cf9985bda47e13940761b3d8e2eaddf377c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
|
||||||
|
Date: Wed, 10 Aug 2022 17:35:54 +0900
|
||||||
|
Subject: [PATCH 1/4] storage_mon: Use the O_DIRECT flag in open() to eliminate
|
||||||
|
cache effects
|
||||||
|
|
||||||
|
---
|
||||||
|
tools/Makefile.am | 1 +
|
||||||
|
tools/storage_mon.c | 82 +++++++++++++++++++++++++++++++++------------
|
||||||
|
2 files changed, 61 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/Makefile.am b/tools/Makefile.am
|
||||||
|
index 1309223b4..08323fee3 100644
|
||||||
|
--- a/tools/Makefile.am
|
||||||
|
+++ b/tools/Makefile.am
|
||||||
|
@@ -74,6 +74,7 @@ sfex_stat_LDADD = $(GLIBLIB) -lplumb -lplumbgpl
|
||||||
|
findif_SOURCES = findif.c
|
||||||
|
|
||||||
|
storage_mon_SOURCES = storage_mon.c
|
||||||
|
+storage_mon_CFLAGS = -D_GNU_SOURCE
|
||||||
|
|
||||||
|
if BUILD_TICKLE
|
||||||
|
halib_PROGRAMS += tickle_tcp
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index 930ead41c..ba87492fc 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -31,23 +31,27 @@ static void usage(char *name, FILE *f)
|
||||||
|
fprintf(f, " --help print this message\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Check one device */
|
||||||
|
-static void *test_device(const char *device, int verbose, int inject_error_percent)
|
||||||
|
+static int open_device(const char *device, int verbose)
|
||||||
|
{
|
||||||
|
- uint64_t devsize;
|
||||||
|
int device_fd;
|
||||||
|
int res;
|
||||||
|
+ uint64_t devsize;
|
||||||
|
off_t seek_spot;
|
||||||
|
- char buffer[512];
|
||||||
|
|
||||||
|
- if (verbose) {
|
||||||
|
- printf("Testing device %s\n", device);
|
||||||
|
+#if defined(__linux__) || defined(__FreeBSD__)
|
||||||
|
+ device_fd = open(device, O_RDONLY|O_DIRECT);
|
||||||
|
+ if (device_fd >= 0) {
|
||||||
|
+ return device_fd;
|
||||||
|
+ } else if (errno != EINVAL) {
|
||||||
|
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
device_fd = open(device, O_RDONLY);
|
||||||
|
if (device_fd < 0) {
|
||||||
|
fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
|
||||||
|
- exit(-1);
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
|
||||||
|
@@ -57,11 +61,12 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
|
||||||
|
if (res != 0) {
|
||||||
|
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
close(device_fd);
|
||||||
|
- exit(-1);
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
if (verbose) {
|
||||||
|
fprintf(stderr, "%s: size=%zu\n", device, devsize);
|
||||||
|
}
|
||||||
|
+
|
||||||
|
/* Don't fret about real randomness */
|
||||||
|
srand(time(NULL) + getpid());
|
||||||
|
/* Pick a random place on the device - sector aligned */
|
||||||
|
@@ -70,35 +75,64 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
|
||||||
|
if (res < 0) {
|
||||||
|
fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
|
||||||
|
close(device_fd);
|
||||||
|
- exit(-1);
|
||||||
|
+ return -1;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
if (verbose) {
|
||||||
|
printf("%s: reading from pos %ld\n", device, seek_spot);
|
||||||
|
}
|
||||||
|
+ return device_fd;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Check one device */
|
||||||
|
+static void *test_device(const char *device, int verbose, int inject_error_percent)
|
||||||
|
+{
|
||||||
|
+ int device_fd;
|
||||||
|
+ int sec_size = 0;
|
||||||
|
+ int res;
|
||||||
|
+ void *buffer;
|
||||||
|
+
|
||||||
|
+ if (verbose) {
|
||||||
|
+ printf("Testing device %s\n", device);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ device_fd = open_device(device, verbose);
|
||||||
|
+ if (device_fd < 0) {
|
||||||
|
+ exit(-1);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ioctl(device_fd, BLKSSZGET, &sec_size);
|
||||||
|
+ if (sec_size == 0) {
|
||||||
|
+ fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- res = read(device_fd, buffer, sizeof(buffer));
|
||||||
|
+ if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
|
||||||
|
+ fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ res = read(device_fd, buffer, sec_size);
|
||||||
|
+ free(buffer);
|
||||||
|
if (res < 0) {
|
||||||
|
fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
|
||||||
|
- close(device_fd);
|
||||||
|
- exit(-1);
|
||||||
|
+ goto error;
|
||||||
|
}
|
||||||
|
- if (res < (int)sizeof(buffer)) {
|
||||||
|
- fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
|
||||||
|
- close(device_fd);
|
||||||
|
- exit(-1);
|
||||||
|
+ if (res < sec_size) {
|
||||||
|
+ fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
|
||||||
|
+ goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fake an error */
|
||||||
|
- if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
|
||||||
|
- fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
|
||||||
|
- close(device_fd);
|
||||||
|
- exit(-1);
|
||||||
|
+ if (inject_error_percent) {
|
||||||
|
+ srand(time(NULL) + getpid());
|
||||||
|
+ if ((rand() % 100) < inject_error_percent) {
|
||||||
|
+ fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
res = close(device_fd);
|
||||||
|
if (res != 0) {
|
||||||
|
fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno));
|
||||||
|
- close(device_fd);
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -106,6 +140,10 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
|
||||||
|
printf("%s: done\n", device);
|
||||||
|
}
|
||||||
|
exit(0);
|
||||||
|
+
|
||||||
|
+error:
|
||||||
|
+ close(device_fd);
|
||||||
|
+ exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
|
||||||
|
From ce4e632f29ed6b86b82a959eac5844655baed153 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
|
||||||
|
Date: Mon, 15 Aug 2022 19:17:21 +0900
|
||||||
|
Subject: [PATCH 2/4] storage_mon: fix build-related issues
|
||||||
|
|
||||||
|
---
|
||||||
|
tools/storage_mon.c | 6 ++++--
|
||||||
|
1 file changed, 4 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index ba87492fc..e34d1975a 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -38,7 +38,6 @@ static int open_device(const char *device, int verbose)
|
||||||
|
uint64_t devsize;
|
||||||
|
off_t seek_spot;
|
||||||
|
|
||||||
|
-#if defined(__linux__) || defined(__FreeBSD__)
|
||||||
|
device_fd = open(device, O_RDONLY|O_DIRECT);
|
||||||
|
if (device_fd >= 0) {
|
||||||
|
return device_fd;
|
||||||
|
@@ -46,7 +45,6 @@ static int open_device(const char *device, int verbose)
|
||||||
|
fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
device_fd = open(device, O_RDONLY);
|
||||||
|
if (device_fd < 0) {
|
||||||
|
@@ -100,7 +98,11 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
+#ifdef __FreeBSD__
|
||||||
|
+ ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
|
||||||
|
+#else
|
||||||
|
ioctl(device_fd, BLKSSZGET, &sec_size);
|
||||||
|
+#endif
|
||||||
|
if (sec_size == 0) {
|
||||||
|
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
From 7a0aaa0dfdebeab3fae9fe9ddc412c3d1f610273 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
|
||||||
|
Date: Wed, 24 Aug 2022 17:36:23 +0900
|
||||||
|
Subject: [PATCH 3/4] storage_mon: do random lseek even with O_DIRECT, etc
|
||||||
|
|
||||||
|
---
|
||||||
|
tools/storage_mon.c | 118 ++++++++++++++++++++++----------------------
|
||||||
|
1 file changed, 58 insertions(+), 60 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index e34d1975a..0bdb48649 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -31,38 +31,43 @@ static void usage(char *name, FILE *f)
|
||||||
|
fprintf(f, " --help print this message\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int open_device(const char *device, int verbose)
|
||||||
|
+/* Check one device */
|
||||||
|
+static void *test_device(const char *device, int verbose, int inject_error_percent)
|
||||||
|
{
|
||||||
|
+ uint64_t devsize;
|
||||||
|
+ int flags = O_RDONLY | O_DIRECT;
|
||||||
|
int device_fd;
|
||||||
|
int res;
|
||||||
|
- uint64_t devsize;
|
||||||
|
off_t seek_spot;
|
||||||
|
|
||||||
|
- device_fd = open(device, O_RDONLY|O_DIRECT);
|
||||||
|
- if (device_fd >= 0) {
|
||||||
|
- return device_fd;
|
||||||
|
- } else if (errno != EINVAL) {
|
||||||
|
- fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
|
||||||
|
- return -1;
|
||||||
|
+ if (verbose) {
|
||||||
|
+ printf("Testing device %s\n", device);
|
||||||
|
}
|
||||||
|
|
||||||
|
- device_fd = open(device, O_RDONLY);
|
||||||
|
+ device_fd = open(device, flags);
|
||||||
|
if (device_fd < 0) {
|
||||||
|
- fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
|
||||||
|
- return -1;
|
||||||
|
+ if (errno != EINVAL) {
|
||||||
|
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
|
||||||
|
+ exit(-1);
|
||||||
|
+ }
|
||||||
|
+ flags &= ~O_DIRECT;
|
||||||
|
+ device_fd = open(device, flags);
|
||||||
|
+ if (device_fd < 0) {
|
||||||
|
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
|
||||||
|
+ exit(-1);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
|
||||||
|
#else
|
||||||
|
res = ioctl(device_fd, BLKGETSIZE64, &devsize);
|
||||||
|
#endif
|
||||||
|
- if (res != 0) {
|
||||||
|
+ if (res < 0) {
|
||||||
|
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
- close(device_fd);
|
||||||
|
- return -1;
|
||||||
|
+ goto error;
|
||||||
|
}
|
||||||
|
if (verbose) {
|
||||||
|
- fprintf(stderr, "%s: size=%zu\n", device, devsize);
|
||||||
|
+ printf("%s: opened %s O_DIRECT, size=%zu\n", device, (flags & O_DIRECT)?"with":"without", devsize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Don't fret about real randomness */
|
||||||
|
@@ -72,65 +77,58 @@ static int open_device(const char *device, int verbose)
|
||||||
|
res = lseek(device_fd, seek_spot, SEEK_SET);
|
||||||
|
if (res < 0) {
|
||||||
|
fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
|
||||||
|
- close(device_fd);
|
||||||
|
- return -1;
|
||||||
|
+ goto error;
|
||||||
|
}
|
||||||
|
if (verbose) {
|
||||||
|
printf("%s: reading from pos %ld\n", device, seek_spot);
|
||||||
|
}
|
||||||
|
- return device_fd;
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-/* Check one device */
|
||||||
|
-static void *test_device(const char *device, int verbose, int inject_error_percent)
|
||||||
|
-{
|
||||||
|
- int device_fd;
|
||||||
|
- int sec_size = 0;
|
||||||
|
- int res;
|
||||||
|
- void *buffer;
|
||||||
|
-
|
||||||
|
- if (verbose) {
|
||||||
|
- printf("Testing device %s\n", device);
|
||||||
|
- }
|
||||||
|
|
||||||
|
- device_fd = open_device(device, verbose);
|
||||||
|
- if (device_fd < 0) {
|
||||||
|
- exit(-1);
|
||||||
|
- }
|
||||||
|
+ if (flags & O_DIRECT) {
|
||||||
|
+ int sec_size = 0;
|
||||||
|
+ void *buffer;
|
||||||
|
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
- ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
|
||||||
|
+ res = ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
|
||||||
|
#else
|
||||||
|
- ioctl(device_fd, BLKSSZGET, &sec_size);
|
||||||
|
+ res = ioctl(device_fd, BLKSSZGET, &sec_size);
|
||||||
|
#endif
|
||||||
|
- if (sec_size == 0) {
|
||||||
|
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
- goto error;
|
||||||
|
- }
|
||||||
|
+ if (res < 0) {
|
||||||
|
+ fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
|
||||||
|
- if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
|
||||||
|
- fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
|
||||||
|
- goto error;
|
||||||
|
- }
|
||||||
|
+ if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
|
||||||
|
+ fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
+ res = read(device_fd, buffer, sec_size);
|
||||||
|
+ free(buffer);
|
||||||
|
+ if (res < 0) {
|
||||||
|
+ fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
+ if (res < sec_size) {
|
||||||
|
+ fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
+ } else {
|
||||||
|
+ char buffer[512];
|
||||||
|
|
||||||
|
- res = read(device_fd, buffer, sec_size);
|
||||||
|
- free(buffer);
|
||||||
|
- if (res < 0) {
|
||||||
|
- fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
|
||||||
|
- goto error;
|
||||||
|
- }
|
||||||
|
- if (res < sec_size) {
|
||||||
|
- fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
|
||||||
|
- goto error;
|
||||||
|
+ res = read(device_fd, buffer, sizeof(buffer));
|
||||||
|
+ if (res < 0) {
|
||||||
|
+ fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
+ if (res < (int)sizeof(buffer)) {
|
||||||
|
+ fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
|
||||||
|
+ goto error;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fake an error */
|
||||||
|
- if (inject_error_percent) {
|
||||||
|
- srand(time(NULL) + getpid());
|
||||||
|
- if ((rand() % 100) < inject_error_percent) {
|
||||||
|
- fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
|
||||||
|
- goto error;
|
||||||
|
- }
|
||||||
|
+ if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
|
||||||
|
+ fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
|
||||||
|
+ goto error;
|
||||||
|
}
|
||||||
|
res = close(device_fd);
|
||||||
|
if (res != 0) {
|
||||||
|
|
||||||
|
From db97e055a17526cec056c595844a9d8851e3ee19 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
|
||||||
|
Date: Thu, 25 Aug 2022 16:03:46 +0900
|
||||||
|
Subject: [PATCH 4/4] storage_mon: improve error messages when ioctl() fails
|
||||||
|
|
||||||
|
---
|
||||||
|
tools/storage_mon.c | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
|
||||||
|
index 0bdb48649..f829c5081 100644
|
||||||
|
--- a/tools/storage_mon.c
|
||||||
|
+++ b/tools/storage_mon.c
|
||||||
|
@@ -63,7 +63,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
|
||||||
|
res = ioctl(device_fd, BLKGETSIZE64, &devsize);
|
||||||
|
#endif
|
||||||
|
if (res < 0) {
|
||||||
|
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
+ fprintf(stderr, "Failed to get device size for %s: %s\n", device, strerror(errno));
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (verbose) {
|
||||||
|
@@ -93,7 +93,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
|
||||||
|
res = ioctl(device_fd, BLKSSZGET, &sec_size);
|
||||||
|
#endif
|
||||||
|
if (res < 0) {
|
||||||
|
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
|
||||||
|
+ fprintf(stderr, "Failed to get block device sector size for %s: %s\n", device, strerror(errno));
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
@ -69,7 +69,7 @@
|
|||||||
Name: resource-agents
|
Name: resource-agents
|
||||||
Summary: Open Source HA Reusable Cluster Resource Scripts
|
Summary: Open Source HA Reusable Cluster Resource Scripts
|
||||||
Version: 4.9.0
|
Version: 4.9.0
|
||||||
Release: 29%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
Release: 30%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
||||||
License: GPLv2+ and LGPLv2+
|
License: GPLv2+ and LGPLv2+
|
||||||
URL: https://github.com/ClusterLabs/resource-agents
|
URL: https://github.com/ClusterLabs/resource-agents
|
||||||
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
|
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
|
||||||
@ -120,6 +120,12 @@ Patch28: bz2103370-ocf-tester-2-remove-deprecated-lrmd-lrmadmin-code.patch
|
|||||||
Patch29: bz1908146-bz1908147-bz1908148-bz1949114-openstack-agents-set-domain-parameters-default.patch
|
Patch29: bz1908146-bz1908147-bz1908148-bz1949114-openstack-agents-set-domain-parameters-default.patch
|
||||||
Patch30: bz2090370-CTDB-move-process-to-root-cgroup-if-rt-enabled.patch
|
Patch30: bz2090370-CTDB-move-process-to-root-cgroup-if-rt-enabled.patch
|
||||||
Patch31: bz2116941-ethmonitor-ovsmonitor-pgsql-fix-attrd_updater-q.patch
|
Patch31: bz2116941-ethmonitor-ovsmonitor-pgsql-fix-attrd_updater-q.patch
|
||||||
|
Patch32: bz2109159-storage_mon-1-exit-after-help.patch
|
||||||
|
Patch33: bz2109159-storage_mon-2-fix-specified-scores-count.patch
|
||||||
|
Patch34: bz2109159-storage_mon-3-fix-child-process-exit.patch
|
||||||
|
Patch35: bz2109159-storage_mon-4-fix-possible-false-negatives.patch
|
||||||
|
Patch36: bz1905820-LVM-activate-fix-return-codes.patch
|
||||||
|
Patch37: bz1977012-azure-events-az-new-ra.patch
|
||||||
|
|
||||||
# bundle patches
|
# bundle patches
|
||||||
Patch1000: 7-gcp-bundled.patch
|
Patch1000: 7-gcp-bundled.patch
|
||||||
@ -331,6 +337,12 @@ exit 1
|
|||||||
%patch29 -p1
|
%patch29 -p1
|
||||||
%patch30 -p1
|
%patch30 -p1
|
||||||
%patch31 -p1
|
%patch31 -p1
|
||||||
|
%patch32 -p1
|
||||||
|
%patch33 -p1
|
||||||
|
%patch34 -p1
|
||||||
|
%patch35 -p1
|
||||||
|
%patch36 -p1
|
||||||
|
%patch37 -p1
|
||||||
|
|
||||||
chmod 755 heartbeat/nova-compute-wait
|
chmod 755 heartbeat/nova-compute-wait
|
||||||
chmod 755 heartbeat/NovaEvacuate
|
chmod 755 heartbeat/NovaEvacuate
|
||||||
@ -350,7 +362,7 @@ tar -xzf %SOURCE1 -C %{bundled_lib_dir}/gcp
|
|||||||
# gcloud support info
|
# gcloud support info
|
||||||
%patch1002 -p1
|
%patch1002 -p1
|
||||||
# configure: skip bundled gcp lib checks
|
# configure: skip bundled gcp lib checks
|
||||||
%patch1003 -p1
|
%patch1003 -p1 -F1
|
||||||
# gcloud remove python 2 detection
|
# gcloud remove python 2 detection
|
||||||
%patch1004 -p1
|
%patch1004 -p1
|
||||||
# rename gcloud
|
# rename gcloud
|
||||||
@ -906,6 +918,15 @@ ccs_update_schema > /dev/null 2>&1 ||:
|
|||||||
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Sep 8 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-30
|
||||||
|
- storage_mon: fix specified scores count and possible false negatives
|
||||||
|
- LVM-activate: use correct return codes to fix unexpected behaviour
|
||||||
|
- azure-events-az: new resource agent
|
||||||
|
|
||||||
|
Resolves: rhbz#2109159
|
||||||
|
Resolves: rhbz#1905820
|
||||||
|
Resolves: rhbz#1977012
|
||||||
|
|
||||||
* Wed Aug 10 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-29
|
* Wed Aug 10 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-29
|
||||||
- ethmonitor/pgsql: remove attrd_updater "-q" parameter to solve issue
|
- ethmonitor/pgsql: remove attrd_updater "-q" parameter to solve issue
|
||||||
with Pacemaker 2.1.3+ not ignoring it
|
with Pacemaker 2.1.3+ not ignoring it
|
||||||
|
Loading…
Reference in New Issue
Block a user