- storage_mon: fix specified scores count and possible false negatives

- LVM-activate: use correct return codes to fix unexpected behaviour - azure-events-az: new resource agent Resolves: rhbz#2109161 Resolves: rhbz#2102126 Resolves: rhbz#2111147
2022-09-08 10:41:54 +02:00 · 2022-09-08 10:41:54 +02:00 · 2405be6fe6
commit 2405be6fe6
parent 9b275c46c1
7 changed files with 1695 additions and 1 deletions
--- a/bz2102126-LVM-activate-fix-return-codes.patch
+++ b/bz2102126-LVM-activate-fix-return-codes.patch
@ -0,0 +1,195 @@
 From 640c2b57f0f3e7256d587ddd5960341cb38b1982 Mon Sep 17 00:00:00 2001
 From: Reid Wahl <nrwahl@protonmail.com>
 Date: Sun, 13 Dec 2020 14:58:34 -0800
 Subject: [PATCH] LVM-activate: Fix return codes
 OCF_ERR_ARGS should be used when the configuration isn't valid for the
 **local** node, and so the resource should not attempt to start again
 locally until the issue is corrected.
 OCF_ERR_CONFIGURED should be used when the configuration isn't valid on
 **any** node, and so the resource should not attempt to start again
 anywhere until the issue is corrected.
 One remaining gray area: Should lvmlockd/lvmetad/clvmd improperly
 running (or improperly not running) be an OCF_ERR_GENERIC or
 OCF_ERR_ARGS? The fact that it's a state issue rather than a config
 issue suggests OCF_ERR_GENERIC. The fact that it won't be fixed without
 user intervention suggests OCF_ERR_ARGS. The approach here is to use
 GENERIC for all of these. One can make the case that "improperly
 running" should use ARGS, since a process must be manually stopped to
 fix the issue, and that "improperly not running" should use GENERIC,
 since there's a small chance the process died and will be recovered in
 some way.
 More info about return code meanings:
  - https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/html/agents.html#how-are-ocf-return-codes-interpreted
 Resolves: RHBZ#1905820
 Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
 ---
 heartbeat/LVM-activate | 47 +++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 24 deletions(-)
 diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
 index c86606637..e951a08e9 100755
 --- a/heartbeat/LVM-activate
 +++ b/heartbeat/LVM-activate
@@ -333,8 +333,7 @@ config_verify()
 	real=$(lvmconfig "$name" | cut -d'=' -f2)
 	if [ "$real" != "$expect" ]; then
 		ocf_exit_reason "config item $name: expect=$expect but real=$real"
 -		exit $OCF_ERR_CONFIGURED
 -
 +		exit $OCF_ERR_ARGS
 	fi
 	return $OCF_SUCCESS
@@ -366,12 +365,12 @@ lvmlockd_check()
 		fi
 		ocf_exit_reason "lvmlockd daemon is not running!"
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_GENERIC
 	fi
 	if pgrep clvmd >/dev/null 2>&1 ; then
 		ocf_exit_reason "clvmd daemon is running unexpectedly."
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_GENERIC
 	fi
 	return $OCF_SUCCESS
@@ -402,17 +401,17 @@ clvmd_check()
 	# Good: clvmd is running, and lvmlockd is not running
 	if ! pgrep clvmd >/dev/null 2>&1 ; then
 		ocf_exit_reason "clvmd daemon is not running!"
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_GENERIC
 	fi
 	if pgrep lvmetad >/dev/null 2>&1 ; then
 		ocf_exit_reason "Please stop lvmetad daemon when clvmd is running."
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_GENERIC
 	fi
 	if pgrep lvmlockd >/dev/null 2>&1 ; then
 		ocf_exit_reason "lvmlockd daemon is running unexpectedly."
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_GENERIC
 	fi
 	return $OCF_SUCCESS
@@ -424,12 +423,12 @@ systemid_check()
 	source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2)
 	if [ "$source" = "" ] || [ "$source" = "none" ]; then
 		ocf_exit_reason "system_id_source in lvm.conf is not set correctly!"
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_ARGS
 	fi
 	if [ -z ${SYSTEM_ID} ]; then
 		ocf_exit_reason "local/system_id is not set!"
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_ARGS
 	fi
 	return $OCF_SUCCESS
@@ -441,18 +440,18 @@ tagging_check()
 	# The volume_list must be initialized to something in order to
 	# guarantee our tag will be filtered on startup
 	if ! lvm dumpconfig activation/volume_list; then
 -		ocf_log err  "LVM: Improper setup detected"
 +		ocf_log err "LVM: Improper setup detected"
 		ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_ARGS
 	fi
 	# Our tag must _NOT_ be in the volume_list.  This agent
 	# overrides the volume_list during activation using the
 	# special tag reserved for cluster activation
 	if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\"";  then
 -		ocf_log err "LVM:  Improper setup detected"
 +		ocf_log err "LVM: Improper setup detected"
 		ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}"
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_ARGS
 	fi
 	return $OCF_SUCCESS
@@ -463,13 +462,13 @@ read_parameters()
 	if [ -z "$VG" ]
 	then
 		ocf_exit_reason "You must identify the volume group name!"
 -		exit $OCF_ERR_ARGS
 +		exit $OCF_ERR_CONFIGURED
 	fi
 	if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ]
 	then
 		ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode"
 -		exit $OCF_ERR_ARGS
 +		exit $OCF_ERR_CONFIGURED
 	fi
 	# Convert VG_access_mode from string to index
@@ -519,8 +518,10 @@ lvm_validate() {
 			exit $OCF_NOT_RUNNING
 		fi
 +		# Could be a transient error (e.g., iSCSI connection
 +		# issue) so use OCF_ERR_GENERIC
 		ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
 -		exit $OCF_ERR_CONFIGURED
 +		exit $OCF_ERR_GENERIC
 	fi
 	# Inconsistency might be due to missing physical volumes, which doesn't
@@ -549,7 +550,7 @@ lvm_validate() {
 	mode=$?
 	if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then
 		ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!"
 -		exit $OCF_ERR_ARGS
 +		exit $OCF_ERR_CONFIGURED
 	fi
 	# Nothing to do if the VG has no logical volume
@@ -561,11 +562,11 @@ lvm_validate() {
 	# Check if the given $LV is in the $VG
 	if [ -n "$LV" ]; then
 -		OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
 +		output=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
 		if [ $? -ne 0 ]; then
 -			ocf_log err "lvs: ${OUT}"
 +			ocf_log err "lvs: ${output}"
 			ocf_exit_reason "LV ($LV) is not in the given VG ($VG)."
 -			exit $OCF_ERR_ARGS
 +			exit $OCF_ERR_CONFIGURED
 		fi
 	fi
@@ -580,7 +581,6 @@ lvm_validate() {
 	3)
 		systemid_check
 		;;
 -
 	4)
 		tagging_check
 		;;
@@ -808,10 +808,9 @@ lvm_status() {
 			dd if=${dm_name} of=/dev/null bs=1 count=1 >/dev/null \
 				2>&1
 			if [ $? -ne 0 ]; then
 -				return $OCF_NOT_RUNNING
 -			else
 -				return $OCF_SUCCESS
 +				return $OCF_ERR_GENERIC
 			fi
 +			return $OCF_SUCCESS
 			;;
 		*)
 			ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"
--- a/bz2109161-storage_mon-1-exit-after-help.patch
+++ b/bz2109161-storage_mon-1-exit-after-help.patch
@ -0,0 +1,79 @@
 From b3eadb8523b599af800a7c772606aa0e90cf142f Mon Sep 17 00:00:00 2001
 From: Fujii Masao <fujii@postgresql.org>
 Date: Tue, 19 Jul 2022 17:03:02 +0900
 Subject: [PATCH 1/2] Make storage_mon -h exit just after printing help
 messages.
 Previously, when -h or an invalid option was specified, storage_mon
 printed the help messages, proceeded processing and then could
 throw an error. This was not the behavior that, e.g., users who want
 to specify -h option to see the help messages are expecting. To fix
 this issue, this commit changes storage_mon so that it exits just
 after printing the help messages when -h or an invalid option is
 specified.
 ---
 tools/storage_mon.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index 7b65bb419..1303371f7 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
 	fprintf(f, "      --timeout <n>   max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
 	fprintf(f, "      --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
 	fprintf(f, "      --verbose        emit extra output to stdout\n");
 -	fprintf(f, "      --help           print this messages\n");
 +	fprintf(f, "      --help           print this messages, then exit\n");
 }
 /* Check one device */
@@ -178,9 +178,11 @@ int main(int argc, char *argv[])
 				break;
 			case 'h':
 				usage(argv[0], stdout);
 +				exit(0);
 				break;
 			default:
 				usage(argv[0], stderr);
 +				exit(-1);
 				break;
 		}
 From e62795f02d25a772a239e0a4f9eb9d6470c134ee Mon Sep 17 00:00:00 2001
 From: Fujii Masao <fujii@postgresql.org>
 Date: Tue, 19 Jul 2022 17:56:32 +0900
 Subject: [PATCH 2/2] Fix typo in help message.
 ---
 tools/storage_mon.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index 1303371f7..3c82d5ee8 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
 	fprintf(f, "      --timeout <n>   max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
 	fprintf(f, "      --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
 	fprintf(f, "      --verbose        emit extra output to stdout\n");
 -	fprintf(f, "      --help           print this messages, then exit\n");
 +	fprintf(f, "      --help           print this message\n");
 }
 /* Check one device */
@@ -178,11 +178,11 @@ int main(int argc, char *argv[])
 				break;
 			case 'h':
 				usage(argv[0], stdout);
 -				exit(0);
 +				return 0;
 				break;
 			default:
 				usage(argv[0], stderr);
 -				exit(-1);
 +				return -1;
 				break;
 		}
--- a/bz2109161-storage_mon-2-fix-specified-scores-count.patch
+++ b/bz2109161-storage_mon-2-fix-specified-scores-count.patch
@ -0,0 +1,36 @@
 From a68957e8f1e8169438acf5a4321f47ed7d8ceec1 Mon Sep 17 00:00:00 2001
 From: Fujii Masao <fujii@postgresql.org>
 Date: Tue, 19 Jul 2022 20:28:38 +0900
 Subject: [PATCH] storage_mon: Fix bug in checking of number of specified
 scores.
 Previously specifying the maximum allowed number (MAX_DEVICES, currently 25)
 of devices and scores as arguments could cause storage_mon to fail unexpectedly
 with the error message "too many scores, max is 25". This issue happened
 because storage_mon checked whether the number of specified scores
 exceeded the upper limit by using the local variable "device_count" indicating
 the number of specified devices (not scores). So after the maximum number
 of devices arguments were interpreted, the appearance of next score argument
 caused the error even when the number of interpreted scores arguments had
 not exceeded the maximum.
 This patch fixes storage_mon so that it uses the local variable "score_count"
 indicating the number of specified scores, to check whether arguments for
 scores are specified more than the upper limit.
 ---
 tools/storage_mon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index 3c82d5ee8..c749076c2 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -154,7 +154,7 @@ int main(int argc, char *argv[])
 				}
 				break;
 			case 's':
 -				if (device_count < MAX_DEVICES) {
 +				if (score_count < MAX_DEVICES) {
 					int score = atoi(optarg);
 					if (score < 1 || score > 10) {
 						fprintf(stderr, "Score must be between 1 and 10 inclusive\n");
--- a/bz2109161-storage_mon-3-fix-child-process-exit.patch
+++ b/bz2109161-storage_mon-3-fix-child-process-exit.patch
@ -0,0 +1,43 @@
 From c6ea93fcb499c84c3d8e9aad2ced65065a3f6d51 Mon Sep 17 00:00:00 2001
 From: Fujii Masao <fujii@postgresql.org>
 Date: Tue, 19 Jul 2022 22:34:08 +0900
 Subject: [PATCH] Fix bug in handling of child process exit.
 When storage_mon detects that a child process exits with zero,
 it resets the test_forks[] entry for the child process to 0, to avoid
 waitpid() for the process again in the loop. But, previously,
 storage_mon didn't do that when it detected that a child process
 exited with non-zero. Which caused waitpid() to be called again
 for the process already gone and to report an error like
 "waitpid on XXX failed: No child processes" unexpectedly.
 In this case, basically storage_mon should wait until all the child
 processes exit and return the final score, instead.
 This patch fixes this issue by making storage_mon reset test_works[]
 entry even when a child process exits with non-zero.
 ---
 tools/storage_mon.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index 3c82d5ee8..83a48ca36 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -232,13 +232,13 @@ int main(int argc, char *argv[])
 				if (w == test_forks[i]) {
 					if (WIFEXITED(wstatus)) {
 -						if (WEXITSTATUS(wstatus) == 0) {
 -							finished_count++;
 -							test_forks[i] = 0;
 -						} else {
 +						if (WEXITSTATUS(wstatus) != 0) {
 							syslog(LOG_ERR, "Error reading from device %s", devices[i]);
 							final_score += scores[i];
 						}
 +
 +						finished_count++;
 +						test_forks[i] = 0;
 					}
 				}
 			}
--- a/bz2109161-storage_mon-4-fix-possible-false-negatives.patch
+++ b/bz2109161-storage_mon-4-fix-possible-false-negatives.patch
@ -0,0 +1,417 @@
 From 0bb52cf9985bda47e13940761b3d8e2eaddf377c Mon Sep 17 00:00:00 2001
 From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
 Date: Wed, 10 Aug 2022 17:35:54 +0900
 Subject: [PATCH 1/4] storage_mon: Use the O_DIRECT flag in open() to eliminate
 cache effects
 ---
 tools/Makefile.am   |  1 +
 tools/storage_mon.c | 82 +++++++++++++++++++++++++++++++++------------
 2 files changed, 61 insertions(+), 22 deletions(-)
 diff --git a/tools/Makefile.am b/tools/Makefile.am
 index 1309223b4..08323fee3 100644
 --- a/tools/Makefile.am
 +++ b/tools/Makefile.am
@@ -74,6 +74,7 @@ sfex_stat_LDADD		= $(GLIBLIB) -lplumb -lplumbgpl
 findif_SOURCES		= findif.c
 storage_mon_SOURCES	= storage_mon.c
 +storage_mon_CFLAGS	= -D_GNU_SOURCE
 if BUILD_TICKLE
 halib_PROGRAMS		+= tickle_tcp
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index 930ead41c..ba87492fc 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -31,23 +31,27 @@ static void usage(char *name, FILE *f)
 	fprintf(f, "      --help           print this message\n");
 }
 -/* Check one device */
 -static void *test_device(const char *device, int verbose, int inject_error_percent)
 +static int open_device(const char *device, int verbose)
 {
 -	uint64_t devsize;
 	int device_fd;
 	int res;
 +	uint64_t devsize;
 	off_t seek_spot;
 -	char buffer[512];
 -	if (verbose) {
 -		printf("Testing device %s\n", device);
 +#if defined(__linux__) || defined(__FreeBSD__)
 +	device_fd = open(device, O_RDONLY|O_DIRECT);
 +	if (device_fd >= 0) {
 +		return device_fd;
 +	} else if (errno != EINVAL) {
 +		fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
 +		return -1;
 	}
 +#endif
 	device_fd = open(device, O_RDONLY);
 	if (device_fd < 0) {
 		fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
 -		exit(-1);
 +		return -1;
 	}
 #ifdef __FreeBSD__
 	res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
@@ -57,11 +61,12 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
 	if (res != 0) {
 		fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 		close(device_fd);
 -		exit(-1);
 +		return -1;
 	}
 	if (verbose) {
 		fprintf(stderr, "%s: size=%zu\n", device, devsize);
 	}
 +
 	/* Don't fret about real randomness */
 	srand(time(NULL) + getpid());
 	/* Pick a random place on the device - sector aligned */
@@ -70,35 +75,64 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
 	if (res < 0) {
 		fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
 		close(device_fd);
 -		exit(-1);
 +		return -1;
 	}
 -
 	if (verbose) {
 		printf("%s: reading from pos %ld\n", device, seek_spot);
 	}
 +	return device_fd;
 +}
 +
 +/* Check one device */
 +static void *test_device(const char *device, int verbose, int inject_error_percent)
 +{
 +	int device_fd;
 +	int sec_size = 0;
 +	int res;
 +	void *buffer;
 +
 +	if (verbose) {
 +		printf("Testing device %s\n", device);
 +	}
 +
 +	device_fd = open_device(device, verbose);
 +	if (device_fd < 0) {
 +		exit(-1);
 +	}
 +
 +	ioctl(device_fd, BLKSSZGET, &sec_size);
 +	if (sec_size == 0) {
 +		fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 +		goto error;
 +	}
 -	res = read(device_fd, buffer, sizeof(buffer));
 +	if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
 +		fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
 +		goto error;
 +	}
 +
 +	res = read(device_fd, buffer, sec_size);
 +	free(buffer);
 	if (res < 0) {
 		fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
 -		close(device_fd);
 -		exit(-1);
 +		goto error;
 	}
 -	if (res < (int)sizeof(buffer)) {
 -		fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
 -		close(device_fd);
 -		exit(-1);
 +	if (res < sec_size) {
 +		fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
 +		goto error;
 	}
 	/* Fake an error */
 -	if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
 -		fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
 -		close(device_fd);
 -		exit(-1);
 +	if (inject_error_percent) {
 +		srand(time(NULL) + getpid());
 +		if ((rand() % 100) < inject_error_percent) {
 +			fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
 +			goto error;
 +		}
 	}
 	res = close(device_fd);
 	if (res != 0) {
 		fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno));
 -		close(device_fd);
 		exit(-1);
 	}
@@ -106,6 +140,10 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
 		printf("%s: done\n", device);
 	}
 	exit(0);
 +
 +error:
 +	close(device_fd);
 +	exit(-1);
 }
 int main(int argc, char *argv[])
 From ce4e632f29ed6b86b82a959eac5844655baed153 Mon Sep 17 00:00:00 2001
 From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
 Date: Mon, 15 Aug 2022 19:17:21 +0900
 Subject: [PATCH 2/4] storage_mon: fix build-related issues
 ---
 tools/storage_mon.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index ba87492fc..e34d1975a 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -38,7 +38,6 @@ static int open_device(const char *device, int verbose)
 	uint64_t devsize;
 	off_t seek_spot;
 -#if defined(__linux__) || defined(__FreeBSD__)
 	device_fd = open(device, O_RDONLY|O_DIRECT);
 	if (device_fd >= 0) {
 		return device_fd;
@@ -46,7 +45,6 @@ static int open_device(const char *device, int verbose)
 		fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
 		return -1;
 	}
 -#endif
 	device_fd = open(device, O_RDONLY);
 	if (device_fd < 0) {
@@ -100,7 +98,11 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
 		exit(-1);
 	}
 +#ifdef __FreeBSD__
 +	ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
 +#else
 	ioctl(device_fd, BLKSSZGET, &sec_size);
 +#endif
 	if (sec_size == 0) {
 		fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 		goto error;
 From 7a0aaa0dfdebeab3fae9fe9ddc412c3d1f610273 Mon Sep 17 00:00:00 2001
 From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
 Date: Wed, 24 Aug 2022 17:36:23 +0900
 Subject: [PATCH 3/4] storage_mon: do random lseek even with O_DIRECT, etc
 ---
 tools/storage_mon.c | 118 ++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 60 deletions(-)
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index e34d1975a..0bdb48649 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -31,38 +31,43 @@ static void usage(char *name, FILE *f)
 	fprintf(f, "      --help           print this message\n");
 }
 -static int open_device(const char *device, int verbose)
 +/* Check one device */
 +static void *test_device(const char *device, int verbose, int inject_error_percent)
 {
 +	uint64_t devsize;
 +	int flags = O_RDONLY | O_DIRECT;
 	int device_fd;
 	int res;
 -	uint64_t devsize;
 	off_t seek_spot;
 -	device_fd = open(device, O_RDONLY|O_DIRECT);
 -	if (device_fd >= 0) {
 -		return device_fd;
 -	} else if (errno != EINVAL) {
 -		fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
 -		return -1;
 +	if (verbose) {
 +		printf("Testing device %s\n", device);
 	}
 -	device_fd = open(device, O_RDONLY);
 +	device_fd = open(device, flags);
 	if (device_fd < 0) {
 -		fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
 -		return -1;
 +		if (errno != EINVAL) {
 +			fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
 +			exit(-1);
 +		}
 +		flags &= ~O_DIRECT;
 +		device_fd = open(device, flags);
 +		if (device_fd < 0) {
 +			fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
 +			exit(-1);
 +		}
 	}
 #ifdef __FreeBSD__
 	res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
 #else
 	res = ioctl(device_fd, BLKGETSIZE64, &devsize);
 #endif
 -	if (res != 0) {
 +	if (res < 0) {
 		fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 -		close(device_fd);
 -		return -1;
 +		goto error;
 	}
 	if (verbose) {
 -		fprintf(stderr, "%s: size=%zu\n", device, devsize);
 +		printf("%s: opened %s O_DIRECT, size=%zu\n", device, (flags & O_DIRECT)?"with":"without", devsize);
 	}
 	/* Don't fret about real randomness */
@@ -72,65 +77,58 @@ static int open_device(const char *device, int verbose)
 	res = lseek(device_fd, seek_spot, SEEK_SET);
 	if (res < 0) {
 		fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
 -		close(device_fd);
 -		return -1;
 +		goto error;
 	}
 	if (verbose) {
 		printf("%s: reading from pos %ld\n", device, seek_spot);
 	}
 -	return device_fd;
 -}
 -
 -/* Check one device */
 -static void *test_device(const char *device, int verbose, int inject_error_percent)
 -{
 -	int device_fd;
 -	int sec_size = 0;
 -	int res;
 -	void *buffer;
 -
 -	if (verbose) {
 -		printf("Testing device %s\n", device);
 -	}
 -	device_fd = open_device(device, verbose);
 -	if (device_fd < 0) {
 -		exit(-1);
 -	}
 +	if (flags & O_DIRECT) {
 +		int sec_size = 0;
 +		void *buffer;
 #ifdef __FreeBSD__
 -	ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
 +		res = ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
 #else
 -	ioctl(device_fd, BLKSSZGET, &sec_size);
 +		res = ioctl(device_fd, BLKSSZGET, &sec_size);
 #endif
 -	if (sec_size == 0) {
 -		fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 -		goto error;
 -	}
 +		if (res < 0) {
 +			fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 +			goto error;
 +		}
 -	if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
 -		fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
 -		goto error;
 -	}
 +		if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
 +			fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
 +			goto error;
 +		}
 +		res = read(device_fd, buffer, sec_size);
 +		free(buffer);
 +		if (res < 0) {
 +			fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
 +			goto error;
 +		}
 +		if (res < sec_size) {
 +			fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
 +			goto error;
 +		}
 +	} else {
 +		char buffer[512];
 -	res = read(device_fd, buffer, sec_size);
 -	free(buffer);
 -	if (res < 0) {
 -		fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
 -		goto error;
 -	}
 -	if (res < sec_size) {
 -		fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
 -		goto error;
 +		res = read(device_fd, buffer, sizeof(buffer));
 +		if (res < 0) {
 +			fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
 +			goto error;
 +		}
 +		if (res < (int)sizeof(buffer)) {
 +			fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
 +			goto error;
 +		}
 	}
 	/* Fake an error */
 -	if (inject_error_percent) {
 -		srand(time(NULL) + getpid());
 -		if ((rand() % 100) < inject_error_percent) {
 -			fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
 -			goto error;
 -		}
 +	if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
 +		fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
 +		goto error;
 	}
 	res = close(device_fd);
 	if (res != 0) {
 From db97e055a17526cec056c595844a9d8851e3ee19 Mon Sep 17 00:00:00 2001
 From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
 Date: Thu, 25 Aug 2022 16:03:46 +0900
 Subject: [PATCH 4/4] storage_mon: improve error messages when ioctl() fails
 ---
 tools/storage_mon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 diff --git a/tools/storage_mon.c b/tools/storage_mon.c
 index 0bdb48649..f829c5081 100644
 --- a/tools/storage_mon.c
 +++ b/tools/storage_mon.c
@@ -63,7 +63,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
 	res = ioctl(device_fd, BLKGETSIZE64, &devsize);
 #endif
 	if (res < 0) {
 -		fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 +		fprintf(stderr, "Failed to get device size for %s: %s\n", device, strerror(errno));
 		goto error;
 	}
 	if (verbose) {
@@ -93,7 +93,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
 		res = ioctl(device_fd, BLKSSZGET, &sec_size);
 #endif
 		if (res < 0) {
 -			fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
 +			fprintf(stderr, "Failed to get block device sector size for %s: %s\n", device, strerror(errno));
 			goto error;
 		}
--- a/bz2111147-azure-events-az-new-ra.patch
+++ b/bz2111147-azure-events-az-new-ra.patch
@ -0,0 +1,903 @@
 From 5dcd5153f0318e4766f7f4d3e61dfdb4b352c39c Mon Sep 17 00:00:00 2001
 From: MSSedusch <sedusch@microsoft.com>
 Date: Mon, 30 May 2022 15:08:10 +0200
 Subject: [PATCH 1/2] add new Azure Events AZ resource agent
 ---
 .gitignore                   |   1 +
 configure.ac                 |   8 +
 doc/man/Makefile.am          |   4 +
 heartbeat/Makefile.am        |   4 +
 heartbeat/azure-events-az.in | 782 +++++++++++++++++++++++++++++++++++
 5 files changed, 799 insertions(+)
 create mode 100644 heartbeat/azure-events-az.in
 diff --git a/.gitignore b/.gitignore
 index 0c259b5cf..e2b7c039c 100644
 --- a/.gitignore
 +++ b/.gitignore
@@ -54,6 +54,7 @@ heartbeat/Squid
 heartbeat/SysInfo
 heartbeat/aws-vpc-route53
 heartbeat/azure-events
 +heartbeat/azure-events-az
 heartbeat/clvm
 heartbeat/conntrackd
 heartbeat/dnsupdate
 diff --git a/configure.ac b/configure.ac
 index eeecfad0e..5716a2be2 100644
 --- a/configure.ac
 +++ b/configure.ac
@@ -523,6 +523,13 @@ if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
 fi
 AM_CONDITIONAL(BUILD_AZURE_EVENTS, test $BUILD_AZURE_EVENTS -eq 1)
 +BUILD_AZURE_EVENTS_AZ=1
 +if test -z "$PYTHON" || test $BUILD_OCF_PY -eq 0; then
 +    BUILD_AZURE_EVENTS_AZ=0
 +    AC_MSG_WARN("Not building azure-events-az")
 +fi
 +AM_CONDITIONAL(BUILD_AZURE_EVENTS_AZ, test $BUILD_AZURE_EVENTS_AZ -eq 1)
 +
 BUILD_GCP_PD_MOVE=1
 if test -z "$PYTHON" || test "x${HAVE_PYMOD_GOOGLEAPICLIENT}" != xyes || test $BUILD_OCF_PY -eq 0; then
     BUILD_GCP_PD_MOVE=0
@@ -976,6 +983,7 @@ rgmanager/Makefile						\
 dnl Files we output that need to be executable
 AC_CONFIG_FILES([heartbeat/azure-events], [chmod +x heartbeat/azure-events])
 +AC_CONFIG_FILES([heartbeat/azure-events-az], [chmod +x heartbeat/azure-events-az])
 AC_CONFIG_FILES([heartbeat/AoEtarget], [chmod +x heartbeat/AoEtarget])
 AC_CONFIG_FILES([heartbeat/ManageRAID], [chmod +x heartbeat/ManageRAID])
 AC_CONFIG_FILES([heartbeat/ManageVE], [chmod +x heartbeat/ManageVE])
 diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
 index cd8fd16bf..658c700ac 100644
 --- a/doc/man/Makefile.am
 +++ b/doc/man/Makefile.am
@@ -219,6 +219,10 @@ if BUILD_AZURE_EVENTS
 man_MANS           	+= ocf_heartbeat_azure-events.7
 endif
 +if BUILD_AZURE_EVENTS_AZ
 +man_MANS           	+= ocf_heartbeat_azure-events-az.7
 +endif
 +
 if BUILD_GCP_PD_MOVE
 man_MANS           	+= ocf_heartbeat_gcp-pd-move.7
 endif
 diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
 index 20d41e36a..1133dc13e 100644
 --- a/heartbeat/Makefile.am
 +++ b/heartbeat/Makefile.am
@@ -188,6 +188,10 @@ if BUILD_AZURE_EVENTS
 ocf_SCRIPTS	     += azure-events
 endif
 +if BUILD_AZURE_EVENTS_AZ
 +ocf_SCRIPTS	     += azure-events-az
 +endif
 +
 if BUILD_GCP_PD_MOVE
 ocf_SCRIPTS	     += gcp-pd-move
 endif
 diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
 new file mode 100644
 index 000000000..616fc8d9e
 --- /dev/null
 +++ b/heartbeat/azure-events-az.in
@@ -0,0 +1,782 @@
 +#!@PYTHON@ -tt
 +#
 +#	Resource agent for monitoring Azure Scheduled Events
 +#
 +# 	License:	GNU General Public License (GPL)
 +#	(c) 2018 	Tobias Niekamp, Microsoft Corp.
 +#				and Linux-HA contributors
 +
 +import os
 +import sys
 +import time
 +import subprocess
 +import json
 +try:
 +		import urllib2
 +		from urllib2 import URLError
 +except ImportError:
 +		import urllib.request as urllib2
 +		from urllib.error import URLError
 +import socket
 +from collections import defaultdict
 +
 +OCF_FUNCTIONS_DIR = os.environ.get("OCF_FUNCTIONS_DIR", "%s/lib/heartbeat" % os.environ.get("OCF_ROOT"))
 +sys.path.append(OCF_FUNCTIONS_DIR)
 +import ocf
 +
 +##############################################################################
 +
 +
 +VERSION = "0.10"
 +USER_AGENT = "Pacemaker-ResourceAgent/%s %s" % (VERSION, ocf.distro())
 +
 +attr_globalPullState = "azure-events-az_globalPullState"
 +attr_lastDocVersion  = "azure-events-az_lastDocVersion"
 +attr_curNodeState = "azure-events-az_curNodeState"
 +attr_pendingEventIDs = "azure-events-az_pendingEventIDs"
 +attr_healthstate = "#health-azure"
 +
 +default_loglevel = ocf.logging.INFO
 +default_relevantEventTypes = set(["Reboot", "Redeploy"])
 +
 +global_pullMaxAttempts = 3
 +global_pullDelaySecs = 1
 +
 +##############################################################################
 +
 +class attrDict(defaultdict):
 +	"""
 +	A wrapper for accessing dict keys like an attribute
 +	"""
 +	def __init__(self, data):
 +		super(attrDict, self).__init__(attrDict)
 +		for d in data.keys():
 +			self.__setattr__(d, data[d])
 +
 +	def __getattr__(self, key):
 +		try:
 +			return self[key]
 +		except KeyError:
 +			raise AttributeError(key)
 +
 +	def __setattr__(self, key, value):
 +		self[key] = value
 +
 +##############################################################################
 +
 +class azHelper:
 +	"""
 +	Helper class for Azure's metadata API (including Scheduled Events)
 +	"""
 +	metadata_host = "http://169.254.169.254/metadata"
 +	instance_api  = "instance"
 +	events_api    = "scheduledevents"
 +	api_version   = "2019-08-01"
 +
 +	@staticmethod
 +	def _sendMetadataRequest(endpoint, postData=None):
 +		"""
 +		Send a request to Azure's Azure Metadata Service API
 +		"""
 +		url = "%s/%s?api-version=%s" % (azHelper.metadata_host, endpoint, azHelper.api_version)
 +		data = ""
 +		ocf.logger.debug("_sendMetadataRequest: begin; endpoint = %s, postData = %s" % (endpoint, postData))
 +		ocf.logger.debug("_sendMetadataRequest: url = %s" % url)
 +
 +		if postData and type(postData) != bytes:
 +			postData = postData.encode()
 +
 +		req = urllib2.Request(url, postData)
 +		req.add_header("Metadata", "true")
 +		req.add_header("User-Agent", USER_AGENT)
 +		try:
 +			resp = urllib2.urlopen(req)
 +		except URLError as e:
 +			if hasattr(e, 'reason'):
 +				ocf.logger.warning("Failed to reach the server: %s" % e.reason)
 +				clusterHelper.setAttr(attr_globalPullState, "IDLE")
 +			elif hasattr(e, 'code'):
 +				ocf.logger.warning("The server couldn\'t fulfill the request. Error code: %s" % e.code)
 +				clusterHelper.setAttr(attr_globalPullState, "IDLE")
 +		else:
 +			data = resp.read()
 +			ocf.logger.debug("_sendMetadataRequest: response = %s" % data)
 +
 +		if data:
 +			data = json.loads(data)
 +
 +		ocf.logger.debug("_sendMetadataRequest: finished")
 +		return data
 +
 +	@staticmethod
 +	def getInstanceInfo():
 +		"""
 +		Fetch details about the current VM from Azure's Azure Metadata Service API
 +		"""
 +		ocf.logger.debug("getInstanceInfo: begin")
 +
 +		jsondata = azHelper._sendMetadataRequest(azHelper.instance_api)
 +		ocf.logger.debug("getInstanceInfo: json = %s" % jsondata)
 +
 +		if jsondata:
 +			ocf.logger.debug("getInstanceInfo: finished, returning {}".format(jsondata["compute"]))
 +			return attrDict(jsondata["compute"])
 +		else:
 +			ocf.ocf_exit_reason("getInstanceInfo: Unable to get instance info")
 +			sys.exit(ocf.OCF_ERR_GENERIC)
 +
 +	@staticmethod
 +	def pullScheduledEvents():
 +		"""
 +		Retrieve all currently scheduled events via Azure Metadata Service API
 +		"""
 +		ocf.logger.debug("pullScheduledEvents: begin")
 +
 +		jsondata = azHelper._sendMetadataRequest(azHelper.events_api)
 +		ocf.logger.debug("pullScheduledEvents: json = %s" % jsondata)
 +
 +		ocf.logger.debug("pullScheduledEvents: finished")
 +		return attrDict(jsondata)
 +
 +	@staticmethod
 +	def forceEvents(eventIDs):
 +		"""
 +		Force a set of events to start immediately
 +		"""
 +		ocf.logger.debug("forceEvents: begin")
 +
 +		events = []
 +		for e in eventIDs:
 +			events.append({
 +				"EventId": e,
 +			})
 +		postData = {
 +			"StartRequests" : events
 +		}
 +		ocf.logger.info("forceEvents: postData = %s" % postData)
 +		resp = azHelper._sendMetadataRequest(azHelper.events_api, postData=json.dumps(postData))
 +
 +		ocf.logger.debug("forceEvents: finished")
 +		return
 +
 +##############################################################################
 +
 +class clusterHelper:
 +	"""
 +	Helper functions for Pacemaker control via crm
 +	"""
 +	@staticmethod
 +	def _getLocation(node):
 +		"""
 +		Helper function to retrieve local/global attributes
 +		"""
 +		if node:
 +			return ["--node", node]
 +		else:
 +			return ["--type", "crm_config"]
 +
 +	@staticmethod
 +	def _exec(command, *args):
 +		"""
 +		Helper function to execute a UNIX command
 +		"""
 +		args = list(args)
 +		ocf.logger.debug("_exec: begin; command = %s, args = %s" % (command, str(args)))
 +
 +		def flatten(*n):
 +			return (str(e) for a in n
 +				for e in (flatten(*a) if isinstance(a, (tuple, list)) else (str(a),)))
 +		command = list(flatten([command] + args))
 +		ocf.logger.debug("_exec: cmd = %s" % " ".join(command))
 +		try:
 +			ret = subprocess.check_output(command)
 +			if type(ret) != str:
 +				ret = ret.decode()
 +			ocf.logger.debug("_exec: return = %s" % ret)
 +			return ret.rstrip()
 +		except Exception as err:
 +			ocf.logger.exception(err)
 +			return None
 +
 +	@staticmethod
 +	def setAttr(key, value, node=None):
 +		"""
 +		Set the value of a specific global/local attribute in the Pacemaker cluster
 +		"""
 +		ocf.logger.debug("setAttr: begin; key = %s, value = %s, node = %s" % (key, value, node))
 +
 +		if value:
 +			ret = clusterHelper._exec("crm_attribute",
 +									  "--name", key,
 +									  "--update", value,
 +									  clusterHelper._getLocation(node))
 +		else:
 +			ret = clusterHelper._exec("crm_attribute",
 +									  "--name", key,
 +									  "--delete",
 +									  clusterHelper._getLocation(node))
 +
 +		ocf.logger.debug("setAttr: finished")
 +		return len(ret) == 0
 +
 +	@staticmethod
 +	def getAttr(key, node=None):
 +		"""
 +		Retrieve a global/local attribute from the Pacemaker cluster
 +		"""
 +		ocf.logger.debug("getAttr: begin; key = %s, node = %s" % (key, node))
 +
 +		val = clusterHelper._exec("crm_attribute",
 +								  "--name", key,
 +								  "--query", "--quiet",
 +								  "--default", "",
 +								  clusterHelper._getLocation(node))
 +		ocf.logger.debug("getAttr: finished")
 +		if not val:
 +			return None
 +		return val if not val.isdigit() else int(val)
 +
 +	@staticmethod
 +	def getAllNodes():
 +		"""
 +		Get a list of hostnames for all nodes in the Pacemaker cluster
 +		"""
 +		ocf.logger.debug("getAllNodes: begin")
 +
 +		nodes = []
 +		nodeList = clusterHelper._exec("crm_node", "--list")
 +		for n in nodeList.split("\n"):
 +			nodes.append(n.split()[1])
 +		ocf.logger.debug("getAllNodes: finished; return %s" % str(nodes))
 +
 +		return nodes
 +
 +	@staticmethod
 +	def getHostNameFromAzName(azName):
 +		"""
 +		Helper function to get the actual host name from an Azure node name
 +		"""
 +		return clusterHelper.getAttr("hostName_%s" % azName)
 +
 +	@staticmethod
 +	def removeHoldFromNodes():
 +		"""
 +		Remove the ON_HOLD state from all nodes in the Pacemaker cluster
 +		"""
 +		ocf.logger.debug("removeHoldFromNodes: begin")
 +
 +		for n in clusterHelper.getAllNodes():
 +			if clusterHelper.getAttr(attr_curNodeState, node=n) == "ON_HOLD":
 +				clusterHelper.setAttr(attr_curNodeState, "AVAILABLE", node=n)
 +				ocf.logger.info("removeHoldFromNodes: removed ON_HOLD from node %s" % n)
 +
 +		ocf.logger.debug("removeHoldFromNodes: finished")
 +		return False
 +
 +	@staticmethod
 +	def otherNodesAvailable(exceptNode):
 +		"""
 +		Check if there are any nodes (except a given node) in the Pacemaker cluster that have state AVAILABLE
 +		"""
 +		ocf.logger.debug("otherNodesAvailable: begin; exceptNode = %s" % exceptNode)
 +
 +		for n in clusterHelper.getAllNodes():
 +			state = clusterHelper.getAttr(attr_curNodeState, node=n)
 +			state = stringToNodeState(state) if state else AVAILABLE
 +			if state == AVAILABLE and n != exceptNode.hostName:
 +				ocf.logger.info("otherNodesAvailable: at least %s is available" % n)
 +				ocf.logger.debug("otherNodesAvailable: finished")
 +				return True
 +		ocf.logger.info("otherNodesAvailable: no other nodes are available")
 +		ocf.logger.debug("otherNodesAvailable: finished")
 +
 +		return False
 +
 +	@staticmethod
 +	def transitionSummary():
 +		"""
 +		Get the current Pacemaker transition summary (used to check if all resources are stopped when putting a node standby)
 +		"""
 +		# <tniek> Is a global crm_simulate "too much"? Or would it be sufficient it there are no planned transitions for a particular node?
 +		# # crm_simulate -Ls
 +		# 	Transition Summary:
 +		# 	 * Promote rsc_SAPHana_HN1_HDB03:0      (Slave -> Master hsr3-db1)
 +		# 	 * Stop    rsc_SAPHana_HN1_HDB03:1      (hsr3-db0)
 +		# 	 * Move    rsc_ip_HN1_HDB03     (Started hsr3-db0 -> hsr3-db1)
 +		# 	 * Start   rsc_nc_HN1_HDB03     (hsr3-db1)
 +		# # Excepted result when there are no pending actions:
 +		# 	Transition Summary:
 +		ocf.logger.debug("transitionSummary: begin")
 +
 +		summary = clusterHelper._exec("crm_simulate", "-Ls")
 +		if not summary:
 +			ocf.logger.warning("transitionSummary: could not load transition summary")
 +			return False
 +		if summary.find("Transition Summary:") < 0:
 +			ocf.logger.warning("transitionSummary: received unexpected transition summary: %s" % summary)
 +			return False
 +		summary = summary.split("Transition Summary:")[1]
 +		ret = summary.split("\n").pop(0)
 +
 +		ocf.logger.debug("transitionSummary: finished; return = %s" % str(ret))
 +		return ret
 +
 +	@staticmethod
 +	def listOperationsOnNode(node):
 +		"""
 +		Get a list of all current operations for a given node (used to check if any resources are pending)
 +		"""
 +		# hsr3-db1:/home/tniek # crm_resource --list-operations -N hsr3-db0
 +		# rsc_azure-events-az    (ocf::heartbeat:azure-events-az):      Started: rsc_azure-events-az_start_0 (node=hsr3-db0, call=91, rc=0, last-rc-change=Fri Jun  8 22:37:46 2018, exec=115ms): complete
 +		# rsc_azure-events-az    (ocf::heartbeat:azure-events-az):      Started: rsc_azure-events-az_monitor_10000 (node=hsr3-db0, call=93, rc=0, last-rc-change=Fri Jun  8 22:37:47 2018, exec=197ms): complete
 +		# rsc_SAPHana_HN1_HDB03   (ocf::suse:SAPHana):    Master: rsc_SAPHana_HN1_HDB03_start_0 (node=hsr3-db0, call=-1, rc=193, last-rc-change=Fri Jun  8 22:37:46 2018, exec=0ms): pending
 +		# rsc_SAPHanaTopology_HN1_HDB03   (ocf::suse:SAPHanaTopology):    Started: rsc_SAPHanaTopology_HN1_HDB03_start_0 (node=hsr3-db0, call=90, rc=0, last-rc-change=Fri Jun  8 22:37:46 2018, exec=3214ms): complete
 +		ocf.logger.debug("listOperationsOnNode: begin; node = %s" % node)
 +
 +		resources = clusterHelper._exec("crm_resource", "--list-operations", "-N", node)
 +		if len(resources) == 0:
 +			ret = []
 +		else:
 +			ret = resources.split("\n")
 +
 +		ocf.logger.debug("listOperationsOnNode: finished; return = %s" % str(ret))
 +		return ret
 +
 +	@staticmethod
 +	def noPendingResourcesOnNode(node):
 +		"""
 +		Check that there are no pending resources on a given node
 +		"""
 +		ocf.logger.debug("noPendingResourcesOnNode: begin; node = %s" % node)
 +
 +		for r in clusterHelper.listOperationsOnNode(node):
 +			ocf.logger.debug("noPendingResourcesOnNode: * %s" % r)
 +			resource = r.split()[-1]
 +			if resource == "pending":
 +				ocf.logger.info("noPendingResourcesOnNode: found resource %s that is still pending" % resource)
 +				ocf.logger.debug("noPendingResourcesOnNode: finished; return = False")
 +				return False
 +		ocf.logger.info("noPendingResourcesOnNode: no pending resources on node %s" % node)
 +		ocf.logger.debug("noPendingResourcesOnNode: finished; return = True")
 +
 +		return True
 +
 +	@staticmethod
 +	def allResourcesStoppedOnNode(node):
 +		"""
 +		Check that all resources on a given node are stopped
 +		"""
 +		ocf.logger.debug("allResourcesStoppedOnNode: begin; node = %s" % node)
 +
 +		if clusterHelper.noPendingResourcesOnNode(node):
 +			if len(clusterHelper.transitionSummary()) == 0:
 +				ocf.logger.info("allResourcesStoppedOnNode: no pending resources on node %s and empty transition summary" % node)
 +				ocf.logger.debug("allResourcesStoppedOnNode: finished; return = True")
 +				return True
 +			ocf.logger.info("allResourcesStoppedOnNode: transition summary is not empty")
 +			ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False")
 +			return False
 +
 +		ocf.logger.info("allResourcesStoppedOnNode: still pending resources on node %s" % node)
 +		ocf.logger.debug("allResourcesStoppedOnNode: finished; return = False")
 +		return False
 +
 +##############################################################################
 +
 +AVAILABLE = 0	# Node is online and ready to handle events
 +STOPPING = 1	# Standby has been triggered, but some resources are still running
 +IN_EVENT = 2	# All resources are stopped, and event has been initiated via Azure Metadata Service
 +ON_HOLD = 3		# Node has a pending event that cannot be started there are no other nodes available
 +
 +def stringToNodeState(name):
 +	if type(name) == int: return name
 +	if name == "STOPPING": return STOPPING
 +	if name == "IN_EVENT": return IN_EVENT
 +	if name == "ON_HOLD": return ON_HOLD
 +	return AVAILABLE
 +
 +def nodeStateToString(state):
 +	if state == STOPPING: return "STOPPING"
 +	if state == IN_EVENT: return "IN_EVENT"
 +	if state == ON_HOLD: return "ON_HOLD"
 +	return "AVAILABLE"
 +
 +##############################################################################
 +
 +class Node:
 +	"""
 +	Core class implementing logic for a cluster node
 +	"""
 +	def __init__(self, ra):
 +		self.raOwner  = ra
 +		self.azInfo   = azHelper.getInstanceInfo()
 +		self.azName   = self.azInfo.name
 +		self.hostName = socket.gethostname()
 +		self.setAttr("azName", self.azName)
 +		clusterHelper.setAttr("hostName_%s" % self.azName, self.hostName)
 +
 +	def getAttr(self, key):
 +		"""
 +		Get a local attribute
 +		"""
 +		return clusterHelper.getAttr(key, node=self.hostName)
 +
 +	def setAttr(self, key, value):
 +		"""
 +		Set a local attribute
 +		"""
 +		return clusterHelper.setAttr(key, value, node=self.hostName)
 +
 +	def selfOrOtherNode(self, node):
 +		"""
 +		Helper function to distinguish self/other node
 +		"""
 +		return node if node else self.hostName
 +
 +	def setState(self, state, node=None):
 +		"""
 +		Set the state for a given node (or self)
 +		"""
 +		node = self.selfOrOtherNode(node)
 +		ocf.logger.debug("setState: begin; node = %s, state = %s" % (node, nodeStateToString(state)))
 +
 +		clusterHelper.setAttr(attr_curNodeState, nodeStateToString(state), node=node)
 +
 +		ocf.logger.debug("setState: finished")
 +
 +	def getState(self, node=None):
 +		"""
 +		Get the state for a given node (or self)
 +		"""
 +		node = self.selfOrOtherNode(node)
 +		ocf.logger.debug("getState: begin; node = %s" % node)
 +
 +		state = clusterHelper.getAttr(attr_curNodeState, node=node)
 +		ocf.logger.debug("getState: state = %s" % state)
 +		ocf.logger.debug("getState: finished")
 +		if not state:
 +			return AVAILABLE
 +		return stringToNodeState(state)
 +
 +	def setEventIDs(self, eventIDs, node=None):
 +		"""
 +		Set pending EventIDs for a given node (or self)
 +		"""
 +		node = self.selfOrOtherNode(node)
 +		ocf.logger.debug("setEventIDs: begin; node = %s, eventIDs = %s" % (node, str(eventIDs)))
 +
 +		if eventIDs:
 +			eventIDStr = ",".join(eventIDs)
 +		else:
 +			eventIDStr = None
 +		clusterHelper.setAttr(attr_pendingEventIDs, eventIDStr, node=node)
 +
 +		ocf.logger.debug("setEventIDs: finished")
 +		return
 +
 +	def getEventIDs(self, node=None):
 +		"""
 +		Get pending EventIDs for a given node (or self)
 +		"""
 +		node = self.selfOrOtherNode(node)
 +		ocf.logger.debug("getEventIDs: begin; node = %s" % node)
 +
 +		eventIDStr = clusterHelper.getAttr(attr_pendingEventIDs, node=node)
 +		if eventIDStr:
 +			eventIDs = eventIDStr.split(",")
 +		else:
 +			eventIDs = None
 +
 +		ocf.logger.debug("getEventIDs: finished; eventIDs = %s" % str(eventIDs))
 +		return eventIDs
 +
 +	def updateNodeStateAndEvents(self, state, eventIDs, node=None):
 +		"""
 +		Set the state and pending EventIDs for a given node (or self)
 +		"""
 +		ocf.logger.debug("updateNodeStateAndEvents: begin; node = %s, state = %s, eventIDs = %s" % (node, nodeStateToString(state), str(eventIDs)))
 +
 +		self.setState(state, node=node)
 +		self.setEventIDs(eventIDs, node=node)
 +
 +		ocf.logger.debug("updateNodeStateAndEvents: finished")
 +		return state
 +
 +	def putNodeStandby(self, node=None):
 +		"""
 +		Put self to standby
 +		"""
 +		node = self.selfOrOtherNode(node)
 +		ocf.logger.debug("putNodeStandby: begin; node = %s" % node)
 +
 +		clusterHelper._exec("crm_attribute",
 +							"--node", node,
 +							"--name", attr_healthstate,
 +							"--update", "-1000000",
 +							"--lifetime=forever")
 +
 +		ocf.logger.debug("putNodeStandby: finished")
 +
 +	def isNodeInStandby(self, node=None):
 +		"""
 +		check if node is in standby
 +		"""
 +		node = self.selfOrOtherNode(node)
 +		ocf.logger.debug("isNodeInStandby: begin; node = %s" % node)
 +		isInStandy = False
 +
 +		healthAttributeStr = clusterHelper.getAttr(attr_healthstate, node)
 +		if healthAttributeStr is not None:
 +			try:
 +				healthAttribute = int(healthAttributeStr)
 +				isInStandy = healthAttribute < 0
 +			except ValueError:
 +				# Handle the exception
 +				ocf.logger.warn("Health attribute %s on node %s cannot be converted to an integer value" % (healthAttributeStr, node))
 +		
 +		ocf.logger.debug("isNodeInStandby: finished - result %s" % isInStandy)
 +		return isInStandy
 +
 +	def putNodeOnline(self, node=None):
 +		"""
 +		Put self back online
 +		"""
 +		node = self.selfOrOtherNode(node)
 +		ocf.logger.debug("putNodeOnline: begin; node = %s" % node)
 +
 +		clusterHelper._exec("crm_attribute",
 +							"--node", node,
 +							"--name", "#health-azure",
 +							"--update", "0",
 +							"--lifetime=forever")
 +
 +		ocf.logger.debug("putNodeOnline: finished")
 +
 +	def separateEvents(self, events):
 +		"""
 +		Split own/other nodes' events
 +		"""
 +		ocf.logger.debug("separateEvents: begin; events = %s" % str(events))
 +
 +		localEvents = []
 +		remoteEvents = []
 +		for e in events:
 +			e = attrDict(e)
 +			if e.EventType not in self.raOwner.relevantEventTypes:
 +				continue
 +			if self.azName in e.Resources:
 +				localEvents.append(e)
 +			else:
 +				remoteEvents.append(e)
 +		ocf.logger.debug("separateEvents: finished; localEvents = %s, remoteEvents = %s" % (str(localEvents), str(remoteEvents)))
 +		return (localEvents, remoteEvents)
 +
 +##############################################################################
 +
 +class raAzEvents:
 +	"""
 +	Main class for resource agent
 +	"""
 +	def __init__(self, relevantEventTypes):
 +		self.node = Node(self)
 +		self.relevantEventTypes = relevantEventTypes
 +
 +	def monitor(self):
 +		ocf.logger.debug("monitor: begin")
 +		
 +		events = azHelper.pullScheduledEvents()
 +
 +		# get current document version
 +		curDocVersion  = events.DocumentIncarnation
 +		lastDocVersion = self.node.getAttr(attr_lastDocVersion)
 +		ocf.logger.debug("monitor: lastDocVersion = %s; curDocVersion = %s" % (lastDocVersion, curDocVersion))
 +
 +		# split events local/remote
 +		(localEvents, remoteEvents) = self.node.separateEvents(events.Events)
 +
 +		# ensure local events are only executing once
 +		if curDocVersion == lastDocVersion:
 +			ocf.logger.info("monitor: already handled curDocVersion, skip")
 +			return ocf.OCF_SUCCESS
 +
 +		localAzEventIDs = set()
 +		for e in localEvents:
 +			localAzEventIDs.add(e.EventId)
 +
 +		curState = self.node.getState()
 +		clusterEventIDs = self.node.getEventIDs()
 +
 +		ocf.logger.debug("monitor: curDocVersion has not been handled yet")
 +		
 +		if clusterEventIDs:
 +			# there are pending events set, so our state must be STOPPING or IN_EVENT
 +			i = 0; touchedEventIDs = False
 +			while i < len(clusterEventIDs):
 +				# clean up pending events that are already finished according to AZ
 +				if clusterEventIDs[i] not in localAzEventIDs:
 +					ocf.logger.info("monitor: remove finished local clusterEvent %s" % (clusterEventIDs[i]))
 +					clusterEventIDs.pop(i)
 +					touchedEventIDs = True
 +				else:
 +					i += 1
 +			if len(clusterEventIDs) > 0:
 +				# there are still pending events (either because we're still stopping, or because the event is still in place)
 +				# either way, we need to wait
 +				if touchedEventIDs:
 +					ocf.logger.info("monitor: added new local clusterEvent %s" % str(clusterEventIDs))
 +					self.node.setEventIDs(clusterEventIDs)
 +				else:
 +					ocf.logger.info("monitor: no local clusterEvents were updated")
 +			else:
 +				# there are no more pending events left after cleanup
 +				if clusterHelper.noPendingResourcesOnNode(self.node.hostName):
 +					# and no pending resources on the node -> set it back online
 +					ocf.logger.info("monitor: all local events finished -> clean up, put node online and AVAILABLE")
 +					curState = self.node.updateNodeStateAndEvents(AVAILABLE, None)
 +					self.node.putNodeOnline()
 +					clusterHelper.removeHoldFromNodes()
 +					# If Azure Scheduled Events are not used for 24 hours (e.g. because the cluster was asleep), it will be disabled for a VM.
 +					# When the cluster wakes up and starts using it again, the DocumentIncarnation is reset.
 +					# We need to remove it during cleanup, otherwise azure-events-az will not process the event after wakeup
 +					self.node.setAttr(attr_lastDocVersion, None)
 +				else:
 +					ocf.logger.info("monitor: all local events finished, but some resources have not completed startup yet -> wait")
 +		else:
 +			if curState == AVAILABLE:
 +				if len(localAzEventIDs) > 0:
 +					if clusterHelper.otherNodesAvailable(self.node):
 +						ocf.logger.info("monitor: can handle local events %s -> set state STOPPING" % (str(localAzEventIDs)))
 +						curState = self.node.updateNodeStateAndEvents(STOPPING, localAzEventIDs)
 +					else:
 +						ocf.logger.info("monitor: cannot handle azEvents %s (only node available) -> set state ON_HOLD" % str(localAzEventIDs))
 +						self.node.setState(ON_HOLD)
 +				else:
 +					ocf.logger.debug("monitor: no local azEvents to handle")
 +
 +		if curState == STOPPING:
 +			eventIDsForNode = {}
 +			if clusterHelper.noPendingResourcesOnNode(self.node.hostName):
 +				if not self.node.isNodeInStandby():
 +					ocf.logger.info("monitor: all local resources are started properly -> put node standby and exit")
 +					self.node.putNodeStandby()
 +					return ocf.OCF_SUCCESS
 +
 +				for e in localEvents:
 +					ocf.logger.info("monitor: handling remote event %s (%s; nodes = %s)" % (e.EventId, e.EventType, str(e.Resources)))
 +					# before we can force an event to start, we need to ensure all nodes involved have stopped their resources
 +					if e.EventStatus == "Scheduled":
 +						allNodesStopped = True
 +						for azName in e.Resources:
 +							hostName = clusterHelper.getHostNameFromAzName(azName)
 +							state = self.node.getState(node=hostName)
 +							if state == STOPPING:
 +								# the only way we can continue is when node state is STOPPING, but all resources have been stopped
 +								if not clusterHelper.allResourcesStoppedOnNode(hostName):
 +									ocf.logger.info("monitor: (at least) node %s has still resources running -> wait" % hostName)
 +									allNodesStopped = False
 +									break
 +							elif state in (AVAILABLE, IN_EVENT, ON_HOLD):
 +								ocf.logger.info("monitor: node %s is still %s -> remote event needs to be picked up locally" % (hostName, nodeStateToString(state)))
 +								allNodesStopped = False
 +								break
 +						if allNodesStopped:
 +							ocf.logger.info("monitor: nodes %s are stopped -> add remote event %s to force list" % (str(e.Resources), e.EventId))
 +							for n in e.Resources:
 +								hostName = clusterHelper.getHostNameFromAzName(n)
 +								if hostName in eventIDsForNode:
 +									eventIDsForNode[hostName].append(e.EventId)
 +								else:
 +									eventIDsForNode[hostName] = [e.EventId]
 +					elif e.EventStatus == "Started":
 +						ocf.logger.info("monitor: remote event already started")
 +
 +				# force the start of all events whose nodes are ready (i.e. have no more resources running)
 +				if len(eventIDsForNode.keys()) > 0:
 +					eventIDsToForce = set([item for sublist in eventIDsForNode.values() for item in sublist])
 +					ocf.logger.info("monitor: set nodes %s to IN_EVENT; force remote events %s" % (str(eventIDsForNode.keys()), str(eventIDsToForce)))
 +					for node, eventId in eventIDsForNode.items():
 +						self.node.updateNodeStateAndEvents(IN_EVENT, eventId, node=node)
 +					azHelper.forceEvents(eventIDsToForce)
 +					self.node.setAttr(attr_lastDocVersion, curDocVersion)
 +			else:
 +				ocf.logger.info("monitor: some local resources are not clean yet -> wait")
 +
 +		ocf.logger.debug("monitor: finished")
 +		return ocf.OCF_SUCCESS
 +
 +##############################################################################
 +
 +def setLoglevel(verbose):
 +	# set up writing into syslog
 +	loglevel = default_loglevel
 +	if verbose:
 +		opener = urllib2.build_opener(urllib2.HTTPHandler(debuglevel=1))
 +		urllib2.install_opener(opener)
 +		loglevel = ocf.logging.DEBUG
 +	ocf.log.setLevel(loglevel)
 +
 +description = (
 +	"Microsoft Azure Scheduled Events monitoring agent",
 +	"""This resource agent implements a monitor for scheduled
 +(maintenance) events for a Microsoft Azure VM.
 +
 +If any relevant events are found, it moves all Pacemaker resources
 +away from the affected node to allow for a graceful shutdown.
 +
 +	Usage:
 +		[OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events-az ACTION
 +
 +		action (required): Supported values: monitor, help, meta-data
 +		eventTypes (optional): List of event types to be considered
 +				relevant by the resource agent (comma-separated).
 +				Supported values: Freeze,Reboot,Redeploy
 +				Default = Reboot,Redeploy
 +/		verbose (optional): If set to true, displays debug info.
 +				Default = false
 +
 +	Deployment:
 +		crm configure primitive rsc_azure-events-az ocf:heartbeat:azure-events-az \
 +			op monitor interval=10s
 +		crm configure clone cln_azure-events-az rsc_azure-events-az
 +
 +For further information on Microsoft Azure Scheduled Events, please
 +refer to the following documentation:
 +https://docs.microsoft.com/en-us/azure/virtual-machines/linux/scheduled-events
 +""")
 +
 +def monitor_action(eventTypes):
 +	relevantEventTypes = set(eventTypes.split(",") if eventTypes else [])
 +	ra = raAzEvents(relevantEventTypes)
 +	return ra.monitor()
 +
 +def validate_action(eventTypes):
 +	if eventTypes:
 +		for event in eventTypes.split(","):
 +			if event not in ("Freeze", "Reboot", "Redeploy"):
 +				ocf.ocf_exit_reason("Event type not one of Freeze, Reboot, Redeploy: " + eventTypes)
 +				return ocf.OCF_ERR_CONFIGURED
 +	return ocf.OCF_SUCCESS
 +
 +def main():
 +	agent = ocf.Agent("azure-events-az", shortdesc=description[0], longdesc=description[1])
 +	agent.add_parameter(
 +		"eventTypes",
 +		shortdesc="List of resources to be considered",
 +		longdesc="A comma-separated list of event types that will be handled by this resource agent. (Possible values: Freeze,Reboot,Redeploy)",
 +		content_type="string",
 +		default="Reboot,Redeploy")
 +	agent.add_parameter(
 +		"verbose",
 +		shortdesc="Enable verbose agent logging",
 +		longdesc="Set to true to enable verbose logging",
 +		content_type="boolean",
 +		default="false")
 +	agent.add_action("start", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
 +	agent.add_action("stop", timeout=10, handler=lambda: ocf.OCF_SUCCESS)
 +	agent.add_action("validate-all", timeout=20, handler=validate_action)
 +	agent.add_action("monitor", timeout=240, interval=10, handler=monitor_action)
 +	setLoglevel(ocf.is_true(ocf.get_parameter("verbose", "false")))
 +	agent.run()
 +
 +if __name__ == '__main__':
 +	main()
 \ No newline at end of file
 From a95337d882c7cc69d604b050159ad50b679f18be Mon Sep 17 00:00:00 2001
 From: MSSedusch <sedusch@microsoft.com>
 Date: Thu, 2 Jun 2022 14:10:33 +0200
 Subject: [PATCH 2/2] Remove developer documentation
 ---
 heartbeat/azure-events-az.in | 11 -----------
 1 file changed, 11 deletions(-)
 diff --git a/heartbeat/azure-events-az.in b/heartbeat/azure-events-az.in
 index 616fc8d9e..59d095306 100644
 --- a/heartbeat/azure-events-az.in
 +++ b/heartbeat/azure-events-az.in
@@ -723,17 +723,6 @@ description = (
 If any relevant events are found, it moves all Pacemaker resources
 away from the affected node to allow for a graceful shutdown.
 -	Usage:
 -		[OCF_RESKEY_eventTypes=VAL] [OCF_RESKEY_verbose=VAL] azure-events-az ACTION
 -
 -		action (required): Supported values: monitor, help, meta-data
 -		eventTypes (optional): List of event types to be considered
 -				relevant by the resource agent (comma-separated).
 -				Supported values: Freeze,Reboot,Redeploy
 -				Default = Reboot,Redeploy
 -/		verbose (optional): If set to true, displays debug info.
 -				Default = false
 -
 	Deployment:
 		crm configure primitive rsc_azure-events-az ocf:heartbeat:azure-events-az \
 			op monitor interval=10s
--- a/resource-agents.spec
+++ b/resource-agents.spec
@ -45,7 +45,7 @@
 Name:		resource-agents
 Summary:	Open Source HA Reusable Cluster Resource Scripts
 Version:	4.10.0
-Release:	23%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
+Release:	24%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
 License:	GPLv2+ and LGPLv2+
 URL:		https://github.com/ClusterLabs/resource-agents
 Source0:	%{upstream_prefix}-%{upstream_version}.tar.gz
@ -78,6 +78,12 @@ Patch25:	bz2083081-bz2083086-bz2083090-bz2083092-openstack-agents-set-domain-par
 Patch26:	bz2103374-ocf-tester-1-update.patch
 Patch27:	bz2103374-ocf-tester-2-remove-deprecated-lrmd-lrmadmin-code.patch
 Patch28:	bz2110452-ethmonitor-ovsmonitor-pgsql-fix-attrd_updater-q.patch
 Patch29:	bz2109161-storage_mon-1-exit-after-help.patch
 Patch30:	bz2109161-storage_mon-2-fix-specified-scores-count.patch
 Patch31:	bz2109161-storage_mon-3-fix-child-process-exit.patch
 Patch32:	bz2109161-storage_mon-4-fix-possible-false-negatives.patch
 Patch33:	bz2102126-LVM-activate-fix-return-codes.patch
 Patch34:	bz2111147-azure-events-az-new-ra.patch
 # bundled ha-cloud-support libs
 Patch500:	ha-cloud-support-aws.patch
@ -230,6 +236,12 @@ exit 1
 %patch26 -p1
 %patch27 -p1
 %patch28 -p1
 %patch29 -p1
 %patch30 -p1
 %patch31 -p1
 %patch32 -p1
 %patch33 -p1
 %patch34 -p1
 # bundled ha-cloud-support libs
 %patch500 -p1
@ -551,6 +563,15 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
 %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
 %changelog
 * Thu Sep  8 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-24
 - storage_mon: fix specified scores count and possible false negatives
 - LVM-activate: use correct return codes to fix unexpected behaviour
 - azure-events-az: new resource agent
  Resolves: rhbz#2109161
  Resolves: rhbz#2102126
  Resolves: rhbz#2111147
 * Tue Jul 26 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-23
 - ethmonitor/pgsql: remove attrd_updater "-q" parameter to solve issue
  with Pacemaker 2.1.3+ not ignoring it