import resource-agents-4.9.0-40.el8

This commit is contained in:
CentOS Sources 2023-05-16 06:06:37 +00:00 committed by Stepan Oksanichenko
parent 64a054ec5d
commit 2b4f9c7a1b
17 changed files with 1792 additions and 10 deletions

View File

@ -0,0 +1,195 @@
From 640c2b57f0f3e7256d587ddd5960341cb38b1982 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Sun, 13 Dec 2020 14:58:34 -0800
Subject: [PATCH] LVM-activate: Fix return codes
OCF_ERR_ARGS should be used when the configuration isn't valid for the
**local** node, and so the resource should not attempt to start again
locally until the issue is corrected.
OCF_ERR_CONFIGURED should be used when the configuration isn't valid on
**any** node, and so the resource should not attempt to start again
anywhere until the issue is corrected.
One remaining gray area: Should lvmlockd/lvmetad/clvmd improperly
running (or improperly not running) be an OCF_ERR_GENERIC or
OCF_ERR_ARGS? The fact that it's a state issue rather than a config
issue suggests OCF_ERR_GENERIC. The fact that it won't be fixed without
user intervention suggests OCF_ERR_ARGS. The approach here is to use
GENERIC for all of these. One can make the case that "improperly
running" should use ARGS, since a process must be manually stopped to
fix the issue, and that "improperly not running" should use GENERIC,
since there's a small chance the process died and will be recovered in
some way.
More info about return code meanings:
- https://clusterlabs.org/pacemaker/doc/2.1/Pacemaker_Administration/html/agents.html#how-are-ocf-return-codes-interpreted
Resolves: RHBZ#1905820
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/LVM-activate | 47 +++++++++++++++++++++---------------------
1 file changed, 23 insertions(+), 24 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index c86606637..e951a08e9 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -333,8 +333,7 @@ config_verify()
real=$(lvmconfig "$name" | cut -d'=' -f2)
if [ "$real" != "$expect" ]; then
ocf_exit_reason "config item $name: expect=$expect but real=$real"
- exit $OCF_ERR_CONFIGURED
-
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -366,12 +365,12 @@ lvmlockd_check()
fi
ocf_exit_reason "lvmlockd daemon is not running!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep clvmd >/dev/null 2>&1 ; then
ocf_exit_reason "clvmd daemon is running unexpectedly."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
@@ -402,17 +401,17 @@ clvmd_check()
# Good: clvmd is running, and lvmlockd is not running
if ! pgrep clvmd >/dev/null 2>&1 ; then
ocf_exit_reason "clvmd daemon is not running!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep lvmetad >/dev/null 2>&1 ; then
ocf_exit_reason "Please stop lvmetad daemon when clvmd is running."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
if pgrep lvmlockd >/dev/null 2>&1 ; then
ocf_exit_reason "lvmlockd daemon is running unexpectedly."
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
return $OCF_SUCCESS
@@ -424,12 +423,12 @@ systemid_check()
source=$(lvmconfig 'global/system_id_source' 2>/dev/null | cut -d"=" -f2)
if [ "$source" = "" ] || [ "$source" = "none" ]; then
ocf_exit_reason "system_id_source in lvm.conf is not set correctly!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
if [ -z ${SYSTEM_ID} ]; then
ocf_exit_reason "local/system_id is not set!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -441,18 +440,18 @@ tagging_check()
# The volume_list must be initialized to something in order to
# guarantee our tag will be filtered on startup
if ! lvm dumpconfig activation/volume_list; then
- ocf_log err "LVM: Improper setup detected"
+ ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list filter must be initialized in lvm.conf for exclusive activation without clvmd"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
# Our tag must _NOT_ be in the volume_list. This agent
# overrides the volume_list during activation using the
# special tag reserved for cluster activation
if lvm dumpconfig activation/volume_list | grep -e "\"@${OUR_TAG}\"" -e "\"${VG}\""; then
- ocf_log err "LVM: Improper setup detected"
+ ocf_log err "LVM: Improper setup detected"
ocf_exit_reason "The volume_list in lvm.conf must not contain the cluster tag, \"${OUR_TAG}\", or volume group, ${VG}"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_ARGS
fi
return $OCF_SUCCESS
@@ -463,13 +462,13 @@ read_parameters()
if [ -z "$VG" ]
then
ocf_exit_reason "You must identify the volume group name!"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
if [ "$LV_activation_mode" != "shared" ] && [ "$LV_activation_mode" != "exclusive" ]
then
ocf_exit_reason "Invalid value for activation_mode: $LV_activation_mode"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
# Convert VG_access_mode from string to index
@@ -519,8 +518,10 @@ lvm_validate() {
exit $OCF_NOT_RUNNING
fi
+ # Could be a transient error (e.g., iSCSI connection
+ # issue) so use OCF_ERR_GENERIC
ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
- exit $OCF_ERR_CONFIGURED
+ exit $OCF_ERR_GENERIC
fi
# Inconsistency might be due to missing physical volumes, which doesn't
@@ -549,7 +550,7 @@ lvm_validate() {
mode=$?
if [ $VG_access_mode_num -ne 4 ] && [ $mode -ne $VG_access_mode_num ]; then
ocf_exit_reason "The specified vg_access_mode doesn't match the lock_type on VG metadata!"
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
# Nothing to do if the VG has no logical volume
@@ -561,11 +562,11 @@ lvm_validate() {
# Check if the given $LV is in the $VG
if [ -n "$LV" ]; then
- OUT=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
+ output=$(lvs --foreign --noheadings ${VG}/${LV} 2>&1)
if [ $? -ne 0 ]; then
- ocf_log err "lvs: ${OUT}"
+ ocf_log err "lvs: ${output}"
ocf_exit_reason "LV ($LV) is not in the given VG ($VG)."
- exit $OCF_ERR_ARGS
+ exit $OCF_ERR_CONFIGURED
fi
fi
@@ -580,7 +581,6 @@ lvm_validate() {
3)
systemid_check
;;
-
4)
tagging_check
;;
@@ -808,10 +808,9 @@ lvm_status() {
dd if=${dm_name} of=/dev/null bs=1 count=1 >/dev/null \
2>&1
if [ $? -ne 0 ]; then
- return $OCF_NOT_RUNNING
- else
- return $OCF_SUCCESS
+ return $OCF_ERR_GENERIC
fi
+ return $OCF_SUCCESS
;;
*)
ocf_exit_reason "unsupported monitor level $OCF_CHECK_LEVEL"

View File

@ -0,0 +1,175 @@
From cab190c737fdf58268aa5c009f6089b754862b22 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Tue, 1 Feb 2022 16:32:50 -0800
Subject: [PATCH 1/3] Filesystem: Fix OpenBSD check in fstype_supported()
fstype_supported() is supposed to skip the /proc/filesystems check if
the OS is OpenBSD. Instead, it skips the check if the OS is **not**
OpenBSD. That means the function has been a no-op for all other distros.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 010c1dcfc..8b4792152 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -440,7 +440,7 @@ fstype_supported()
local support="$FSTYPE"
local rc
- if [ "X${HOSTOS}" != "XOpenBSD" ];then
+ if [ "X${HOSTOS}" = "XOpenBSD" ];then
# skip checking /proc/filesystems for obsd
return $OCF_SUCCESS
fi
From 5d38b87daa9cfffa89a193df131d6ebd87cd05aa Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Tue, 1 Feb 2022 18:26:32 -0800
Subject: [PATCH 2/3] Filesystem: Improve fstype_supported logs for fuse
Make it more clear when we have to use a different name to check for
support of a particular filesystem. Currently only used for fuse-type
filesystems.
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/Filesystem | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 8b4792152..4d84846c1 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -455,6 +455,10 @@ fstype_supported()
fuse.*|glusterfs|rozofs) support="fuse";;
esac
+ if [ "$support" != "$FSTYPE" ]; then
+ ocf_log info "Checking support for $FSTYPE as \"$support\""
+ fi
+
grep -w "$support"'$' /proc/filesystems >/dev/null
if [ $? -eq 0 ]; then
# found the fs type
@@ -465,7 +469,7 @@ fstype_supported()
# check the if the filesystem support exists again.
$MODPROBE $support >/dev/null
if [ $? -ne 0 ]; then
- ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernel module"
+ ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems and failed to load kernel module"
return $OCF_ERR_INSTALLED
fi
@@ -478,11 +482,11 @@ fstype_supported()
# yes. found the filesystem after doing the modprobe
return $OCF_SUCCESS
fi
- ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again"
+ ocf_log debug "Unable to find support for $support in /proc/filesystems after modprobe, trying again"
sleep 1
done
- ocf_exit_reason "Couldn't find filesystem $FSTYPE in /proc/filesystems"
+ ocf_exit_reason "Couldn't find filesystem $support in /proc/filesystems"
return $OCF_ERR_INSTALLED
}
@@ -837,6 +841,9 @@ Filesystem_monitor()
# VALIDATE_ALL: Are the instance parameters valid?
# FIXME!! The only part that's useful is the return code.
# This code always returns $OCF_SUCCESS (!)
+# FIXME!! Needs some tuning to match fstype_supported() (e.g., for
+# fuse). Can we just call fstype_supported() with a flag like
+# "no_modprobe" instead?
#
Filesystem_validate_all()
{
From e2174244067b02d798e0f12437f0f499c80f91fe Mon Sep 17 00:00:00 2001
From: Reid Wahl <nrwahl@protonmail.com>
Date: Tue, 1 Feb 2022 18:55:47 -0800
Subject: [PATCH 3/3] Filesystem: Add support for Amazon EFS mount helper
mount.efs, the mount helper for Amazon Elastic File System (EFS)
provided by amazon-efs-utils [1], is a wrapper for mount.nfs4. It offers
a number of AWS-specific mount options and some security improvements
like encryption of data in transit.
This commit adds support by treating an fstype=efs like fstype=nfs4 for
the most part.
Resolves: RHBZ#2049319
[1] https://docs.aws.amazon.com/efs/latest/ug/efs-mount-helper.html
Signed-off-by: Reid Wahl <nrwahl@protonmail.com>
---
heartbeat/Filesystem | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 4d84846c1..1a90d6a42 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -341,7 +341,7 @@ determine_blockdevice() {
# Get the current real device name, if possible.
# (specified devname could be -L or -U...)
case "$FSTYPE" in
- nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none|lustre)
+ nfs4|nfs|efs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none|lustre)
: ;;
*)
match_string="${TAB}${CANONICALIZED_MOUNTPOINT}${TAB}"
@@ -423,7 +423,7 @@ is_fsck_needed() {
no) false;;
""|auto)
case "$FSTYPE" in
- ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs)
+ ext4|ext4dev|ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs4|nfs|efs|cifs|smbfs|ocfs2|gfs2|none|lustre|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs)
false;;
*)
true;;
@@ -450,9 +450,11 @@ fstype_supported()
return $OCF_SUCCESS
fi
- # support fuse-filesystems (e.g. GlusterFS)
+ # support fuse-filesystems (e.g. GlusterFS) and Amazon Elastic File
+ # System (EFS)
case "$FSTYPE" in
fuse.*|glusterfs|rozofs) support="fuse";;
+ efs) support="nfs4";;
esac
if [ "$support" != "$FSTYPE" ]; then
@@ -701,7 +703,7 @@ Filesystem_stop()
# For networked filesystems, there's merit in trying -f:
case "$FSTYPE" in
- nfs4|nfs|cifs|smbfs) umount_force="-f" ;;
+ nfs4|nfs|efs|cifs|smbfs) umount_force="-f" ;;
esac
# Umount all sub-filesystems mounted under $MOUNTPOINT/ too.
@@ -892,7 +894,7 @@ set_blockdevice_var() {
# these are definitely not block devices
case "$FSTYPE" in
- nfs4|nfs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|lustre) return;;
+ nfs4|nfs|efs|smbfs|cifs|none|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|lustre) return;;
esac
if $(is_option "loop"); then
@@ -1013,7 +1015,7 @@ is_option "ro" &&
CLUSTERSAFE=2
case "$FSTYPE" in
-nfs4|nfs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre)
+nfs4|nfs|efs|smbfs|cifs|none|gfs2|glusterfs|ceph|ocfs2|overlay|overlayfs|tmpfs|cvfs|lustre)
CLUSTERSAFE=1 # this is kind of safe too
;;
# add here CLUSTERSAFE=0 for all filesystems which are not

View File

@ -0,0 +1,79 @@
From b3eadb8523b599af800a7c772606aa0e90cf142f Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 17:03:02 +0900
Subject: [PATCH 1/2] Make storage_mon -h exit just after printing help
messages.
Previously, when -h or an invalid option was specified, storage_mon
printed the help messages, proceeded processing and then could
throw an error. This was not the behavior that, e.g., users who want
to specify -h option to see the help messages are expecting. To fix
this issue, this commit changes storage_mon so that it exits just
after printing the help messages when -h or an invalid option is
specified.
---
tools/storage_mon.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 7b65bb419..1303371f7 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
fprintf(f, " --timeout <n> max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
fprintf(f, " --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
fprintf(f, " --verbose emit extra output to stdout\n");
- fprintf(f, " --help print this messages\n");
+ fprintf(f, " --help print this messages, then exit\n");
}
/* Check one device */
@@ -178,9 +178,11 @@ int main(int argc, char *argv[])
break;
case 'h':
usage(argv[0], stdout);
+ exit(0);
break;
default:
usage(argv[0], stderr);
+ exit(-1);
break;
}
From e62795f02d25a772a239e0a4f9eb9d6470c134ee Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 17:56:32 +0900
Subject: [PATCH 2/2] Fix typo in help message.
---
tools/storage_mon.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 1303371f7..3c82d5ee8 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -28,7 +28,7 @@ static void usage(char *name, FILE *f)
fprintf(f, " --timeout <n> max time to wait for a device test to come back. in seconds (default %d)\n", DEFAULT_TIMEOUT);
fprintf(f, " --inject-errors-percent <n> Generate EIO errors <n>%% of the time (for testing only)\n");
fprintf(f, " --verbose emit extra output to stdout\n");
- fprintf(f, " --help print this messages, then exit\n");
+ fprintf(f, " --help print this message\n");
}
/* Check one device */
@@ -178,11 +178,11 @@ int main(int argc, char *argv[])
break;
case 'h':
usage(argv[0], stdout);
- exit(0);
+ return 0;
break;
default:
usage(argv[0], stderr);
- exit(-1);
+ return -1;
break;
}

View File

@ -0,0 +1,36 @@
From a68957e8f1e8169438acf5a4321f47ed7d8ceec1 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 20:28:38 +0900
Subject: [PATCH] storage_mon: Fix bug in checking of number of specified
scores.
Previously specifying the maximum allowed number (MAX_DEVICES, currently 25)
of devices and scores as arguments could cause storage_mon to fail unexpectedly
with the error message "too many scores, max is 25". This issue happened
because storage_mon checked whether the number of specified scores
exceeded the upper limit by using the local variable "device_count" indicating
the number of specified devices (not scores). So after the maximum number
of devices arguments were interpreted, the appearance of next score argument
caused the error even when the number of interpreted scores arguments had
not exceeded the maximum.
This patch fixes storage_mon so that it uses the local variable "score_count"
indicating the number of specified scores, to check whether arguments for
scores are specified more than the upper limit.
---
tools/storage_mon.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 3c82d5ee8..c749076c2 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -154,7 +154,7 @@ int main(int argc, char *argv[])
}
break;
case 's':
- if (device_count < MAX_DEVICES) {
+ if (score_count < MAX_DEVICES) {
int score = atoi(optarg);
if (score < 1 || score > 10) {
fprintf(stderr, "Score must be between 1 and 10 inclusive\n");

View File

@ -0,0 +1,43 @@
From c6ea93fcb499c84c3d8e9aad2ced65065a3f6d51 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Tue, 19 Jul 2022 22:34:08 +0900
Subject: [PATCH] Fix bug in handling of child process exit.
When storage_mon detects that a child process exits with zero,
it resets the test_forks[] entry for the child process to 0, to avoid
waitpid() for the process again in the loop. But, previously,
storage_mon didn't do that when it detected that a child process
exited with non-zero. Which caused waitpid() to be called again
for the process already gone and to report an error like
"waitpid on XXX failed: No child processes" unexpectedly.
In this case, basically storage_mon should wait until all the child
processes exit and return the final score, instead.
This patch fixes this issue by making storage_mon reset test_works[]
entry even when a child process exits with non-zero.
---
tools/storage_mon.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 3c82d5ee8..83a48ca36 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -232,13 +232,13 @@ int main(int argc, char *argv[])
if (w == test_forks[i]) {
if (WIFEXITED(wstatus)) {
- if (WEXITSTATUS(wstatus) == 0) {
- finished_count++;
- test_forks[i] = 0;
- } else {
+ if (WEXITSTATUS(wstatus) != 0) {
syslog(LOG_ERR, "Error reading from device %s", devices[i]);
final_score += scores[i];
}
+
+ finished_count++;
+ test_forks[i] = 0;
}
}
}

View File

@ -0,0 +1,417 @@
From 0bb52cf9985bda47e13940761b3d8e2eaddf377c Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Wed, 10 Aug 2022 17:35:54 +0900
Subject: [PATCH 1/4] storage_mon: Use the O_DIRECT flag in open() to eliminate
cache effects
---
tools/Makefile.am | 1 +
tools/storage_mon.c | 82 +++++++++++++++++++++++++++++++++------------
2 files changed, 61 insertions(+), 22 deletions(-)
diff --git a/tools/Makefile.am b/tools/Makefile.am
index 1309223b4..08323fee3 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -74,6 +74,7 @@ sfex_stat_LDADD = $(GLIBLIB) -lplumb -lplumbgpl
findif_SOURCES = findif.c
storage_mon_SOURCES = storage_mon.c
+storage_mon_CFLAGS = -D_GNU_SOURCE
if BUILD_TICKLE
halib_PROGRAMS += tickle_tcp
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 930ead41c..ba87492fc 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -31,23 +31,27 @@ static void usage(char *name, FILE *f)
fprintf(f, " --help print this message\n");
}
-/* Check one device */
-static void *test_device(const char *device, int verbose, int inject_error_percent)
+static int open_device(const char *device, int verbose)
{
- uint64_t devsize;
int device_fd;
int res;
+ uint64_t devsize;
off_t seek_spot;
- char buffer[512];
- if (verbose) {
- printf("Testing device %s\n", device);
+#if defined(__linux__) || defined(__FreeBSD__)
+ device_fd = open(device, O_RDONLY|O_DIRECT);
+ if (device_fd >= 0) {
+ return device_fd;
+ } else if (errno != EINVAL) {
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
+ return -1;
}
+#endif
device_fd = open(device, O_RDONLY);
if (device_fd < 0) {
fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
- exit(-1);
+ return -1;
}
#ifdef __FreeBSD__
res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
@@ -57,11 +61,12 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
if (res != 0) {
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
close(device_fd);
- exit(-1);
+ return -1;
}
if (verbose) {
fprintf(stderr, "%s: size=%zu\n", device, devsize);
}
+
/* Don't fret about real randomness */
srand(time(NULL) + getpid());
/* Pick a random place on the device - sector aligned */
@@ -70,35 +75,64 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
if (res < 0) {
fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
close(device_fd);
- exit(-1);
+ return -1;
}
-
if (verbose) {
printf("%s: reading from pos %ld\n", device, seek_spot);
}
+ return device_fd;
+}
+
+/* Check one device */
+static void *test_device(const char *device, int verbose, int inject_error_percent)
+{
+ int device_fd;
+ int sec_size = 0;
+ int res;
+ void *buffer;
+
+ if (verbose) {
+ printf("Testing device %s\n", device);
+ }
+
+ device_fd = open_device(device, verbose);
+ if (device_fd < 0) {
+ exit(-1);
+ }
+
+ ioctl(device_fd, BLKSSZGET, &sec_size);
+ if (sec_size == 0) {
+ fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ goto error;
+ }
- res = read(device_fd, buffer, sizeof(buffer));
+ if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
+ fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
+ goto error;
+ }
+
+ res = read(device_fd, buffer, sec_size);
+ free(buffer);
if (res < 0) {
fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
- close(device_fd);
- exit(-1);
+ goto error;
}
- if (res < (int)sizeof(buffer)) {
- fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
- close(device_fd);
- exit(-1);
+ if (res < sec_size) {
+ fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
+ goto error;
}
/* Fake an error */
- if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
- fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
- close(device_fd);
- exit(-1);
+ if (inject_error_percent) {
+ srand(time(NULL) + getpid());
+ if ((rand() % 100) < inject_error_percent) {
+ fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
+ goto error;
+ }
}
res = close(device_fd);
if (res != 0) {
fprintf(stderr, "Failed to close %s: %s\n", device, strerror(errno));
- close(device_fd);
exit(-1);
}
@@ -106,6 +140,10 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
printf("%s: done\n", device);
}
exit(0);
+
+error:
+ close(device_fd);
+ exit(-1);
}
int main(int argc, char *argv[])
From ce4e632f29ed6b86b82a959eac5844655baed153 Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Mon, 15 Aug 2022 19:17:21 +0900
Subject: [PATCH 2/4] storage_mon: fix build-related issues
---
tools/storage_mon.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index ba87492fc..e34d1975a 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -38,7 +38,6 @@ static int open_device(const char *device, int verbose)
uint64_t devsize;
off_t seek_spot;
-#if defined(__linux__) || defined(__FreeBSD__)
device_fd = open(device, O_RDONLY|O_DIRECT);
if (device_fd >= 0) {
return device_fd;
@@ -46,7 +45,6 @@ static int open_device(const char *device, int verbose)
fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
return -1;
}
-#endif
device_fd = open(device, O_RDONLY);
if (device_fd < 0) {
@@ -100,7 +98,11 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
exit(-1);
}
+#ifdef __FreeBSD__
+ ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
+#else
ioctl(device_fd, BLKSSZGET, &sec_size);
+#endif
if (sec_size == 0) {
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
goto error;
From 7a0aaa0dfdebeab3fae9fe9ddc412c3d1f610273 Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Wed, 24 Aug 2022 17:36:23 +0900
Subject: [PATCH 3/4] storage_mon: do random lseek even with O_DIRECT, etc
---
tools/storage_mon.c | 118 ++++++++++++++++++++++----------------------
1 file changed, 58 insertions(+), 60 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index e34d1975a..0bdb48649 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -31,38 +31,43 @@ static void usage(char *name, FILE *f)
fprintf(f, " --help print this message\n");
}
-static int open_device(const char *device, int verbose)
+/* Check one device */
+static void *test_device(const char *device, int verbose, int inject_error_percent)
{
+ uint64_t devsize;
+ int flags = O_RDONLY | O_DIRECT;
int device_fd;
int res;
- uint64_t devsize;
off_t seek_spot;
- device_fd = open(device, O_RDONLY|O_DIRECT);
- if (device_fd >= 0) {
- return device_fd;
- } else if (errno != EINVAL) {
- fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
- return -1;
+ if (verbose) {
+ printf("Testing device %s\n", device);
}
- device_fd = open(device, O_RDONLY);
+ device_fd = open(device, flags);
if (device_fd < 0) {
- fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
- return -1;
+ if (errno != EINVAL) {
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
+ exit(-1);
+ }
+ flags &= ~O_DIRECT;
+ device_fd = open(device, flags);
+ if (device_fd < 0) {
+ fprintf(stderr, "Failed to open %s: %s\n", device, strerror(errno));
+ exit(-1);
+ }
}
#ifdef __FreeBSD__
res = ioctl(device_fd, DIOCGMEDIASIZE, &devsize);
#else
res = ioctl(device_fd, BLKGETSIZE64, &devsize);
#endif
- if (res != 0) {
+ if (res < 0) {
fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
- close(device_fd);
- return -1;
+ goto error;
}
if (verbose) {
- fprintf(stderr, "%s: size=%zu\n", device, devsize);
+ printf("%s: opened %s O_DIRECT, size=%zu\n", device, (flags & O_DIRECT)?"with":"without", devsize);
}
/* Don't fret about real randomness */
@@ -72,65 +77,58 @@ static int open_device(const char *device, int verbose)
res = lseek(device_fd, seek_spot, SEEK_SET);
if (res < 0) {
fprintf(stderr, "Failed to seek %s: %s\n", device, strerror(errno));
- close(device_fd);
- return -1;
+ goto error;
}
if (verbose) {
printf("%s: reading from pos %ld\n", device, seek_spot);
}
- return device_fd;
-}
-
-/* Check one device */
-static void *test_device(const char *device, int verbose, int inject_error_percent)
-{
- int device_fd;
- int sec_size = 0;
- int res;
- void *buffer;
-
- if (verbose) {
- printf("Testing device %s\n", device);
- }
- device_fd = open_device(device, verbose);
- if (device_fd < 0) {
- exit(-1);
- }
+ if (flags & O_DIRECT) {
+ int sec_size = 0;
+ void *buffer;
#ifdef __FreeBSD__
- ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
+ res = ioctl(device_fd, DIOCGSECTORSIZE, &sec_size);
#else
- ioctl(device_fd, BLKSSZGET, &sec_size);
+ res = ioctl(device_fd, BLKSSZGET, &sec_size);
#endif
- if (sec_size == 0) {
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
- goto error;
- }
+ if (res < 0) {
+ fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ goto error;
+ }
- if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
- fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
- goto error;
- }
+ if (posix_memalign(&buffer, sysconf(_SC_PAGESIZE), sec_size) != 0) {
+ fprintf(stderr, "Failed to allocate aligned memory: %s\n", strerror(errno));
+ goto error;
+ }
+ res = read(device_fd, buffer, sec_size);
+ free(buffer);
+ if (res < 0) {
+ fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
+ goto error;
+ }
+ if (res < sec_size) {
+ fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
+ goto error;
+ }
+ } else {
+ char buffer[512];
- res = read(device_fd, buffer, sec_size);
- free(buffer);
- if (res < 0) {
- fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
- goto error;
- }
- if (res < sec_size) {
- fprintf(stderr, "Failed to read %d bytes from %s, got %d\n", sec_size, device, res);
- goto error;
+ res = read(device_fd, buffer, sizeof(buffer));
+ if (res < 0) {
+ fprintf(stderr, "Failed to read %s: %s\n", device, strerror(errno));
+ goto error;
+ }
+ if (res < (int)sizeof(buffer)) {
+ fprintf(stderr, "Failed to read %ld bytes from %s, got %d\n", sizeof(buffer), device, res);
+ goto error;
+ }
}
/* Fake an error */
- if (inject_error_percent) {
- srand(time(NULL) + getpid());
- if ((rand() % 100) < inject_error_percent) {
- fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
- goto error;
- }
+ if (inject_error_percent && ((rand() % 100) < inject_error_percent)) {
+ fprintf(stderr, "People, please fasten your seatbelts, injecting errors!\n");
+ goto error;
}
res = close(device_fd);
if (res != 0) {
From db97e055a17526cec056c595844a9d8851e3ee19 Mon Sep 17 00:00:00 2001
From: Kazunori INOUE <kazunori_inoue@newson.co.jp>
Date: Thu, 25 Aug 2022 16:03:46 +0900
Subject: [PATCH 4/4] storage_mon: improve error messages when ioctl() fails
---
tools/storage_mon.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/storage_mon.c b/tools/storage_mon.c
index 0bdb48649..f829c5081 100644
--- a/tools/storage_mon.c
+++ b/tools/storage_mon.c
@@ -63,7 +63,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
res = ioctl(device_fd, BLKGETSIZE64, &devsize);
#endif
if (res < 0) {
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ fprintf(stderr, "Failed to get device size for %s: %s\n", device, strerror(errno));
goto error;
}
if (verbose) {
@@ -93,7 +93,7 @@ static void *test_device(const char *device, int verbose, int inject_error_perce
res = ioctl(device_fd, BLKSSZGET, &sec_size);
#endif
if (res < 0) {
- fprintf(stderr, "Failed to stat %s: %s\n", device, strerror(errno));
+ fprintf(stderr, "Failed to get block device sector size for %s: %s\n", device, strerror(errno));
goto error;
}

View File

@ -0,0 +1,298 @@
From 764757380af19d3a21d40f3c9624e4135ff074e1 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 2 Nov 2022 10:26:31 +0100
Subject: [PATCH] nfsserver: add nfsv4_only parameter to make it run without
rpc-statd/rpcbind services
---
heartbeat/nfsserver | 200 +++++++++++++++++++++++++-------------------
1 file changed, 114 insertions(+), 86 deletions(-)
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index 9bbd603e5..cb2d43ab1 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -79,6 +79,16 @@ Init script for nfsserver
<content type="string" default="auto detected" />
</parameter>
+<parameter name="nfsv4_only" unique="0" required="0">
+<longdesc lang="en">
+Run in NFSv4 only mode (rpc-statd and rpcbind services masked).
+</longdesc>
+<shortdesc lang="en">
+NFSv4 only mode.
+</shortdesc>
+<content type="boolean" default="false" />
+</parameter>
+
<parameter name="nfs_no_notify" unique="0" required="0">
<longdesc lang="en">
Do not send reboot notifications to NFSv3 clients during server startup.
@@ -332,7 +342,7 @@ v3locking_exec()
if [ $EXEC_MODE -eq 2 ]; then
nfs_exec $cmd nfs-lock.service
elif [ $EXEC_MODE -eq 3 ]; then
- nfs_exec $cmd rpc-statd.service
+ nfs_exec $cmd rpc-statd.service
else
case $cmd in
start) locking_start;;
@@ -348,20 +358,22 @@ nfsserver_systemd_monitor()
local rc
local fn
- ocf_log debug "Status: rpcbind"
- rpcinfo > /dev/null 2>&1
- rc=$?
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "rpcbind is not running"
- return $OCF_NOT_RUNNING
- fi
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ ocf_log debug "Status: rpcbind"
+ rpcinfo > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "rpcbind is not running"
+ return $OCF_NOT_RUNNING
+ fi
- ocf_log debug "Status: nfs-mountd"
- ps axww | grep -q "[r]pc.mountd"
- rc=$?
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "nfs-mountd is not running"
- return $OCF_NOT_RUNNING
+ ocf_log debug "Status: nfs-mountd"
+ ps axww | grep -q "[r]pc.mountd"
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "nfs-mountd is not running"
+ return $OCF_NOT_RUNNING
+ fi
fi
ocf_log debug "Status: nfs-idmapd"
@@ -375,12 +387,14 @@ nfsserver_systemd_monitor()
return $OCF_NOT_RUNNING
fi
- ocf_log debug "Status: rpc-statd"
- rpcinfo -t localhost 100024 > /dev/null 2>&1
- rc=$?
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "rpc-statd is not running"
- return $OCF_NOT_RUNNING
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ ocf_log debug "Status: rpc-statd"
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "rpc-statd is not running"
+ return $OCF_NOT_RUNNING
+ fi
fi
nfs_exec is-active nfs-server
@@ -424,7 +438,7 @@ nfsserver_monitor ()
if [ $rc -eq 0 ]; then
# don't report success if nfs servers are up
# without locking daemons.
- v3locking_exec "status"
+ ocf_is_true "$OCF_RESKEY_nfsv4_only" || v3locking_exec "status"
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "NFS server is up, but the locking daemons are down"
@@ -786,48 +800,54 @@ nfsserver_start ()
# systemd
case $EXEC_MODE in
- [23]) nfs_exec start rpcbind
- local i=1
- while : ; do
- ocf_log info "Start: rpcbind i: $i"
- rpcinfo > /dev/null 2>&1
- rc=$?
- if [ "$rc" -eq "0" ]; then
- break;
- fi
- sleep 1
- i=$((i + 1))
- done
+ [23]) if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec start rpcbind
+ local i=1
+ while : ; do
+ ocf_log info "Start: rpcbind i: $i"
+ rpcinfo > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ fi
;;
esac
- # check to see if we need to start rpc.statd
- v3locking_exec "status"
- if [ $? -ne $OCF_SUCCESS ]; then
- v3locking_exec "start"
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_exit_reason "Failed to start NFS server locking daemons"
- return $rc
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ # check to see if we need to start rpc.statd
+ v3locking_exec "status"
+ if [ $? -ne $OCF_SUCCESS ]; then
+ v3locking_exec "start"
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_exit_reason "Failed to start NFS server locking daemons"
+ return $rc
+ fi
+ else
+ ocf_log info "rpc.statd already up"
fi
- else
- ocf_log info "rpc.statd already up"
fi
# systemd
case $EXEC_MODE in
- [23]) nfs_exec start nfs-mountd
- local i=1
- while : ; do
- ocf_log info "Start: nfs-mountd i: $i"
- ps axww | grep -q "[r]pc.mountd"
- rc=$?
- if [ "$rc" -eq "0" ]; then
- break;
- fi
- sleep 1
- i=$((i + 1))
- done
+ [23]) if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec start nfs-mountd
+ local i=1
+ while : ; do
+ ocf_log info "Start: nfs-mountd i: $i"
+ ps axww | grep -q "[r]pc.mountd"
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ fi
nfs_exec start nfs-idmapd
local i=1
@@ -839,24 +859,26 @@ nfsserver_start ()
ocf_log debug "$(cat $fn)"
rm -f $fn
if [ "$rc" -eq "0" ]; then
- break;
+ break
fi
sleep 1
i=$((i + 1))
done
- nfs_exec start rpc-statd
- local i=1
- while : ; do
- ocf_log info "Start: rpc-statd i: $i"
- rpcinfo -t localhost 100024 > /dev/null 2>&1
- rc=$?
- if [ "$rc" -eq "0" ]; then
- break;
- fi
- sleep 1
- i=$((i + 1))
- done
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec start rpc-statd
+ local i=1
+ while : ; do
+ ocf_log info "Start: rpc-statd i: $i"
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break
+ fi
+ sleep 1
+ i=$((i + 1))
+ done
+ fi
esac
@@ -914,13 +936,15 @@ nfsserver_stop ()
sleep 1
done
- nfs_exec stop rpc-statd > /dev/null 2>&1
- ocf_log info "Stop: rpc-statd"
- rpcinfo -t localhost 100024 > /dev/null 2>&1
- rc=$?
- if [ "$rc" -eq "0" ]; then
- ocf_exit_reason "Failed to stop rpc-statd"
- return $OCF_ERR_GENERIC
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec stop rpc-statd > /dev/null 2>&1
+ ocf_log info "Stop: rpc-statd"
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop rpc-statd"
+ return $OCF_ERR_GENERIC
+ fi
fi
nfs_exec stop nfs-idmapd > /dev/null 2>&1
@@ -935,13 +959,15 @@ nfsserver_stop ()
return $OCF_ERR_GENERIC
fi
- nfs_exec stop nfs-mountd > /dev/null 2>&1
- ocf_log info "Stop: nfs-mountd"
- ps axww | grep -q "[r]pc.mountd"
- rc=$?
- if [ "$rc" -eq "0" ]; then
- ocf_exit_reason "Failed to stop nfs-mountd"
- return $OCF_ERR_GENERIC
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ nfs_exec stop nfs-mountd > /dev/null 2>&1
+ ocf_log info "Stop: nfs-mountd"
+ ps axww | grep -q "[r]pc.mountd"
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop nfs-mountd"
+ return $OCF_ERR_GENERIC
+ fi
fi
if systemctl --no-legend list-unit-files "nfsdcld*" | grep -q nfsdcld; then
@@ -960,10 +986,12 @@ nfsserver_stop ()
esac
- v3locking_exec "stop"
- if [ $? -ne 0 ]; then
- ocf_exit_reason "Failed to stop NFS locking daemons"
- rc=$OCF_ERR_GENERIC
+ if ! ocf_is_true "$OCF_RESKEY_nfsv4_only"; then
+ v3locking_exec "stop"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to stop NFS locking daemons"
+ rc=$OCF_ERR_GENERIC
+ fi
fi
# systemd

View File

@ -0,0 +1,25 @@
From 97a05e0e662ed922c9ecd016b39ab90ee233d5c9 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 24 Nov 2022 10:36:56 +0100
Subject: [PATCH] mysql-common: return error in stop-action if kill fails to
stop the process, so the node can get fenced
---
heartbeat/mysql-common.sh | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index 34e1c6748..8104019b0 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -318,6 +318,10 @@ mysql_common_stop()
if [ $? != $OCF_NOT_RUNNING ]; then
ocf_log info "MySQL failed to stop after ${shutdown_timeout}s using SIGTERM. Trying SIGKILL..."
/bin/kill -KILL $pid > /dev/null
+ mysql_common_status info $pid
+ if [ $? != $OCF_NOT_RUNNING ]; then
+ return $OCF_ERR_GENERIC
+ fi
fi
ocf_log info "MySQL stopped";

View File

@ -0,0 +1,137 @@
From bf89ad06d5da5c05533c80a37a37c8dbbcd123aa Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 8 Dec 2022 15:40:07 +0100
Subject: [PATCH] galera/mpathpersist/sg_persist/IPsrcaddr: only check notify
and promotable when OCF_CHECK_LEVEL=10
Pacemaker has started running validate-all action before creating the
resource. It doesnt provide notify/promotable settings while doing so,
so this patch moves these checks to OCF_CHECK_LEVEL 10 and runs the
validate action at OCF_CHECK_LEVEL 10 in the start-action.
---
heartbeat/IPsrcaddr | 13 ++++++++-----
heartbeat/galera.in | 9 ++++++---
heartbeat/mpathpersist.in | 13 +++++++++----
heartbeat/sg_persist.in | 13 +++++++++----
4 files changed, 32 insertions(+), 16 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 1bd41a930..66e2ad8cd 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -510,11 +510,13 @@ srca_validate_all() {
fi
# We should serve this IP address of course
- if ip_status "$ipaddress"; then
- :
- else
- ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
- return $OCF_ERR_INSTALLED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if ip_status "$ipaddress"; then
+ :
+ else
+ ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
+ return $OCF_ERR_INSTALLED
+ fi
fi
return $OCF_SUCCESS
}
@@ -540,6 +542,7 @@ esac
ipaddress="$OCF_RESKEY_ipaddress"
+[ "$__OCF_ACTION" != "validate-all" ] && OCF_CHECK_LEVEL=10
srca_validate_all
rc=$?
if [ $rc -ne $OCF_SUCCESS ]; then
diff --git a/heartbeat/galera.in b/heartbeat/galera.in
index cd2fee7c0..6aed3e4b6 100755
--- a/heartbeat/galera.in
+++ b/heartbeat/galera.in
@@ -1015,9 +1015,11 @@ galera_stop()
galera_validate()
{
- if ! ocf_is_ms; then
- ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
- return $OCF_ERR_CONFIGURED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if ! ocf_is_ms; then
+ ocf_exit_reason "Galera must be configured as a multistate Master/Slave resource."
+ return $OCF_ERR_CONFIGURED
+ fi
fi
if [ -z "$OCF_RESKEY_wsrep_cluster_address" ]; then
@@ -1035,6 +1037,7 @@ case "$1" in
exit $OCF_SUCCESS;;
esac
+[ "$__OCF_ACTION" = "start" ] && OCF_CHECK_LEVEL=10
galera_validate
rc=$?
LSB_STATUS_STOPPED=3
diff --git a/heartbeat/mpathpersist.in b/heartbeat/mpathpersist.in
index 0e2c2a4a0..8a46b9930 100644
--- a/heartbeat/mpathpersist.in
+++ b/heartbeat/mpathpersist.in
@@ -630,10 +630,11 @@ mpathpersist_action_notify() {
}
mpathpersist_action_validate_all () {
-
- if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
- ocf_log err "Master options misconfigured."
- exit $OCF_ERR_CONFIGURED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
+ ocf_log err "Master options misconfigured."
+ exit $OCF_ERR_CONFIGURED
+ fi
fi
return $OCF_SUCCESS
@@ -659,6 +660,10 @@ case $ACTION in
start|promote|monitor|stop|demote)
ocf_log debug "$RESOURCE: starting action \"$ACTION\""
mpathpersist_init
+ if [ "$__OCF_ACTION" = "start" ]; then
+ OCF_CHECK_LEVEL=10
+ mpathpersist_action_validate_all
+ fi
mpathpersist_action_$ACTION
exit $?
;;
diff --git a/heartbeat/sg_persist.in b/heartbeat/sg_persist.in
index 16048ea6f..620c02f4a 100644
--- a/heartbeat/sg_persist.in
+++ b/heartbeat/sg_persist.in
@@ -643,10 +643,11 @@ sg_persist_action_notify() {
}
sg_persist_action_validate_all () {
-
- if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
- ocf_log err "Master options misconfigured."
- exit $OCF_ERR_CONFIGURED
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ if [ "$OCF_RESKEY_CRM_meta_master_max" != "1" ] && [ "$RESERVATION_TYPE" != "7" ] && [ "$RESERVATION_TYPE" != "8" ]; then
+ ocf_log err "Master options misconfigured."
+ exit $OCF_ERR_CONFIGURED
+ fi
fi
return $OCF_SUCCESS
@@ -672,6 +673,10 @@ case $ACTION in
start|promote|monitor|stop|demote)
ocf_log debug "$RESOURCE: starting action \"$ACTION\""
sg_persist_init
+ if [ "$__OCF_ACTION" = "start" ]; then
+ OCF_CHECK_LEVEL=10
+ sg_persist_action_validate_all
+ fi
sg_persist_action_$ACTION
exit $?
;;

View File

@ -0,0 +1,49 @@
From 21666c5c842b8a6028699ee78db75a1d7134fad0 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 4 Jan 2023 10:39:16 +0100
Subject: [PATCH 1/2] Filesystem: remove validate-all mountpoint warning as it
is auto-created during start-action if it doesnt exist
---
heartbeat/Filesystem | 4 ----
1 file changed, 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 44270ad98..65088029e 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -851,10 +851,6 @@ Filesystem_monitor()
#
Filesystem_validate_all()
{
- if [ -n "$MOUNTPOINT" ] && [ ! -d "$MOUNTPOINT" ]; then
- ocf_log warn "Mountpoint $MOUNTPOINT does not exist"
- fi
-
# Check if the $FSTYPE is workable
# NOTE: Without inserting the $FSTYPE module, this step may be imprecise
# TODO: This is Linux specific crap.
From 8a7f40b6ab93d8d39230d864ab06a57ff48d6f1f Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 5 Jan 2023 13:09:48 +0100
Subject: [PATCH 2/2] CTDB: change public_addresses validate-all warning to
info
---
heartbeat/CTDB.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 46f56cfac..b4af66bc1 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -940,7 +940,7 @@ ctdb_validate() {
fi
if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
- ocf_log warn "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!"
+ ocf_log info "CTDB file '${OCF_RESKEY_ctdb_config_dir}/public_addresses' exists - CTDB will try to manage IP failover!"
fi
if [ ! -f "$OCF_RESKEY_ctdb_config_dir/nodes" ]; then

View File

@ -0,0 +1,68 @@
--- a/heartbeat/pgsqlms 2023-01-04 14:42:36.093258702 +0100
+++ b/heartbeat/pgsqlms 2023-01-04 14:40:52.403994545 +0100
@@ -66,6 +66,7 @@
my $maxlag = $ENV{'OCF_RESKEY_maxlag'} || $maxlag_default;
my $recovery_tpl = $ENV{'OCF_RESKEY_recovery_template'}
|| "$pgdata/recovery.conf.pcmk";
+my $ocf_check_level = $ENV{'OCF_CHECK_LEVEL'} || 0;
# PostgreSQL commands path
@@ -1304,26 +1305,28 @@
return $OCF_ERR_INSTALLED;
}
- # check notify=true
- $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\
- --meta --get-parameter notify 2>/dev/null };
- chomp $ans;
- unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) {
- ocf_exit_reason(
- 'You must set meta parameter notify=true for your master resource'
- );
- return $OCF_ERR_INSTALLED;
- }
+ if ( $ocf_check_level == 10 ) {
+ # check notify=true
+ $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\
+ --meta --get-parameter notify 2>/dev/null };
+ chomp $ans;
+ unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) {
+ ocf_exit_reason(
+ 'You must set meta parameter notify=true for your "master" resource'
+ );
+ return $OCF_ERR_INSTALLED;
+ }
- # check master-max=1
- unless (
- defined $ENV{'OCF_RESKEY_CRM_meta_master_max'}
- and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1'
- ) {
- ocf_exit_reason(
- 'You must set meta parameter master-max=1 for your master resource'
- );
- return $OCF_ERR_INSTALLED;
+ # check master-max=1
+ unless (
+ defined $ENV{'OCF_RESKEY_CRM_meta_master_max'}
+ and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1'
+ ) {
+ ocf_exit_reason(
+ 'You must set meta parameter master-max=1 for your "master" resource'
+ );
+ return $OCF_ERR_INSTALLED;
+ }
}
if ( $PGVERNUM >= $PGVER_12 ) {
@@ -2242,6 +2245,9 @@
# Set current node name.
$nodename = ocf_local_nodename();
+if ( $__OCF_ACTION ne 'validate-all' ) {
+ $ocf_check_level = 10;
+}
$exit_code = pgsql_validate_all();
exit $exit_code if $exit_code != $OCF_SUCCESS or $__OCF_ACTION eq 'validate-all';

View File

@ -0,0 +1,187 @@
From 81f9e1a04dfd2274ccb906310b4f191485e342ab Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 11 Jan 2023 13:22:24 +0100
Subject: [PATCH 1/2] exportfs: move testdir() to start-action to avoid failing
during resource creation (validate-all) and make it create the directory if
it doesnt exist
---
heartbeat/exportfs | 27 +++++++++++++++------------
1 file changed, 15 insertions(+), 12 deletions(-)
diff --git a/heartbeat/exportfs b/heartbeat/exportfs
index c10777fa9..2307a9e67 100755
--- a/heartbeat/exportfs
+++ b/heartbeat/exportfs
@@ -301,6 +301,16 @@ exportfs_monitor ()
fi
}
+testdir() {
+ if [ ! -d $1 ]; then
+ mkdir -p "$1"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Unable to create directory $1"
+ return 1
+ fi
+ fi
+ return 0
+}
export_one() {
local dir=$1
local opts sep
@@ -331,6 +341,10 @@ export_one() {
}
exportfs_start ()
{
+ if ! forall testdir; then
+ return $OCF_ERR_INSTALLED
+ fi
+
if exportfs_monitor; then
ocf_log debug "already exported"
return $OCF_SUCCESS
@@ -428,14 +442,6 @@ exportfs_stop ()
fi
}
-testdir() {
- if [ ! -d $1 ]; then
- ocf_is_probe ||
- ocf_log err "$1 does not exist or is not a directory"
- return 1
- fi
- return 0
-}
exportfs_validate_all ()
{
if echo "$OCF_RESKEY_fsid" | grep -q -F ','; then
@@ -447,9 +453,6 @@ exportfs_validate_all ()
ocf_exit_reason "use integer fsid when exporting multiple directories"
return $OCF_ERR_CONFIGURED
fi
- if ! forall testdir; then
- return $OCF_ERR_INSTALLED
- fi
}
for dir in $OCF_RESKEY_directory; do
@@ -466,7 +469,7 @@ for dir in $OCF_RESKEY_directory; do
fi
else
case "$__OCF_ACTION" in
- stop|monitor)
+ stop|monitor|validate-all)
canonicalized_dir="$dir"
ocf_log debug "$dir does not exist"
;;
From 8ee41af82cda35149f8e0cfede6a8ddef3e221e1 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 11 Jan 2023 13:25:57 +0100
Subject: [PATCH 2/2] pgsql: dont run promotable and file checks that could be
on shared storage during validate-all action
---
heartbeat/pgsql | 53 +++++++++++++++++++++++++++++--------------------
1 file changed, 32 insertions(+), 21 deletions(-)
diff --git a/heartbeat/pgsql b/heartbeat/pgsql
index aa8a13a84..532063ac5 100755
--- a/heartbeat/pgsql
+++ b/heartbeat/pgsql
@@ -1835,7 +1835,7 @@ check_config() {
if [ ! -f "$1" ]; then
if ocf_is_probe; then
- ocf_log info "Configuration file is $1 not readable during probe."
+ ocf_log info "Unable to read $1 during probe."
rc=1
else
ocf_exit_reason "Configuration file $1 doesn't exist"
@@ -1846,8 +1846,7 @@ check_config() {
return $rc
}
-# Validate most critical parameters
-pgsql_validate_all() {
+validate_ocf_check_level_10() {
local version
local check_config_rc
local rep_mode_string
@@ -1883,12 +1882,6 @@ pgsql_validate_all() {
fi
fi
- getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
- if [ ! $? -eq 0 ]; then
- ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
- return $OCF_ERR_INSTALLED;
- fi
-
if ocf_is_probe; then
ocf_log info "Don't check $OCF_RESKEY_pgdata during probe"
else
@@ -1898,18 +1891,6 @@ pgsql_validate_all() {
fi
fi
- if [ -n "$OCF_RESKEY_monitor_user" -a ! -n "$OCF_RESKEY_monitor_password" ]
- then
- ocf_exit_reason "monitor password can't be empty"
- return $OCF_ERR_CONFIGURED
- fi
-
- if [ ! -n "$OCF_RESKEY_monitor_user" -a -n "$OCF_RESKEY_monitor_password" ]
- then
- ocf_exit_reason "monitor_user has to be set if monitor_password is set"
- return $OCF_ERR_CONFIGURED
- fi
-
if is_replication || [ "$OCF_RESKEY_rep_mode" = "slave" ]; then
if [ `printf "$version\n9.1" | sort -n | head -1` != "9.1" ]; then
ocf_exit_reason "Replication mode needs PostgreSQL 9.1 or higher."
@@ -2027,6 +2008,35 @@ pgsql_validate_all() {
return $OCF_SUCCESS
}
+# Validate most critical parameters
+pgsql_validate_all() {
+ local rc
+
+ getent passwd $OCF_RESKEY_pgdba >/dev/null 2>&1
+ if [ ! $? -eq 0 ]; then
+ ocf_exit_reason "User $OCF_RESKEY_pgdba doesn't exist";
+ return $OCF_ERR_INSTALLED;
+ fi
+
+ if [ -n "$OCF_RESKEY_monitor_user" ] && [ -z "$OCF_RESKEY_monitor_password" ]; then
+ ocf_exit_reason "monitor password can't be empty"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -z "$OCF_RESKEY_monitor_user" ] && [ -n "$OCF_RESKEY_monitor_password" ]; then
+ ocf_exit_reason "monitor_user has to be set if monitor_password is set"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ if [ "$OCF_CHECK_LEVEL" -eq 10 ]; then
+ validate_ocf_check_level_10
+ rc=$?
+ [ $rc -ne "$OCF_SUCCESS" ] && exit $rc
+ fi
+
+ return $OCF_SUCCESS
+}
+
#
# Check if we need to create a log file
@@ -2163,6 +2173,7 @@ case "$1" in
exit $OCF_SUCCESS;;
esac
+[ "$__OCF_ACTION" != "validate-all" ] && OCF_CHECK_LEVEL=10
pgsql_validate_all
rc=$?

View File

@ -0,0 +1,23 @@
--- ClusterLabs-resource-agents-fd0720f7/heartbeat/pgsqlms 2023-01-16 10:54:30.897188238 +0100
+++ pgsqlms 2023-01-10 14:21:19.281286242 +0100
@@ -1351,12 +1351,14 @@
return $OCF_ERR_ARGS;
}
- $guc = qx{ $POSTGRES -C primary_conninfo -D "$pgdata" $start_opts};
- unless ($guc =~ /\bapplication_name='?$nodename'?\b/) {
- ocf_exit_reason(
- q{Parameter "primary_conninfo" MUST contain 'application_name=%s'. }.
- q{It is currently set to '%s'}, $nodename, $guc );
- return $OCF_ERR_ARGS;
+ if ( $ocf_check_level == 10 ) {
+ $guc = qx{ $POSTGRES -C primary_conninfo -D "$pgdata" $start_opts};
+ unless ($guc =~ /\bapplication_name='?$nodename'?\b/) {
+ ocf_exit_reason(
+ q{Parameter "primary_conninfo" MUST contain 'application_name=%s'. }.
+ q{It is currently set to '%s'}, $nodename, $guc );
+ return $OCF_ERR_ARGS;
+ }
}
}
else {

View File

@ -69,7 +69,7 @@
Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.9.0
Release: 29%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}.3
Release: 40%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPLv2+ and LGPLv2+
URL: https://github.com/ClusterLabs/resource-agents
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
@ -120,9 +120,22 @@ Patch28: bz2103370-ocf-tester-2-remove-deprecated-lrmd-lrmadmin-code.patch
Patch29: bz1908146-bz1908147-bz1908148-bz1949114-openstack-agents-set-domain-parameters-default.patch
Patch30: bz2090370-CTDB-move-process-to-root-cgroup-if-rt-enabled.patch
Patch31: bz2116941-ethmonitor-ovsmonitor-pgsql-fix-attrd_updater-q.patch
Patch32: bz2130986-azure-events-az-new-ra.patch
Patch33: bz2134536-IPsrcaddr-proto-metric-scope-default-route-fixes.patch
Patch34: bz2144866-vdo-vol-dont-fail-probe-action.patch
Patch32: bz2109159-storage_mon-1-exit-after-help.patch
Patch33: bz2109159-storage_mon-2-fix-specified-scores-count.patch
Patch34: bz2109159-storage_mon-3-fix-child-process-exit.patch
Patch35: bz2109159-storage_mon-4-fix-possible-false-negatives.patch
Patch36: bz1905820-LVM-activate-fix-return-codes.patch
Patch37: bz1977012-azure-events-az-new-ra.patch
Patch38: bz2133682-IPsrcaddr-proto-metric-scope-default-route-fixes.patch
Patch39: bz2141836-vdo-vol-dont-fail-probe-action.patch
Patch40: bz2049319-Filesystem-add-support-for-Amazon-EFS.patch
Patch41: bz2127117-nfsserver-nfsv4_only-parameter.patch
Patch42: bz2139131-mysql-common-return-error-if-kill-fails.patch
Patch43: bz2157873-1-all-ras-validate-all-OCF_CHECK_LEVEL-10.patch
Patch44: bz2157873-2-Filesystem-CTDB-validate-all-improvements.patch
Patch45: bz2157873-3-pgsqlms-validate-all-OCF_CHECK_LEVEL-10.patch
Patch46: bz2157873-4-exportfs-pgsql-validate-all-fixes.patch
Patch47: bz2157873-5-pgsqlms-alidate-all-OCF_CHECK_LEVEL-10.patch
# bundle patches
Patch1000: 7-gcp-bundled.patch
@ -337,6 +350,19 @@ exit 1
%patch32 -p1
%patch33 -p1
%patch34 -p1
%patch35 -p1
%patch36 -p1
%patch37 -p1
%patch38 -p1
%patch39 -p1
%patch40 -p1
%patch41 -p1
%patch42 -p1
%patch43 -p1
%patch44 -p1
%patch45 -p1
%patch46 -p1
%patch47 -p1
chmod 755 heartbeat/nova-compute-wait
chmod 755 heartbeat/NovaEvacuate
@ -912,21 +938,45 @@ ccs_update_schema > /dev/null 2>&1 ||:
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
%changelog
* Tue Nov 22 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-29.3
* Tue Jan 17 2023 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-40
- all agents: dont check notify/promotable settings during
validate-action
Resolves: rhbz#2157873
* Thu Nov 24 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-35
- mysql-common: return error in stop-action if kill fails to stop
the process, so the node can get fenced
Resolves: rhbz#2139131
* Tue Nov 22 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-34
- nfsserver: add nfsv4_only parameter to make it run without
rpc-statd/rpcbind services
Resolves: rhbz#2127117
* Mon Nov 14 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-33
- Filesystem: add support for Amazon EFS (Elastic File System)
- vdo-vol: dont fail probe action when the underlying device doesnt
exist
Resolves: rhbz#2144866
Resolves: rhbz#2049319
Resolves: rhbz#2141836
* Fri Oct 14 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-29.2
* Fri Oct 14 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-31
- IPsrcaddr: proto, metric, scope and default route fixes
Resolves: rhbz#2134536
Resolves: rhbz#2133682
* Mon Oct 3 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-29.1
* Thu Sep 8 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-30
- storage_mon: fix specified scores count and possible false negatives
- LVM-activate: use correct return codes to fix unexpected behaviour
- azure-events-az: new resource agent
Resolves: rhbz#2130986
Resolves: rhbz#2109159
Resolves: rhbz#1905820
Resolves: rhbz#1977012
* Wed Aug 10 2022 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.9.0-29
- ethmonitor/pgsql: remove attrd_updater "-q" parameter to solve issue