import resource-agents-4.1.1-39.el8

This commit is contained in:
CentOS Sources 2020-01-21 14:47:07 -05:00 committed by Stepan Oksanichenko
parent 4f27a0a17b
commit ccd9eb7a2e
49 changed files with 3606 additions and 11 deletions

View File

@ -0,0 +1,24 @@
From 0d53e80957a00016418080967892337b1b13f99d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 30 Jul 2019 11:23:07 +0200
Subject: [PATCH] iSCSILogicalUnit: only create acls if it doesnt exist
---
heartbeat/iSCSILogicalUnit.in | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/iSCSILogicalUnit.in b/heartbeat/iSCSILogicalUnit.in
index 0fe85b593..02045d754 100644
--- a/heartbeat/iSCSILogicalUnit.in
+++ b/heartbeat/iSCSILogicalUnit.in
@@ -420,8 +420,8 @@ iSCSILogicalUnit_start() {
if [ -n "${OCF_RESKEY_allowed_initiators}" ]; then
for initiator in ${OCF_RESKEY_allowed_initiators}; do
- ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC
- ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} create ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
+ [ -d "/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls" ] || ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls create ${initiator} add_mapped_luns=False || exit $OCF_ERR_GENERIC
+ [ -d "/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/acls/${initiator}" ] || ocf_run targetcli /iscsi/${OCF_RESKEY_target_iqn}/tpg1/acls/${initiator} create ${OCF_RESKEY_lun} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
done
fi

View File

@ -0,0 +1,93 @@
From db6d12f4b7b10e214526512abe35307270f81c03 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 8 Aug 2019 14:48:13 +0200
Subject: [PATCH] mysql/mariadb/galera: use runuser/su to avoid using SELinux
DAC_OVERRIDE
---
heartbeat/galera | 11 ++++++-----
heartbeat/mysql-common.sh | 16 ++++++++++++----
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git a/heartbeat/galera b/heartbeat/galera
index 9b9fe5569..056281fb8 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -624,8 +624,7 @@ detect_last_commit()
local recover_args="--defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
- --datadir=$OCF_RESKEY_datadir \
- --user=$OCF_RESKEY_user"
+ --datadir=$OCF_RESKEY_datadir"
local recovery_file_regex='s/.*WSREP\:.*position\s*recovery.*--log_error='\''\([^'\'']*\)'\''.*/\1/p'
local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
@@ -654,7 +653,8 @@ detect_last_commit()
ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
- ${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null
+ $SU - $OCF_RESKEY_user -s /bin/sh -c \
+ "${OCF_RESKEY_binary} $recover_args --wsrep-recover --log-error=$tmp 2>/dev/null"
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ -z "$last_commit" ]; then
@@ -670,8 +670,9 @@ detect_last_commit()
# we can only rollback the transaction, but that's OK
# since the DB will get resynchronized anyway
ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
- ${OCF_RESKEY_binary} $recover_args --wsrep-recover \
- --tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null
+ $SU - $OCF_RESKEY_user -s /bin/sh -c \
+ "${OCF_RESKEY_binary} $recover_args --wsrep-recover \
+ --tc-heuristic-recover=rollback --log-error=$tmp 2>/dev/null"
last_commit="$(cat $tmp | sed -n $recovered_position_regex | tail -1)"
if [ ! -z "$last_commit" ]; then
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index d5ac972cd..65db9bf85 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -2,6 +2,13 @@
#######################################################################
+# Use runuser if available for SELinux.
+if [ -x /sbin/runuser ]; then
+ SU=runuser
+else
+ SU=su
+fi
+
# Attempt to detect a default binary
OCF_RESKEY_binary_default=$(which mysqld_safe 2> /dev/null)
if [ "$OCF_RESKEY_binary_default" = "" ]; then
@@ -207,7 +214,7 @@ mysql_common_prepare_dirs()
# already existed, check whether it is writable by the configured
# user
for dir in $pid_dir $socket_dir; do
- if ! su -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
+ if ! $SU -s /bin/sh - $OCF_RESKEY_user -c "test -w $dir"; then
ocf_exit_reason "Directory $dir is not writable by $OCF_RESKEY_user"
exit $OCF_ERR_PERM;
fi
@@ -219,14 +226,15 @@ mysql_common_start()
local mysql_extra_params="$1"
local pid
- ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
+ $SU - $OCF_RESKEY_user -s /bin/sh -c \
+ "${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
--pid-file=$OCF_RESKEY_pid \
--socket=$OCF_RESKEY_socket \
--datadir=$OCF_RESKEY_datadir \
--log-error=$OCF_RESKEY_log \
- --user=$OCF_RESKEY_user $OCF_RESKEY_additional_parameters \
+ $OCF_RESKEY_additional_parameters \
$mysql_extra_params >/dev/null 2>&1 &
- pid=$!
+ pid=$!"
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required.

View File

@ -0,0 +1,82 @@
From 4ee9a7026d7ed15b0b5cd26f06a21d04fc05d14e Mon Sep 17 00:00:00 2001
From: Roger Zhou <zzhou@suse.com>
Date: Mon, 1 Apr 2019 22:57:26 +0800
Subject: [PATCH 1/2] LVM-activate: return OCF_NOT_RUNNING on initial probe
In the use case of lvm on top of cluster md/raid. When the fenced node
rejoins to the cluster, Pacemaker will run the monitor action for the
probe operation. At that time, LVM PV and VG won't exist before cluster
md/raid get assembled, and the probe should return $OCF_NOT_RUNNING
instead of $OCF_ERR_CONFIGURED.
Signed-off-by: Roger Zhou <zzhou@suse.com>
---
heartbeat/LVM-activate | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 3c462c75c..91ac05c34 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -329,6 +329,7 @@ lvmlockd_check()
# Good: lvmlockd is running, and clvmd is not running
if ! pgrep lvmlockd >/dev/null 2>&1 ; then
if ocf_is_probe; then
+ ocf_log info "initial probe: lvmlockd is not running yet."
exit $OCF_NOT_RUNNING
fi
@@ -481,6 +482,11 @@ lvm_validate() {
exit $OCF_SUCCESS
fi
+ if ocf_is_probe; then
+ ocf_log info "initial probe: VG [${VG}] is not found on any block device yet."
+ exit $OCF_NOT_RUNNING
+ fi
+
ocf_exit_reason "Volume group[${VG}] doesn't exist, or not visible on this node!"
exit $OCF_ERR_CONFIGURED
fi
From df2f58c400b1f6f239f9e1c1fdf6ce0875639b43 Mon Sep 17 00:00:00 2001
From: Roger Zhou <zzhou@suse.com>
Date: Mon, 1 Apr 2019 23:02:54 +0800
Subject: [PATCH 2/2] LVM-activate: align dmsetup report command to standard
Namely to change 'vgname/lvname' to 'vg_name/lv_name'. The dmsetup
report command follows lvm2 selection criteria field name standard.
- dmsetup v1.02.86 (lvm2 v2_02_107) - 23rd June 2014
"Add dmsetup -S/--select to define selection criteria"
- dmsetup info -c -S help
Signed-off-by: Roger Zhou <zzhou@suse.com>
---
heartbeat/LVM-activate | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 91ac05c34..730d9a09d 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -707,7 +707,7 @@ tagging_deactivate() {
# method:
#
# lv_count=$(vgs --foreign -o lv_count --noheadings ${VG} 2>/dev/null | tr -d '[:blank:]')
-# dm_count=$(dmsetup --noheadings info -c -S "vgname=${VG}" 2>/dev/null | grep -c "${VG}-")
+# dm_count=$(dmsetup --noheadings info -c -S "vg_name=${VG}" 2>/dev/null | grep -c "${VG}-")
# test $lv_count -eq $dm_count
#
# It works, but we cannot afford to use LVM command in lvm_status. LVM command is expensive
@@ -730,9 +730,9 @@ lvm_status() {
if [ -n "${LV}" ]; then
# dmsetup ls? It cannot accept device name. It's
# too heavy to list all DM devices.
- dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG} && lvname=${LV}" | grep -c -v '^No devices found')
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG} && lv_name=${LV}" | grep -c -v '^No devices found')
else
- dm_count=$(dmsetup info --noheadings --noflush -c -S "vgname=${VG}" | grep -c -v '^No devices found')
+ dm_count=$(dmsetup info --noheadings --noflush -c -S "vg_name=${VG}" | grep -c -v '^No devices found')
fi
if [ $dm_count -eq 0 ]; then

View File

@ -0,0 +1,46 @@
From d8400a30604229d349f36855c30a6a438204023b Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 12 Jun 2019 11:29:17 +0200
Subject: [PATCH] Avoid double call to podman inspect in podman_simple_status()
Right now podman_simple_status() does the following:
- It calls container_exists() which then calls "podman inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1"
- Then it calls "podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null"
This duplication is unnecessary and we can rely on the second podman inspect
call. We need to do this because podman inspect calls are very expensive as
soon as moderate I/O kicks in.
Tested as follows:
1) Injected the change on an existing bundle-based cluster
2) Observed that monitoring operations kept working okay
3) Verified by adding set -x that only a single podman inspect per monitor
operation was called (as opposed to two before)
4) Restarted a bundle with an OCF resource inside correctly
5) Did a podman stop of a bundle and correctly observed that:
5.a) It was detected as non running:
* haproxy-bundle-podman-1_monitor_60000 on controller-0 'not running' (7): call=192, status=complete, exitreason='',
last-rc-change='Wed Jun 12 09:22:18 2019', queued=0ms, exec=0ms
5.b) It was correctly started afterwards
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/podman | 5 -----
1 file changed, 5 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 34e11da6b..b2b3081f9 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -238,11 +238,6 @@ podman_simple_status()
{
local val
- container_exists
- if [ $? -ne 0 ]; then
- return $OCF_NOT_RUNNING
- fi
-
# retrieve the 'Running' attribute for the container
val=$(podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then

View File

@ -0,0 +1,63 @@
From 9685e8e6bf2896377a9cf0e07a85de5dd5fcf2df Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 12 Jun 2019 12:00:31 +0200
Subject: [PATCH] Simplify podman_monitor()
Before this change podman_monitor() does two things:
\-> podman_simple_status()
\-> podman inspect {{.State.Running}}
\-> if podman_simple_status == 0 then monitor_cmd_exec()
\-> if [ -z "$OCF_RESKEY_monitor_cmd" ]; then # so if OCF_RESKEY_monitor_cmd is empty we just return SUCCESS
return $rc
fi
# if OCF_RESKEY_monitor_cmd is set to something we execute it
podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd
Let's actually only rely on podman exec as invoked inside monitor_cmd_exec
when $OCF_RESKEY_monitor_cmd is non empty (which is the default as it is set to "/bin/true").
When there is no monitor_cmd command defined then it makes sense to rely on podman inspect
calls container in podman_simple_status().
Tested as follows:
1) Injected the change on an existing bundle-based cluster
2) Observed that monitoring operations kept working okay
3) Restarted rabbitmq-bundle and galera-bundle successfully
4) Killed a container and we correctly detected the monitor failure
Jun 12 09:52:12 controller-0 pacemaker-controld[25747]: notice: controller-0-haproxy-bundle-podman-1_monitor_60000:230 [ ocf-exit-reason:monitor cmd failed (rc=125), output: cannot exec into container that is not running\n ]
5) Container correctly got restarted after the monitor failure:
haproxy-bundle-podman-1 (ocf::heartbeat:podman): Started controller-0
6) Stopped and removed a container and pcmk detected it correctly:
Jun 12 09:55:15 controller-0 podman(haproxy-bundle-podman-1)[841411]: ERROR: monitor cmd failed (rc=125), output: unable to exec into haproxy-bundle-podman-1: no container with name or ID haproxy-bundle-podman-1 found: no such container
Jun 12 09:55:15 controller-0 pacemaker-execd[25744]: notice: haproxy-bundle-podman-1_monitor_60000:841411:stderr [ ocf-exit-reason:monitor cmd failed (rc=125), output: unable to exec into haproxy-bundle-podman-1: no container with name or ID haproxy-bundle-podman-1 found: no such container ]
7) pcmk was able to start the container that was stopped and removed:
Jun 12 09:55:16 controller-0 pacemaker-controld[25747]: notice: Result of start operation for haproxy-bundle-podman-1 on controller-0: 0 (ok)
8) Added 'set -x' to the RA and correctly observed that no 'podman inspect' has been invoked during monitoring operations
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/podman | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index b2b3081f9..a9bd57dea 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -255,15 +255,10 @@ podman_simple_status()
podman_monitor()
{
- local rc=0
-
- podman_simple_status
- rc=$?
-
- if [ $rc -ne 0 ]; then
- return $rc
+ if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
+ podman_simple_status
+ return $?
fi
-
monitor_cmd_exec
}

View File

@ -0,0 +1,34 @@
From 69c5d35a7a5421d4728db824558007bbb91a9d4a Mon Sep 17 00:00:00 2001
From: Michele Baldessari <michele@acksyn.org>
Date: Wed, 12 Jun 2019 12:02:06 +0200
Subject: [PATCH] Remove unneeded podman exec --help call
There are no podman releases that do not have the exec argument, so
let's just drop this remnant that came from the docker RA.
Signed-off-by: Michele Baldessari <michele@acksyn.org>
---
heartbeat/podman | 10 ++--------
1 file changed, 2 insertions(+), 8 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index a9bd57dea..858023555 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -190,14 +190,8 @@ monitor_cmd_exec()
return $rc
fi
- if podman exec --help >/dev/null 2>&1; then
- out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
- rc=$?
- else
- out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(podman inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
- rc=$?
- fi
-
+ out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
+ rc=$?
if [ $rc -eq 127 ]; then
ocf_log err "monitor cmd failed (rc=$rc), output: $out"
ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."

View File

@ -0,0 +1,161 @@
From 6016283dfdcb45bf750f96715fc653a4c0904bca Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Fri, 28 Jun 2019 13:34:40 +0200
Subject: [PATCH] podman: only use exec to manage container's lifecycle
Under heavy IO load, podman may be impacted and take a long time
to execute some actions. If that takes more than the default
20s container monitoring timeout, containers will restart unexpectedly.
Replace all IO-sensitive podman calls (inspect, exists...) by
equivalent "podman exec" calls, because the latter command seems
less prone to performance degradation under IO load.
With this commit, the resource agent now requires podman 1.0.2+,
because it relies on of two different patches [1,2] that improve
IO performance and enable to distinguish "container stopped"
"container doesn't exist" error codes.
Tested on an OpenStack environment with podman 1.0.2, with the
following scenario:
. regular start/stop/monitor operations
. probe operations (pcs resource cleanup/refresh)
. unmanage/manage operations
. reboot
[1] https://github.com/containers/libpod/commit/90b835db69d589de559462d988cb3fae5cf1ef49
[2] https://github.com/containers/libpod/commit/a19975f96d2ee7efe186d9aa0be42285cfafa3f4
---
heartbeat/podman | 75 ++++++++++++++++++++++++------------------------
1 file changed, 37 insertions(+), 38 deletions(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 51f6ba883..8fc2c4695 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -129,9 +129,6 @@ the health of the container. This command must return 0 to indicate that
the container is healthy. A non-zero return code will indicate that the
container has failed and should be recovered.
-If 'podman exec' is supported, it is used to execute the command. If not,
-nsenter is used.
-
Note: Using this method for monitoring processes inside a container
is not recommended, as containerd tries to track processes running
inside the container and does not deal well with many short-lived
@@ -192,17 +189,13 @@ monitor_cmd_exec()
local rc=$OCF_SUCCESS
local out
- if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
- return $rc
- fi
-
out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
rc=$?
- if [ $rc -eq 127 ]; then
- ocf_log err "monitor cmd failed (rc=$rc), output: $out"
- ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."
- # there is no recovering from this, exit immediately
- exit $OCF_ERR_ARGS
+ # 125: no container with name or ID ${CONTAINER} found
+ # 126: container state improper (not running)
+ # 127: any other error
+ if [ $rc -eq 125 ] || [ $rc -eq 126 ]; then
+ rc=$OCF_NOT_RUNNING
elif [ $rc -ne 0 ]; then
ocf_exit_reason "monitor cmd failed (rc=$rc), output: $out"
rc=$OCF_ERR_GENERIC
@@ -215,7 +208,16 @@ monitor_cmd_exec()
container_exists()
{
- podman inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
+ local rc
+ local out
+
+ out=$(podman exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
+ rc=$?
+ # 125: no container with name or ID ${CONTAINER} found
+ if [ $rc -ne 125 ]; then
+ return 0
+ fi
+ return 1
}
remove_container()
@@ -236,30 +238,30 @@ remove_container()
podman_simple_status()
{
- local val
-
- # retrieve the 'Running' attribute for the container
- val=$(podman inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
- if [ $? -ne 0 ]; then
- #not running as a result of container not being found
- return $OCF_NOT_RUNNING
- fi
+ local rc
- if ocf_is_true "$val"; then
- # container exists and is running
- return $OCF_SUCCESS
+ # simple status is implemented via podman exec
+ # everything besides success is considered "not running"
+ monitor_cmd_exec
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ rc=$OCF_NOT_RUNNING;
fi
-
- return $OCF_NOT_RUNNING
+ return $rc
}
podman_monitor()
{
- if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
- podman_simple_status
- return $?
- fi
+ # We rely on running podman exec to monitor the container
+ # state because that command seems to be less prone to
+ # performance issue under IO load.
+ #
+ # For probes to work, we expect cmd_exec to be able to report
+ # when a container is not running. Here, we're not interested
+ # in distinguishing whether it's stopped or non existing
+ # (there's function container_exists for that)
monitor_cmd_exec
+ return $?
}
podman_create_mounts() {
@@ -416,14 +418,6 @@ podman_validate()
exit $OCF_ERR_CONFIGURED
fi
- if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
- podman exec --help >/dev/null 2>&1
- if [ ! $? ]; then
- ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
- check_binary nsenter
- fi
- fi
-
image_exists
if [ $? -ne 0 ]; then
ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
@@ -457,6 +451,11 @@ fi
CONTAINER=$OCF_RESKEY_name
+# Note: we currently monitor podman containers by with the "podman exec"
+# command, so make sure that invocation is always valid by enforcing the
+# exec command to be non-empty
+: ${OCF_RESKEY_monitor_cmd:=/bin/true}
+
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;

View File

@ -0,0 +1,22 @@
From ef37f8a2461b5763f4510d51e08d27d8b1f76937 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 23 Jul 2019 15:47:17 +0200
Subject: [PATCH] LVM-activate: fix monitor might hang due to lvm_validate
which was added by accident
---
heartbeat/LVM-activate | 1 -
1 file changed, 1 deletion(-)
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 9c7c721bf..3df40c894 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -852,7 +852,6 @@ stop)
lvm_stop
;;
monitor)
- lvm_validate
lvm_status
;;
validate-all)

View File

@ -0,0 +1,39 @@
From 1ff4ce7cbe58b5309f00ac1bbe124c562b6dcaf6 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 27 Jul 2018 16:02:26 +0200
Subject: [PATCH] CTDB: explicitly use bash shell
Upcoming recovery lock substring processing is bash specific.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
configure.ac | 1 +
heartbeat/{CTDB => CTDB.in} | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
rename heartbeat/{CTDB => CTDB.in} (99%)
diff --git a/configure.ac b/configure.ac
index 039b4942c..10f5314da 100644
--- a/configure.ac
+++ b/configure.ac
@@ -978,6 +978,7 @@ AC_CONFIG_FILES([heartbeat/slapd], [chmod +x heartbeat/slapd])
AC_CONFIG_FILES([heartbeat/sybaseASE], [chmod +x heartbeat/sybaseASE])
AC_CONFIG_FILES([heartbeat/syslog-ng], [chmod +x heartbeat/syslog-ng])
AC_CONFIG_FILES([heartbeat/vsftpd], [chmod +x heartbeat/vsftpd])
+AC_CONFIG_FILES([heartbeat/CTDB], [chmod +x heartbeat/CTDB])
AC_CONFIG_FILES([rgmanager/src/resources/ASEHAagent.sh], [chmod +x rgmanager/src/resources/ASEHAagent.sh])
AC_CONFIG_FILES([rgmanager/src/resources/apache.sh], [chmod +x rgmanager/src/resources/apache.sh])
AC_CONFIG_FILES([rgmanager/src/resources/bind-mount.sh], [chmod +x rgmanager/src/resources/bind-mount.sh])
diff --git a/heartbeat/CTDB b/heartbeat/CTDB.in
similarity index 99%
rename from heartbeat/CTDB
rename to heartbeat/CTDB.in
index 28e58cea0..7d87a4ef7 100755
--- a/heartbeat/CTDB
+++ b/heartbeat/CTDB.in
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!@BASH_SHELL@
#
# OCF Resource Agent for managing CTDB
#

View File

@ -0,0 +1,40 @@
From 61f7cb5954d1727f58fab6d642a124ef342c8641 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 20 Feb 2019 11:24:28 +0100
Subject: [PATCH] CTDB: add ctdb_max_open_files parameter
---
heartbeat/CTDB.in | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 0d58c850a..bbf8ef627 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -288,6 +288,14 @@ What debug level to run at (0-10). Higher means more verbose.
<content type="integer" default="2" />
</parameter>
+<parameter name="ctdb_max_open_files" required="0">
+<longdesc lang="en">
+Maximum number of open files (for ulimit -n)
+</longdesc>
+<shortdesc lang="en">Max open files</shortdesc>
+<content type="integer" default="" />
+</parameter>
+
<parameter name="smb_conf" unique="0" required="0">
<longdesc lang="en">
Path to default samba config file. Only necessary if CTDB
@@ -611,6 +619,11 @@ ctdb_start() {
start_as_disabled="--start-as-disabled"
ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled=""
+ # set nofile ulimit for ctdbd process
+ if [ -n "$OCF_RESKEY_ctdb_max_open_files" ]; then
+ ulimit -n "$OCF_RESKEY_ctdb_max_open_files"
+ fi
+
# Start her up
"$OCF_RESKEY_ctdbd_binary" \
--reclock="$OCF_RESKEY_ctdb_recovery_lock" \

View File

@ -0,0 +1,131 @@
From 8c61f2019d11781b737251b5cf839437b25fc53f Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 25 Jul 2018 23:15:10 +0200
Subject: [PATCH 1/3] CTDB: fix incorrect db corruption reports (bsc#1101668)
If a database was disconnected during an active transaction, then
tdbdump may fail with e.g.:
> /usr/bin/tdbdump /var/lib/ctdb/persistent/secrets.tdb.1
Failed to open /var/lib/ctdb/persistent/secrets.tdb.1
tdb(/var/lib/ctdb/persistent/secrets.tdb.1): FATAL:
tdb_transaction_recover: attempt to recover read only database
This does *not* indicate corruption, only that tdbdump, which opens the
database readonly, isn't able to perform recovery.
Using tdbtool check, instead of tdbdump, passes:
> tdbtool /var/lib/ctdb/persistent/secrets.tdb.1 check
tdb_transaction_recover: recovered 2146304 byte database
Database integrity is OK and has 2 records.
Drop the tdbdump checks, and instead rely on the core ctdb event script,
which performs the same checks with tdbtool.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
heartbeat/CTDB.in | 18 ++++--------------
1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 1456ea32b..28e58cea0 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -392,6 +392,8 @@ enable_event_scripts() {
local event_dir
event_dir=$OCF_RESKEY_ctdb_config_dir/events.d
+ chmod u+x "$event_dir/00.ctdb" # core database health check
+
if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
chmod u+x "$event_dir/10.interface"
else
@@ -563,17 +565,6 @@ ctdb_start() {
rv=$?
[ $rv -ne 0 ] && return $rv
- # Die if databases are corrupted
- persistent_db_dir="${OCF_RESKEY_ctdb_dbdir}/persistent"
- mkdir -p $persistent_db_dir 2>/dev/null
- for pdbase in $persistent_db_dir/*.tdb.[0-9]; do
- [ -f "$pdbase" ] || break
- /usr/bin/tdbdump "$pdbase" >/dev/null 2>/dev/null || {
- ocf_exit_reason "Persistent database $pdbase is corrupted! CTDB will not start."
- return $OCF_ERR_GENERIC
- }
- done
-
# Add necessary configuration to smb.conf
init_smb_conf
if [ $? -ne 0 ]; then
@@ -737,9 +728,8 @@ ctdb_monitor() {
ctdb_validate() {
- # Required binaries (full path to tdbdump is intentional, as that's
- # what's used in ctdb_start, which was lifted from the init script)
- for binary in pkill /usr/bin/tdbdump; do
+ # Required binaries
+ for binary in pkill; do
check_binary $binary
done
From 1ff4ce7cbe58b5309f00ac1bbe124c562b6dcaf6 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Fri, 27 Jul 2018 16:02:26 +0200
Subject: [PATCH 2/3] CTDB: explicitly use bash shell
Upcoming recovery lock substring processing is bash specific.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
configure.ac | 1 +
heartbeat/CTDB.in | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 7d87a4ef7..f9b5c564f 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -134,8 +134,8 @@ For more information see http://linux-ha.org/wiki/CTDB_(resource_agent)
<parameter name="ctdb_recovery_lock" unique="1" required="1">
<longdesc lang="en">
-The location of a shared lock file, common across all nodes.
-This must be on shared storage, e.g.: /shared-fs/samba/ctdb.lock
+The location of a shared lock file or helper binary, common across all nodes.
+See CTDB documentation for details.
</longdesc>
<shortdesc lang="en">CTDB shared lock file</shortdesc>
<content type="string" default="" />
@@ -757,13 +757,24 @@ ctdb_validate() {
return $OCF_ERR_CONFIGURED
fi
- lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock")
- touch "$lock_dir/$$" 2>/dev/null
- if [ $? != 0 ]; then
- ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
- return $OCF_ERR_ARGS
+ if [ "${OCF_RESKEY_ctdb_recovery_lock:0:1}" == '!' ]; then
+ # '!' prefix means recovery lock is handled via a helper binary
+ binary="${OCF_RESKEY_ctdb_recovery_lock:1}"
+ binary="${binary%% *}" # trim any parameters
+ if [ -z "$binary" ]; then
+ ocf_exit_reason "ctdb_recovery_lock invalid helper"
+ return $OCF_ERR_CONFIGURED
+ fi
+ check_binary "${binary}"
+ else
+ lock_dir=$(dirname "$OCF_RESKEY_ctdb_recovery_lock")
+ touch "$lock_dir/$$" 2>/dev/null
+ if [ $? != 0 ]; then
+ ocf_exit_reason "Directory for lock file '$OCF_RESKEY_ctdb_recovery_lock' does not exist, or is not writable."
+ return $OCF_ERR_ARGS
+ fi
+ rm "$lock_dir/$$"
fi
- rm "$lock_dir/$$"
return $OCF_SUCCESS
}

View File

@ -0,0 +1,452 @@
From 30b9f55325d2acfba27aa6859c7360e10b7201d7 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 5 Jun 2019 00:41:13 +0200
Subject: [PATCH 1/3] CTDB: support Samba 4.9+
With Samba 4.9+, all ctdbd parameters have moved to config files.
Generate a new /etc/ctdb/ctdb.conf file during ctdb startup, based on RA
configuration.
Event scripts in Samba 4.9+ are also no longer enabled/disabled based on
file mode. Use the "ctdb event script enable/disable" helpers, which now
work without a running ctdbd.
Fixes: https://github.com/ClusterLabs/resource-agents/issues/1196
Signed-off-by: David Disseldorp <ddiss@suse.de>
Signed-off-by: Noel Power <noel.power@suse.com>
Signed-off-by: Amitay Isaacs <amitay@samba.org>
---
heartbeat/CTDB.in | 214 ++++++++++++++++++++++++++++++++++++----------
1 file changed, 167 insertions(+), 47 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 4dd646896..79a2f97e7 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -143,6 +143,10 @@ OCF_RESKEY_smb_fileid_algorithm_default=""
#######################################################################
+ctdb_version() {
+ $OCF_RESKEY_ctdb_binary version | awk '{print $NF}' | sed "s/[-\.]\?[[:alpha:]].*//"
+}
+
meta_data() {
cat <<END
<?xml version="1.0"?>
@@ -256,7 +260,7 @@ host any public ip addresses.
<longdesc lang="en">
The directory containing various CTDB configuration files.
The "nodes" and "notify.sh" scripts are expected to be
-in this directory, as is the "events.d" subdirectory.
+in this directory.
</longdesc>
<shortdesc lang="en">CTDB config file directory</shortdesc>
<content type="string" default="/etc/ctdb" />
@@ -282,8 +286,10 @@ Full path to the CTDB cluster daemon binary.
<longdesc lang="en">
Full path to the domain socket that ctdbd will create, used for
local clients to attach and communicate with the ctdb daemon.
+With CTDB 4.9.0 and later the socket path is hardcoded at build
+time, so this parameter is ignored.
</longdesc>
-<shortdesc lang="en">CTDB socket location</shortdesc>
+<shortdesc lang="en">CTDB socket location (ignored with CTDB 4.9+)</shortdesc>
<content type="string" default="${OCF_RESKEY_ctdb_socket}" />
</parameter>
@@ -421,16 +427,28 @@ invoke_ctdb() {
timeout=$((OCF_RESKEY_CRM_meta_timeout/1000))
timelimit=$((OCF_RESKEY_CRM_meta_timeout/1000))
fi
- $OCF_RESKEY_ctdb_binary --socket="$OCF_RESKEY_ctdb_socket" \
- -t $timeout -T $timelimit \
- "$@"
+
+ local vers=$(ctdb_version)
+ ocf_version_cmp "$vers" "4.9.0"
+
+ # if version < 4.9.0 specify '--socket' otherwise it's
+ # a compiled option
+ if [ "$?" -eq "0" ]; then
+ $OCF_RESKEY_ctdb_binary --socket="$OCF_RESKEY_ctdb_socket" \
+ -t $timeout -T $timelimit \
+ "$@"
+ else
+ $OCF_RESKEY_ctdb_binary \
+ -t $timeout -T $timelimit \
+ "$@"
+ fi
}
# Enable any event scripts that are explicitly required.
# Any others will ultimately be invoked or not based on how they ship
# with CTDB, but will generally have no effect, beacuase the relevant
# CTDB_MANAGES_* options won't be set in /etc/sysconfig/ctdb.
-enable_event_scripts() {
+enable_event_scripts_chmod() {
local event_dir
event_dir=$OCF_RESKEY_ctdb_config_dir/events.d
@@ -454,6 +472,36 @@ enable_event_scripts() {
fi
}
+enable_event_scripts_symlink() {
+ # event scripts are symlinked once enabled, with the link source in...
+ mkdir -p "$OCF_RESKEY_ctdb_config_dir/events/legacy" 2>/dev/null
+
+ invoke_ctdb event script enable legacy 00.ctdb
+
+ if [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ]; then
+ invoke_ctdb event script enable legacy 10.interface
+ else
+ invoke_ctdb event script disable legacy 10.interface
+ fi
+ if [ -f "${OCF_RESKEY_ctdb_config_dir}/static-routes" ]; then
+ invoke_ctdb event script enable legacy 11.routing
+ else
+ invoke_ctdb event script disable legacy 11.routing
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind"; then
+ invoke_ctdb event script enable legacy 49.winbind
+ else
+ invoke_ctdb event script disable legacy 49.winbind
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba"; then
+ invoke_ctdb event script enable legacy 50.samba
+ else
+ invoke_ctdb event script disable legacy 50.samba
+ fi
+}
+
# This function has no effect (currently no way to set CTDB_SET_*)
# but remains here in case we need it in future.
set_ctdb_variables() {
@@ -556,6 +604,46 @@ append_ctdb_sysconfig() {
[ -n "$2" ] && echo "$1=$2" >> "$CTDB_SYSCONFIG"
}
+generate_ctdb_config() {
+ local ctdb_config="$OCF_RESKEY_ctdb_config_dir/ctdb.conf"
+
+ # Backup existing config if we're not already using an auto-generated one
+ grep -qa '# CTDB-RA: Auto-generated' $ctdb_config || cp -p $ctdb_config ${ctdb_config}.ctdb-ra-orig
+ if [ $? -ne 0 ]; then
+ ocf_log warn "Unable to backup $ctdb_config to ${ctdb_config}.ctdb-ra-orig"
+ fi
+
+ local log_option="file:$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="syslog"
+ fi
+
+ local start_as_disabled="false"
+ ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" && start_as_disabled="true"
+
+ local dbdir_volatile="$OCF_RESKEY_ctdb_dbdir/volatile"
+ [ -d "$dbdir_volatile" ] || mkdir -p "$dbdir_volatile" 2>/dev/null
+ local dbdir_persistent="$OCF_RESKEY_ctdb_dbdir/persistent"
+ [ -d "$dbdir_persistent" ] || mkdir -p "$dbdir_persistent" 2>/dev/null
+ local dbdir_state="$OCF_RESKEY_ctdb_dbdir/state"
+ [ -d "$dbdir_state" ] || mkdir -p "$dbdir_state" 2>/dev/null
+
+cat >$ctdb_config <<EOF
+# CTDB-RA: Auto-generated
+[logging]
+ location = $log_option
+ log level = $OCF_RESKEY_ctdb_debuglevel
+[cluster]
+ recovery lock = $OCF_RESKEY_ctdb_recovery_lock
+[database]
+ volatile database directory = $dbdir_volatile
+ persistent database directory = $dbdir_persistent
+ state database directory = $dbdir_state
+[legacy]
+ start as disabled = $start_as_disabled
+EOF
+}
+
# Generate a new, minimal CTDB config file that's just enough
# to get CTDB running as configured by the RA parameters.
generate_ctdb_sysconfig() {
@@ -589,6 +677,58 @@ EOF
}
+invoke_ctdbd() {
+ local vers="$1"
+
+ ocf_version_cmp "$vers" "4.9.0"
+ if [ "$?" -ne "0" ]; then
+ # With 4.9+, all ctdbd binary parameters are provided as
+ # config settings
+ $OCF_RESKEY_ctdbd_binary
+ return
+ fi
+
+ # Use logfile by default, or syslog if asked for
+ local log_option
+ # --logging supported from v4.3.0 and --logfile / --syslog support
+ # has been removed from newer versions
+ ocf_version_cmp "$vers" "4.2.14"
+ if [ "$?" -eq "2" ]; then
+ log_option="--logging=file:$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="--logging=syslog"
+ fi
+ else
+ log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="--syslog"
+ fi
+ fi
+
+ # public addresses file (should not be present, but need to set for correctness if it is)
+ local pub_addr_option
+ pub_addr_option=""
+ [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \
+ pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses"
+ # start as disabled
+ local start_as_disabled
+ start_as_disabled="--start-as-disabled"
+ ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled=""
+
+ $OCF_RESKEY_ctdbd_binary \
+ --reclock="$OCF_RESKEY_ctdb_recovery_lock" \
+ --nlist="$OCF_RESKEY_ctdb_config_dir/nodes" \
+ --socket="$OCF_RESKEY_ctdb_socket" \
+ --dbdir="$OCF_RESKEY_ctdb_dbdir" \
+ --dbdir-persistent="$OCF_RESKEY_ctdb_dbdir/persistent" \
+ --event-script-dir="$OCF_RESKEY_ctdb_config_dir/events.d" \
+ --notification-script="$OCF_RESKEY_ctdb_config_dir/notify.sh" \
+ --transport=tcp \
+ $start_as_disabled $log_option $pub_addr_option \
+ -d "$OCF_RESKEY_ctdb_debuglevel"
+}
+
+
ctdb_usage() {
cat <<END
usage: $0 {start|stop|monitor|validate-all|meta-data}
@@ -614,27 +754,26 @@ ctdb_start() {
return $OCF_ERR_GENERIC
fi
- # Generate new CTDB sysconfig
- generate_ctdb_sysconfig
- enable_event_scripts
+ local version=$(ctdb_version)
- # Use logfile by default, or syslog if asked for
- local log_option
- # --logging supported from v4.3.0 and --logfile / --syslog support
- # has been removed from newer versions
- version=$(ctdb version | awk '{print $NF}')
- ocf_version_cmp "$version" "4.2.14"
- if [ "$?" -eq "2" ]; then
- log_option="--logging=file:$OCF_RESKEY_ctdb_logfile"
- if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
- log_option="--logging=syslog"
- fi
+ ocf_version_cmp "$version" "4.9.0"
+ if [ "$?" -eq "0" ]; then
+ # prior to 4.9, ctdbd parameters are in sysconfig or passed as
+ # binary arguments
+ generate_ctdb_sysconfig
+
+ # prior to 4.9, event script enablement without a running
+ # ctdbd is done by chmoding the scripts directly
+ enable_event_scripts_chmod
else
- log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
- if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
- log_option="--syslog"
- fi
+ # 4.9+ moves all ctdbd parameters to ctdb.conf
+ generate_ctdb_config
+
+ # 4.9+ event scripts can be enabled with ctdb directly, which
+ # performs a symlink
+ enable_event_scripts_symlink
fi
+
if [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then
# ensure the logfile's directory exists, otherwise ctdb will fail to start
mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile)
@@ -643,33 +782,14 @@ ctdb_start() {
# ensure ctdb's rundir exists, otherwise it will fail to start
mkdir -p $OCF_RESKEY_ctdb_rundir 2>/dev/null
- # public addresses file (should not be present, but need to set for correctness if it is)
- local pub_addr_option
- pub_addr_option=""
- [ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \
- pub_addr_option="--public-addresses=${OCF_RESKEY_ctdb_config_dir}/public_addresses"
- # start as disabled
- local start_as_disabled
- start_as_disabled="--start-as-disabled"
- ocf_is_true "$OCF_RESKEY_ctdb_start_as_disabled" || start_as_disabled=""
-
# set nofile ulimit for ctdbd process
if [ -n "$OCF_RESKEY_ctdb_max_open_files" ]; then
ulimit -n "$OCF_RESKEY_ctdb_max_open_files"
fi
# Start her up
- "$OCF_RESKEY_ctdbd_binary" \
- --reclock="$OCF_RESKEY_ctdb_recovery_lock" \
- --nlist="$OCF_RESKEY_ctdb_config_dir/nodes" \
- --socket="$OCF_RESKEY_ctdb_socket" \
- --dbdir="$OCF_RESKEY_ctdb_dbdir" \
- --dbdir-persistent="$OCF_RESKEY_ctdb_dbdir/persistent" \
- --event-script-dir="$OCF_RESKEY_ctdb_config_dir/events.d" \
- --notification-script="$OCF_RESKEY_ctdb_config_dir/notify.sh" \
- --transport=tcp \
- $start_as_disabled $log_option $pub_addr_option \
- -d "$OCF_RESKEY_ctdb_debuglevel"
+ invoke_ctdbd "$version"
+
if [ $? -ne 0 ]; then
# cleanup smb.conf
cleanup_smb_conf
@@ -688,7 +808,7 @@ ctdb_start() {
if [ $? -ne 0 ]; then
# CTDB will be running, kill it before returning
ctdb_stop
- ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary --socket=$OCF_RESKEY_ctdb_socket status"
+ ocf_exit_reason "Can't invoke $OCF_RESKEY_ctdb_binary status"
return $OCF_ERR_GENERIC
fi
if ! echo "$status" | grep -qs 'UNHEALTHY (THIS'; then
@@ -725,7 +845,7 @@ ctdb_stop() {
[ $count -gt 10 ] && {
ocf_log info "killing ctdbd "
pkill -9 -f "$OCF_RESKEY_ctdbd_binary"
- pkill -9 -f "${OCF_RESKEY_ctdb_config_dir}/events.d/"
+ pkill -9 -f "${OCF_RESKEY_ctdb_config_dir}/events"
}
done
From b4753b7cb46045bb9e7ed5e3a0a20f6104264b12 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 10 Jul 2019 17:11:50 +0200
Subject: [PATCH 2/3] CTDB: generate script.options file for 4.9+
Event scripts in CTDB 4.9+ ignore sysconfig configuration and instead
parse parameters in ctdb_config_dir/script.options .
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
heartbeat/CTDB.in | 35 ++++++++++++++++++++++++++++++-----
1 file changed, 30 insertions(+), 5 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 79a2f97e7..0906f3da9 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -242,6 +242,7 @@ If the amount of free memory drops below this value the node will
become unhealthy and ctdb and all managed services will be shutdown.
Once this occurs, the administrator needs to find the reason for the
OOM situation, rectify it and restart ctdb with "service ctdb start".
+With CTDB 4.4.0 and later this parameter is ignored.
</longdesc>
<shortdesc lang="en">Minimum amount of free memory (MB)</shortdesc>
<content type="integer" default="${OCF_RESKEY_ctdb_monitor_free_memory_default}" />
@@ -600,8 +601,10 @@ cleanup_smb_conf() {
mv "$OCF_RESKEY_smb_conf.$$" "$OCF_RESKEY_smb_conf"
}
-append_ctdb_sysconfig() {
- [ -n "$2" ] && echo "$1=$2" >> "$CTDB_SYSCONFIG"
+append_conf() {
+ local file_path="$1"
+ shift
+ [ -n "$2" ] && echo "$1=$2" >> "$file_path"
}
generate_ctdb_config() {
@@ -644,6 +647,25 @@ cat >$ctdb_config <<EOF
EOF
}
+generate_event_script_options() {
+ local script_options="$OCF_RESKEY_ctdb_config_dir/script.options"
+
+ # Backup existing config if we're not already using an auto-generated one
+ grep -qa '# CTDB-RA: Auto-generated' $script_options || cp -p $script_options ${script_options}.ctdb-ra-orig
+ if [ $? -ne 0 ]; then
+ ocf_log warn "Unable to backup $script_options to ${script_options}.ctdb-ra-orig"
+ fi
+
+cat >$script_options <<EOF
+# CTDB-RA: Auto-generated
+CTDB_SAMBA_SKIP_SHARE_CHECK=$(ocf_is_true "$OCF_RESKEY_ctdb_samba_skip_share_check" && echo 'yes' || echo 'no')
+EOF
+
+ append_conf "$script_options" CTDB_SERVICE_SMB $OCF_RESKEY_ctdb_service_smb
+ append_conf "$script_options" CTDB_SERVICE_NMB $OCF_RESKEY_ctdb_service_nmb
+ append_conf "$script_options" CTDB_SERVICE_WINBIND $OCF_RESKEY_ctdb_service_winbind
+}
+
# Generate a new, minimal CTDB config file that's just enough
# to get CTDB running as configured by the RA parameters.
generate_ctdb_sysconfig() {
@@ -671,9 +693,9 @@ CTDB_SAMBA_SKIP_SHARE_CHECK=$(ocf_is_true "$OCF_RESKEY_ctdb_samba_skip_share_che
CTDB_MANAGES_SAMBA=$(ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && echo 'yes' || echo 'no')
CTDB_MANAGES_WINBIND=$(ocf_is_true "$OCF_RESKEY_ctdb_manages_winbind" && echo 'yes' || echo 'no')
EOF
- append_ctdb_sysconfig CTDB_SERVICE_SMB $OCF_RESKEY_ctdb_service_smb
- append_ctdb_sysconfig CTDB_SERVICE_NMB $OCF_RESKEY_ctdb_service_nmb
- append_ctdb_sysconfig CTDB_SERVICE_WINBIND $OCF_RESKEY_ctdb_service_winbind
+ append_conf "$CTDB_SYSCONFIG" CTDB_SERVICE_SMB $OCF_RESKEY_ctdb_service_smb
+ append_conf "$CTDB_SYSCONFIG" CTDB_SERVICE_NMB $OCF_RESKEY_ctdb_service_nmb
+ append_conf "$CTDB_SYSCONFIG" CTDB_SERVICE_WINBIND $OCF_RESKEY_ctdb_service_winbind
}
@@ -769,6 +791,9 @@ ctdb_start() {
# 4.9+ moves all ctdbd parameters to ctdb.conf
generate_ctdb_config
+ # 4.9+ event script options are in script.options
+ generate_event_script_options
+
# 4.9+ event scripts can be enabled with ctdb directly, which
# performs a symlink
enable_event_scripts_symlink
From 0a8610711f90c4cc7a2b380a4795f463532d9520 Mon Sep 17 00:00:00 2001
From: David Disseldorp <ddiss@suse.de>
Date: Wed, 10 Jul 2019 17:54:01 +0200
Subject: [PATCH 3/3] CTDB: drop sysconfig presence check during validate
There are two reasons to avoid this check:
- for ctdb versions prior to 4.9.0, the sysconfig file is generated by
the resource agent start hook *after* ctdb_validate() is called.
- post 4.9.0 versions don't use the sysconfig file.
Signed-off-by: David Disseldorp <ddiss@suse.de>
---
heartbeat/CTDB.in | 5 -----
1 file changed, 5 deletions(-)
diff --git a/heartbeat/CTDB.in b/heartbeat/CTDB.in
index 0906f3da9..15d78902e 100755
--- a/heartbeat/CTDB.in
+++ b/heartbeat/CTDB.in
@@ -925,11 +925,6 @@ ctdb_validate() {
check_binary $binary
done
- if [ -z "$CTDB_SYSCONFIG" ]; then
- ocf_exit_reason "Can't find CTDB config file (expecting /etc/sysconfig/ctdb, /etc/default/ctdb or similar)"
- return $OCF_ERR_INSTALLED
- fi
-
if ocf_is_true "$OCF_RESKEY_ctdb_manages_samba" && [ ! -f "$OCF_RESKEY_smb_conf" ]; then
ocf_exit_reason "Samba config file '$OCF_RESKEY_smb_conf' does not exist."
return $OCF_ERR_INSTALLED

View File

@ -0,0 +1,193 @@
From 462ada6164cb77c81f5291d88287d68506d38056 Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Tue, 9 Jul 2019 23:14:21 +0200
Subject: [PATCH] Generate addition drop-in dependencies for podman containers
When podman creates a container, it creates two additional systemd
scope files dynamically:
- libpod-conmon-<CONTAINERID>.scope - runs a conmon process that
tracks a container's pid1 into a dedicated pidfile.
- libpod-<CONTAINERID>.scope - created dynamically by runc,
for cgroups accounting
On shutdown, it can happen that systemd stops those scope early,
which in turn sends a SIGTERM to pacemaker-managed containers
before pacemaker has scheduled any stop operation. That
confuses the cluster and may break shutdown.
Add a new option in the resource-agent to inject additional
dependencies into the dynamically created scope files, so that
systemd is not allowed to stop scopes before the pacemaker
service itself is stopped.
When that option is enabled, the scopes look like:
# podman ps | grep galera
c329819a1227 192.168.122.8:8787/rhosp15/openstack-mariadb:latest dumb-init -- /bin... About an hour ago Up About an hour ago galera-bundle-podman-0
# systemctl cat libpod*c329819a1227*
# /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
# This is a transient unit file, created programmatically via the systemd API. Do not edit.
[Scope]
Slice=machine.slice
Delegate=yes
[Unit]
DefaultDependencies=no
# /run/systemd/transient/libpod-conmon-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
[Unit]
Before=pacemaker.service
# /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope
# This is a transient unit file, created programmatically via the systemd API. Do not edit.
[Unit]
Description=libcontainer container c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b
[Scope]
Slice=machine.slice
Delegate=yes
MemoryAccounting=yes
CPUAccounting=yes
BlockIOAccounting=yes
[Unit]
DefaultDependencies=no
# /run/systemd/transient/libpod-c329819a1227ec548d678861994ef755b1fde9a244e1e4d966d17674df88ce7b.scope.d/dep.conf
[Unit]
Before=pacemaker.service
Effectively, this prevents systemd from managing the shutdown of any
pacemaker-managed podman container.
Related: rhbz#1726442
---
heartbeat/podman | 82 +++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 81 insertions(+), 1 deletion(-)
diff --git a/heartbeat/podman b/heartbeat/podman
index 8fc2c4695..8a916eb8c 100755
--- a/heartbeat/podman
+++ b/heartbeat/podman
@@ -158,6 +158,16 @@ to have the particular one persist when this happens.
<shortdesc lang="en">reuse container</shortdesc>
<content type="boolean" default="${OCF_RESKEY_reuse_default}"/>
</parameter>
+
+<parameter name="drop_in_dependency" required="0" unique="0">
+<longdesc lang="en">
+Use transient drop-in files to add extra dependencies to the systemd
+scopes associated to the container. During reboot, this prevents systemd
+to stop the container before pacemaker.
+</longdesc>
+<shortdesc lang="en">drop-in dependency</shortdesc>
+<content type="boolean"/>
+</parameter>
</parameters>
<actions>
@@ -273,8 +283,57 @@ podman_create_mounts() {
IFS="$oldIFS"
}
+podman_container_id()
+{
+ # Retrieve the container ID by doing a "podman ps" rather than
+ # a "podman inspect", because the latter has performance issues
+ # under IO load.
+ # We could have run "podman start $CONTAINER" to get the ID back
+ # but if the container is stopped, the command will return a
+ # name instead of a container ID. This would break us.
+ podman ps --no-trunc --format '{{.ID}} {{.Names}}' | grep -F -w -m1 "$CONTAINER" | cut -d' ' -f1
+}
+
+
+create_transient_drop_in_dependency()
+{
+ local cid=$1
+ local rc=$OCF_SUCCESS
+
+ if [ -z "$cid" ]; then
+ ocf_log error "Container ID not found for \"$CONTAINER\". Not creating drop-in dependency"
+ return $OCF_ERR_GENERIC
+ fi
+
+ ocf_log info "Creating drop-in dependency for \"$CONTAINER\" ($cid)"
+ for scope in "libpod-$cid.scope.d" "libpod-conmon-$cid.scope.d"; do
+ if [ $rc -eq $OCF_SUCCESS ] && [ ! -d /run/systemd/transient/"$scope" ]; then
+ mkdir -p /run/systemd/transient/"$scope" && \
+ echo -e "[Unit]\nBefore=pacemaker.service" > /run/systemd/transient/"$scope"/dep.conf && \
+ chmod ago+r /run/systemd/transient/"$scope" /run/systemd/transient/"$scope"/dep.conf
+ rc=$?
+ fi
+ done
+
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_log error "Could not create drop-in dependency for \"$CONTAINER\" ($cid)"
+ else
+ systemctl daemon-reload
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_log error "Could not refresh service definition after creating drop-in for \"$CONTAINER\""
+ fi
+ fi
+
+ return $rc
+}
+
+
podman_start()
{
+ local cid
+ local rc
+
podman_create_mounts
local run_opts="-d --name=${CONTAINER}"
# check to see if the container has already started
@@ -306,8 +365,17 @@ podman_start()
ocf_log info "running container $CONTAINER for the first time"
ocf_run podman run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
fi
+ rc=$?
- if [ $? -ne 0 ]; then
+ # if the container was stopped or didn't exist before, systemd
+ # removed the libpod* scopes. So always try to recreate the drop-ins
+ if [ $rc -eq 0 ] && ocf_is_true "$OCF_RESKEY_drop_in_dependency"; then
+ cid=$(podman_container_id)
+ create_transient_drop_in_dependency "$cid"
+ rc=$?
+ fi
+
+ if [ $rc -ne 0 ]; then
ocf_exit_reason "podman failed to launch container"
return $OCF_ERR_GENERIC
fi
@@ -353,6 +421,8 @@ podman_stop()
else
ocf_log debug "waiting $timeout second[s] before killing container"
ocf_run podman stop -t=$timeout $CONTAINER
+ # on stop, systemd will automatically delete any transient
+ # drop-in conf that has been created earlier
fi
if [ $? -ne 0 ]; then
@@ -456,6 +526,16 @@ CONTAINER=$OCF_RESKEY_name
# exec command to be non-empty
: ${OCF_RESKEY_monitor_cmd:=/bin/true}
+# When OCF_RESKEY_drop_in_dependency is not populated, we
+# look at another file-based way of enabling the option.
+# Otherwise, consider it disabled.
+if [ -z "$OCF_RESKEY_drop_in_dependency" ]; then
+ if [ -f "/etc/sysconfig/podman_drop_in" ] || \
+ [ -f "/etc/default/podman_drop_in" ]; then
+ OCF_RESKEY_drop_in_dependency=yes
+ fi
+fi
+
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS;;

View File

@ -0,0 +1,48 @@
From 6c24147ebe0e979c48db93a5f8ec6094b8707591 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 26 Sep 2019 12:52:39 +0200
Subject: [PATCH] LVM-activate: move pvscan --cache to validate
It needs to be called before validate attempts to look at the VG.
---
configure.ac | 2 +-
heartbeat/LVM-activate | 6 +++++-
2 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/configure.ac b/configure.ac
index 97dac7cf8..1eb65cf34 100644
--- a/configure.ac
+++ b/configure.ac
@@ -21,7 +21,7 @@ dnl checks for system services
AC_INIT([resource-agents],
m4_esyscmd([make/git-version-gen .tarball-version]),
- [to_be_defined@foobar.org])
+ [developers@clusterlabs.org])
AC_USE_SYSTEM_EXTENSIONS
diff --git a/heartbeat/LVM-activate b/heartbeat/LVM-activate
index 3df40c894..9b7c0aa7f 100755
--- a/heartbeat/LVM-activate
+++ b/heartbeat/LVM-activate
@@ -489,6 +489,11 @@ lvm_validate() {
check_binary lvm
check_binary dmsetup
+ # This is necessary when using system ID to update lvm hints,
+ # or in older versions of lvm, this is necessary to update the
+ # lvmetad cache.
+ pvscan --cache
+
if ! vgs --foreign ${VG} >/dev/null 2>&1 ; then
# stop action exits successfully if the VG cannot be accessed...
if [ $__OCF_ACTION = "stop" ]; then
@@ -627,7 +632,6 @@ clvmd_activate() {
systemid_activate() {
local cur_systemid
- pvscan --cache
cur_systemid=$(vgs --foreign --noheadings -o systemid ${VG} | tr -d '[:blank:]')
# Put our system ID on the VG

View File

@ -0,0 +1,66 @@
From 34b46b172857babbb2bca5e012c7827ed6a26b01 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Wed, 6 Nov 2019 10:00:31 +0100
Subject: [PATCH] IPaddr2: add noprefixroute parameter
---
heartbeat/IPaddr2 | 17 ++++++++++++++++-
1 file changed, 16 insertions(+), 1 deletion(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 1d39ae514..6f8e8c734 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -88,6 +88,7 @@ OCF_RESKEY_arp_sender_default=""
OCF_RESKEY_send_arp_opts_default=""
OCF_RESKEY_flush_routes_default="false"
OCF_RESKEY_run_arping_default=false
+OCF_RESKEY_noprefixroute_default="false"
OCF_RESKEY_preferred_lft_default="forever"
OCF_RESKEY_network_namespace_default=""
@@ -109,6 +110,7 @@ OCF_RESKEY_network_namespace_default=""
: ${OCF_RESKEY_send_arp_opts=${OCF_RESKEY_send_arp_opts_default}}
: ${OCF_RESKEY_flush_routes=${OCF_RESKEY_flush_routes_default}}
: ${OCF_RESKEY_run_arping=${OCF_RESKEY_run_arping_default}}
+: ${OCF_RESKEY_noprefixroute=${OCF_RESKEY_noprefixroute_default}}
: ${OCF_RESKEY_preferred_lft=${OCF_RESKEY_preferred_lft_default}}
: ${OCF_RESKEY_network_namespace=${OCF_RESKEY_network_namespace_default}}
@@ -377,6 +379,14 @@ Whether or not to run arping for IPv4 collision detection check.
<content type="string" default="${OCF_RESKEY_run_arping_default}"/>
</parameter>
+<parameter name="noprefixroute">
+<longdesc lang="en">
+Use noprefixroute flag (see 'man ip-address').
+</longdesc>
+<shortdesc lang="en">Use noprefixroute flag</shortdesc>
+<content type="string" default="${OCF_RESKEY_noprefixroute_default}"/>
+</parameter>
+
<parameter name="preferred_lft">
<longdesc lang="en">
For IPv6, set the preferred lifetime of the IP address.
@@ -397,8 +407,8 @@ the namespace.
<shortdesc lang="en">Network namespace to use</shortdesc>
<content type="string" default="${OCF_RESKEY_network_namespace_default}"/>
</parameter>
-
</parameters>
+
<actions>
<action name="start" timeout="20s" />
<action name="stop" timeout="20s" />
@@ -640,6 +650,11 @@ add_interface () {
msg="Adding $FAMILY address $ipaddr/$netmask with broadcast address $broadcast to device $iface"
fi
+ if ocf_is_true "${OCF_RESKEY_noprefixroute}"; then
+ cmd="$cmd noprefixroute"
+ msg="${msg} (with noprefixroute)"
+ fi
+
if [ ! -z "$label" ]; then
cmd="$cmd label $label"
msg="${msg} (with label $label)"

View File

@ -0,0 +1,69 @@
diff -uNr a/heartbeat/LVM-activate b/heartbeat/LVM-activate
--- a/heartbeat/LVM-activate 2019-10-08 12:10:11.755991580 +0200
+++ b/heartbeat/LVM-activate 2019-10-08 12:14:38.388288176 +0200
@@ -42,6 +42,11 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+# Parameter defaults
+OCF_RESKEY_partial_activation_default="false"
+
+: ${OCF_RESKEY_partial_activation=${OCF_RESKEY_partial_activation_default}}
+
# If LV is given, only activate this named LV; otherwise, activate all
# LVs in the named VG.
VG=${OCF_RESKEY_vgname}
@@ -150,6 +155,16 @@
<content type="string" default="pacemaker" />
</parameter>
+<parameter name="partial_activation" unique="0" required="0">
+<longdesc lang="en">
+If set, the volume group will be activated partially even with some
+physical volumes missing. It helps to set to true when using mirrored
+logical volumes.
+</longdesc>
+<shortdesc lang="en">Activate VG partially when missing PVs</shortdesc>
+<content type="string" default="${OCF_RESKEY_partial_activation_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -486,6 +501,25 @@
exit $OCF_ERR_CONFIGURED
fi
+ # Inconsistency might be due to missing physical volumes, which doesn't
+ # automatically mean we should fail. If partial_activation=true then
+ # we should let start try to handle it, or if no PVs are listed as
+ # "unknown device" then another node may have marked a device missing
+ # where we have access to all of them and can start without issue.
+ case $(vgs -o attr --noheadings $VG | tr -d ' ') in
+ ???p??*)
+ if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ # We are missing devices and cannot activate partially
+ ocf_exit_reason "Volume group [$VG] has devices missing. Consider partial_activation=true to attempt to activate partially"
+ exit $OCF_ERR_GENERIC
+ else
+ # We are missing devices but are allowed to activate partially.
+ # Assume that caused the vgck failure and carry on
+ ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
+ fi
+ ;;
+ esac
+
# Get the access mode from VG metadata and check if it matches the input
# value. Skip to check "tagging" mode because there's no reliable way to
# automatically check if "tagging" mode is being used.
@@ -545,6 +579,10 @@
do_activate() {
local activate_opt=$1
+ if ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ activate_opt="${activate_opt} --partial"
+ fi
+
# Only activate the specific LV if it's given
if [ -n "$LV" ]; then
ocf_run lvchange $activate_opt ${VG}/${LV}

View File

@ -0,0 +1,39 @@
From 2aa8015bc4ff0bd61eca13eceb59aaa672335b76 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nwahl@redhat.com>
Date: Thu, 30 Aug 2018 18:36:11 -0700
Subject: [PATCH] Filesystem: Support symlink as mountpoint directory
Filesystem monitor operation fails when the `directory` attribute is a
symlink.
The monitor operation calls the `list_mounts` function, which cats
`/proc/mounts` if it exists, else cats `/etc/mtab` if it exists, else
runs the `mount` command. It then greps for `" $MOUNTPOINT "` in the
output, where `$MOUNTPOINT` is the value of the `directory` attribute.
`/proc/mounts`, `/etc/mtab`, and the `mount` command resolve symlinks
to their canonical targets. So while the monitor operation greps for
the symlink path (surrounded by spaces) as defined in the directory
attribute, the symlink will not be present in the `list_mounts` output.
Only the symlink's target will be present.
This patch uses `readlink -f $MOUNTPOINT` to resolve the symlink to its
canonical name before using it as a grep pattern in the
`Filesystem_status` function.
---
heartbeat/Filesystem | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 7c73b0b97..fc4b8fcd5 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -580,7 +580,7 @@ Filesystem_stop()
#
Filesystem_status()
{
- if list_mounts | grep -q " $MOUNTPOINT " >/dev/null 2>&1; then
+ if list_mounts | grep -q " $(readlink -f $MOUNTPOINT) " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else

View File

@ -0,0 +1,43 @@
From e2c3ec91cdd123b8afc6010f45ecd22ee6d8ecf7 Mon Sep 17 00:00:00 2001
From: Reid Wahl <nwahl@redhat.com>
Date: Mon, 3 Sep 2018 00:30:01 -0700
Subject: [PATCH] Filesystem: Canonicalize mountpoint symlinks
Commit 2aa8015 added support to `Filesystem_status()` for mountpoints
that are symlinks. However, it missed two other places where `readlink`
calls should have been added to canonicalize symlinks.
---
heartbeat/Filesystem | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index fc4b8fcd5..2a43d1daa 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -278,7 +278,7 @@ determine_blockdevice() {
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none)
: ;;
*)
- DEVICE=`list_mounts | grep " $MOUNTPOINT " | cut -d' ' -f1`
+ DEVICE=`list_mounts | grep " $(readlink -f "$MOUNTPOINT" ) " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
@@ -396,7 +396,7 @@ fstype_supported()
Filesystem_start()
{
# Check if there are any mounts mounted under the mountpoint
- if list_mounts | grep -q -E " $MOUNTPOINT/\w+" >/dev/null 2>&1; then
+ if list_mounts | grep -q -E " $(readlink -f "$MOUNTPOINT" )/\w+" >/dev/null 2>&1; then
ocf_log err "There is one or more mounts mounted under $MOUNTPOINT."
return $OCF_ERR_CONFIGURED
fi
@@ -580,7 +580,7 @@ Filesystem_stop()
#
Filesystem_status()
{
- if list_mounts | grep -q " $(readlink -f $MOUNTPOINT) " >/dev/null 2>&1; then
+ if list_mounts | grep -q " $(readlink -f "$MOUNTPOINT" ) " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else

View File

@ -0,0 +1,53 @@
From 69d607dc7568168e874f99d5a8b6bdb66b579d8b Mon Sep 17 00:00:00 2001
From: "yusk.iida" <yusk.iida@gmail.com>
Date: Tue, 7 May 2019 19:37:26 +0900
Subject: [PATCH] Low: Filesystem: Fix a problem umount is not executed in the
event of a disk failure
---
heartbeat/Filesystem | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 2a43d1daa..bd974f8f3 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -278,7 +278,7 @@ determine_blockdevice() {
nfs4|nfs|smbfs|cifs|glusterfs|ceph|tmpfs|overlay|overlayfs|rozofs|zfs|cvfs|none)
: ;;
*)
- DEVICE=`list_mounts | grep " $(readlink -f "$MOUNTPOINT" ) " | cut -d' ' -f1`
+ DEVICE=`list_mounts | grep " $CANONICALIZED_MOUNTPOINT " | cut -d' ' -f1`
if [ -b "$DEVICE" ]; then
blockdevice=yes
fi
@@ -396,7 +396,7 @@ fstype_supported()
Filesystem_start()
{
# Check if there are any mounts mounted under the mountpoint
- if list_mounts | grep -q -E " $(readlink -f "$MOUNTPOINT" )/\w+" >/dev/null 2>&1; then
+ if list_mounts | grep -q -E " $CANONICALIZED_MOUNTPOINT/\w+" >/dev/null 2>&1; then
ocf_log err "There is one or more mounts mounted under $MOUNTPOINT."
return $OCF_ERR_CONFIGURED
fi
@@ -580,7 +580,7 @@ Filesystem_stop()
#
Filesystem_status()
{
- if list_mounts | grep -q " $(readlink -f "$MOUNTPOINT" ) " >/dev/null 2>&1; then
+ if list_mounts | grep -q " $CANONICALIZED_MOUNTPOINT " >/dev/null 2>&1; then
rc=$OCF_SUCCESS
msg="$MOUNTPOINT is mounted (running)"
else
@@ -804,6 +804,11 @@ if [ -z "$OCF_RESKEY_directory" ]; then
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
+ CANONICALIZED_MOUNTPOINT=$(readlink -f "$MOUNTPOINT")
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed"
+ exit $OCF_ERR_GENERIC
+ fi
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
# kill the whole system. Is that a good idea?

View File

@ -0,0 +1,32 @@
From 48a7ebcea5ce0522021cf3079b62107a06b530b9 Mon Sep 17 00:00:00 2001
From: James Oakley <jfunk@funktronics.ca>
Date: Thu, 8 Aug 2019 05:56:14 -0700
Subject: [PATCH] Don't call readlink on path if it does not exist
---
heartbeat/Filesystem | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 4bbbc06d3..738e3c08e 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -858,10 +858,14 @@ if [ -z "$OCF_RESKEY_directory" ]; then
else
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
- CANONICALIZED_MOUNTPOINT=$(readlink -f "$MOUNTPOINT")
- if [ $? -ne 0 ]; then
- ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed"
- exit $OCF_ERR_GENERIC
+ if [ -e "$MOUNTPOINT" ] ; then
+ CANONICALIZED_MOUNTPOINT=$(readlink -f "$MOUNTPOINT")
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Could not canonicalize $MOUNTPOINT because readlink failed"
+ exit $OCF_ERR_GENERIC
+ fi
+ else
+ CANONICALIZED_MOUNTPOINT="$MOUNTPOINT"
fi
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
# TODO: / mounted via Filesystem sounds dangerous. On stop, we'll

View File

@ -0,0 +1,46 @@
From b67278bc92cfb0b9947ff5fff65f46f420a42c2c Mon Sep 17 00:00:00 2001
From: Kazutomo Nakahira <kazutomo_nakahira@newson.co.jp>
Date: Fri, 10 May 2019 14:30:51 +0900
Subject: [PATCH] Low: Filesystem: Fix missing mount point due to corrupted
mount list
---
heartbeat/Filesystem | 20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 2a43d1daa..c38ae12d4 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -255,16 +255,26 @@ is_bind_mount() {
}
list_mounts() {
local inpf=""
+ local mount_list=""
+ local check_list="x"
+
if [ -e "/proc/mounts" ] && ! is_bind_mount; then
inpf=/proc/mounts
elif [ -f "/etc/mtab" -a -r "/etc/mtab" ]; then
inpf=/etc/mtab
fi
- if [ "$inpf" ]; then
- cut -d' ' -f1,2,3 < $inpf
- else
- $MOUNT | cut -d' ' -f1,3,5
- fi
+
+ # Make sure that the mount list has not been changed while reading.
+ while [ "$mount_list" != "$check_list" ]; do
+ check_list=$mount_list
+ if [ "$inpf" ]; then
+ mount_list=$(cut -d' ' -f1,2,3 < $inpf)
+ else
+ mount_list=$($MOUNT | cut -d' ' -f1,3,5)
+ fi
+ done
+
+ echo "$mount_list"
}
determine_blockdevice() {

View File

@ -0,0 +1,52 @@
From bfbc99003ebd96d79bbf8ad50be0b5e714a92fd7 Mon Sep 17 00:00:00 2001
From: ytakeshita <y.takeshita0311@gmail.com>
Date: Fri, 7 Jun 2019 15:20:52 +0900
Subject: [PATCH] Medium: Filesystem: Prevents to all root user processes are
killed when bind mounting a directory on rootfs.
if a directory is bound mounting on rootfs and "force_umount" is not set "safe", change "force_umount" to "safe".
---
heartbeat/Filesystem | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index c46ec3cca..1b29a08b3 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -314,6 +314,24 @@ bind_kernel_check() {
[ $? -ne 0 ] &&
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
+
+bind_rootfs_check() {
+ local SOURCE
+ local TARGET
+ local ROOTFS
+
+ SOURCE=$1
+ TARGET=$(df --output=target $SOURCE | tail -n 1)
+
+ ROOTFS=$(list_mounts | grep -w rootfs | cut -d' ' -f 2)
+
+ if [ "${TARGET}" = "${ROOTFS}" ]; then
+ return 1
+ else
+ return 0
+ fi
+}
+
bind_mount() {
if is_bind_mount && [ "$options" != "-o bind" ]
then
@@ -476,6 +494,11 @@ get_pids()
local procs
local mmap_procs
+ if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_rootfs_check "$DEVICE"; then
+ ocf_log debug "Change force_umount from '$FORCE_UNMOUNT' to 'safe'"
+ FORCE_UNMOUNT=safe
+ fi
+
if ocf_is_true "$FORCE_UNMOUNT"; then
if [ "X${HOSTOS}" = "XOpenBSD" ];then
fstat | grep $dir | awk '{print $3}'

View File

@ -0,0 +1,42 @@
From f8e5d2afc5b9bbf676ac20894f0f26e6ec998557 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 10 Sep 2019 15:40:12 +0200
Subject: [PATCH] Filesystem: improve "/" check for bind mounts
---
heartbeat/Filesystem | 15 +++------------
1 file changed, 3 insertions(+), 12 deletions(-)
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 738e3c08e..e66ddc77f 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -337,17 +337,8 @@ bind_kernel_check() {
ocf_log warn "kernel `uname -r` cannot handle read only bind mounts"
}
-bind_rootfs_check() {
- local SOURCE
- local TARGET
- local ROOTFS
-
- SOURCE=$1
- TARGET=$(df --output=target $SOURCE | tail -n 1)
-
- ROOTFS=$(list_mounts | grep -w rootfs | cut -d' ' -f 2)
-
- if [ "${TARGET}" = "${ROOTFS}" ]; then
+bind_root_mount_check() {
+ if [ "$(df -P "$1" | awk 'END{print $6}')" = "/" ]; then
return 1
else
return 0
@@ -516,7 +507,7 @@ get_pids()
local procs
local mmap_procs
- if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_rootfs_check "$DEVICE"; then
+ if is_bind_mount && ocf_is_true "$FORCE_UNMOUNT" && ! bind_root_mount_check "$DEVICE"; then
ocf_log debug "Change force_umount from '$FORCE_UNMOUNT' to 'safe'"
FORCE_UNMOUNT=safe
fi

View File

@ -0,0 +1,202 @@
--- ClusterLabs-resource-agents-e711383f/heartbeat/IPsrcaddr 2019-08-15 16:02:10.055827624 +0200
+++ /home/oalbrigt/src/resource-agents/heartbeat/IPsrcaddr 2019-08-15 15:45:50.690757838 +0200
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# Description: IPsrcaddr - Preferred source address modification
+# Description: IPsrcaddr - Preferred source(/dest) address modification
#
# Author: John Sutton <john@scl.co.uk>
# Support: users@clusterlabs.org
@@ -11,7 +11,7 @@
#
# This script manages the preferred source address associated with
# packets which originate on the localhost and are routed through the
-# default route. By default, i.e. without the use of this script or
+# matching route. By default, i.e. without the use of this script or
# similar, these packets will carry the IP of the primary i.e. the
# non-aliased interface. This can be a nuisance if you need to ensure
# that such packets carry the same IP irrespective of which host in
@@ -27,7 +27,7 @@
#
# NOTES:
#
-# 1) There must be one and not more than 1 default route! Mainly because
+# 1) There must be one and not more than 1 matching route! Mainly because
# I can't see why you should have more than one. And if there is more
# than one, we would have to box clever to find out which one is to be
# modified, or we would have to pass its identity as an argument.
@@ -54,16 +54,25 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
+OCF_RESKEY_ipaddress_default=""
+OCF_RESKEY_cidr_netmask_default=""
+OCF_RESKEY_destination_default="0.0.0.0/0"
OCF_RESKEY_proto_default=""
+OCF_RESKEY_table_default=""
+: ${OCF_RESKEY_ipaddress=${OCF_RESKEY_ipaddress_default}}
+: ${OCF_RESKEY_cidr_netmask=${OCF_RESKEY_cidr_netmask_default}}
+: ${OCF_RESKEY_destination=${OCF_RESKEY_destination_default}}
: ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}}
+: ${OCF_RESKEY_table=${OCF_RESKEY_table_default}}
#######################################################################
[ -z "$OCF_RESKEY_proto" ] && PROTO="" || PROTO="proto $OCF_RESKEY_proto"
+[ -z "$OCF_RESKEY_table" ] && TABLE="" || TABLE="table $OCF_RESKEY_table"
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
- CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0"
+ CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination"
CMDCHANGE="$IP2UTIL route change to "
SYSTYPE="`uname -s`"
@@ -91,7 +100,7 @@
The IP address.
</longdesc>
<shortdesc lang="en">IP address</shortdesc>
-<content type="string" default="" />
+<content type="string" default="${OCF_RESKEY_ipaddress_default}" />
</parameter>
<parameter name="cidr_netmask">
@@ -100,7 +109,15 @@
dotted quad notation 255.255.255.0).
</longdesc>
<shortdesc lang="en">Netmask</shortdesc>
-<content type="string" default=""/>
+<content type="string" default="${OCF_RESKEY_cidr_netmask_default}"/>
+</parameter>
+
+<parameter name="destination">
+<longdesc lang="en">
+The destination IP/subnet for the route (default: $OCF_RESKEY_destination_default)
+</longdesc>
+<shortdesc lang="en">Destination IP/subnet</shortdesc>
+<content type="string" default="${OCF_RESKEY_destination_default}" />
</parameter>
<parameter name="proto">
@@ -108,7 +125,17 @@
Proto to match when finding network. E.g. "kernel".
</longdesc>
<shortdesc lang="en">Proto</shortdesc>
-<content type="string" default="" />
+<content type="string" default="${OCF_RESKEY_proto_default}" />
+</parameter>
+
+<parameter name="table">
+<longdesc lang="en">
+Table to modify. E.g. "local".
+
+The table has to have a route matching the "destination" parameter.
+</longdesc>
+<shortdesc lang="en">Table</shortdesc>
+<content type="string" default="${OCF_RESKEY_table_default}" />
</parameter>
</parameters>
@@ -151,21 +178,22 @@
export OCF_RESKEY_ip=$OCF_RESKEY_ipaddress
srca_read() {
- # Capture the default route - doublequotes prevent word splitting...
- DEFROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
-
- # ... so we can make sure there is only 1 default route
- [ 1 -eq `echo "$DEFROUTE" | wc -l` ] || \
- errorexit "more than 1 default route exists"
+ # Capture matching route - doublequotes prevent word splitting...
+ ROUTE="`$CMDSHOW`" || errorexit "command '$CMDSHOW' failed"
- # But there might still be no default route
- [ -z "$DEFROUTE" ] && errorexit "no default route exists"
+ # ... so we can make sure there is only 1 matching route
+ [ 1 -eq `echo "$ROUTE" | wc -l` ] || \
+ errorexit "more than 1 matching route exists"
+
+ # But there might still be no matching route
+ [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] && [ -z "$ROUTE" ] && \
+ ! ocf_is_probe && errorexit "no matching route exists"
# Sed out the source ip address if it exists
- SRCIP=`echo $DEFROUTE | sed -n "s/$MATCHROUTE/\3/p"`
+ SRCIP=`echo $ROUTE | sed -n "s/$MATCHROUTE/\3/p"`
# and what remains after stripping out the source ip address clause
- ROUTE_WO_SRC=`echo $DEFROUTE | sed "s/$MATCHROUTE/\1\5/"`
+ ROUTE_WO_SRC=`echo $ROUTE | sed "s/$MATCHROUTE/\1\5/"`
[ -z "$SRCIP" ] && return 1
[ $SRCIP = $1 ] && return 0
@@ -185,11 +213,13 @@
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
- $IP2UTIL route replace $NETWORK dev $INTERFACE src $1 || \
- errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE src $1 || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE src $1' failed"
- $CMDCHANGE $ROUTE_WO_SRC src $1 || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
+ if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
+ $CMDCHANGE $ROUTE_WO_SRC src $1 || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC src $1' failed"
+ fi
rc=$?
fi
@@ -201,7 +231,7 @@
# If one exists but it's not the same as the one specified, that's
# an error. Maybe that's the wrong behaviour because if this fails
# then when IPaddr releases the associated interface (if there is one)
-# your default route will also get dropped ;-(
+# your matching route will also get dropped ;-(
# The exit code should conform to LSB exit codes.
#
@@ -217,11 +247,13 @@
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
- $IP2UTIL route replace $NETWORK dev $INTERFACE || \
- errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed"
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE' failed"
- $CMDCHANGE $ROUTE_WO_SRC || \
- errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
+ if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
+ $CMDCHANGE $ROUTE_WO_SRC || \
+ errorexit "command '$CMDCHANGE $ROUTE_WO_SRC' failed"
+ fi
return $?
}
@@ -406,6 +438,10 @@
return $OCF_ERR_CONFIGURED
fi
+ if ! echo "$OCF_RESKEY_destination" | grep -q "/"; then
+ return $OCF_ERR_CONFIGURED
+ fi
+
if ! [ "x$SYSTYPE" = "xLinux" ]; then
# checks after this point are only relevant for linux.
@@ -486,7 +522,11 @@
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
-NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
+ NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
+else
+ NETWORK="$OCF_RESKEY_destination"
+fi
case $1 in
start) srca_start $ipaddress

View File

@ -0,0 +1,42 @@
From 0e73d3f474d08779b64ed99fb3f80c1e806ff1b7 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 28 Nov 2019 16:11:51 +0100
Subject: [PATCH] IPsrcaddr: fixes to replace local rule if using local table,
and set src back to primary for device on stop
---
heartbeat/IPsrcaddr | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index d80b72165..f9085f082 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -75,6 +75,10 @@ USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
CMDSHOW="$IP2UTIL route show $TABLE to exact $OCF_RESKEY_destination"
CMDCHANGE="$IP2UTIL route change to "
+if [ "$OCF_RESKEY_table" = "local" ]; then
+ TABLE="$TABLE local"
+fi
+
SYSTYPE="`uname -s`"
usage() {
@@ -247,8 +251,14 @@ srca_stop() {
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
- $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE || \
- errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE' failed"
+ OPTS=""
+ if [ "$OCF_RESKEY_destination" != "0.0.0.0/0" ] ;then
+ PRIMARY_IP="$($IP2UTIL -4 -o addr show dev eth0 primary | awk '{split($4,a,"/");print a[1]}')"
+ OPTS="proto kernel scope host src $PRIMARY_IP"
+ fi
+
+ $IP2UTIL route replace $TABLE $NETWORK dev $INTERFACE $OPTS || \
+ errorexit "command 'ip route replace $TABLE $NETWORK dev $INTERFACE $OPTS' failed"
if [ "$OCF_RESKEY_destination" = "0.0.0.0/0" ] ;then
$CMDCHANGE $ROUTE_WO_SRC || \

View File

@ -0,0 +1,57 @@
From fcaa52bb98a8686d993550c6f4ab7867625c8059 Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Wed, 29 Aug 2018 16:18:55 -0400
Subject: [PATCH] rabbitmq-cluster: get cluster status from mnesia during
monitor
If mnesia is not running (for example if `rabbitmqctl stop_app` has
been called, or the service has paused during partition due to the
pause_minority strategy) then the cluster_status command to
rabbitmqctl will read the cached cluster status from disk and the
command returns 0 even though the service isn't really running at all.
Instead, force the cluster status to be read from mnesia. If mnesia
is not running due to the above or similar circumstances, the command
will catch that and properly fail the monitor action.
Resolves: RHBZ#1595753
---
heartbeat/rabbitmq-cluster | 20 +++++---------------
1 file changed, 5 insertions(+), 15 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index a7d2db614..204917475 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -181,26 +181,16 @@ remove_pid () {
rmq_monitor() {
local rc
- $RMQ_CTL cluster_status > /dev/null 2>&1
- rc=$?
- case "$rc" in
- 0)
+ if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
ocf_log debug "RabbitMQ server is running normally"
rmq_write_nodename
-
+
return $OCF_SUCCESS
- ;;
- 2|68|69|70|75|78)
- ocf_log info "RabbitMQ server is not running"
+ else
+ ocf_log info "RabbitMQ server could not get cluster status from mnesia"
rmq_delete_nodename
return $OCF_NOT_RUNNING
- ;;
- *)
- ocf_log err "Unexpected return code from '$RMQ_CTL cluster_status' exit code: $rc"
- rmq_delete_nodename
- return $OCF_ERR_GENERIC
- ;;
- esac
+ fi
}
rmq_init_and_wait()

View File

@ -0,0 +1,96 @@
From cc23c5523a0185fa557a5ab9056d50a60300d12a Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Tue, 16 Oct 2018 16:21:25 -0400
Subject: [PATCH] rabbitmq-cluster: fail monitor when node is in minority
partition
It's possible for mnesia to still be running, but for mnesia to be
partitioned. And it's also possible to get into this state without
pacemaker seeing the node go down so no corrective action is taken.
When monitoring, check the number of nodes that pacemaker thinks is
running, and compare to the number of nodes that mnesia thinks is
running. If mnesia only sees a minority of the total nodes, fail it
so corrective action can be taken to rejoin the cluster.
This also adds a new function, rmq_app_running, which simply checks
whether the app is running or not and does not care about the
partition status. This is now used instead of the full monitor in a
few places where we don't care about partition state.
Resolves: RHBZ#1639826
---
heartbeat/rabbitmq-cluster | 28 +++++++++++++++++++++++++---
1 file changed, 25 insertions(+), 3 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 204917475..78b2bbadf 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -178,10 +178,31 @@ remove_pid () {
rm -f ${RMQ_PID_FILE} > /dev/null 2>&1
}
+rmq_app_running() {
+ if $RMQ_CTL eval 'application:which_applications().' | grep -q '{rabbit,'; then
+ ocf_log debug "RabbitMQ application is running"
+ return $OCF_SUCCESS
+ else
+ ocf_log debug "RabbitMQ application is stopped"
+ return $OCF_NOT_RUNNING
+ fi
+}
+
rmq_monitor() {
local rc
if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
+ pcs_running=$(rmq_join_list | wc -w)
+ ocf_log debug "Pacemaker thinks ${pcs_running} RabbitMQ nodes are running"
+ rmq_running=$($RMQ_CTL eval 'length(mnesia:system_info(running_db_nodes)).')
+ ocf_log debug "RabbitMQ thinks ${rmq_running} RabbitMQ nodes are running"
+
+ if [ $(( $rmq_running * 2 )) -lt $pcs_running ]; then
+ ocf_log info "RabbitMQ is a minority partition, failing monitor"
+ rmq_delete_nodename
+ return $OCF_ERR_GENERIC
+ fi
+
ocf_log debug "RabbitMQ server is running normally"
rmq_write_nodename
@@ -215,7 +236,7 @@ rmq_init_and_wait()
return $OCF_ERR_GENERIC
fi
- rmq_monitor
+ rmq_app_running
return $?
}
@@ -236,6 +257,7 @@ rmq_start_first()
if [ $rc -eq 0 ]; then
rc=$OCF_SUCCESS
ocf_log info "cluster bootstrapped"
+ rmq_write_nodename
if [ -n "$OCF_RESKEY_set_policy" ]; then
# do not quote set_policy, we are passing in arguments
@@ -492,7 +514,7 @@ rmq_stop() {
end.
"
- rmq_monitor
+ rmq_app_running
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi
@@ -508,7 +530,7 @@ rmq_stop() {
#TODO add kill logic
stop_wait=1
while [ $stop_wait = 1 ]; do
- rmq_monitor
+ rmq_app_running
rc=$?
if [ "$rc" -eq $OCF_NOT_RUNNING ]; then
stop_wait=0

View File

@ -0,0 +1,63 @@
From 19ee29342f8bb573722991b8cbe4503309ad0bf9 Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Fri, 2 Nov 2018 13:12:53 -0400
Subject: [PATCH] rabbitmq-cluster: fix regression in rmq_stop
This regression was introduced in PR#1249 (cc23c55). The stop action
was modified to use rmq_app_running in order to check the service
status, which allows for the following sequence of events:
- service is started, unclustered
- stop_app is called
- cluster_join is attempted and fails
- stop is called
Because stop_app was called, rmq_app_running returns $OCF_NOT_RUNNING
and the stop action is a no-op. This means the erlang VM continues
running.
When the start action is attempted again, a new erlang VM is launched,
but this VM fails to boot because the old one is still running and is
registered with the same name (rabbit@nodename).
This adds a new function, rmq_node_alive, which does a simple eval to
test whether the erlang VM is up, independent of the rabbit app. The
stop action now uses rmq_node_alive to check the service status, so
even if stop_app was previously called, the erlang VM will be stopped
properly.
Resolves: RHBZ#1639826
---
heartbeat/rabbitmq-cluster | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 78b2bbadf..a2de9dc20 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -188,6 +188,16 @@ rmq_app_running() {
fi
}
+rmq_node_alive() {
+ if $RMQ_CTL eval 'ok.'; then
+ ocf_log debug "RabbitMQ node is alive"
+ return $OCF_SUCCESS
+ else
+ ocf_log debug "RabbitMQ node is down"
+ return $OCF_NOT_RUNNING
+ fi
+}
+
rmq_monitor() {
local rc
@@ -514,7 +524,7 @@ rmq_stop() {
end.
"
- rmq_app_running
+ rmq_node_alive
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS
fi

View File

@ -0,0 +1,83 @@
From 63c9449bfa9a7fecbc0f00394699a475a384671d Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Thu, 9 Aug 2018 16:33:26 +0200
Subject: [PATCH] rabbitmq-cluster: retry start when cluster join fails
When a node tries to join an existing cluster, it fetches a node
list to try to connect from any of those running nodes.
If the nodes from this list become unavailable while we're joining
the cluster, the rabbitmq server will fail to get clustered and
make the start operation fail.
Give the resource a chance to start anyway by retrying the entire
start actions until it succeeds or until the start timeout is
reached and pacemaker stops the start operation.
Co-Authored-by: <michele@acksyn.org>
Suggested-by: <abeekhof@redhat.com>
---
heartbeat/rabbitmq-cluster | 29 ++++++++++++++++++++++++++---
1 file changed, 26 insertions(+), 3 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 9ff49e075..84f383460 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -31,6 +31,12 @@
#######################################################################
+# This arbitrary value here is used by the rmq_start action to
+# signify that the resource agent must retry the start process
+# It might potentially conflict with OCF assigned error code
+# in the future.
+RMQ_TRY_RESTART_ERROR_CODE=126
+
RMQ_SERVER=/usr/sbin/rabbitmq-server
RMQ_CTL=/usr/sbin/rabbitmqctl
RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
@@ -354,7 +360,7 @@ rmq_notify() {
return $OCF_SUCCESS
}
-rmq_start() {
+rmq_try_start() {
local join_list=""
local rc
@@ -384,8 +390,16 @@ rmq_start() {
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
- return $OCF_ERR_GENERIC
+ # we could not join the rabbitmq cluster from any of the running nodes
+ # this might be due to a unexpected reset of those nodes. Give ourself
+ # a chance to start by retrying the entire start sequence.
+
+ ocf_log warn "Failed to join the RabbitMQ cluster from nodes ${join_list}. Stopping local unclustered rabbitmq"
+ rmq_stop
+
+ ocf_log warn "Re-detect available rabbitmq nodes and try to start again"
+ # return an unused OCF value to signify a "retry" condition
+ return $RMQ_TRY_RESTART_ERROR_CODE
fi
# Restore users, user permissions, and policies (if any)
@@ -443,6 +457,15 @@ rmq_start() {
return $OCF_SUCCESS
}
+rmq_start() {
+ local rc=$RMQ_TRY_RESTART_ERROR_CODE
+ while [ $rc -eq $RMQ_TRY_RESTART_ERROR_CODE ]; do
+ rmq_try_start
+ rc=$?
+ done
+ return $rc
+}
+
rmq_stop() {
# Backup users, user permissions, and policies
BaseDataDir=`dirname $RMQ_DATA_DIR`

View File

@ -0,0 +1,42 @@
From 8ed87936e9ad06318cc49ea767885a405dfde11e Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Wed, 5 Dec 2018 11:45:43 -0500
Subject: [PATCH] rabbitmq-cluster: better ensure node attributes are removed
Ensure that the attribute is removed at the end of the stop action.
Also if rmq_app_running or rmq_node_alive shows the service as down,
ensure the attribute is deleted as well.
Resolves: RHBZ#1656368
---
heartbeat/rabbitmq-cluster | 3 +++
1 file changed, 3 insertions(+)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 1643dd1e7..2dca3e216 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -184,6 +184,7 @@ rmq_app_running() {
return $OCF_SUCCESS
else
ocf_log debug "RabbitMQ application is stopped"
+ rmq_delete_nodename
return $OCF_NOT_RUNNING
fi
}
@@ -194,6 +195,7 @@ rmq_node_alive() {
return $OCF_SUCCESS
else
ocf_log debug "RabbitMQ node is down"
+ rmq_delete_nodename
return $OCF_NOT_RUNNING
fi
}
@@ -554,6 +556,7 @@ rmq_stop() {
sleep 1
done
+ rmq_delete_nodename
remove_pid
return $OCF_SUCCESS
}

View File

@ -0,0 +1,32 @@
From 2b6e4a94c847129dd014a1efa733cd1b4a2448e6 Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Fri, 2 Nov 2018 10:11:41 -0400
Subject: [PATCH] rabbitmq-cluster: debug log detailed output when mnesia query
fails
---
heartbeat/rabbitmq-cluster | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 78b2bbadf..fabfeedfb 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -191,7 +191,8 @@ rmq_app_running() {
rmq_monitor() {
local rc
- if $RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' | grep -q '^{ok'; then
+ status=$($RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' 2>&1)
+ if echo "${status}" | grep -q '^{ok'; then
pcs_running=$(rmq_join_list | wc -w)
ocf_log debug "Pacemaker thinks ${pcs_running} RabbitMQ nodes are running"
rmq_running=$($RMQ_CTL eval 'length(mnesia:system_info(running_db_nodes)).')
@@ -209,6 +210,7 @@ rmq_monitor() {
return $OCF_SUCCESS
else
ocf_log info "RabbitMQ server could not get cluster status from mnesia"
+ ocf_log debug "${status}"
rmq_delete_nodename
return $OCF_NOT_RUNNING
fi

View File

@ -0,0 +1,87 @@
From 5a33171b2c40e2e1587e82aad0cb7e39abcf615d Mon Sep 17 00:00:00 2001
From: John Eckersberg <jeckersb@redhat.com>
Date: Thu, 13 Dec 2018 12:58:43 -0500
Subject: [PATCH] rabbitmq-cluster: always use quiet flag for eval calls
On older rabbitmq versions, rabbitmqctl appends "...done." at the end
of the output. However we expect eval without this extra output so it
can be used for further processing. The -q option to rabbitmqctl
suppresses the extra output, so ensure we always pass that when
calling eval.
Resolves: RHBZ#1659072
---
heartbeat/rabbitmq-cluster | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index 2dca3e216..e82ac2399 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -39,6 +39,7 @@ RMQ_TRY_RESTART_ERROR_CODE=126
RMQ_SERVER=/usr/sbin/rabbitmq-server
RMQ_CTL=/usr/sbin/rabbitmqctl
+RMQ_EVAL="${RMQ_CTL} eval -q"
RMQ_DATA_DIR="/var/lib/rabbitmq/mnesia"
RMQ_PID_DIR="/var/run/rabbitmq"
RMQ_PID_FILE="/var/run/rabbitmq/rmq.pid"
@@ -179,7 +180,7 @@ remove_pid () {
}
rmq_app_running() {
- if $RMQ_CTL eval 'application:which_applications().' | grep -q '{rabbit,'; then
+ if $RMQ_EVAL 'application:which_applications().' | grep -q '{rabbit,'; then
ocf_log debug "RabbitMQ application is running"
return $OCF_SUCCESS
else
@@ -190,7 +191,7 @@ rmq_app_running() {
}
rmq_node_alive() {
- if $RMQ_CTL eval 'ok.'; then
+ if $RMQ_EVAL 'ok.'; then
ocf_log debug "RabbitMQ node is alive"
return $OCF_SUCCESS
else
@@ -203,11 +204,11 @@ rmq_node_alive() {
rmq_monitor() {
local rc
- status=$($RMQ_CTL eval 'rabbit_mnesia:cluster_status_from_mnesia().' 2>&1)
+ status=$($RMQ_EVAL 'rabbit_mnesia:cluster_status_from_mnesia().' 2>&1)
if echo "${status}" | grep -q '^{ok'; then
pcs_running=$(rmq_join_list | wc -w)
ocf_log debug "Pacemaker thinks ${pcs_running} RabbitMQ nodes are running"
- rmq_running=$($RMQ_CTL eval 'length(mnesia:system_info(running_db_nodes)).')
+ rmq_running=$($RMQ_EVAL 'length(mnesia:system_info(running_db_nodes)).')
ocf_log debug "RabbitMQ thinks ${rmq_running} RabbitMQ nodes are running"
if [ $(( $rmq_running * 2 )) -lt $pcs_running ]; then
@@ -294,7 +295,7 @@ rmq_start_first()
rmq_is_clustered()
{
- $RMQ_CTL eval 'rabbit_mnesia:is_clustered().' | grep -q true
+ $RMQ_EVAL 'rabbit_mnesia:is_clustered().' | grep -q true
}
rmq_join_existing()
@@ -432,7 +433,7 @@ rmq_try_start() {
# Restore users, user permissions, and policies (if any)
BaseDataDir=`dirname $RMQ_DATA_DIR`
- $RMQ_CTL eval "
+ $RMQ_EVAL "
%% Run only if Mnesia is ready.
lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
begin
@@ -497,7 +498,7 @@ rmq_start() {
rmq_stop() {
# Backup users, user permissions, and policies
BaseDataDir=`dirname $RMQ_DATA_DIR`
- $RMQ_CTL eval "
+ $RMQ_EVAL "
%% Run only if Mnesia is still available.
lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
begin

View File

@ -0,0 +1,88 @@
From 5949405d0031a4aba91c81cb28c24821ad2d439a Mon Sep 17 00:00:00 2001
From: Reid Wahl <nwahl@redhat.com>
Date: Thu, 3 Jan 2019 15:05:20 -0800
Subject: [PATCH] docker: Fix issues with stop operation
The docker RA's stop operation doesn't behave properly in some cases.
1. It returns a false success code in case of an error response from
the daemon.
2. It fails at `remove_container()` if the container does not exist
but another docker object of the same name does exist.
In case #1, the `container_exists()` function returns the same exit code
(1) if the container is not found (an expected error) or if there is an
error response from the docker daemon (an unexpected error). These types
of errors should be handled differently.
In case #2, the `docker inspect` calls do not limit their search to
containers. So if a non-container object is found with a matching name,
the RA attempts to remove a container by that name. Such a container may
not exist.
This patch fixes these issues as follows:
1. Match an error response in `container_exists()` against the string
"No such container".
2. Add `--type=container` to the `docker inspect` calls to restrict
the match.
---
heartbeat/docker | 26 ++++++++++++++++++++++----
1 file changed, 22 insertions(+), 4 deletions(-)
diff --git a/heartbeat/docker b/heartbeat/docker
index f5ba83ff2..c206344ad 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -215,7 +215,7 @@ monitor_cmd_exec()
out=$(docker exec ${CONTAINER} $OCF_RESKEY_monitor_cmd 2>&1)
rc=$?
else
- out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
+ out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --type=container --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
rc=$?
fi
@@ -236,7 +236,25 @@ monitor_cmd_exec()
container_exists()
{
- docker inspect --format {{.State.Running}} $CONTAINER | egrep '(true|false)' >/dev/null 2>&1
+ local err
+
+ err=$(docker inspect --type=container $CONTAINER 2>&1 >/dev/null)
+
+ if [ $? -ne $OCF_SUCCESS ]; then
+ case $err in
+ *"No such container"*)
+ # Return failure instead of exiting if container does not exist
+ return 1
+ ;;
+ *)
+ # Exit if error running command
+ ocf_exit_reason "$err"
+ exit $OCF_ERR_GENERIC
+ ;;
+ esac
+ fi
+
+ return $OCF_SUCCESS
}
remove_container()
@@ -265,7 +283,7 @@ docker_simple_status()
fi
# retrieve the 'Running' attribute for the container
- val=$(docker inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
+ val=$(docker inspect --type=container --format {{.State.Running}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then
#not running as a result of container not being found
return $OCF_NOT_RUNNING
@@ -295,7 +313,7 @@ docker_health_status()
# if starting takes longer than monitor timeout then upstream will make this fail.
while
- val=$(docker inspect --format {{.State.Health.Status}} $CONTAINER 2>/dev/null)
+ val=$(docker inspect --type=container --format {{.State.Health.Status}} $CONTAINER 2>/dev/null)
if [ $? -ne 0 ]; then
#not healthy as a result of container not being found
return $OCF_NOT_RUNNING

View File

@ -0,0 +1,35 @@
From 1286636b768bb635e9a6b1f1fbf6267c9c3f4b03 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 19 Aug 2019 13:31:06 +0200
Subject: [PATCH] Route: dont fence node when parameters arent set
---
heartbeat/Route | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Route b/heartbeat/Route
index b4011e37d..9f92eff3a 100755
--- a/heartbeat/Route
+++ b/heartbeat/Route
@@ -249,18 +249,18 @@ route_validate() {
if [ "${OCF_RESKEY_CRM_meta_clone}" ]; then
if [ "${OCF_RESKEY_CRM_meta_clone_node_max}" != 1 ]; then
ocf_exit_reason "Misconfigured clone parameters. Must set meta attribute \"clone_node_max\" to 1, got ${OCF_RESKEY_CRM_meta_clone_node_max}."
- return $OCF_ERR_ARGS
+ return $OCF_ERR_CONFIGURED
fi
fi
# Did we get a destination?
if [ -z "${OCF_RESKEY_destination}" ]; then
ocf_exit_reason "Missing required parameter \"destination\"."
- return $OCF_ERR_ARGS
+ return $OCF_ERR_CONFIGURED
fi
# Did we get either a device or a gateway address?
if [ -z "${OCF_RESKEY_device}" -a -z "${OCF_RESKEY_gateway}" ]; then
ocf_exit_reason "Must specify either \"device\", or \"gateway\", or both."
- return $OCF_ERR_ARGS
+ return $OCF_ERR_CONFIGURED
fi
# If a device has been configured, is it available on this system?
if [ -n "${OCF_RESKEY_device}" ]; then

View File

@ -0,0 +1,40 @@
From 444bdc44fc47c65f848efc0c39c1e8e6620ce10d Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 11 Oct 2019 12:12:52 +0200
Subject: [PATCH] Route: only validate for start and validate-all actions
---
heartbeat/Route | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/Route b/heartbeat/Route
index 9f92eff3a..8898e1afd 100755
--- a/heartbeat/Route
+++ b/heartbeat/Route
@@ -187,6 +187,8 @@ END
}
route_start() {
+ route_validate || exit $?
+
route_status
status=$?
if [ $status -eq $OCF_SUCCESS ]; then
@@ -313,8 +315,6 @@ for binary in ip grep; do
check_binary $binary
done
-route_validate || exit $?
-
case $OCF_RESKEY_family in
ip4) addr_family="-4" ;;
ip6) addr_family="-6" ;;
@@ -334,7 +334,7 @@ status|monitor) route_status;;
reload) ocf_log info "Reloading..."
route_start
;;
-validate-all) ;;
+validate-all) route_validate;;
*) route_usage
exit $OCF_ERR_UNIMPLEMENTED
;;

View File

@ -0,0 +1,148 @@
From c0b6356bbf5b9a1fb76b011486dfce258d395ef8 Mon Sep 17 00:00:00 2001
From: Peter Lemenkov <lemenkov@gmail.com>
Date: Fri, 6 Sep 2019 14:22:46 +0200
Subject: [PATCH] Restore users/perms/policies even if starting in a single
node mode
See https://bugzilla.redhat.com/1744467#c1
Signed-off-by: Peter Lemenkov <lemenkov@gmail.com>
---
heartbeat/rabbitmq-cluster | 109 ++++++++++++++++++++-----------------
1 file changed, 58 insertions(+), 51 deletions(-)
diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index cf8ca21a6..7837e9e3c 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -114,6 +114,62 @@ rmq_wipe_data()
rm -rf $RMQ_DATA_DIR > /dev/null 2>&1
}
+rmq_restore_users_perms_policies()
+{
+ # Restore users, user permissions, and policies (if any)
+ BaseDataDir=`dirname $RMQ_DATA_DIR`
+ $RMQ_EVAL "
+ %% Run only if Mnesia is ready.
+ lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
+ begin
+ Restore = fun(Table, PostprocessFun, Filename) ->
+ case file:consult(Filename) of
+ {error, _} ->
+ ok;
+ {ok, [Result]} ->
+ lists:foreach(fun(X) -> mnesia:dirty_write(Table, PostprocessFun(X)) end, Result),
+ file:delete(Filename)
+ end
+ end,
+
+ %% Restore users
+
+ Upgrade = fun
+ ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5};
+ ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D}
+ end,
+
+ Downgrade = fun
+ ({internal_user, A, B, C}) -> {internal_user, A, B, C};
+ ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C};
+ %% Incompatible scheme, so we will loose user's password ('B' value) during conversion.
+ %% Unfortunately, this case will require manual intervention - user have to run:
+ %% rabbitmqctl change_password <A> <somenewpassword>
+ ({internal_user, A, B, C, _}) -> {internal_user, A, B, C}
+ end,
+
+ %% Check db scheme first
+ [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]),
+ case WildPattern of
+ %% Version < 3.6.0
+ {internal_user,'_','_','_'} ->
+ Restore(rabbit_user, Downgrade, \"$BaseDataDir/users.erl\");
+ %% Version >= 3.6.0
+ {internal_user,'_','_','_','_'} ->
+ Restore(rabbit_user, Upgrade, \"$BaseDataDir/users.erl\")
+ end,
+
+ NoOp = fun(X) -> X end,
+
+ %% Restore user permissions
+ Restore(rabbit_user_permission, NoOp, \"$BaseDataDir/users_perms.erl\"),
+
+ %% Restore policies
+ Restore(rabbit_runtime_parameters, NoOp, \"$BaseDataDir/policies.erl\")
+ end.
+ "
+}
+
rmq_local_node()
{
@@ -411,6 +467,7 @@ rmq_try_start() {
if [ -z "$join_list" ]; then
rmq_start_first
rc=$?
+ rmq_restore_users_perms_policies
return $rc
fi
@@ -437,58 +494,8 @@ rmq_try_start() {
return $RMQ_TRY_RESTART_ERROR_CODE
fi
- # Restore users, user permissions, and policies (if any)
- BaseDataDir=`dirname $RMQ_DATA_DIR`
- $RMQ_EVAL "
- %% Run only if Mnesia is ready.
- lists:any(fun({mnesia,_,_}) -> true; ({_,_,_}) -> false end, application:which_applications()) andalso
- begin
- Restore = fun(Table, PostprocessFun, Filename) ->
- case file:consult(Filename) of
- {error, _} ->
- ok;
- {ok, [Result]} ->
- lists:foreach(fun(X) -> mnesia:dirty_write(Table, PostprocessFun(X)) end, Result),
- file:delete(Filename)
- end
- end,
+ rmq_restore_users_perms_policies
- %% Restore users
-
- Upgrade = fun
- ({internal_user, A, B, C}) -> {internal_user, A, B, C, rabbit_password_hashing_md5};
- ({internal_user, A, B, C, D}) -> {internal_user, A, B, C, D}
- end,
-
- Downgrade = fun
- ({internal_user, A, B, C}) -> {internal_user, A, B, C};
- ({internal_user, A, B, C, rabbit_password_hashing_md5}) -> {internal_user, A, B, C};
- %% Incompatible scheme, so we will loose user's password ('B' value) during conversion.
- %% Unfortunately, this case will require manual intervention - user have to run:
- %% rabbitmqctl change_password <A> <somenewpassword>
- ({internal_user, A, B, C, _}) -> {internal_user, A, B, C}
- end,
-
- %% Check db scheme first
- [WildPattern] = ets:select(mnesia_gvar, [ { {{rabbit_user, wild_pattern}, '\\\$1'}, [], ['\\\$1'] } ]),
- case WildPattern of
- %% Version < 3.6.0
- {internal_user,'_','_','_'} ->
- Restore(rabbit_user, Downgrade, \"$BaseDataDir/users.erl\");
- %% Version >= 3.6.0
- {internal_user,'_','_','_','_'} ->
- Restore(rabbit_user, Upgrade, \"$BaseDataDir/users.erl\")
- end,
-
- NoOp = fun(X) -> X end,
-
- %% Restore user permissions
- Restore(rabbit_user_permission, NoOp, \"$BaseDataDir/users_perms.erl\"),
-
- %% Restore policies
- Restore(rabbit_runtime_parameters, NoOp, \"$BaseDataDir/policies.erl\")
- end.
- "
return $OCF_SUCCESS
}

View File

@ -0,0 +1,47 @@
From 8ecfa95fff384ed047fd804016abdbbdcdd96d27 Mon Sep 17 00:00:00 2001
From: Keisuke MORI <kskmori@intellilink.co.jp>
Date: Wed, 11 Sep 2019 15:33:37 +0900
Subject: [PATCH] Low: IPaddr2: fix to work properly with unsanitized IPv6
addresses
`ip route get` shows the sanitized address at $1 or $2 depending on
the address is already assigned to the node or not.
```
[root@centos73-1 ~]# /sbin/ip route get 2001:db8:101::0001
2001:db8:101::1 dev eth1 proto ra src 2001:db8:101:0:XXXX:XXXX:XXXX:XXXX metric 100
[root@centos73-1 ~]# /sbin/ip addr add 2001:db8:101::0001/64 dev eth1
[root@centos73-1 ~]# /sbin/ip route get 2001:db8:101::0001
local 2001:db8:101::1 dev lo table local proto none src 2001:db8:101::1 metric 0
```
It can not be sanitized if the address is unreachable and on the recent distributions
(probably depending on the iproute package version)
```
[root@centos73-1 ~]# /sbin/ip route get 2001:db8:201::0001
unreachable 2001:db8:201::1 dev lo table unspec proto kernel src 2001:db8:101:0:XXXX:XXXX:XXXX:XXXX metric 429496
```
```
[root@rhel80-1 ~]# /sbin/ip route get 200:db8:201::0001
RTNETLINK answers: Network is unreachable
```
---
heartbeat/IPaddr2 | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 041ace3a2..4f28ddab6 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -477,6 +477,12 @@ ip_init() {
fi
else
FAMILY=inet6
+ # address sanitization defined in RFC5952
+ SANITIZED_IP=$($IP2UTIL route get $OCF_RESKEY_ip | awk '$1~/:/ {print $1} $2~/:/ {print $2}')
+ if [ -n "$SANITIZED_IP" ]; then
+ OCF_RESKEY_ip="$SANITIZED_IP"
+ fi
+
if ocf_is_true $OCF_RESKEY_lvs_support ;then
ocf_exit_reason "The IPv6 does not support lvs_support"
exit $OCF_ERR_CONFIGURED

View File

@ -0,0 +1,22 @@
From 7eff4e17641cc1463e61d772af16d17264477523 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 12 Sep 2019 12:51:54 +0200
Subject: [PATCH] IPaddr2: IPv6 return empty string when sanitation fails
---
heartbeat/IPaddr2 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index 4f28ddab6..1d39ae514 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -478,7 +478,7 @@ ip_init() {
else
FAMILY=inet6
# address sanitization defined in RFC5952
- SANITIZED_IP=$($IP2UTIL route get $OCF_RESKEY_ip | awk '$1~/:/ {print $1} $2~/:/ {print $2}')
+ SANITIZED_IP=$($IP2UTIL route get $OCF_RESKEY_ip 2> /dev/null | awk '$1~/:/ {print $1} $2~/:/ {print $2}')
if [ -n "$SANITIZED_IP" ]; then
OCF_RESKEY_ip="$SANITIZED_IP"
fi

View File

@ -0,0 +1,92 @@
From 70a28e8130be863a9073b0a80e0511e971e205c4 Mon Sep 17 00:00:00 2001
From: Fabian Herschel <fabian.herschel@suse.com>
Date: Fri, 27 Jul 2018 12:33:19 +0200
Subject: [PATCH 1/2] SAPInstance: implemeted reload method The reload method
is needed to avoid resource restarts after a non-unique parameter has been
changed. This is in special for interest of the MONITOR_SERVICES parameter.
---
heartbeat/SAPInstance | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance
index 8de7cee8c..c25839f0c 100755
--- a/heartbeat/SAPInstance
+++ b/heartbeat/SAPInstance
@@ -61,6 +61,7 @@ sapinstance_usage() {
The 'monitor' operation reports whether the instance seems to be working
The 'promote' operation starts the primary instance in a Master/Slave configuration
The 'demote' operation stops the primary instance and starts the ERS instance
+ The 'reload' operation allows changed parameters (non-unique only) without restarting the service
The 'notify' operation always returns SUCCESS
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation reports on the methods $0 supports
@@ -224,6 +225,7 @@ The name of the SAP START profile. Specify this parameter, if you have changed t
<action name="monitor" depth="0" timeout="60s" interval="119s" role="Master" />
<action name="promote" timeout="320s" />
<action name="demote" timeout="320s" />
+<action name="reload" timeout="320" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
<action name="methods" timeout="5s" />
@@ -244,6 +246,7 @@ sapinstance_methods() {
monitor
promote
demote
+ reload
notify
validate-all
methods
@@ -965,6 +968,9 @@ case "$ACTION" in
exit $?;;
validate-all) sapinstance_validate
exit $?;;
+ reload )
+ ocf_log info "reloading SAPInstance parameters"
+ exit $OCF_SUCCESS;;
*) sapinstance_methods
exit $OCF_ERR_UNIMPLEMENTED;;
esac
From ee529b088cc1111656e94dea56b9fcfa6d813313 Mon Sep 17 00:00:00 2001
From: Fabian Herschel <fabian.herschel@suse.com>
Date: Fri, 27 Jul 2018 13:02:39 +0200
Subject: [PATCH 2/2] SAPInstance: Improved indents
---
heartbeat/SAPInstance | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance
index c25839f0c..174ea36ef 100755
--- a/heartbeat/SAPInstance
+++ b/heartbeat/SAPInstance
@@ -61,7 +61,7 @@ sapinstance_usage() {
The 'monitor' operation reports whether the instance seems to be working
The 'promote' operation starts the primary instance in a Master/Slave configuration
The 'demote' operation stops the primary instance and starts the ERS instance
- The 'reload' operation allows changed parameters (non-unique only) without restarting the service
+ The 'reload' operation allows changed parameters (non-unique only) without restarting the service
The 'notify' operation always returns SUCCESS
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation reports on the methods $0 supports
@@ -246,7 +246,7 @@ sapinstance_methods() {
monitor
promote
demote
- reload
+ reload
notify
validate-all
methods
@@ -969,8 +969,8 @@ case "$ACTION" in
validate-all) sapinstance_validate
exit $?;;
reload )
- ocf_log info "reloading SAPInstance parameters"
- exit $OCF_SUCCESS;;
+ ocf_log info "reloading SAPInstance parameters"
+ exit $OCF_SUCCESS;;
*) sapinstance_methods
exit $OCF_ERR_UNIMPLEMENTED;;
esac

View File

@ -0,0 +1,26 @@
From 8eda4725a946ca669df035ed0ffdf053a65e1258 Mon Sep 17 00:00:00 2001
From: Fabian Herschel <fabian.herschel@suse.com>
Date: Thu, 2 Aug 2018 15:36:31 +0200
Subject: [PATCH] SAPInstance: Improved SAP instance profile detection
---
heartbeat/SAPInstance | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/heartbeat/SAPInstance b/heartbeat/SAPInstance
index 174ea36ef..eb058cccf 100755
--- a/heartbeat/SAPInstance
+++ b/heartbeat/SAPInstance
@@ -371,7 +371,11 @@ sapinstance_init() {
if [ -z "$currentSTART_PROFILE" ]
then
- SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}"
+ if [ ! -r "$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}" -a -r "$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}" ]; then
+ SAPSTARTPROFILE="$DIR_PROFILE/${SID}_${InstanceName}_${SAPVIRHOST}"
+ else
+ SAPSTARTPROFILE="$DIR_PROFILE/START_${InstanceName}_${SAPVIRHOST}"
+ fi
else
SAPSTARTPROFILE="$currentSTART_PROFILE"
fi

View File

@ -0,0 +1,37 @@
--- a/heartbeat/SAPInstance 2019-02-20 12:42:55.655819263 +0100
+++ b/heartbeat/SAPInstance 2019-02-08 10:57:02.281048136 +0100
@@ -159,14 +159,14 @@
<content type="string" default="disp+work|msg_server|enserver|enrepserver|jcontrol|jstart|enq_server|enq_replicator"/>
</parameter>
<parameter name="SHUTDOWN_METHOD" unique="0" required="0">
- <longdesc lang="en">Usual a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the gracefull stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !!</longdesc>
+ <longdesc lang="en">Usually a SAP Instance is stopped by the command 'sapcontrol -nr InstanceNr -function Stop'. SHUTDOWN_METHOD=KILL means to kill the SAP Instance using OS commands. SAP processes of the instance are terminated with 'kill -9', shared memory is deleted with 'cleanipc' and the 'kill.sap' file will be deleted. That method is much faster than the graceful stop, but the instance does not have the chance to say goodbye to other SAPinstances in the same system. USE AT YOUR OWN RISK !!</longdesc>
<shortdesc lang="en">Shutdown graceful or kill a SAP instance by terminating the processes. (normal|KILL)</shortdesc>
<content type="string" default="normal"/>
</parameter>
<parameter name="ERS_InstanceName" unique="1" required="0">
<longdesc lang="en">Only used in a Master/Slave resource configuration:
The full qualified SAP enqueue replication instance name. e.g. P01_ERS02_sapp01ers. Usually this is the name of the SAP instance profile.
-The enqueue replication instance must be installed, before you want to configure a master-slave cluster recource.
+The enqueue replication instance must be installed, before you want to configure a master-slave cluster resource.
The master-slave configuration in the cluster must use this properties:
clone_max = 2
@@ -209,7 +209,7 @@
<longdesc lang="en">Only used for ASCS/ERS SAP Netweaver installations without implementing a master/slave resource to
allow the ASCS to 'find' the ERS running on another cluster node after a resource failure. This parameter should be set
to true 'only' for the ERS instance for implementations following the SAP NetWeaver 7.40 HA certification (NW-HA-CLU-740). This includes also
- systems for NetWeaver less than 7.40, if you like to impelemnt the NW-HA-CLU-740 scenario.
+ systems for NetWeaver less than 7.40, if you like to implement the NW-HA-CLU-740 scenario.
</longdesc>
<shortdesc lang="en">Mark SAPInstance as ERS instance</shortdesc>
<content type="boolean" default="false" />
@@ -225,7 +225,7 @@
<action name="monitor" depth="0" timeout="60s" interval="119s" role="Master" />
<action name="promote" timeout="320s" />
<action name="demote" timeout="320s" />
-<action name="reload" timeout="320" />
+<action name="reload" timeout="320s" />
<action name="validate-all" timeout="5s" />
<action name="meta-data" timeout="5s" />
<action name="methods" timeout="5s" />

View File

@ -0,0 +1,77 @@
From 530c48138f7dedaf99ae1ca98865d2f8b7432475 Mon Sep 17 00:00:00 2001
From: Eberhard Kuemmerle <E.Kuemmerle@fz-juelich.de>
Date: Thu, 12 Sep 2019 21:10:43 +0200
Subject: [PATCH] nfsserver: performance improvements for systemd enabled
systems
> I found two critical actions in the script:
> - systemctl status nfs-server (which also calls journalctl)
> - systemctl list-unit-files
source:
https://lists.clusterlabs.org/pipermail/developers/2019-September/002214.html
---
heartbeat/nfsserver | 37 +++++++++++++++++++------------------
1 file changed, 19 insertions(+), 18 deletions(-)
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index bf59da98e..8527a90f3 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -233,24 +233,25 @@ set_exec_mode()
return 0
fi
- ##
- # Attempt systemd (with nfs-lock.service).
- ##
if which systemctl > /dev/null 2>&1; then
- if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep nfs-lock > /dev/null; then
- EXEC_MODE=2
- # when using systemd, the nfs-lock service file handles nfsv3 locking daemons for us.
- return 0
- fi
- fi
+ if systemctl list-unit-files 'nfs-*' | grep nfs-server > /dev/null; then
+
+ ##
+ # Attempt systemd (with nfs-lock.service).
+ ##
+ if systemctl list-unit-files 'nfs-*' | grep nfs-lock > /dev/null; then
+ EXEC_MODE=2
+ # when using systemd, the nfs-lock service file handles nfsv3 locking daemons for us.
+ return 0
+ fi
- ##
- # Attempt systemd (with rpc-statd.service).
- ##
- if which systemctl > /dev/null 2>&1; then
- if systemctl list-unit-files | grep nfs-server > /dev/null && systemctl list-unit-files | grep rpc-statd > /dev/null; then
- EXEC_MODE=3
- return 0
+ ##
+ # Attempt systemd (with rpc-statd.service).
+ ##
+ if systemctl list-unit-files 'rpc-*' | grep rpc-statd > /dev/null; then
+ EXEC_MODE=3
+ return 0
+ fi
fi
fi
@@ -272,12 +273,12 @@ nfs_exec()
2) if ! echo $svc | grep -q "\."; then
svc="${svc}.service"
fi
- systemctl $cmd $svc
+ systemctl -n0 $cmd $svc
;;
3) if ! echo $svc | grep -q "\."; then
svc="${svc}.service"
fi
- systemctl $cmd $svc
+ systemctl -n0 $cmd $svc
;;
esac
}

View File

@ -0,0 +1,38 @@
From ca9d2f9c2d23a9dc783e0d52419790d0d441232c Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 24 Sep 2019 09:12:47 +0200
Subject: [PATCH] nfsserver: use "--no-legend" for systemctl "list-unit-files"
calls
---
heartbeat/nfsserver | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index 8527a90f3..acef0147a 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -234,12 +234,12 @@ set_exec_mode()
fi
if which systemctl > /dev/null 2>&1; then
- if systemctl list-unit-files 'nfs-*' | grep nfs-server > /dev/null; then
+ if systemctl --no-legend list-unit-files 'nfs-*' | grep nfs-server > /dev/null; then
##
# Attempt systemd (with nfs-lock.service).
##
- if systemctl list-unit-files 'nfs-*' | grep nfs-lock > /dev/null; then
+ if systemctl --no-legend list-unit-files 'nfs-*' | grep nfs-lock > /dev/null; then
EXEC_MODE=2
# when using systemd, the nfs-lock service file handles nfsv3 locking daemons for us.
return 0
@@ -248,7 +248,7 @@ set_exec_mode()
##
# Attempt systemd (with rpc-statd.service).
##
- if systemctl list-unit-files 'rpc-*' | grep rpc-statd > /dev/null; then
+ if systemctl --no-legend list-unit-files 'rpc-*' | grep rpc-statd > /dev/null; then
EXEC_MODE=3
return 0
fi

View File

@ -0,0 +1,50 @@
From 8b9c49fd965f73709d5a6e2c21987ba26af4856b Mon Sep 17 00:00:00 2001
From: Luca Miccini <lmiccini@redhat.com>
Date: Wed, 25 Sep 2019 17:12:39 +0200
Subject: [PATCH] Add a configurable delay to Nova Evacuate calls
In case /var/lib/nova/instances resides on NFS we have seen migrations
failing with 'Failed to get "write" lock - Is another process using the
image' errors.
This has been tracked down to grace/lease timeouts not having expired
before attempting the migration/evacuate, so in this cases it might be
desirable to delay the nova evacuate call to give the storage time to
release the locks.
Change-Id: Ie2fe784202d754eda38092479b1ab3ff4d02136a
Resolves: rhbz#1740069
---
diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
index 810f30a..596f520 100644
--- a/heartbeat/NovaEvacuate
+++ b/heartbeat/NovaEvacuate
@@ -125,6 +125,15 @@
<content type="boolean" default="0" />
</parameter>
+<parameter name="evacuate_delay" unique="0" required="0">
+<longdesc lang="en">
+Allows delaying the nova evacuate API call, e.g. to give a storage array time to clean
+up eventual locks/leases.
+</longdesc>
+<shortdesc lang="en">Nova evacuate delay</shortdesc>
+<content type="integer" default="0" />
+</parameter>
+
</parameters>
<actions>
@@ -216,6 +225,11 @@
fence_agent="fence_evacuate"
fi
+ if [ ${OCF_RESKEY_evacuate_delay} != 0 ]; then
+ ocf_log info "Delaying nova evacuate by $OCF_RESKEY_evacuate_delay seconds"
+ sleep ${OCF_RESKEY_evacuate_delay}
+ fi
+
ocf_log notice "Initiating evacuation of $node with $fence_agent"
$fence_agent ${fence_options} -o status -n ${node}
if [ $? = 1 ]; then

View File

@ -0,0 +1,75 @@
From 6052e8fd37d23f46db217f915b445c7e67dccb34 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 4 Apr 2019 13:31:27 +0200
Subject: [PATCH] IPsrcaddr: make proto optional to fix regression when used
without NetworkManager
---
heartbeat/IPsrcaddr | 21 +++++++++++++++++----
1 file changed, 17 insertions(+), 4 deletions(-)
diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 4ca3d2364..5a447196e 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -50,12 +50,17 @@
#######################################################################
# Initialization:
-
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+# Defaults
+OCF_RESKEY_proto_default=""
+
+: ${OCF_RESKEY_proto=${OCF_RESKEY_proto_default}}
#######################################################################
+[ -z "$OCF_RESKEY_proto" ] && PROTO="" || PROTO="proto $OCF_RESKEY_proto"
+
USAGE="usage: $0 {start|stop|status|monitor|validate-all|meta-data}";
CMDSHOW="$IP2UTIL route show to exact 0.0.0.0/0"
@@ -97,6 +102,14 @@ dotted quad notation 255.255.255.0).
<shortdesc lang="en">Netmask</shortdesc>
<content type="string" default=""/>
</parameter>
+
+<parameter name="proto">
+<longdesc lang="en">
+Proto to match when finding network. E.g. "kernel".
+</longdesc>
+<shortdesc lang="en">Proto</shortdesc>
+<content type="string" default="" />
+</parameter>
</parameters>
<actions>
@@ -172,7 +185,7 @@ srca_start() {
rc=$OCF_SUCCESS
ocf_log info "The ip route has been already set.($NETWORK, $INTERFACE, $ROUTE_WO_SRC)"
else
- ip route replace $NETWORK dev $INTERFACE src $1 || \
+ $IP2UTIL route replace $NETWORK dev $INTERFACE src $1 || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE src $1' failed"
$CMDCHANGE $ROUTE_WO_SRC src $1 || \
@@ -204,7 +217,7 @@ srca_stop() {
[ $rc = 2 ] && errorexit "The address you specified to stop does not match the preferred source address"
- ip route replace $NETWORK dev $INTERFACE || \
+ $IP2UTIL route replace $NETWORK dev $INTERFACE || \
errorexit "command 'ip route replace $NETWORK dev $INTERFACE' failed"
$CMDCHANGE $ROUTE_WO_SRC || \
@@ -473,7 +486,7 @@ rc=$?
}
INTERFACE=`echo $findif_out | awk '{print $1}'`
-NETWORK=`ip route list dev $INTERFACE scope link proto kernel match $ipaddress|grep -o '^[^ ]*'`
+NETWORK=`$IP2UTIL route list dev $INTERFACE scope link $PROTO match $ipaddress|grep -m 1 -o '^[^ ]*'`
case $1 in
start) srca_start $ipaddress

View File

@ -0,0 +1,22 @@
From 9cea030ba6d5c759971873b80d6d97b545ecac39 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 7 Nov 2019 13:03:30 +0100
Subject: [PATCH] exportfs: allow multiple exports of same directory
---
heartbeat/exportfs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/heartbeat/exportfs b/heartbeat/exportfs
index d79aced88..1cabdee70 100755
--- a/heartbeat/exportfs
+++ b/heartbeat/exportfs
@@ -82,7 +82,7 @@ The directory or directories to export.
<content type="string" />
</parameter>
-<parameter name="fsid" unique="1" required="1">
+<parameter name="fsid" unique="0" required="1">
<longdesc lang="en">
The fsid option to pass to exportfs. This can be a unique positive
integer, a UUID (assuredly sans comma characters), or the special string

View File

@ -0,0 +1,46 @@
From c718050a4a2bb47d640af1d8e19995590755670f Mon Sep 17 00:00:00 2001
From: Valentin Vidic <vvidic@debian.org>
Date: Wed, 23 Oct 2019 22:04:44 +0200
Subject: [PATCH] Low: mysql-common: fix startup check
PID value is not capture correctly so the startup
fails with the wrong exit code.
Starting 'mysql' case 8 'check lib file':
Setting agent environment: export OCF_RESKEY_CRM_meta_timeout=15000
Setting system environment: chmod u-w /var/lib/mysql
Running agent: ./mysql start
ERROR: The agent was hanging, killed it, maybe you damaged the agent or system's environment, see details below:
Oct 23 18:46:06 INFO: MySQL is not running
runuser: warning: cannot change directory to /nonexistent: No such file or directory
runuser: warning: cannot change directory to /nonexistent: No such file or directory
runuser: warning: cannot change directory to /nonexistent: No such file or directory
Oct 23 18:46:06 INFO: MySQL is not running
Oct 23 18:46:08 INFO: MySQL is not running
Oct 23 18:46:10 INFO: MySQL is not running
Oct 23 18:46:12 INFO: MySQL is not running
Oct 23 18:46:14 INFO: MySQL is not running
Oct 23 18:46:16 INFO: MySQL is not running
Oct 23 18:46:18 INFO: MySQL is not running
Oct 23 18:46:20 INFO: MySQL is not running
Oct 23 18:46:22 INFO: MySQL is not running
Oct 23 18:46:24 INFO: MySQL is not running
---
heartbeat/mysql-common.sh | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index d1b1ddb96..4004a6b65 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -239,8 +239,8 @@ mysql_common_start()
--datadir=$OCF_RESKEY_datadir \
--log-error=$OCF_RESKEY_log \
$OCF_RESKEY_additional_parameters \
- $mysql_extra_params >/dev/null 2>&1 &
- pid=$!"
+ $mysql_extra_params >/dev/null 2>&1" &
+ pid=$!
# Spin waiting for the server to come up.
# Let the CRM/LRM time us out if required.

View File

@ -66,7 +66,7 @@
Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.1.1
Release: 27%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
Release: 39%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPLv2+ and LGPLv2+
URL: https://github.com/ClusterLabs/resource-agents
%if 0%{?fedora} || 0%{?centos_version} || 0%{?rhel}
@ -129,12 +129,60 @@ Patch42: bz1697559-aws-vpc-move-ip-1-multi-route-table-support.patch
Patch43: bz1697559-aws-vpc-move-ip-2-fix-route-update-multi-NICs.patch
Patch44: bz1669140-Route-make-family-parameter-optional.patch
Patch45: bz1683548-redis-mute-password-warning.patch
Patch46: bz1692413-iSCSILogicalUnit-create-iqn-when-it-doesnt-exist.patch
Patch46: bz1692413-1-iSCSITarget-create-iqn-when-it-doesnt-exist.patch
Patch47: bz1689184-Squid-2-dont-run-pgrep-without-PID.patch
Patch48: bz1707969-1-ocf_log-do-not-log-debug-when-HA_debug-unset.patch
Patch49: bz1707969-2-ocf_is_true-add-True-to-regexp.patch
Patch50: bz1717759-Filesystem-remove-notify-action-from-metadata.patch
Patch51: bz1719684-dhcpd-keep-SELinux-context-chroot.patch
Patch52: bz1718219-podman-1-avoid-double-inspect-call.patch
Patch53: bz1718219-podman-2-improve-monitor-action.patch
Patch54: bz1718219-podman-3-remove-docker-remnant.patch
Patch55: bz1718219-podman-4-use-exec-to-avoid-performance-issues.patch
Patch56: bz1730455-LVM-activate-fix-monitor-hang.patch
Patch57: bz1732867-CTDB-1-explicitly-use-bash-shell.patch
Patch58: bz1732867-CTDB-2-add-ctdb_max_open_files-parameter.patch
Patch59: bz1732867-CTDB-3-fixes.patch
Patch60: bz1732867-CTDB-4-add-v4.9-support.patch
Patch61: bz1692413-2-iSCSILogicalUnit-create-acls-fix.patch
Patch62: bz1736746-podman-drop-in-support.patch
Patch63: bz1692960-mysql-galera-runuser-su-to-avoid-dac_override.patch
Patch64: bz1745713-rabbitmq-cluster-1-monitor-mnesia-status.patch
Patch65: bz1745713-rabbitmq-cluster-2-fail-when-in-minority-partition.patch
Patch66: bz1745713-rabbitmq-cluster-3-fix-stop-regression.patch
Patch67: bz1745713-rabbitmq-cluster-4-retry-start-cluster-join-fails.patch
Patch68: bz1745713-rabbitmq-cluster-5-ensure-node-attribures-removed.patch
Patch69: bz1745713-rabbitmq-cluster-6-debug-log-mnesia-query-fails.patch
Patch70: bz1745713-rabbitmq-cluster-7-suppress-additional-output.patch
Patch71: bz1695039-LVM-activate-return-NOT_RUNNING-rejoin-cluster.patch
Patch72: bz1738428-LVM-activate-detect-volume-without-reboot.patch
Patch73: bz1744103-Filesystem-1-monitor-symlink-support.patch
Patch74: bz1744103-Filesystem-2-add-symlink-support.patch
Patch75: bz1744103-Filesystem-3-fix-umount-disk-failure.patch
Patch76: bz1744103-Filesystem-4-fix-readlink-issue.patch
Patch77: bz1744140-Filesystem-1-avoid-corrupt-mount-list.patch
Patch78: bz1744140-Filesystem-2-prevent-killing-bind-mount.patch
Patch79: bz1744140-Filesystem-3-improved-bind-mount-check.patch
Patch80: bz1757837-IPsrcaddr-fix-regression-without-NetworkManager.patch
Patch81: bz1744224-IPsrcaddr-1-add-destination-and-table-parameters.patch
Patch82: bz1748768-docker-fix-stop-issues.patch
Patch83: bz1750261-Route-1-dont-fence-when-parameters-not-set.patch
Patch84: bz1750352-rabbitmq-cluster-restore-users-single-node-mode.patch
Patch85: bz1751700-IPaddr2-1-sanitize-IPv6-IPs.patch
Patch86: bz1751700-IPaddr2-2-return-empty-when-sanitation-fails.patch
Patch87: bz1751949-1-SAPInstance-add-reload-action.patch
Patch88: bz1751949-2-SAPInstance-improve-profile-detection.patch
Patch89: bz1751949-3-SAPInstance-metadata-improvements.patch
Patch90: bz1751962-nfsserver-1-systemd-perf-improvements.patch
Patch91: bz1751962-nfsserver-2-systemd-use-no-legend.patch
Patch92: bz1755760-NovaEvacuate-evacuate_delay.patch
Patch93: bz1750261-Route-2-validate-start-validate-all.patch
Patch94: bz1741843-LVM-activate-partial-activation.patch
Patch95: bz1764888-exportfs-allow-same-fsid.patch
Patch96: bz1765128-mysql-galera-fix-incorrect-rc.patch
Patch97: bz1741042-IPaddr2-add-noprefixroute-parameter.patch
Patch98: bz1744224-IPsrcaddr-2-local-rule-destination-fixes.patch
# bundle patches
Patch1000: 7-gcp-bundled.patch
Patch1001: 8-google-cloud-sdk-fixes.patch
@ -282,8 +330,6 @@ Platform instances to be managed in a cluster environment.
exit 1
%endif
%setup -q -n %{upstream_prefix}-%{upstream_version}
%setup -T -D -a 1 -n %{upstream_prefix}-%{upstream_version}
%setup -T -D -a 2 -n %{upstream_prefix}-%{upstream_version}
%patch0 -p1
%patch1 -p1
%patch2 -p1
@ -336,6 +382,53 @@ exit 1
%patch49 -p1
%patch50 -p1
%patch51 -p1
%patch52 -p1
%patch53 -p1
%patch54 -p1
%patch55 -p1
%patch56 -p1
%patch57 -p1
%patch58 -p1
%patch59 -p1
%patch60 -p1 -F1
%patch61 -p1
%patch62 -p1 -F2
%patch63 -p1
%patch64 -p1
%patch65 -p1
%patch66 -p1
%patch67 -p1
%patch68 -p1
%patch69 -p1
%patch70 -p1
%patch71 -p1
%patch72 -p1
%patch73 -p1
%patch74 -p1
%patch75 -p1
%patch76 -p1
%patch77 -p1
%patch78 -p1
%patch79 -p1
%patch80 -p1
%patch81 -p1
%patch82 -p1
%patch83 -p1
%patch84 -p1
%patch85 -p1
%patch86 -p1
%patch87 -p1
%patch88 -p1
%patch89 -p1
%patch90 -p1
%patch91 -p1
%patch92 -p1
%patch93 -p1
%patch94 -p1
%patch95 -p1
%patch96 -p1
%patch97 -p1 -F2
%patch98 -p1
chmod 755 heartbeat/nova-compute-wait
chmod 755 heartbeat/NovaEvacuate
@ -760,8 +853,8 @@ rm -rf %{buildroot}
%exclude /usr/lib/ocf/resource.d/heartbeat/rsyslog
%exclude /usr/lib/ocf/resource.d/heartbeat/vsftpd
%exclude /usr/lib/ocf/resource.d/heartbeat/ZFS
%exclude %{_mandir}/man7/ocf_heartbeat_clvm
%exclude %{_mandir}/man7/ocf_heartbeat_LVM
%exclude %{_mandir}/man7/ocf_heartbeat_clvm.7.gz
%exclude %{_mandir}/man7/ocf_heartbeat_LVM.7.gz
%exclude %{_mandir}/man7/ocf_heartbeat_AoEtarget.7.gz
%exclude %{_mandir}/man7/ocf_heartbeat_AudibleAlarm.7.gz
%exclude %{_mandir}/man7/ocf_heartbeat_ClusterMon.7.gz
@ -851,7 +944,7 @@ ccs_update_schema > /dev/null 2>&1 ||:
%ifarch x86_64
%files aliyun
%doc %{aliyuncli}_README.rst %{colorama}_README.rst %{pycryptodome}_README.rst aliyun*_README*
%doc aliyun*_README* %{colorama}_README.rst %{pycryptodome}_README.rst
%license %{aliyuncli}_LICENSE %{colorama}_LICENSE.txt %{pycryptodome}_LICENSE.rst
%defattr(-,root,root)
/usr/lib/ocf/resource.d/heartbeat/aliyun-vpc-move-ip*
@ -880,6 +973,95 @@ ccs_update_schema > /dev/null 2>&1 ||:
%endif
%changelog
* Fri Nov 29 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-39
- IPsrcaddr: add destination and table parameters
Resolves: rhbz#1744224
* Wed Nov 27 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-38
- IPaddr2: add noprefixroute parameter
Resolves: rhbz#1741042
* Wed Nov 13 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-36
- exportfs: allow multiple exports with same fsid
- mysql/galera: fix incorrect rc
Resolves: rhbz#1764888
Resolves: rhbz#1765128
* Mon Oct 14 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-35
- Route: dont fence when parameters not set
- LVM-activate: add partial-activation support
Resolves: rhbz#1750261
Resolves: rhbz#1741843
* Wed Oct 2 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-34
- LVM/clvm: remove manpages for excluded agents
- LVM-activate: return NOT_RUNNING when node rejoins cluster
- LVM-activate: detect systemid volume without reboot
- Filesystem: add symlink support
- Filesystem: avoid corrupt mount-list and dont kill incorrect processes
for bind-mounts
- IPsrcaddr: make proto optional to fix regression when used without
NetworkManager
- docker: fix stop issues
- rabbitmq-cluster: also restore users in single node mode
- IPaddr2: sanitize compressed IPv6 IPs
- SAPInstance: add reload-action
- nfsserver: systemd performance improvements
- NovaEvacuate: add "evacuate_delay" parameter
Resolves: rhbz#1694392
Resolves: rhbz#1695039
Resolves: rhbz#1738428
Resolves: rhbz#1744103
Resolves: rhbz#1744140
Resolves: rhbz#1757837
Resolves: rhbz#1748768
Resolves: rhbz#1750352
Resolves: rhbz#1751700
Resolves: rhbz#1751949
Resolves: rhbz#1751962
Resolves: rhbz#1755760
* Tue Aug 27 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-33
- rabbitmq-cluster: fail monitor when node is in minority partition,
fix stop regression, retry start when cluster join fails, ensure
node attributes are removed
Resolves: rhbz#1745713
* Mon Aug 12 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-32
- mysql/galera: use runuser/su to avoid using DAC_OVERRIDE
Resolves: rhbz#1692960
* Wed Aug 7 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-31
- podman: add drop-in dependency support
Resolves: rhbz#1736746
* Wed Jul 31 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-30
- iSCSITarget/iSCSILogicalUnit: only create iqn/acls when it doesnt
exist
Resolves: rhbz#1692413
* Tue Jul 30 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-29
- CTDB: add support for v4.9+
Resolves: rhbz#1732867
* Tue Jul 23 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-28
- podman: fixes to avoid bundle resources restarting when probing
takes too long
- LVM-activate: fix monitor to avoid hang caused by validate-all call
Resolves: rhbz#1718219
Resolves: rhbz#1730455
* Wed Jun 19 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-27
- ocf_log: do not log debug messages when HA_debug unset
- Filesystem: remove notify-action from metadata
@ -902,11 +1084,9 @@ ccs_update_schema > /dev/null 2>&1 ||:
* Tue May 28 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-23
- Route: make family parameter optional
- redis: mute password warning
- iSCSILogicalUnit: create iqn when it doesnt exist
Resolves: rhbz#1669140
Resolves: rhbz#1683548
Resolves: rhbz#1692413
* Thu May 23 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-22
- aws-vpc-move-ip: add multi route-table support and fix issue
@ -920,11 +1100,9 @@ ccs_update_schema > /dev/null 2>&1 ||:
Resolves: rhbz#1695656
* Mon Apr 1 2019 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.1.1-20
- LVM/clvm: remove manpages for excluded agents
- aws-vpc-move-ip: use "--query" to avoid a possible race condition
- gcloud-ra: fix Python 3 issue and remove Python 2 detection
Resolves: rhbz#1694392
Resolves: rhbz#1693662
Resolves: rhbz#1691456