- portblock: add promotable and nftables support, and method and
status_check parameters - db2: use reintegration flag to avoid race condition on cluster reintegration Resolves: RHEL-116149, RHEL-116152, RHEL-115495
This commit is contained in:
parent
a77e31dd32
commit
3e111eae9a
@ -0,0 +1,481 @@
|
|||||||
|
From dbc0d2647d73bed986bf7208df33f092f56e8523 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Thu, 25 Sep 2025 14:23:20 +0200
|
||||||
|
Subject: [PATCH] db2: use reintegration flag to avoid race condition on
|
||||||
|
cluster reintegration, and removed FAL, as it's no longer needed
|
||||||
|
|
||||||
|
---
|
||||||
|
heartbeat/db2 | 306 ++++++++++++++++++++++++++++++++------------------
|
||||||
|
1 file changed, 197 insertions(+), 109 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/db2 b/heartbeat/db2
|
||||||
|
index fe1d9b892..83020fc70 100755
|
||||||
|
--- a/heartbeat/db2
|
||||||
|
+++ b/heartbeat/db2
|
||||||
|
@@ -37,6 +37,13 @@
|
||||||
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
||||||
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
||||||
|
|
||||||
|
+# Use runuser if available for SELinux.
|
||||||
|
+if [ -x "/sbin/runuser" ]; then
|
||||||
|
+ SU="runuser"
|
||||||
|
+else
|
||||||
|
+ SU="su"
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
# Parameter defaults
|
||||||
|
|
||||||
|
OCF_RESKEY_instance_default=""
|
||||||
|
@@ -55,11 +62,12 @@ OCF_RESKEY_dbpartitionnum_default="0"
|
||||||
|
: ${OCF_RESKEY_admin=${OCF_RESKEY_admin_default}}
|
||||||
|
: ${OCF_RESKEY_dbpartitionnum=${OCF_RESKEY_dbpartitionnum_default}}
|
||||||
|
|
||||||
|
+POSIX_UNICODE_LOCALE="C.UTF-8"
|
||||||
|
#######################################################################
|
||||||
|
|
||||||
|
|
||||||
|
db2_usage() {
|
||||||
|
- echo "db2 start|stop|monitor|promote|demote|notify|validate-all|meta-data"
|
||||||
|
+ echo "db2 start|stop|monitor|promote|demote|validate-all|meta-data"
|
||||||
|
}
|
||||||
|
|
||||||
|
db2_meta_data() {
|
||||||
|
@@ -162,7 +170,6 @@ The number of the partition (DBPARTITIONNUM) to be managed.
|
||||||
|
<action name="stop" timeout="120s"/>
|
||||||
|
<action name="promote" timeout="120s"/>
|
||||||
|
<action name="demote" timeout="120s"/>
|
||||||
|
-<action name="notify" timeout="10s"/>
|
||||||
|
<action name="monitor" depth="0" timeout="60s" interval="20s"/>
|
||||||
|
<action name="monitor" depth="0" timeout="60s" role="Promoted" interval="22s"/>
|
||||||
|
<action name="validate-all" timeout="5s"/>
|
||||||
|
@@ -273,7 +280,18 @@ master_score()
|
||||||
|
# Run the given command as db2 instance user
|
||||||
|
#
|
||||||
|
runasdb2() {
|
||||||
|
- su $instance -c ". $db2profile; $*"
|
||||||
|
+ $SU $instance -c ". $db2profile; $*"
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+#
|
||||||
|
+# Run the given command as db2 instance user using $SU
|
||||||
|
+# We run this function as opposed to runasdb2 whenever we have to issue commands
|
||||||
|
+# that leave processes running on the system, such as db2start
|
||||||
|
+# We do not want these processes to hog the resources as they were run with elevated privileges
|
||||||
|
+#
|
||||||
|
+runasdb2_session() {
|
||||||
|
+ # Override db2profile with unicode locale is required to maintain compatibility with unicode CODEPAGE
|
||||||
|
+ $SU "$instance" -c "ksh -c '. $db2profile; export LC_ALL="$POSIX_UNICODE_LOCALE"; export LANG="$POSIX_UNICODE_LOCALE"; $*'"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
@@ -294,48 +312,6 @@ logasdb2() {
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
-#
|
||||||
|
-# maintain the fal (first active log) attribute
|
||||||
|
-# db2_fal_attrib DB {set val|get}
|
||||||
|
-#
|
||||||
|
-db2_fal_attrib() {
|
||||||
|
- local db=$1
|
||||||
|
- local attr val rc id node member me
|
||||||
|
-
|
||||||
|
- attr=db2hadr_${instance}_${db}_fal
|
||||||
|
-
|
||||||
|
- case "$2" in
|
||||||
|
- set)
|
||||||
|
- me=$(ocf_local_nodename)
|
||||||
|
-
|
||||||
|
- # loop over all member nodes and set attribute
|
||||||
|
- crm_node -l |
|
||||||
|
- while read id node member
|
||||||
|
- do
|
||||||
|
- [ "$member" = member -a "$node" != "$me" ] || continue
|
||||||
|
- crm_attribute -l forever --node=$node -n $attr -v "$3"
|
||||||
|
- rc=$?
|
||||||
|
- ocf_log info "DB2 instance $instance($db2node/$db: setting attrib for FAL to $FIRST_ACTIVE_LOG @ $node"
|
||||||
|
- [ $rc != 0 ] && break
|
||||||
|
- done
|
||||||
|
- ;;
|
||||||
|
-
|
||||||
|
- get)
|
||||||
|
- crm_attribute -l forever -n $attr -G --quiet 2>&1
|
||||||
|
- rc=$?
|
||||||
|
- if ! ocf_is_true "$OCF_RESKEY_CRM_meta_notify" && [ $rc != 0 ]
|
||||||
|
- then
|
||||||
|
- ocf_log warn "DB2 instance $instance($db2node/$db: can't retrieve attribute $attr, are you sure notifications are enabled ?"
|
||||||
|
- fi
|
||||||
|
- ;;
|
||||||
|
-
|
||||||
|
- *)
|
||||||
|
- exit $OCF_ERR_CONFIGURED
|
||||||
|
- esac
|
||||||
|
-
|
||||||
|
- return $rc
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
#
|
||||||
|
# unfortunately a first connect after a crash may need several minutes
|
||||||
|
# for some internal cleanup stuff in DB2.
|
||||||
|
@@ -429,6 +405,42 @@ db2_check_config_compatibility() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
+#
|
||||||
|
+# Start HADR as standby.
|
||||||
|
+#
|
||||||
|
+# Parameters
|
||||||
|
+# 1 - Calling function
|
||||||
|
+# 2 - Calling functions line number
|
||||||
|
+#
|
||||||
|
+# Return codes:
|
||||||
|
+# 0 - Start as standby successful
|
||||||
|
+# 1 - Start as standby failed
|
||||||
|
+#
|
||||||
|
+reintegrateAsStandby() {
|
||||||
|
+ db=$1
|
||||||
|
+ reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: reintegrateAsStandby called by $2 at $3. Attempting to reintegrate $db as standby."
|
||||||
|
+ if output=$(runasdb2_session "db2 start hadr on db $db as standby"); then
|
||||||
|
+ rc=0
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: Db2 database $instance($db2node)/$db started/activated"
|
||||||
|
+ else
|
||||||
|
+ case $output in
|
||||||
|
+ SQL1777N*)
|
||||||
|
+ # SQL1777N: HADR is already started in given state.
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $output"
|
||||||
|
+ rc=0
|
||||||
|
+ ;;
|
||||||
|
+
|
||||||
|
+ *)
|
||||||
|
+ rc=1
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: Unable to reintegrate Db2 database $instance($db2node)/$db. Please reintegrate manually: $output, return with rc=$rc"
|
||||||
|
+ ;;
|
||||||
|
+ esac
|
||||||
|
+ fi
|
||||||
|
+ crm_attribute -n "$reint_attr" -N "$local_host" -v "0" -l forever
|
||||||
|
+ return $rc
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#
|
||||||
|
# Start instance and DB.
|
||||||
|
# Standard mode is through "db2 activate" in order to start in previous
|
||||||
|
@@ -478,6 +490,8 @@ db2_start() {
|
||||||
|
|
||||||
|
for db in $dblist
|
||||||
|
do
|
||||||
|
+ reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
|
||||||
|
+
|
||||||
|
# sets HADR_ROLE HADR_TIMEOUT HADR_PEER_WINDOW FIRST_ACTIVE_LOG
|
||||||
|
db2_get_cfg $db || return $?
|
||||||
|
|
||||||
|
@@ -488,20 +502,13 @@ db2_start() {
|
||||||
|
|
||||||
|
if [ $HADR_ROLE = PRIMARY ]
|
||||||
|
then
|
||||||
|
- local master_fal
|
||||||
|
-
|
||||||
|
- # communicate our FAL to other nodes the might start concurrently
|
||||||
|
- db2_fal_attrib $db set $FIRST_ACTIVE_LOG
|
||||||
|
-
|
||||||
|
- # ignore false positive:
|
||||||
|
- # error: Can't use > in [ ]. Escape it or use [[..]]. [SC2073]
|
||||||
|
- # see https://github.com/koalaman/shellcheck/issues/691
|
||||||
|
- # shellcheck disable=SC2073
|
||||||
|
- if master_fal=$(db2_fal_attrib $db get) && [ "$master_fal" '>' $FIRST_ACTIVE_LOG ]
|
||||||
|
- then
|
||||||
|
+ cib_value=$(crm_attribute -n "$reint_attr" -N "$local_host" -G | awk -v FS=' value=' '{print $2}')
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: CIB attribute $reint_attr is set to '$cib_value'"
|
||||||
|
+ if [ "$cib_value" = "1" ]; then
|
||||||
|
ocf_log info "DB2 database $instance($db2node)/$db is Primary and outdated, starting as secondary"
|
||||||
|
start_cmd="db2 start hadr on db $db as standby"
|
||||||
|
HADR_ROLE=STANDBY
|
||||||
|
+ standby_reintegration=1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
@@ -511,27 +518,65 @@ db2_start() {
|
||||||
|
[ $HADR_ROLE != STANDBY ] && db2_run_connect $db &
|
||||||
|
else
|
||||||
|
case $output in
|
||||||
|
- SQL1490W*|SQL1494W*|SQL1497W*|SQL1777N*)
|
||||||
|
- ocf_log info "DB2 database $instance($db2node)/$db already activated: $output"
|
||||||
|
+ SQL1490W* | SQL1494W* | SQL1497W* | SQL1777N*)
|
||||||
|
+ # SQL1490W Activate database is successful, however, the database has already been activated on one or more nodes.
|
||||||
|
+ # SQL1494W Activate database is successful, however, there is already a connection to the database.
|
||||||
|
+ # SQL1497W Activate/Deactivate database was successful, however, an error occurred on some nodes.
|
||||||
|
+ # SQL1777N HADR is already started.
|
||||||
|
+
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database is already activated: $output"
|
||||||
|
;;
|
||||||
|
|
||||||
|
- SQL1768N*"Reason code = \"7\""*)
|
||||||
|
- ocf_log err "DB2 database $instance($db2node)/$db is a Primary and the Standby is down"
|
||||||
|
- ocf_log err "Possible split brain ! Manual intervention required."
|
||||||
|
+ SQL1768N*"Reason code = \"7\""*)
|
||||||
|
+ rc="$OCF_ERR_GENERIC"
|
||||||
|
+
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database is a Primary and the Standby is down"
|
||||||
|
+ ocf_log err "Possible split brain! Manual intervention required."
|
||||||
|
ocf_log err "If this DB is outdated use \"db2 start hadr on db $db as standby\""
|
||||||
|
- ocf_log err "If this DB is the surviving primary use \"db2 start hadr on db $db as primary by force\""
|
||||||
|
+ ocf_log err "If this DB is the surviving primary use \"db2 start hadr on db $db as primary by force\". db2_start() exit with rc=$rc."
|
||||||
|
|
||||||
|
- # might be the Standby is not yet there
|
||||||
|
- # might be a timing problem because "First active log" is delayed
|
||||||
|
- # on the next start attempt we might succeed when FAL was advanced
|
||||||
|
- # might be manual intervention is required
|
||||||
|
- # ... so let pacemaker give it another try and we will succeed then
|
||||||
|
- return $OCF_ERR_GENERIC
|
||||||
|
+ # let pacemaker give it another try and we will succeed then
|
||||||
|
+ return "$rc"
|
||||||
|
;;
|
||||||
|
|
||||||
|
- *)
|
||||||
|
- ocf_log err "DB2 database $instance($db2node)/$db didn't start: $output"
|
||||||
|
- return $OCF_ERR_GENERIC
|
||||||
|
+ SQL1776N*"Reason code = \"6\""*)
|
||||||
|
+ # SQL1776N The command cannot be issued on an HADR database.
|
||||||
|
+ # Reason code 6:
|
||||||
|
+ # This database is an old primary database. It cannot be started
|
||||||
|
+ # because the standby has become the new primary through forced
|
||||||
|
+ # takeover.
|
||||||
|
+
|
||||||
|
+ rc="$OCF_ERR_GENERIC"
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: Db2 database $instance($db2node)/$db didn't start: $output, return with rc=$rc"
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: This database is an old primary database. Trying start again as standby"
|
||||||
|
+
|
||||||
|
+ start_cmd="db2 start hadr on db $db as standby"
|
||||||
|
+ if output=$(runasdb2_session "$start_cmd"); then
|
||||||
|
+ rc="$OCF_SUCCESS"
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: Db2 database $instance($db2node)/$db started/activated"
|
||||||
|
+ else
|
||||||
|
+ case $output in
|
||||||
|
+ SQL1777N*)
|
||||||
|
+ # SQL1777N: HADR is already started.
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $output"
|
||||||
|
+ rc="$OCF_SUCCESS"
|
||||||
|
+ ;;
|
||||||
|
+
|
||||||
|
+ *)
|
||||||
|
+ rc="$OCF_ERR_GENERIC"
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: Unable to reintegrate Db2 database $instance($db2node)/$db. Please reintegrate manually: $output, return with rc=$rc"
|
||||||
|
+ ;;
|
||||||
|
+ esac
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
+ return "$rc"
|
||||||
|
+ ;;
|
||||||
|
+
|
||||||
|
+ *)
|
||||||
|
+ rc="$OCF_ERR_GENERIC"
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database didn't start: $output, db2_start() exit with rc=$rc."
|
||||||
|
+ return "$rc"
|
||||||
|
+ ;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
@@ -539,6 +584,15 @@ db2_start() {
|
||||||
|
# come here with success
|
||||||
|
# Even if we are a db2 Primary pacemaker requires start to end up in slave mode
|
||||||
|
echo SLAVE > $STATE_FILE
|
||||||
|
+
|
||||||
|
+ # Unset primary failover attribute as host was successfully reintegrated as standby
|
||||||
|
+ if [ "$standby_reintegration" = "1" ]; then
|
||||||
|
+ for db in $dblist; do
|
||||||
|
+ reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
|
||||||
|
+ crm_attribute -n "$reint_attr" -N "$local_host" -v "0" -l forever
|
||||||
|
+ done
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -737,7 +791,7 @@ db2_monitor_retry() {
|
||||||
|
|
||||||
|
#
|
||||||
|
# Monitor the db
|
||||||
|
-# And as side effect set crm_master / FAL attribute
|
||||||
|
+# And as side effect set crm_master
|
||||||
|
#
|
||||||
|
db2_monitor() {
|
||||||
|
local CMD output hadr db
|
||||||
|
@@ -754,6 +808,22 @@ db2_monitor() {
|
||||||
|
|
||||||
|
for db in $dblist
|
||||||
|
do
|
||||||
|
+ reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
|
||||||
|
+
|
||||||
|
+ #Check for the reintegration file, then set the flag if it exists and delete the file
|
||||||
|
+ if [ -e "/tmp/$reint_attr" ] && [ -n "$remote_host" ]; then
|
||||||
|
+ #The file exist, try to set the reintegration attribute
|
||||||
|
+ crm_attribute -n "$reint_attr" -N "$remote_host" -v "1" -l forever
|
||||||
|
+ cib_value=$(crm_attribute -n "$reint_attr" -N "$remote_host" -G | awk -v FS=' value=' '{print $2}')
|
||||||
|
+
|
||||||
|
+ if [ "$cib_value" = "1" ]; then
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: CIB attribute $reint_attr is set to '$cib_value', reintegration flag file will now be deleted."
|
||||||
|
+ rm -f "/tmp/$reint_attr"
|
||||||
|
+ else
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The reintegration flag file exists, but its attribute failed to set."
|
||||||
|
+ fi
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
hadr=$(db2_hadr_status $db)
|
||||||
|
rc=$?
|
||||||
|
ocf_log debug "Monitor: DB2 database $instance($db2node)/$db has HADR status $hadr"
|
||||||
|
@@ -804,6 +874,14 @@ db2_monitor() {
|
||||||
|
;;
|
||||||
|
|
||||||
|
STANDBY/*PEER/*|Standby/*Peer)
|
||||||
|
+ # If db is in standby peer, then it has already reintegrated.
|
||||||
|
+ # If the reintegrate flag is still set, remove it
|
||||||
|
+ cib_value=$(crm_attribute -n "$reint_attr" -N "$local_host" -G | awk -v FS=' value=' '{print $2}')
|
||||||
|
+ if [ "$cib_value" = "1" ]; then
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: Reintegrate flag detected for $db, but it has already reintegrated as standby. Removing reintegration flag."
|
||||||
|
+ crm_attribute -n "$reint_attr" -N "$local_host" -v "0" -l forever
|
||||||
|
+ fi
|
||||||
|
+
|
||||||
|
master_score -v 8000 -l reboot
|
||||||
|
;;
|
||||||
|
|
||||||
|
@@ -812,6 +890,34 @@ db2_monitor() {
|
||||||
|
master_score -D -l reboot
|
||||||
|
;;
|
||||||
|
|
||||||
|
+ Down/Off)
|
||||||
|
+ # If db is a deactivated primary and it has a reintegration flag, then reintegrate as standby.
|
||||||
|
+ cib_value=$(crm_attribute -n "$reint_attr" -N "$local_host" -G | awk -v FS=' value=' '{print $2}')
|
||||||
|
+ if [ "$cib_value" = "1" ]; then
|
||||||
|
+ output=$(runasdb2 "db2 get db cfg for $db" | grep 'HADR database role' | awk '{print $5}')
|
||||||
|
+ if [ "PRIMARY" = "$output" ]; then
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: Database is deactivated with Primary role and the reintegration flag is set. Role: $output, Reintegration flag: $reint_attr = $cib_value"
|
||||||
|
+ # Reintegrate as the standby database.
|
||||||
|
+ if reintegrateAsStandby "$db" 'db2_monitor' $LINENO; then
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database reintegration succeeded."
|
||||||
|
+ # Setting slave state here will cause rc to be OCF_SUCCESS below.
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: Echoing SLAVE into $STATE_FILE"
|
||||||
|
+ echo SLAVE >"$STATE_FILE"
|
||||||
|
+ # Update master score to reflect standby state.
|
||||||
|
+ master_score -v 8000 -l reboot
|
||||||
|
+ else
|
||||||
|
+ ocf_log err "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database reintegration failed."
|
||||||
|
+ return "$OCF_ERR_GENERIC"
|
||||||
|
+ fi
|
||||||
|
+ fi
|
||||||
|
+ else
|
||||||
|
+ rc="$OCF_NOT_RUNNING"
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: The database has HADR status $hadr."
|
||||||
|
+ ocf_log info "$__OCF_ACTION: $LINENO: $instance: $db2node: $db: db2_monitor() exit with rc=$rc."
|
||||||
|
+ return "$rc"
|
||||||
|
+ fi
|
||||||
|
+ ;;
|
||||||
|
+
|
||||||
|
*)
|
||||||
|
return $OCF_ERR_GENERIC
|
||||||
|
esac
|
||||||
|
@@ -875,8 +981,6 @@ db2_promote() {
|
||||||
|
# update pacemaker's view
|
||||||
|
echo MASTER > $STATE_FILE
|
||||||
|
|
||||||
|
- # turn the log so we rapidly get a new FAL
|
||||||
|
- logasdb2 "db2 archive log for db $db"
|
||||||
|
return $OCF_SUCCESS
|
||||||
|
fi
|
||||||
|
|
||||||
|
@@ -914,26 +1018,6 @@ db2_demote() {
|
||||||
|
return $?
|
||||||
|
}
|
||||||
|
|
||||||
|
-#
|
||||||
|
-# handle pre start notification
|
||||||
|
-# We record our first active log on the other nodes.
|
||||||
|
-# If two primaries come up after a crash they can safely determine who is
|
||||||
|
-# the outdated one.
|
||||||
|
-#
|
||||||
|
-db2_notify() {
|
||||||
|
- local node
|
||||||
|
-
|
||||||
|
- # only interested in pre-start
|
||||||
|
- [ $OCF_RESKEY_CRM_meta_notify_type = pre \
|
||||||
|
- -a $OCF_RESKEY_CRM_meta_notify_operation = start ] || return $OCF_SUCCESS
|
||||||
|
-
|
||||||
|
- # gets FIRST_ACTIVE_LOG
|
||||||
|
- db2_get_cfg $dblist || return $?
|
||||||
|
-
|
||||||
|
- db2_fal_attrib $dblist set $FIRST_ACTIVE_LOG || return $OCF_ERR_GENERIC
|
||||||
|
- exit $OCF_SUCCESS
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
########
|
||||||
|
# Main #
|
||||||
|
########
|
||||||
|
@@ -947,50 +1031,54 @@ case "$__OCF_ACTION" in
|
||||||
|
db2_usage
|
||||||
|
exit $OCF_SUCCESS
|
||||||
|
;;
|
||||||
|
+esac
|
||||||
|
|
||||||
|
+local_host=$(ocf_local_nodename)
|
||||||
|
+inst1=$(echo "$OCF_RESKEY_instance" | cut -d"," -f1)
|
||||||
|
+inst2=$(echo "$OCF_RESKEY_instance" | cut -d"," -f2)
|
||||||
|
+host1=$(crm_node -l | sort | awk '{print $2;}' | sed -n 1p)
|
||||||
|
+
|
||||||
|
+if [ "$host1" = "$local_host" ]; then
|
||||||
|
+ remote_host=$(crm_node -l | sort | awk '{print $2;}' | sed -n 2p)
|
||||||
|
+else
|
||||||
|
+ remote_host="$host1"
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+db2_validate; validate_rc=$?
|
||||||
|
+
|
||||||
|
+case "$__OCF_ACTION" in
|
||||||
|
start)
|
||||||
|
- db2_validate
|
||||||
|
db2_start || exit $?
|
||||||
|
db2_monitor
|
||||||
|
- exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
stop)
|
||||||
|
- db2_validate
|
||||||
|
db2_stop
|
||||||
|
- exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
promote)
|
||||||
|
- db2_validate
|
||||||
|
db2_promote
|
||||||
|
- exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
demote)
|
||||||
|
- db2_validate
|
||||||
|
db2_demote
|
||||||
|
- exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
notify)
|
||||||
|
- db2_validate
|
||||||
|
- db2_notify
|
||||||
|
- exit $?
|
||||||
|
+ ocf_log debug "notify-action has been DEPRECATED, and should be removed"
|
||||||
|
;;
|
||||||
|
|
||||||
|
monitor)
|
||||||
|
- db2_validate
|
||||||
|
db2_monitor_retry
|
||||||
|
- exit $?
|
||||||
|
;;
|
||||||
|
|
||||||
|
validate-all)
|
||||||
|
- db2_validate
|
||||||
|
- exit $?
|
||||||
|
+ exit $validate_rc
|
||||||
|
;;
|
||||||
|
|
||||||
|
*)
|
||||||
|
db2_usage
|
||||||
|
exit $OCF_ERR_UNIMPLEMENTED
|
||||||
|
esac
|
||||||
|
+
|
||||||
|
+exit $?
|
||||||
@ -0,0 +1,239 @@
|
|||||||
|
From 344beb18e41442f7af86fa585e4fb970452dc632 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
|
||||||
|
Date: Fri, 10 Oct 2025 16:31:00 +0200
|
||||||
|
Subject: [PATCH] portblock: add Promoted monitor op, validate-all checks, and
|
||||||
|
add "method" and "status_check" parameters
|
||||||
|
|
||||||
|
- add Promoted monitor op
|
||||||
|
- run validate-all to catch missing firewall binary and other issues for
|
||||||
|
non-metadata/usage actions
|
||||||
|
- add "method" parameter with reject alternative to be able to clear
|
||||||
|
connections when blocking
|
||||||
|
- add "status_check" parameter to allow user to specify rule or pseudo
|
||||||
|
check
|
||||||
|
---
|
||||||
|
heartbeat/portblock | 93 ++++++++++++++++++++++++++++++++++++++-------
|
||||||
|
1 file changed, 79 insertions(+), 14 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/heartbeat/portblock b/heartbeat/portblock
|
||||||
|
index ff162c955..4fc9c2bb8 100755
|
||||||
|
--- a/heartbeat/portblock
|
||||||
|
+++ b/heartbeat/portblock
|
||||||
|
@@ -29,6 +29,8 @@ OCF_RESKEY_protocol_default=""
|
||||||
|
OCF_RESKEY_portno_default=""
|
||||||
|
OCF_RESKEY_direction_default="in"
|
||||||
|
OCF_RESKEY_action_default=""
|
||||||
|
+OCF_RESKEY_method_default="drop"
|
||||||
|
+OCF_RESKEY_status_check_default="rule"
|
||||||
|
OCF_RESKEY_ip_default="0.0.0.0/0"
|
||||||
|
OCF_RESKEY_reset_local_on_unblock_stop_default="false"
|
||||||
|
OCF_RESKEY_tickle_dir_default=""
|
||||||
|
@@ -39,6 +41,8 @@ OCF_RESKEY_sync_script_default=""
|
||||||
|
: ${OCF_RESKEY_portno=${OCF_RESKEY_portno_default}}
|
||||||
|
: ${OCF_RESKEY_direction=${OCF_RESKEY_direction_default}}
|
||||||
|
: ${OCF_RESKEY_action=${OCF_RESKEY_action_default}}
|
||||||
|
+: ${OCF_RESKEY_method=${OCF_RESKEY_method_default}}
|
||||||
|
+: ${OCF_RESKEY_status_check=${OCF_RESKEY_status_check_default}}
|
||||||
|
: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}}
|
||||||
|
: ${OCF_RESKEY_reset_local_on_unblock_stop=${OCF_RESKEY_reset_local_on_unblock_stop_default}}
|
||||||
|
: ${OCF_RESKEY_tickle_dir=${OCF_RESKEY_tickle_dir_default}}
|
||||||
|
@@ -195,6 +199,26 @@ actions.
|
||||||
|
<content type="string" default="${OCF_RESKEY_action_default}" />
|
||||||
|
</parameter>
|
||||||
|
|
||||||
|
+<parameter name="method" unique="0" required="0">
|
||||||
|
+<longdesc lang="en">
|
||||||
|
+Block method:
|
||||||
|
+drop: Use DROP rule.
|
||||||
|
+reject: Use REJECT rule w/conntrack to clear connections when blocking.
|
||||||
|
+</longdesc>
|
||||||
|
+<shortdesc lang="en">Block method</shortdesc>
|
||||||
|
+<content type="string" default="${OCF_RESKEY_method_default}" />
|
||||||
|
+</parameter>
|
||||||
|
+
|
||||||
|
+<parameter name="status_check" unique="0" required="0">
|
||||||
|
+<longdesc lang="en">
|
||||||
|
+Status check:
|
||||||
|
+rule: Check rule.
|
||||||
|
+pseudo: Check pseudo status when rule is absent.
|
||||||
|
+</longdesc>
|
||||||
|
+<shortdesc lang="en">Status check</shortdesc>
|
||||||
|
+<content type="string" default="${OCF_RESKEY_status_check_default}" />
|
||||||
|
+</parameter>
|
||||||
|
+
|
||||||
|
<parameter name="reset_local_on_unblock_stop" unique="0" required="0">
|
||||||
|
<longdesc lang="en">
|
||||||
|
If for some reason the long lived server side TCP sessions won't be cleaned up
|
||||||
|
@@ -263,6 +287,7 @@ If "both" is used, both the incoming and outgoing ports are blocked.
|
||||||
|
<action name="demote" timeout="10s"/>
|
||||||
|
<action name="status" depth="0" timeout="10s" interval="10s" />
|
||||||
|
<action name="monitor" depth="0" timeout="10s" interval="10s" />
|
||||||
|
+<action name="monitor" depth="0" timeout="10s" interval="9s" role="Promoted" />
|
||||||
|
<action name="meta-data" timeout="5s" />
|
||||||
|
<action name="validate-all" timeout="5s" />
|
||||||
|
</actions>
|
||||||
|
@@ -301,9 +326,17 @@ active_grep_pat()
|
||||||
|
if [ "$FIREWALL" = "nft" ]; then
|
||||||
|
local ip
|
||||||
|
[ "$4" = "s" ] && ip=$src || ip=$dst
|
||||||
|
- echo "^\s\+ip $4addr ${ip} $1 $4port $2 ct state { established, related, new } drop$"
|
||||||
|
+ if [ "$method" = "DROP" ]; then
|
||||||
|
+ echo "^\s\+ip${w}$4addr${w}${ip}${w}$1${w}$4port${w}$2${w}ct${w}state${w}{${w}established,${w}related,${w}new${w}}${w}drop$"
|
||||||
|
+ else
|
||||||
|
+ echo "^\s\+ip${w}$4addr${w}${ip}${w}$1${w}$4port${w}$2${w}ct${w}state${w}{${w}established,${w}related,${w}new${w}}${w}reject${w}with${w}tcp${w}reset$"
|
||||||
|
+ fi
|
||||||
|
else
|
||||||
|
- echo "^DROP${w}${prot}${w}--${w}${src}${w}${dst}${w}multiport${w}${4}ports${w}${2}$"
|
||||||
|
+ if [ "$method" = "DROP" ]; then
|
||||||
|
+ echo "^DROP${w}${prot}${w}--${w}${src}${w}${dst}${w}multiport${w}${4}ports${w}${2}$"
|
||||||
|
+ else
|
||||||
|
+ echo "^REJECT${w}${prot}${w}--${w}${src}${w}${dst}${w}multiport${w}${4}ports${w}${2}${w}ctstate${w}NEW,RELATED,ESTABLISHED${w}reject-with${w}tcp-reset$"
|
||||||
|
+ fi
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -394,17 +427,17 @@ tickle_local()
|
||||||
|
|
||||||
|
SayActive()
|
||||||
|
{
|
||||||
|
- ocf_log debug "$CMD DROP rule [$*] is running (OK)"
|
||||||
|
+ ocf_log debug "$CMD $method rule [$*] is running (OK)"
|
||||||
|
}
|
||||||
|
|
||||||
|
SayConsideredActive()
|
||||||
|
{
|
||||||
|
- ocf_log debug "$CMD DROP rule [$*] considered to be running (OK)"
|
||||||
|
+ ocf_log debug "$CMD $method rule [$*] considered to be running (OK)"
|
||||||
|
}
|
||||||
|
|
||||||
|
SayInactive()
|
||||||
|
{
|
||||||
|
- ocf_log debug "$CMD DROP rule [$*] is inactive"
|
||||||
|
+ ocf_log debug "$CMD $method rule [$*] is inactive"
|
||||||
|
}
|
||||||
|
|
||||||
|
#PortStatus {udp|tcp} portno,portno ip {in|out|both} {block|unblock}
|
||||||
|
@@ -425,14 +458,18 @@ PortStatus() {
|
||||||
|
case $5 in
|
||||||
|
block)
|
||||||
|
SayActive $*
|
||||||
|
- rc=$OCF_SUCCESS
|
||||||
|
+ if [ "$__OCF_ACTION" = "monitor" ] && [ "$promotion_score" = "$SCORE_PROMOTED" ]; then
|
||||||
|
+ rc=$OCF_RUNNING_MASTER
|
||||||
|
+ else
|
||||||
|
+ rc=$OCF_SUCCESS
|
||||||
|
+ fi
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
SayInactive $*
|
||||||
|
rc=$OCF_NOT_RUNNING
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
- elif ocf_is_ms; then
|
||||||
|
+ elif [ "$OCF_RESKEY_status_check" = "rule" ]; then
|
||||||
|
case $5 in
|
||||||
|
block)
|
||||||
|
SayInactive $*
|
||||||
|
@@ -440,7 +477,11 @@ PortStatus() {
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
SayActive $*
|
||||||
|
- rc=$OCF_SUCCESS
|
||||||
|
+ if [ "$__OCF_ACTION" = "monitor" ] && [ "$promotion_score" = "$SCORE_PROMOTED" ]; then
|
||||||
|
+ rc=$OCF_RUNNING_MASTER
|
||||||
|
+ else
|
||||||
|
+ rc=$OCF_SUCCESS
|
||||||
|
+ fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
else
|
||||||
|
@@ -499,13 +540,21 @@ DoPort()
|
||||||
|
case $FIREWALL in
|
||||||
|
nft)
|
||||||
|
if [ "$op" = "insert" ]; then
|
||||||
|
- $NFTABLES $op rule inet $TABLE $chain ip ${ds}addr $ip $proto ${ds}port $ports ct state { established, related, new } drop
|
||||||
|
+ if [ "$method" = "DROP" ]; then
|
||||||
|
+ $NFTABLES $op rule inet $TABLE $chain ip ${ds}addr $ip $proto ${ds}port $ports ct state { established, related, new } drop
|
||||||
|
+ else
|
||||||
|
+ $NFTABLES $op rule inet $TABLE $chain ip ${ds}addr $ip $proto ${ds}port $ports ct state { established, related, new } reject with tcp reset
|
||||||
|
+ fi
|
||||||
|
elif [ "$op" = "delete" ]; then
|
||||||
|
NftDelete "$chain" "$proto" "$ds" "$ip" "$ports"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
iptables)
|
||||||
|
- $IPTABLES $wait "$op" "$chain" -p "$proto" -${ds} "$ip" -m multiport --${ds}ports "$ports" -j DROP
|
||||||
|
+ if [ "$method" = "DROP" ]; then
|
||||||
|
+ $IPTABLES $wait "$op" "$chain" -p "$proto" -${ds} "$ip" -m multiport --${ds}ports "$ports" -j DROP
|
||||||
|
+ else
|
||||||
|
+ $IPTABLES $wait "$op" "$chain" -p "$proto" -${ds} "$ip" -m multiport --${ds}ports "$ports" -m conntrack --ctstate NEW,ESTABLISHED,RELATED -j REJECT --reject-with tcp-reset
|
||||||
|
+ fi
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
@@ -534,7 +583,11 @@ PortBLOCK()
|
||||||
|
$NFTABLES insert rule inet $TABLE OUTPUT ip saddr $3 $1 sport $2 ct state { established, related, new } reject with tcp reset
|
||||||
|
tickle_local
|
||||||
|
fi
|
||||||
|
- $NFTABLES insert rule inet $TABLE INPUT ip daddr $3 $1 dport $2 ct state { established, related, new } drop
|
||||||
|
+ if [ "$method" = "DROP" ]; then
|
||||||
|
+ $NFTABLES insert rule inet $TABLE INPUT ip daddr $3 $1 dport $2 ct state { established, related, new } drop
|
||||||
|
+ else
|
||||||
|
+ $NFTABLES insert rule inet $TABLE INPUT ip daddr $3 $1 dport $2 ct state { established, related, new } reject with tcp reset
|
||||||
|
+ fi
|
||||||
|
rc_in=$?
|
||||||
|
if $try_reset ; then
|
||||||
|
NftDelete "OUTPUT" "$1" "s" "$ports"
|
||||||
|
@@ -544,7 +597,11 @@ PortBLOCK()
|
||||||
|
$IPTABLES $wait -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset
|
||||||
|
tickle_local
|
||||||
|
fi
|
||||||
|
- $IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP
|
||||||
|
+ if [ "$method" = "DROP" ]; then
|
||||||
|
+ $IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP
|
||||||
|
+ else
|
||||||
|
+ $IPTABLES $wait -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -m conntrack --ctstate NEW,ESTABLISHED,RELATED -j REJECT --reject-with tcp-reset
|
||||||
|
+ fi
|
||||||
|
rc_in=$?
|
||||||
|
if $try_reset ; then
|
||||||
|
$IPTABLES $wait -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset
|
||||||
|
@@ -768,7 +825,7 @@ detect_firewall_tool() {
|
||||||
|
ocf_log debug "Detected iptables"
|
||||||
|
else
|
||||||
|
ocf_exit_reason "No firewall tool available"
|
||||||
|
- return $OCF_ERR_CONFIGURED
|
||||||
|
+ exit $OCF_ERR_CONFIGURED
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -812,6 +869,13 @@ action=$OCF_RESKEY_action
|
||||||
|
ip=$OCF_RESKEY_ip
|
||||||
|
reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop
|
||||||
|
nodename=$(ocf_local_nodename)
|
||||||
|
+case "$OCF_RESKEY_method" in
|
||||||
|
+ drop) method="DROP" ;;
|
||||||
|
+ reject) method="REJECT" ;;
|
||||||
|
+ *) ocf_log err "method: $OCF_RESKEY_method not supported"
|
||||||
|
+ exit $OCF_ERR_CONFIGURED
|
||||||
|
+ ;;
|
||||||
|
+esac
|
||||||
|
|
||||||
|
|
||||||
|
# If "tickle" is enabled, we need to record the list of currently established
|
||||||
|
@@ -863,6 +927,8 @@ if ocf_is_ms; then
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
+PortValidateAll
|
||||||
|
+
|
||||||
|
case $__OCF_ACTION in
|
||||||
|
start)
|
||||||
|
PortStart "$protocol" "$portno" "$ip" "$direction" "$action"
|
||||||
|
@@ -885,7 +951,6 @@ case $__OCF_ACTION in
|
||||||
|
;;
|
||||||
|
|
||||||
|
validate-all)
|
||||||
|
- PortValidateAll
|
||||||
|
;;
|
||||||
|
|
||||||
|
*) usage
|
||||||
@ -45,7 +45,7 @@
|
|||||||
Name: resource-agents
|
Name: resource-agents
|
||||||
Summary: Open Source HA Reusable Cluster Resource Scripts
|
Summary: Open Source HA Reusable Cluster Resource Scripts
|
||||||
Version: 4.16.0
|
Version: 4.16.0
|
||||||
Release: 31%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
Release: 32%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
|
||||||
License: GPL-2.0-or-later AND LGPL-2.1-or-later
|
License: GPL-2.0-or-later AND LGPL-2.1-or-later
|
||||||
URL: https://github.com/ClusterLabs/resource-agents
|
URL: https://github.com/ClusterLabs/resource-agents
|
||||||
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
|
||||||
@ -88,7 +88,9 @@ Patch35: RHEL-113816-podman-etcd-preserve-containers-for-debugging.patch
|
|||||||
Patch36: RHEL-116205-podman-etcd-add-cluster-wide-force_new_cluster-attribute-check.patch
|
Patch36: RHEL-116205-podman-etcd-add-cluster-wide-force_new_cluster-attribute-check.patch
|
||||||
Patch37: RHEL-116149-RHEL-116152-1-portblock-add-promotable-and-nftables-support.patch
|
Patch37: RHEL-116149-RHEL-116152-1-portblock-add-promotable-and-nftables-support.patch
|
||||||
Patch38: RHEL-116149-RHEL-116152-2-portblock-fix-incorrect-promotable-description.patch
|
Patch38: RHEL-116149-RHEL-116152-2-portblock-fix-incorrect-promotable-description.patch
|
||||||
Patch39: RHEL-119504-podman-etcd-add-automatic-learner-member-promotion.patch
|
Patch39: RHEL-116149-RHEL-116152-3-portblock-fixes-add-method-and-status_check-parameters.patch
|
||||||
|
Patch40: RHEL-119504-podman-etcd-add-automatic-learner-member-promotion.patch
|
||||||
|
Patch41: RHEL-115495-db2-use-reintegration-flag-to-avoid-race-condition-on-cluster-reintegration.patch
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
Patch500: ha-cloud-support-aliyun.patch
|
Patch500: ha-cloud-support-aliyun.patch
|
||||||
@ -299,6 +301,8 @@ exit 1
|
|||||||
%patch -p1 -P 37
|
%patch -p1 -P 37
|
||||||
%patch -p1 -P 38
|
%patch -p1 -P 38
|
||||||
%patch -p1 -P 39
|
%patch -p1 -P 39
|
||||||
|
%patch -p1 -P 40
|
||||||
|
%patch -p1 -P 41
|
||||||
|
|
||||||
# bundled ha-cloud-support libs
|
# bundled ha-cloud-support libs
|
||||||
%patch -p1 -P 500
|
%patch -p1 -P 500
|
||||||
@ -631,6 +635,14 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
|
|||||||
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Oct 22 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-32
|
||||||
|
- portblock: add promotable and nftables support, and method and
|
||||||
|
status_check parameters
|
||||||
|
- db2: use reintegration flag to avoid race condition on cluster
|
||||||
|
reintegration
|
||||||
|
|
||||||
|
Resolves: RHEL-116149, RHEL-116152, RHEL-115495
|
||||||
|
|
||||||
* Fri Oct 10 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-31
|
* Fri Oct 10 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-31
|
||||||
- podman-etcd: add automatic learner member promotion
|
- podman-etcd: add automatic learner member promotion
|
||||||
|
|
||||||
@ -641,11 +653,6 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
|
|||||||
|
|
||||||
Resolves: RHEL-113500
|
Resolves: RHEL-113500
|
||||||
|
|
||||||
* Tue Oct 7 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-29
|
|
||||||
- portblock: add promotable and nftables support
|
|
||||||
|
|
||||||
Resolves: RHEL-116149, RHEL-116152
|
|
||||||
|
|
||||||
* Mon Sep 22 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-27
|
* Mon Sep 22 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-27
|
||||||
- podman-etcd: wrap ipv6 address in brackets
|
- podman-etcd: wrap ipv6 address in brackets
|
||||||
- podman-etcd: preserve containers for debugging
|
- podman-etcd: preserve containers for debugging
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user