resource-agents/SOURCES/RHEL-116208-podman-etcd-add-cluster-wide-force_new_cluster-attribute-check.patch

187 lines
6.8 KiB
Diff

From 1afdd91b2961061937fc802c575304ede8d79286 Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Wed, 10 Sep 2025 16:56:56 +0200
Subject: [PATCH] podman-etcd: Add cluster-wide force_new_cluster attribute
checking
Implement cluster-wide validation of force_new_cluster attribute to resolve
race conditions during automated cluster recovery. The enhancement ensures
agents check for the cluster-wide attribute before falling back to local
etcd revision comparison.
Key changes:
- Enhanced get_force_new_cluster() to query all cluster nodes
- Ensure force_new_cluster is not set in both nodes to prevent
conflicting recovery attempts
- Updated startup logic to prioritize cluster-wide attribute checking
fixes OCPBUGS-61117
---
heartbeat/podman-etcd | 107 ++++++++++++++++++++++++++++--------------
1 file changed, 72 insertions(+), 35 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 33804414a..f3a6da5e2 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -794,54 +794,72 @@ set_force_new_cluster()
return $rc
}
+# get_force_new_cluster returns a space-separated list of nodes that have the force_new_cluster attribute set.
+# Return values:
+# - Exit code 0 with non-empty output: One or more nodes have the force_new_cluster attribute set
+# - Exit code 0 with empty output: No nodes have the force_new_cluster attribute set
+# - Exit code 1 with empty output: Error occurred while querying the cluster nodes
get_force_new_cluster()
{
- crm_attribute --lifetime reboot --query --name "force_new_cluster" | awk -F"value=" '{print $2}'
+ local node nodes value
+ local holders=""
+
+ if ! nodes=$(crm_node -l | awk '{print $2}'); then
+ ocf_log err "could not get force_new_cluster attribute, crm_node error code: $?"
+ return 1
+ fi
+ if [ -z "$nodes" ]; then
+ ocf_log err "could not get force_new_cluster attribute, the list of nodes is empty"
+ return 1
+ fi
+
+ for node in $nodes; do
+ if ! value=$(crm_attribute --query --lifetime reboot --name "force_new_cluster" --node "$node" 2>/dev/null | awk -F'value=' '{print $2}' | tr -d "'"); then
+ ocf_log err "could not get force_new_cluster attribute, crm_attribut error code: $?"
+ return 1
+ fi
+ if [ -n "$value" ]; then
+ holders="$holders$node "
+ fi
+ done
+ echo "$holders"
}
+
clear_force_new_cluster()
{
- local force_new_cluster_node
-
- force_new_cluster_node=$(get_force_new_cluster)
- if [ -z "$force_new_cluster_node" ]; then
- ocf_log info "$NODENAME: force_new_cluster attribute not set"
+ # only the holder of "force_new_cluster" attribute can delete it
+ if ! is_force_new_cluster; then
+ ocf_log info "force_new_cluster unset or not owned by $NODENAME"
return $OCF_SUCCESS
fi
- # only the holder of "force_new_cluster" attribute can delete it
- if [ "$NODENAME" = "$force_new_cluster_node" ]; then
- crm_attribute --lifetime reboot --name "force_new_cluster" --delete
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_log err "could not clear force_new_cluster attribute, error code: $rc"
- else
- ocf_log info "$NODENAME: force_new_cluster attribute cleared"
- fi
- return $rc
- else
- ocf_log info "$NODENAME does not hold force_new_cluster ($force_new_cluster_node has it)"
- return $OCF_SUCCESS
+ if ! crm_attribute --delete --lifetime reboot --node "$NODENAME" --name "force_new_cluster"; then
+ ocf_log err "could not clear force_new_cluster attribute, error code: $?"
+ return $OCF_ERR_GENERIC
fi
+
+ ocf_log info "$NODENAME: force_new_cluster attribute cleared"
+ return $OCF_SUCCESS
}
+
is_force_new_cluster()
{
- # Return 0 if 'force_new_cluster' is set and the value matches the current node name, 1 otherwise.
- local value
+ # Return 0 if 'force_new_cluster' is set on the current node, 1 otherwise.
+ local fnc_holders
- value=$(get_force_new_cluster)
- if [ -z "$value" ]; then
- ocf_log debug "force_new_cluster attribute is not set"
- return 1
+ if ! fnc_holders=$(get_force_new_cluster); then
+ ocf_exit_reason "is_force_new_cluster: Failed to get force_new_cluster node holders"
+ exit $OCF_ERR_GENERIC
fi
- if [ "$value" = "$NODENAME" ]; then
+ if echo "$fnc_holders" | grep -q -w "$NODENAME"; then
ocf_log debug "$NODENAME has force_new_cluster set"
return 0
fi
- ocf_log info "force_new_cluster attribute set on peer node $value"
+ ocf_log debug "force_new_cluster attribute is not set on $NODENAME"
return 1
}
@@ -1415,17 +1433,34 @@ podman_start()
return "$OCF_ERR_GENERIC"
fi
- # force-new-cluster property is a runtime-scoped flag that instructs the agent to force a new cluster-of-1.
- # Since this attribute is configured with a reboot-lifetime, it is automatically cleared when the machine reboots.
- # If the agent detects during its start that this property is set, it indicates that the flag was explicitly set
- # during the current node boot session, implying a deliberate request to recover the cluster.
if ocf_is_true "$pod_was_running"; then
ocf_log info "static pod was running: start normally"
else
- if is_force_new_cluster; then
- ocf_log notice "'$NODENAME' marked to force-new-cluster"
+ local fnc_holders
+ if ! fnc_holders=$(get_force_new_cluster); then
+ ocf_exit_reason "Failed to get force_new_cluster node holders"
+ return "$OCF_ERR_GENERIC"
+ fi
+
+ local fnc_holder_count
+ fnc_holder_count=$(echo "$fnc_holders" | wc -w)
+ if [ "$fnc_holder_count" -gt 1 ]; then
+ ocf_exit_reason "force_new_cluster attribute is set on multiple nodes ($fnc_holders)"
+ return "$OCF_ERR_GENERIC"
+ fi
+
+ if [ "$fnc_holder_count" -eq 1 ]; then
+ if echo "$fnc_holders" | grep -q -w "$NODENAME"; then
+ # Attribute is set on the local node.
+ ocf_log notice "$NODENAME marked to force-new-cluster"
+ JOIN_AS_LEARNER=false
+ else
+ # Attribute is set on a peer node.
+ ocf_log info "$NODENAME shall join as learner because force_new_cluster is set on peer $fnc_holders"
+ JOIN_AS_LEARNER=true
+ fi
else
- ocf_log info "'$NODENAME' is not marked to force-new-cluster"
+ ocf_log info "no node is marked to force-new-cluster"
# When the local agent starts, we can infer the cluster state by counting
# how many agents are starting or already active:
# - 1 active agent: it's the peer (we are just starting)
@@ -1522,7 +1557,7 @@ podman_start()
for try in $(seq $retries); do
learner_node=$(attribute_learner_node get)
if [ "$NODENAME" != "$learner_node" ]; then
- ocf_log info "$learner_node is not in the member list yet. Retry in $poll_interval_sec seconds."
+ ocf_log info "$NODENAME is not in the member list yet. Retry in $poll_interval_sec seconds."
sleep $poll_interval_sec
continue
fi
@@ -1673,6 +1708,8 @@ podman_stop()
{
local timeout=60
local rc
+
+ ocf_log notice "podman-etcd stop"
podman_simple_status
if [ $? -eq $OCF_NOT_RUNNING ]; then
ocf_log info "could not leave members list: etcd container not running"