From 1afdd91b2961061937fc802c575304ede8d79286 Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Wed, 10 Sep 2025 16:56:56 +0200 Subject: [PATCH] podman-etcd: Add cluster-wide force_new_cluster attribute checking Implement cluster-wide validation of force_new_cluster attribute to resolve race conditions during automated cluster recovery. The enhancement ensures agents check for the cluster-wide attribute before falling back to local etcd revision comparison. Key changes: - Enhanced get_force_new_cluster() to query all cluster nodes - Ensure force_new_cluster is not set in both nodes to prevent conflicting recovery attempts - Updated startup logic to prioritize cluster-wide attribute checking fixes OCPBUGS-61117 --- heartbeat/podman-etcd | 107 ++++++++++++++++++++++++++++-------------- 1 file changed, 72 insertions(+), 35 deletions(-) diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd index 33804414a..f3a6da5e2 100755 --- a/heartbeat/podman-etcd +++ b/heartbeat/podman-etcd @@ -794,54 +794,72 @@ set_force_new_cluster() return $rc } +# get_force_new_cluster returns a space-separated list of nodes that have the force_new_cluster attribute set. +# Return values: +# - Exit code 0 with non-empty output: One or more nodes have the force_new_cluster attribute set +# - Exit code 0 with empty output: No nodes have the force_new_cluster attribute set +# - Exit code 1 with empty output: Error occurred while querying the cluster nodes get_force_new_cluster() { - crm_attribute --lifetime reboot --query --name "force_new_cluster" | awk -F"value=" '{print $2}' + local node nodes value + local holders="" + + if ! nodes=$(crm_node -l | awk '{print $2}'); then + ocf_log err "could not get force_new_cluster attribute, crm_node error code: $?" + return 1 + fi + if [ -z "$nodes" ]; then + ocf_log err "could not get force_new_cluster attribute, the list of nodes is empty" + return 1 + fi + + for node in $nodes; do + if ! value=$(crm_attribute --query --lifetime reboot --name "force_new_cluster" --node "$node" 2>/dev/null | awk -F'value=' '{print $2}' | tr -d "'"); then + ocf_log err "could not get force_new_cluster attribute, crm_attribut error code: $?" + return 1 + fi + if [ -n "$value" ]; then + holders="$holders$node " + fi + done + echo "$holders" } + clear_force_new_cluster() { - local force_new_cluster_node - - force_new_cluster_node=$(get_force_new_cluster) - if [ -z "$force_new_cluster_node" ]; then - ocf_log info "$NODENAME: force_new_cluster attribute not set" + # only the holder of "force_new_cluster" attribute can delete it + if ! is_force_new_cluster; then + ocf_log info "force_new_cluster unset or not owned by $NODENAME" return $OCF_SUCCESS fi - # only the holder of "force_new_cluster" attribute can delete it - if [ "$NODENAME" = "$force_new_cluster_node" ]; then - crm_attribute --lifetime reboot --name "force_new_cluster" --delete - rc=$? - if [ $rc -ne 0 ]; then - ocf_log err "could not clear force_new_cluster attribute, error code: $rc" - else - ocf_log info "$NODENAME: force_new_cluster attribute cleared" - fi - return $rc - else - ocf_log info "$NODENAME does not hold force_new_cluster ($force_new_cluster_node has it)" - return $OCF_SUCCESS + if ! crm_attribute --delete --lifetime reboot --node "$NODENAME" --name "force_new_cluster"; then + ocf_log err "could not clear force_new_cluster attribute, error code: $?" + return $OCF_ERR_GENERIC fi + + ocf_log info "$NODENAME: force_new_cluster attribute cleared" + return $OCF_SUCCESS } + is_force_new_cluster() { - # Return 0 if 'force_new_cluster' is set and the value matches the current node name, 1 otherwise. - local value + # Return 0 if 'force_new_cluster' is set on the current node, 1 otherwise. + local fnc_holders - value=$(get_force_new_cluster) - if [ -z "$value" ]; then - ocf_log debug "force_new_cluster attribute is not set" - return 1 + if ! fnc_holders=$(get_force_new_cluster); then + ocf_exit_reason "is_force_new_cluster: Failed to get force_new_cluster node holders" + exit $OCF_ERR_GENERIC fi - if [ "$value" = "$NODENAME" ]; then + if echo "$fnc_holders" | grep -q -w "$NODENAME"; then ocf_log debug "$NODENAME has force_new_cluster set" return 0 fi - ocf_log info "force_new_cluster attribute set on peer node $value" + ocf_log debug "force_new_cluster attribute is not set on $NODENAME" return 1 } @@ -1415,17 +1433,34 @@ podman_start() return "$OCF_ERR_GENERIC" fi - # force-new-cluster property is a runtime-scoped flag that instructs the agent to force a new cluster-of-1. - # Since this attribute is configured with a reboot-lifetime, it is automatically cleared when the machine reboots. - # If the agent detects during its start that this property is set, it indicates that the flag was explicitly set - # during the current node boot session, implying a deliberate request to recover the cluster. if ocf_is_true "$pod_was_running"; then ocf_log info "static pod was running: start normally" else - if is_force_new_cluster; then - ocf_log notice "'$NODENAME' marked to force-new-cluster" + local fnc_holders + if ! fnc_holders=$(get_force_new_cluster); then + ocf_exit_reason "Failed to get force_new_cluster node holders" + return "$OCF_ERR_GENERIC" + fi + + local fnc_holder_count + fnc_holder_count=$(echo "$fnc_holders" | wc -w) + if [ "$fnc_holder_count" -gt 1 ]; then + ocf_exit_reason "force_new_cluster attribute is set on multiple nodes ($fnc_holders)" + return "$OCF_ERR_GENERIC" + fi + + if [ "$fnc_holder_count" -eq 1 ]; then + if echo "$fnc_holders" | grep -q -w "$NODENAME"; then + # Attribute is set on the local node. + ocf_log notice "$NODENAME marked to force-new-cluster" + JOIN_AS_LEARNER=false + else + # Attribute is set on a peer node. + ocf_log info "$NODENAME shall join as learner because force_new_cluster is set on peer $fnc_holders" + JOIN_AS_LEARNER=true + fi else - ocf_log info "'$NODENAME' is not marked to force-new-cluster" + ocf_log info "no node is marked to force-new-cluster" # When the local agent starts, we can infer the cluster state by counting # how many agents are starting or already active: # - 1 active agent: it's the peer (we are just starting) @@ -1522,7 +1557,7 @@ podman_start() for try in $(seq $retries); do learner_node=$(attribute_learner_node get) if [ "$NODENAME" != "$learner_node" ]; then - ocf_log info "$learner_node is not in the member list yet. Retry in $poll_interval_sec seconds." + ocf_log info "$NODENAME is not in the member list yet. Retry in $poll_interval_sec seconds." sleep $poll_interval_sec continue fi @@ -1673,6 +1708,8 @@ podman_stop() { local timeout=60 local rc + + ocf_log notice "podman-etcd stop" podman_simple_status if [ $? -eq $OCF_NOT_RUNNING ]; then ocf_log info "could not leave members list: etcd container not running"