resource-agents/SOURCES/RHEL-119502-podman-etcd-add-automatic-learner-member-promotion.patch

322 lines
12 KiB
Diff

From a31f15104fc712cd25f8a59d49f1bbcdbbbc5434 Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Tue, 30 Sep 2025 11:54:44 +0200
Subject: [PATCH 1/2] Refactor(podman-etcd): improve peer checking and
leadership loss detection
The check_peers function is broken up into smaller, more manageable
functions. This refactoring separates the logic for detecting a loss of
cluster leadership from the logic for managing peer membership.
The main function is renamed to check_peer as there is only 1 peer to
check (it was check_peers).
---
heartbeat/podman-etcd | 78 +++++++++++++++++++++++++------------------
1 file changed, 45 insertions(+), 33 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index f3a6da5e2..3d1e4c520 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -1014,42 +1014,35 @@ get_member_list_json() {
podman exec "${CONTAINER}" etcdctl member list --endpoints="$this_node_endpoint" -w json
}
-check_peers()
+detect_cluster_leadership_loss()
{
- # Check peers endpoint status and locally accessible member list
- local member_list_json
-
- if ! container_exists; then
- # we need a running container to execute etcdctl.
- return $OCF_SUCCESS
+ endpoint_status_json=$(get_endpoint_status_json)
+ ocf_log info "endpoint status: $endpoint_status_json"
+
+ count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
+ if [ "$count_endpoints" -eq 1 ]; then
+ ocf_log info "one endpoint only: checking status errors"
+ endpoint_status_errors=$(printf "%s" "$endpoint_status_json" | jq -r ".[0].Status.errors")
+ if echo "$endpoint_status_errors" | grep -q "no leader"; then
+ set_force_new_cluster
+ set_standalone_node
+ ocf_exit_reason "$NODENAME must force a new cluster"
+ return $OCF_ERR_GENERIC
+ fi
+ if [ "$endpoint_status_errors" != "null" ]; then
+ ocf_log err "unmanaged endpoint status error: $endpoint_status_errors"
+ fi
fi
- member_list_json=$(get_member_list_json)
- rc=$?
- ocf_log debug "member list: $member_list_json"
- if [ $rc -ne 0 ]; then
- ocf_log info "podman failed to get member list, error code: $rc"
-
- endpoint_status_json=$(get_endpoint_status_json)
- ocf_log info "endpoint status: $endpoint_status_json"
-
- count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
- if [ "$count_endpoints" -eq 1 ]; then
- ocf_log info "one endpoint only: checking status errors"
- endpoint_status_errors=$(printf "%s" "$endpoint_status_json" | jq -r ".[0].Status.errors")
- if echo "$endpoint_status_errors" | grep -q "no leader"; then
- set_force_new_cluster
- set_standalone_node
- ocf_exit_reason "$NODENAME must force a new cluster"
- return $OCF_ERR_GENERIC
- fi
- if [ "$endpoint_status_errors" != "null" ]; then
- ocf_log err "unmanaged endpoint status error: $endpoint_status_errors"
- fi
- fi
+ return $OCF_SUCCESS
+}
- return $OCF_SUCCESS
- fi
+manage_peer_membership()
+{
+ # Read etcd member list to detect the status of the peer member.
+ # If the peer is missing from the member list, it will be added back as learner
+ # If the peer is back in the member list, we ensure that the related CIB attributes (standalone and learner_node) are reset
+ local member_list_json="$1"
# Example of .members[] instance fields in member list json format:
# NOTE that "name" is present in voting members only, while "isLearner" in learner members only
@@ -1083,6 +1076,25 @@ check_peers()
clear_standalone_and_learner_if_not_learners "$member_list_json"
fi
done
+}
+
+check_peer()
+{
+ # Check peers endpoint status and locally accessible member list
+ local member_list_json
+
+ # we need a running container to execute etcdctl.
+ if ! container_exists; then
+ return $OCF_SUCCESS
+ fi
+
+ if ! member_list_json=$(get_member_list_json); then
+ ocf_log info "podman failed to get member list, error code: $?"
+ detect_cluster_leadership_loss
+ return $?
+ fi
+
+ manage_peer_membership "$member_list_json"
return $OCF_SUCCESS
}
@@ -1124,7 +1136,7 @@ podman_monitor()
# monitor operation to fail.
# TODO: move this inside check_peers where we already query member list json
attribute_node_member_id update
- if ! check_peers; then
+ if ! check_peer; then
return $OCF_ERR_GENERIC
fi
From de7c73a933cefb8f7b9e810bd23c3d12f6d6f29a Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Tue, 30 Sep 2025 18:38:06 +0200
Subject: [PATCH 2/2] OCPBUGS-42808: podman-etcd: add automatic learner member
promotion
Automatically promote etcd learner members to voting members when detected.
Includes refactored member management functions and improved validation.
---
heartbeat/podman-etcd | 108 ++++++++++++++++++++++++++++++------------
1 file changed, 79 insertions(+), 29 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 3d1e4c520..e1425ec02 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -712,6 +712,22 @@ attribute_node_revision_peer()
crm_attribute --query --type nodes --node "$nodename" --name "revision" | awk -F"value=" '{print $2}'
}
+# Converts a decimal number to hexadecimal format with validation
+# Args: $1 - decimal number (test for non-negative integer too)
+# Returns: 0 on success, OCF_ERR_GENERIC on invalid input
+# Outputs: hexadecimal representation to stdout
+decimal_to_hex() {
+ local dec=$1
+
+ if ! echo "$dec" | grep -q "^[1-9][0-9]*$"; then
+ ocf_log err "Invalid member ID format: '$dec' (expected decimal number)"
+ return $OCF_ERR_GENERIC
+ fi
+
+ printf "%x" "$dec"
+ return $OCF_SUCCESS
+}
+
attribute_node_member_id()
{
local action="$1"
@@ -737,7 +753,7 @@ attribute_node_member_id()
return "$rc"
fi
- local value
+ local value value_hex
if ! value=$(echo -n "$member_list_json" | jq -r ".header.member_id"); then
rc=$?
ocf_log err "could not get $attribute from member list JSON, error code: $rc"
@@ -745,8 +761,11 @@ attribute_node_member_id()
fi
# JSON member_id is decimal, while etcdctl command needs the hex version
- value=$(printf "%x" "$value")
- if ! crm_attribute --type nodes --node "$NODENAME" --name "$attribute" --update "$value"; then
+ if ! value_hex=$(decimal_to_hex "$value"); then
+ ocf_log err "could not convert decimal member_id '$value' to hex, error code: $?"
+ return $OCF_ERR_GENERIC
+ fi
+ if ! crm_attribute --type nodes --node "$NODENAME" --name "$attribute" --update "$value_hex"; then
rc=$?
ocf_log err "could not update etcd $attribute, error code: $rc"
return "$rc"
@@ -905,42 +924,70 @@ clear_standalone_node()
crm_attribute --name "standalone_node" --delete
}
-clear_standalone_and_learner_if_not_learners()
+
+# Promotes an etcd learner member to a voting member
+# Args: $1 - learner member ID in decimal format
+# Returns: OCF_SUCCESS (even on expected promotion failures), OCF_ERR_GENERIC on conversion errors
+# Note: Promotion failures are expected and logged as info (peer may not be up-to-date)
+promote_learner_member()
+{
+ local learner_member_id=$1
+
+ # JSON member_id is decimal, while etcdctl command needs the hex version
+ if ! learner_member_id_hex=$(decimal_to_hex "$learner_member_id"); then
+ ocf_log err "could not convert decimal member_id '$learner_member_id' to hex, error code: $?"
+ return $OCF_ERR_GENERIC
+ fi
+ if ! ocf_run podman exec "${CONTAINER}" etcdctl member promote "$learner_member_id_hex" 2>&1; then
+ # promotion is expected to fail if the peer is not yet up-to-date
+ ocf_log info "could not promote member $learner_member_id_hex, error code: $?"
+ return $OCF_SUCCESS
+ fi
+ ocf_log info "successfully promoted member '$learner_member_id_hex'"
+ return $OCF_SUCCESS
+}
+
+# Reconciles etcd cluster member states
+# Promotes learner members or clears standalone/learner attributes as needed
+# Args: $1 - member list JSON from etcdctl
+# Returns: OCF_SUCCESS on completion, OCF_ERR_GENERIC on errors
+# Note: Only operates when exactly 2 started members are present
+reconcile_member_state()
{
local rc
local member_list_json="$1"
- number_of_members=$(printf "%s" "$member_list_json" | jq -r ".members[].ID" | wc -l)
- if [ "$number_of_members" -ne 2 ]; then
- ocf_log info "could not clear standalone_node, nor learner_node properties: found $number_of_members members, need 2"
+ # count only the started members, which have the ".name" JSON field
+ number_of_started_members=$(printf "%s" "$member_list_json" | jq -r ".members[].name | select(. != null)" | wc -l)
+ if [ "$number_of_started_members" -ne 2 ]; then
+ ocf_log info "could not clear standalone_node, nor learner_node properties: found $number_of_started_members members, need 2"
return $OCF_SUCCESS
fi
- id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .isLearner==true ).ID")
+ learner_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .isLearner==true ).ID")
rc=$?
if [ $rc -ne 0 ]; then
ocf_log err "could not get isLearner field from member list, error code: $rc"
return $rc
fi
- if [ -z "$id" ]; then
- clear_standalone_node
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_og error "could not clear standalone_node attribute, error code: $rc"
- return $rc
- fi
+ if [ -n "$learner_member_id" ]; then
+ promote_learner_member "$learner_member_id"
+ return $?
fi
- if [ -z "$id" ]; then
- attribute_learner_node clear
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_og error "could not clear learner_node attribute, error code: $rc"
- return $rc
+
+ if [ -z "$learner_member_id" ]; then
+ if ! clear_standalone_node; then
+ ocf_log error "could not clear standalone_node attribute, error code: $?"
+ return $OCF_ERR_GENERIC
+ fi
+ if ! attribute_learner_node clear; then
+ ocf_log error "could not clear learner_node attribute, error code: $?"
+ return $OCF_ERR_GENERIC
fi
fi
- return $rc
+ return $OCF_SUCCESS
}
attribute_learner_node()
@@ -1019,7 +1066,7 @@ detect_cluster_leadership_loss()
endpoint_status_json=$(get_endpoint_status_json)
ocf_log info "endpoint status: $endpoint_status_json"
- count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
+ count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
if [ "$count_endpoints" -eq 1 ]; then
ocf_log info "one endpoint only: checking status errors"
endpoint_status_errors=$(printf "%s" "$endpoint_status_json" | jq -r ".[0].Status.errors")
@@ -1037,11 +1084,14 @@ detect_cluster_leadership_loss()
return $OCF_SUCCESS
}
+
+# Manages etcd peer membership by detecting and handling missing or rejoining peers
+# Adds missing peers as learners and reconciles member states when peers rejoin
+# Args: $1 - member list JSON from etcdctl
+# Returns: OCF_SUCCESS on completion, OCF_ERR_GENERIC on errors
+# Note: Iterates through all peer nodes to ensure proper cluster membership
manage_peer_membership()
{
- # Read etcd member list to detect the status of the peer member.
- # If the peer is missing from the member list, it will be added back as learner
- # If the peer is back in the member list, we ensure that the related CIB attributes (standalone and learner_node) are reset
local member_list_json="$1"
# Example of .members[] instance fields in member list json format:
@@ -1066,14 +1116,14 @@ manage_peer_membership()
# Check by IP instead of Name since "learner" members appear only in peerURLs, not by Name.
ip=$(echo "$node" | cut -d: -f2-) # Grab everything after the first : this covers ipv4/ipv6
- id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$ip\")) | any).ID")
- if [ -z "$id" ]; then
+ peer_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$ip\")) | any).ID")
+ if [ -z "$peer_member_id" ]; then
ocf_log info "$name is not in the members list"
add_member_as_learner "$name" "$ip"
set_standalone_node
else
ocf_log debug "$name is in the members list by IP: $ip"
- clear_standalone_and_learner_if_not_learners "$member_list_json"
+ reconcile_member_state "$member_list_json"
fi
done
}