322 lines
12 KiB
Diff
322 lines
12 KiB
Diff
From a31f15104fc712cd25f8a59d49f1bbcdbbbc5434 Mon Sep 17 00:00:00 2001
|
|
From: Carlo Lobrano <c.lobrano@gmail.com>
|
|
Date: Tue, 30 Sep 2025 11:54:44 +0200
|
|
Subject: [PATCH 1/2] Refactor(podman-etcd): improve peer checking and
|
|
leadership loss detection
|
|
|
|
The check_peers function is broken up into smaller, more manageable
|
|
functions. This refactoring separates the logic for detecting a loss of
|
|
cluster leadership from the logic for managing peer membership.
|
|
|
|
The main function is renamed to check_peer as there is only 1 peer to
|
|
check (it was check_peers).
|
|
---
|
|
heartbeat/podman-etcd | 78 +++++++++++++++++++++++++------------------
|
|
1 file changed, 45 insertions(+), 33 deletions(-)
|
|
|
|
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
|
|
index f3a6da5e2..3d1e4c520 100755
|
|
--- a/heartbeat/podman-etcd
|
|
+++ b/heartbeat/podman-etcd
|
|
@@ -1014,42 +1014,35 @@ get_member_list_json() {
|
|
podman exec "${CONTAINER}" etcdctl member list --endpoints="$this_node_endpoint" -w json
|
|
}
|
|
|
|
-check_peers()
|
|
+detect_cluster_leadership_loss()
|
|
{
|
|
- # Check peers endpoint status and locally accessible member list
|
|
- local member_list_json
|
|
-
|
|
- if ! container_exists; then
|
|
- # we need a running container to execute etcdctl.
|
|
- return $OCF_SUCCESS
|
|
+ endpoint_status_json=$(get_endpoint_status_json)
|
|
+ ocf_log info "endpoint status: $endpoint_status_json"
|
|
+
|
|
+ count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
|
|
+ if [ "$count_endpoints" -eq 1 ]; then
|
|
+ ocf_log info "one endpoint only: checking status errors"
|
|
+ endpoint_status_errors=$(printf "%s" "$endpoint_status_json" | jq -r ".[0].Status.errors")
|
|
+ if echo "$endpoint_status_errors" | grep -q "no leader"; then
|
|
+ set_force_new_cluster
|
|
+ set_standalone_node
|
|
+ ocf_exit_reason "$NODENAME must force a new cluster"
|
|
+ return $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ if [ "$endpoint_status_errors" != "null" ]; then
|
|
+ ocf_log err "unmanaged endpoint status error: $endpoint_status_errors"
|
|
+ fi
|
|
fi
|
|
|
|
- member_list_json=$(get_member_list_json)
|
|
- rc=$?
|
|
- ocf_log debug "member list: $member_list_json"
|
|
- if [ $rc -ne 0 ]; then
|
|
- ocf_log info "podman failed to get member list, error code: $rc"
|
|
-
|
|
- endpoint_status_json=$(get_endpoint_status_json)
|
|
- ocf_log info "endpoint status: $endpoint_status_json"
|
|
-
|
|
- count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
|
|
- if [ "$count_endpoints" -eq 1 ]; then
|
|
- ocf_log info "one endpoint only: checking status errors"
|
|
- endpoint_status_errors=$(printf "%s" "$endpoint_status_json" | jq -r ".[0].Status.errors")
|
|
- if echo "$endpoint_status_errors" | grep -q "no leader"; then
|
|
- set_force_new_cluster
|
|
- set_standalone_node
|
|
- ocf_exit_reason "$NODENAME must force a new cluster"
|
|
- return $OCF_ERR_GENERIC
|
|
- fi
|
|
- if [ "$endpoint_status_errors" != "null" ]; then
|
|
- ocf_log err "unmanaged endpoint status error: $endpoint_status_errors"
|
|
- fi
|
|
- fi
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
|
|
- return $OCF_SUCCESS
|
|
- fi
|
|
+manage_peer_membership()
|
|
+{
|
|
+ # Read etcd member list to detect the status of the peer member.
|
|
+ # If the peer is missing from the member list, it will be added back as learner
|
|
+ # If the peer is back in the member list, we ensure that the related CIB attributes (standalone and learner_node) are reset
|
|
+ local member_list_json="$1"
|
|
|
|
# Example of .members[] instance fields in member list json format:
|
|
# NOTE that "name" is present in voting members only, while "isLearner" in learner members only
|
|
@@ -1083,6 +1076,25 @@ check_peers()
|
|
clear_standalone_and_learner_if_not_learners "$member_list_json"
|
|
fi
|
|
done
|
|
+}
|
|
+
|
|
+check_peer()
|
|
+{
|
|
+ # Check peers endpoint status and locally accessible member list
|
|
+ local member_list_json
|
|
+
|
|
+ # we need a running container to execute etcdctl.
|
|
+ if ! container_exists; then
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+
|
|
+ if ! member_list_json=$(get_member_list_json); then
|
|
+ ocf_log info "podman failed to get member list, error code: $?"
|
|
+ detect_cluster_leadership_loss
|
|
+ return $?
|
|
+ fi
|
|
+
|
|
+ manage_peer_membership "$member_list_json"
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
@@ -1124,7 +1136,7 @@ podman_monitor()
|
|
# monitor operation to fail.
|
|
# TODO: move this inside check_peers where we already query member list json
|
|
attribute_node_member_id update
|
|
- if ! check_peers; then
|
|
+ if ! check_peer; then
|
|
return $OCF_ERR_GENERIC
|
|
fi
|
|
|
|
|
|
From de7c73a933cefb8f7b9e810bd23c3d12f6d6f29a Mon Sep 17 00:00:00 2001
|
|
From: Carlo Lobrano <c.lobrano@gmail.com>
|
|
Date: Tue, 30 Sep 2025 18:38:06 +0200
|
|
Subject: [PATCH 2/2] OCPBUGS-42808: podman-etcd: add automatic learner member
|
|
promotion
|
|
|
|
Automatically promote etcd learner members to voting members when detected.
|
|
Includes refactored member management functions and improved validation.
|
|
---
|
|
heartbeat/podman-etcd | 108 ++++++++++++++++++++++++++++++------------
|
|
1 file changed, 79 insertions(+), 29 deletions(-)
|
|
|
|
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
|
|
index 3d1e4c520..e1425ec02 100755
|
|
--- a/heartbeat/podman-etcd
|
|
+++ b/heartbeat/podman-etcd
|
|
@@ -712,6 +712,22 @@ attribute_node_revision_peer()
|
|
crm_attribute --query --type nodes --node "$nodename" --name "revision" | awk -F"value=" '{print $2}'
|
|
}
|
|
|
|
+# Converts a decimal number to hexadecimal format with validation
|
|
+# Args: $1 - decimal number (test for non-negative integer too)
|
|
+# Returns: 0 on success, OCF_ERR_GENERIC on invalid input
|
|
+# Outputs: hexadecimal representation to stdout
|
|
+decimal_to_hex() {
|
|
+ local dec=$1
|
|
+
|
|
+ if ! echo "$dec" | grep -q "^[1-9][0-9]*$"; then
|
|
+ ocf_log err "Invalid member ID format: '$dec' (expected decimal number)"
|
|
+ return $OCF_ERR_GENERIC
|
|
+ fi
|
|
+
|
|
+ printf "%x" "$dec"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
attribute_node_member_id()
|
|
{
|
|
local action="$1"
|
|
@@ -737,7 +753,7 @@ attribute_node_member_id()
|
|
return "$rc"
|
|
fi
|
|
|
|
- local value
|
|
+ local value value_hex
|
|
if ! value=$(echo -n "$member_list_json" | jq -r ".header.member_id"); then
|
|
rc=$?
|
|
ocf_log err "could not get $attribute from member list JSON, error code: $rc"
|
|
@@ -745,8 +761,11 @@ attribute_node_member_id()
|
|
fi
|
|
|
|
# JSON member_id is decimal, while etcdctl command needs the hex version
|
|
- value=$(printf "%x" "$value")
|
|
- if ! crm_attribute --type nodes --node "$NODENAME" --name "$attribute" --update "$value"; then
|
|
+ if ! value_hex=$(decimal_to_hex "$value"); then
|
|
+ ocf_log err "could not convert decimal member_id '$value' to hex, error code: $?"
|
|
+ return $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ if ! crm_attribute --type nodes --node "$NODENAME" --name "$attribute" --update "$value_hex"; then
|
|
rc=$?
|
|
ocf_log err "could not update etcd $attribute, error code: $rc"
|
|
return "$rc"
|
|
@@ -905,42 +924,70 @@ clear_standalone_node()
|
|
crm_attribute --name "standalone_node" --delete
|
|
}
|
|
|
|
-clear_standalone_and_learner_if_not_learners()
|
|
+
|
|
+# Promotes an etcd learner member to a voting member
|
|
+# Args: $1 - learner member ID in decimal format
|
|
+# Returns: OCF_SUCCESS (even on expected promotion failures), OCF_ERR_GENERIC on conversion errors
|
|
+# Note: Promotion failures are expected and logged as info (peer may not be up-to-date)
|
|
+promote_learner_member()
|
|
+{
|
|
+ local learner_member_id=$1
|
|
+
|
|
+ # JSON member_id is decimal, while etcdctl command needs the hex version
|
|
+ if ! learner_member_id_hex=$(decimal_to_hex "$learner_member_id"); then
|
|
+ ocf_log err "could not convert decimal member_id '$learner_member_id' to hex, error code: $?"
|
|
+ return $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ if ! ocf_run podman exec "${CONTAINER}" etcdctl member promote "$learner_member_id_hex" 2>&1; then
|
|
+ # promotion is expected to fail if the peer is not yet up-to-date
|
|
+ ocf_log info "could not promote member $learner_member_id_hex, error code: $?"
|
|
+ return $OCF_SUCCESS
|
|
+ fi
|
|
+ ocf_log info "successfully promoted member '$learner_member_id_hex'"
|
|
+ return $OCF_SUCCESS
|
|
+}
|
|
+
|
|
+# Reconciles etcd cluster member states
|
|
+# Promotes learner members or clears standalone/learner attributes as needed
|
|
+# Args: $1 - member list JSON from etcdctl
|
|
+# Returns: OCF_SUCCESS on completion, OCF_ERR_GENERIC on errors
|
|
+# Note: Only operates when exactly 2 started members are present
|
|
+reconcile_member_state()
|
|
{
|
|
local rc
|
|
local member_list_json="$1"
|
|
|
|
- number_of_members=$(printf "%s" "$member_list_json" | jq -r ".members[].ID" | wc -l)
|
|
- if [ "$number_of_members" -ne 2 ]; then
|
|
- ocf_log info "could not clear standalone_node, nor learner_node properties: found $number_of_members members, need 2"
|
|
+ # count only the started members, which have the ".name" JSON field
|
|
+ number_of_started_members=$(printf "%s" "$member_list_json" | jq -r ".members[].name | select(. != null)" | wc -l)
|
|
+ if [ "$number_of_started_members" -ne 2 ]; then
|
|
+ ocf_log info "could not clear standalone_node, nor learner_node properties: found $number_of_started_members members, need 2"
|
|
return $OCF_SUCCESS
|
|
fi
|
|
|
|
- id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .isLearner==true ).ID")
|
|
+ learner_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .isLearner==true ).ID")
|
|
rc=$?
|
|
if [ $rc -ne 0 ]; then
|
|
ocf_log err "could not get isLearner field from member list, error code: $rc"
|
|
return $rc
|
|
fi
|
|
|
|
- if [ -z "$id" ]; then
|
|
- clear_standalone_node
|
|
- rc=$?
|
|
- if [ $rc -ne 0 ]; then
|
|
- ocf_og error "could not clear standalone_node attribute, error code: $rc"
|
|
- return $rc
|
|
- fi
|
|
+ if [ -n "$learner_member_id" ]; then
|
|
+ promote_learner_member "$learner_member_id"
|
|
+ return $?
|
|
fi
|
|
- if [ -z "$id" ]; then
|
|
- attribute_learner_node clear
|
|
- rc=$?
|
|
- if [ $rc -ne 0 ]; then
|
|
- ocf_og error "could not clear learner_node attribute, error code: $rc"
|
|
- return $rc
|
|
+
|
|
+ if [ -z "$learner_member_id" ]; then
|
|
+ if ! clear_standalone_node; then
|
|
+ ocf_log error "could not clear standalone_node attribute, error code: $?"
|
|
+ return $OCF_ERR_GENERIC
|
|
+ fi
|
|
+ if ! attribute_learner_node clear; then
|
|
+ ocf_log error "could not clear learner_node attribute, error code: $?"
|
|
+ return $OCF_ERR_GENERIC
|
|
fi
|
|
fi
|
|
|
|
- return $rc
|
|
+ return $OCF_SUCCESS
|
|
}
|
|
|
|
attribute_learner_node()
|
|
@@ -1019,7 +1066,7 @@ detect_cluster_leadership_loss()
|
|
endpoint_status_json=$(get_endpoint_status_json)
|
|
ocf_log info "endpoint status: $endpoint_status_json"
|
|
|
|
- count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
|
|
+ count_endpoints=$(printf "%s" "$endpoint_status_json" | jq -r ".[].Endpoint" | wc -l)
|
|
if [ "$count_endpoints" -eq 1 ]; then
|
|
ocf_log info "one endpoint only: checking status errors"
|
|
endpoint_status_errors=$(printf "%s" "$endpoint_status_json" | jq -r ".[0].Status.errors")
|
|
@@ -1037,11 +1084,14 @@ detect_cluster_leadership_loss()
|
|
return $OCF_SUCCESS
|
|
}
|
|
|
|
+
|
|
+# Manages etcd peer membership by detecting and handling missing or rejoining peers
|
|
+# Adds missing peers as learners and reconciles member states when peers rejoin
|
|
+# Args: $1 - member list JSON from etcdctl
|
|
+# Returns: OCF_SUCCESS on completion, OCF_ERR_GENERIC on errors
|
|
+# Note: Iterates through all peer nodes to ensure proper cluster membership
|
|
manage_peer_membership()
|
|
{
|
|
- # Read etcd member list to detect the status of the peer member.
|
|
- # If the peer is missing from the member list, it will be added back as learner
|
|
- # If the peer is back in the member list, we ensure that the related CIB attributes (standalone and learner_node) are reset
|
|
local member_list_json="$1"
|
|
|
|
# Example of .members[] instance fields in member list json format:
|
|
@@ -1066,14 +1116,14 @@ manage_peer_membership()
|
|
|
|
# Check by IP instead of Name since "learner" members appear only in peerURLs, not by Name.
|
|
ip=$(echo "$node" | cut -d: -f2-) # Grab everything after the first : this covers ipv4/ipv6
|
|
- id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$ip\")) | any).ID")
|
|
- if [ -z "$id" ]; then
|
|
+ peer_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$ip\")) | any).ID")
|
|
+ if [ -z "$peer_member_id" ]; then
|
|
ocf_log info "$name is not in the members list"
|
|
add_member_as_learner "$name" "$ip"
|
|
set_standalone_node
|
|
else
|
|
ocf_log debug "$name is in the members list by IP: $ip"
|
|
- clear_standalone_and_learner_if_not_learners "$member_list_json"
|
|
+ reconcile_member_state "$member_list_json"
|
|
fi
|
|
done
|
|
}
|