diff --git a/RHEL-150700-podman-etcd-set-attributes-if-they-fail-during-force-new-cluster.patch b/RHEL-150700-podman-etcd-set-attributes-if-they-fail-during-force-new-cluster.patch new file mode 100644 index 0000000..cd458ea --- /dev/null +++ b/RHEL-150700-podman-etcd-set-attributes-if-they-fail-during-force-new-cluster.patch @@ -0,0 +1,111 @@ +From e4d311b40d8ded2a1921a0e5c01cb49a07c9fb35 Mon Sep 17 00:00:00 2001 +From: Carlo Lobrano +Date: Thu, 5 Feb 2026 19:31:42 +0100 +Subject: [PATCH] podman-etcd: fix learner node attribute not set after etcdctl + failure + +Ensure that learner_node attribute is always set when the member list +contains one learner member. + +Moreover: +* Ensure set_standalone_node is called after adding a learner member. +* Capture stderr from etcdctl for better error logging. +--- + heartbeat/podman-etcd | 61 +++++++++++++++++++++++++++---------------- + 1 file changed, 38 insertions(+), 23 deletions(-) + +diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd +index 77525ddb7..06814ad89 100755 +--- a/heartbeat/podman-etcd ++++ b/heartbeat/podman-etcd +@@ -1082,7 +1082,7 @@ add_member_as_learner() + local peer_url=$(ip_url $member_ip) + + ocf_log info "add $member_name ($member_ip) to the member list as learner" +- out=$(podman exec "${CONTAINER}" etcdctl --endpoints="$endpoint_url:2379" member add "$member_name" --peer-urls="$peer_url:2380" --learner) ++ out=$(podman exec "${CONTAINER}" etcdctl --endpoints="$endpoint_url:2379" member add "$member_name" --peer-urls="$peer_url:2380" --learner 2>&1) + rc=$? + if [ $rc -ne 0 ]; then + ocf_log err "could not add $member_name as learner, error code $rc, etcdctl output: $out" +@@ -1429,10 +1429,22 @@ detect_cluster_leadership_loss() + manage_peer_membership() + { + local member_list_json="$1" ++ local peer_ip_map_entry ++ local peer_member_name ++ local peer_member_ip ++ local peer_member_id ++ ++ # Get peer node name and IP ++ peer_ip_map_entry=$(echo "$OCF_RESKEY_node_ip_map" | tr ';' '\n' | grep -vF "$NODENAME") ++ if [ -z "$peer_ip_map_entry" ]; then ++ ocf_exit_reason "manage_peer_membership: could not parse node_ip_map: '$OCF_RESKEY_node_ip_map'" ++ exit $OCF_ERR_CONFIGURED ++ fi ++ peer_member_name=$(echo "$peer_ip_map_entry" | cut -d: -f1) ++ peer_member_ip=$(echo "$peer_ip_map_entry" | cut -d: -f2-) + +- # Example of .members[] instance fields in member list json format: +- # NOTE that "name" is present in voting members only, while "isLearner" in learner members only +- # and the value is always true (not a string) in that case. ++ # Parsing the member list's json output to find a "learner" member. ++ # Example of .members[] instance fields in member list json format: + # { + # "ID": , + # "name": "", +@@ -1443,26 +1455,28 @@ manage_peer_membership() + # "https://:2379" + # ] + # } +- for node in $(echo "$OCF_RESKEY_node_ip_map" | sed "s/\s//g;s/;/ /g"); do +- name=$(echo "$node" | cut -d: -f1) +- # do not check itself +- if [ "$name" = "$NODENAME" ]; then +- continue +- fi ++ # NOTE that the "name" field is present in voting members only, while "isLearner" ++ # field in learner members only and the value is always true (not a string) in that case. ++ peer_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$peer_member_ip\")) | any).ID") ++ if [ -z "$peer_member_id" ]; then ++ ocf_log info "$peer_member_name is not in the members list" ++ add_member_as_learner "$peer_member_name" "$peer_member_ip" ++ set_standalone_node ++ return ++ fi + +- # Check by IP instead of Name since "learner" members appear only in peerURLs, not by Name. +- ip=$(echo "$node" | cut -d: -f2-) # Grab everything after the first : this covers ipv4/ipv6 +- peer_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$ip\")) | any).ID") +- if [ -z "$peer_member_id" ]; then +- ocf_log info "$name is not in the members list" +- add_member_as_learner "$name" "$ip" +- set_standalone_node +- else +- ocf_log debug "$name is in the members list by IP: $ip" +- # Errors from reconcile_member_state are logged internally. Ignoring them here prevents stopping a healthy voter agent; critical local failures are caught by detect_cluster_leadership_loss. +- reconcile_member_state "$member_list_json" +- fi +- done ++ # Ensure learner_node attribute is always set when we have a learner member ++ local learner_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .isLearner==true ).ID") ++ local current_learner_node=$(attribute_learner_node get) ++ if [ -n "$learner_member_id" ] && [ -z "$current_learner_node" ]; then ++ ocf_log debug "$peer_member_name found as learner in member list, but learner_node attribute was not set. Updating" ++ attribute_learner_node update "$peer_member_name" ++ return ++ fi ++ ++ ocf_log debug "$peer_member_name is in the members list by IP: $peer_member_ip" ++ # Errors from reconcile_member_state are logged internally. Ignoring them here prevents stopping a healthy voter agent; critical local failures are caught by detect_cluster_leadership_loss. ++ reconcile_member_state "$member_list_json" + } + + check_peer() +@@ -2209,6 +2223,7 @@ podman_start() + peer_node_ip="$(attribute_node_ip_peer)" + if [ -n "$peer_node_name" ] && [ -n "$peer_node_ip" ]; then + add_member_as_learner "$peer_node_name" "$peer_node_ip" ++ set_standalone_node + else + ocf_log err "could not add peer as learner (peer node name: ${peer_node_name:-unknown}, peer ip: ${peer_node_ip:-unknown})" + fi diff --git a/resource-agents.spec b/resource-agents.spec index 7ce744c..86bc4bf 100644 --- a/resource-agents.spec +++ b/resource-agents.spec @@ -45,7 +45,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.10.0 -Release: 107%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 108%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPLv2+ and LGPLv2+ URL: https://github.com/ClusterLabs/resource-agents Source0: %{upstream_prefix}-%{upstream_version}.tar.gz @@ -198,6 +198,7 @@ Patch145: RHEL-139519-podman-etcd-verify-no-containers-running-or-being-deleted. Patch146: RHEL-42513-powervs-subnet-wait-for-IP.patch Patch147: RHEL-143527-powervs-move-ip-powervs-subnet-fix-error-logging.patch Patch148: RHEL-145628-podman-etcd-enhance-etcd-data-backup-with-snapshots-and-retention.patch +Patch149: RHEL-150700-podman-etcd-set-attributes-if-they-fail-during-force-new-cluster.patch # bundled ha-cloud-support libs Patch500: ha-cloud-support-aliyun.patch @@ -491,6 +492,7 @@ exit 1 %patch -p1 -P 146 %patch -p1 -P 147 %patch -p1 -P 148 +%patch -p1 -P 149 # bundled ha-cloud-support libs %patch -p1 -P 500 @@ -823,6 +825,11 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %changelog +* Wed Feb 18 2026 Oyvind Albrigtsen - 4.10.0-108 +- podman-etcd: set attributes if they fail during force-new-cluster + + Resolves: RHEL-150700 + * Wed Feb 4 2026 Oyvind Albrigtsen - 4.10.0-107 - podman-etcd: enhance etcd data backup with snapshots and retention