From eea4aa580e123fcfb89d875e150f663dca7da057 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Thu, 19 Mar 2026 11:33:15 +0100
Subject: [PATCH] - podman etcd: ignore learners when considering which node
 has higher revision - podman etcd: handle existing peer URLs gracefully
 during force_new_cluster recovery - db2: set reintegration when promotion is
 successful

  Resolves: RHEL-156709, RHEL-157146, RHEL-153158
---
 ...gration-when-promotion-is-successful.patch |  46 +++++++
 ...ering-which-node-has-higher-revision.patch |  40 ++++++
 ...ly-during-force_new_cluster-recovery.patch | 130 ++++++++++++++++++
 resource-agents.spec                          |  15 +-
 4 files changed, 230 insertions(+), 1 deletion(-)
 create mode 100644 RHEL-153158-db2-set-reintegration-when-promotion-is-successful.patch
 create mode 100644 RHEL-156709-podman-etcd-ignore-learners-when-considering-which-node-has-higher-revision.patch
 create mode 100644 RHEL-157146-podman-etcd-handle-existing-peer-URLs-gracefully-during-force_new_cluster-recovery.patch

diff --git a/RHEL-153158-db2-set-reintegration-when-promotion-is-successful.patch b/RHEL-153158-db2-set-reintegration-when-promotion-is-successful.patch
new file mode 100644
index 0000000..56bc252
--- /dev/null
+++ b/RHEL-153158-db2-set-reintegration-when-promotion-is-successful.patch
@@ -0,0 +1,46 @@
+From 66885ea0227e847b571608015b150d391a6234d7 Mon Sep 17 00:00:00 2001
+From: Oyvind Albrigtsen <oalbrigt@redhat.com>
+Date: Mon, 23 Feb 2026 13:35:58 +0100
+Subject: [PATCH] db2: set reintegration when promotion is successful
+
+---
+ heartbeat/db2 | 19 +++++++++++++++++++
+ 1 file changed, 19 insertions(+)
+
+diff --git a/heartbeat/db2 b/heartbeat/db2
+index 82f2f82c3..4420b9989 100755
+--- a/heartbeat/db2
++++ b/heartbeat/db2
+@@ -955,6 +955,16 @@ db2_promote() {
+             PRIMARY/PEER/*|PRIMARY/REMOTE_CATCHUP/*|PRIMARY/REMOTE_CATCHUP_PENDING/CONNECTED|Primary/Peer)
+             # nothing to do, only update pacemaker's view
+             echo MASTER > $STATE_FILE
++
++            if [ -n "$remote_host" ]; then
++                for db in $dblist
++                do
++                    reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
++                    ocf_log debug "Promotion succeeded, setting $reint_attr = 1"
++                    crm_attribute -n "$reint_attr" -N "$remote_host" -v "1" -l forever
++                done
++            fi
++
+             return $OCF_SUCCESS
+             ;;
+ 
+@@ -981,6 +991,15 @@ db2_promote() {
+             # update pacemaker's view
+             echo MASTER > $STATE_FILE
+ 
++            if [ -n "$remote_host" ]; then
++                for db in $dblist
++                do
++                    reint_attr="db2hadr-${inst1}_${inst2}_${db}_reint"
++                    ocf_log debug "Promotion succeeded, setting $reint_attr = 1"
++                    crm_attribute -n "$reint_attr" -N "$remote_host" -v "1" -l forever
++                done
++            fi
++
+             return $OCF_SUCCESS
+         fi
+ 
diff --git a/RHEL-156709-podman-etcd-ignore-learners-when-considering-which-node-has-higher-revision.patch b/RHEL-156709-podman-etcd-ignore-learners-when-considering-which-node-has-higher-revision.patch
new file mode 100644
index 0000000..280b14f
--- /dev/null
+++ b/RHEL-156709-podman-etcd-ignore-learners-when-considering-which-node-has-higher-revision.patch
@@ -0,0 +1,40 @@
+From 5890f47bc61703130cd27d767118367f03bca95f Mon Sep 17 00:00:00 2001
+From: Carlo Lobrano <c.lobrano@gmail.com>
+Date: Tue, 10 Mar 2026 17:26:04 +0100
+Subject: [PATCH] podman-etcd: Preserve standalone voter identity during
+ restart
+
+If the standalone voter restarts before the peer is added to the member
+list, the learner_node attribute may not be set yet. Without checking
+is_standalone, the voter incorrectly joins as a learner, causing both
+nodes to become learners and creating an unrecoverable deadlock.
+
+Check is_standalone to ensure the voter restarts in the same role it
+had before the restart.
+---
+ heartbeat/podman-etcd | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
+index 539ad33b2..2f8aa122f 100755
+--- a/heartbeat/podman-etcd
++++ b/heartbeat/podman-etcd
+@@ -2002,9 +2002,16 @@ podman_start()
+ 			ocf_log info "found '$active_resources_count' active etcd resources (active: '$OCF_RESKEY_CRM_meta_notify_active_resource', stop: '$OCF_RESKEY_CRM_meta_notify_stop_resource')"
+ 			case "$active_resources_count" in
+ 			1)
+-				if [ "$(attribute_learner_node get)" = "$(get_peer_node_name)" ]; then
+-					ocf_log info "peer active but in learner mode: start normally"
++				# is_standalone may return true here due to a restart: in the previous run,
++				# this agent was the sole voter and the peer had not yet joined the member
++				# list (learner_node unset). Since standalone_node was not cleared before
++				# the restart, start normally to recover the previous cluster state.
++				if is_standalone; then
++					ocf_log info "peer active but not a voter: start normally to recover"
++				elif [ "$(attribute_learner_node get)" = "$(get_peer_node_name)" ]; then
++					ocf_log info "peer active but in learner mode: start normally to recover"
+ 				else
++					# If (A) we must join the peer's new cluster
+ 					ocf_log info "peer is active standalone: joining as learner"
+ 					JOIN_AS_LEARNER=true
+ 				fi
diff --git a/RHEL-157146-podman-etcd-handle-existing-peer-URLs-gracefully-during-force_new_cluster-recovery.patch b/RHEL-157146-podman-etcd-handle-existing-peer-URLs-gracefully-during-force_new_cluster-recovery.patch
new file mode 100644
index 0000000..89b206e
--- /dev/null
+++ b/RHEL-157146-podman-etcd-handle-existing-peer-URLs-gracefully-during-force_new_cluster-recovery.patch
@@ -0,0 +1,130 @@
+From 83d16b59a354a20bf7679af45a9acfa9f344959a Mon Sep 17 00:00:00 2001
+From: Carlo Lobrano <c.lobrano@gmail.com>
+Date: Wed, 18 Mar 2026 11:30:31 +0100
+Subject: [PATCH] OCPBUGS-78482: podman-etcd: fix "Peer URLs already exists" in
+ add_member_as_learner (#2136)
+
+* podman-etcd: handle "Peer URLs already exists" in add_member_as_learner
+
+When etcdctl member add fails with "Peer URLs already exists", the stale
+member entry is removed and the add is retried.
+
+Without this fix, add_member_as_learner returns early without setting
+the learner_node attribute, causing the peer node to time out waiting
+for it.
+---
+ heartbeat/podman-etcd | 85 +++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 73 insertions(+), 12 deletions(-)
+
+diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
+index 2f8aa122f..860aca817 100755
+--- a/heartbeat/podman-etcd
++++ b/heartbeat/podman-etcd
+@@ -1072,6 +1072,49 @@ attribute_node_member_id()
+ 	esac
+ }
+ 
++# remove an etcd member identified by its IP from the member list
++remove_etcd_member_by_ip()
++{
++	local rc
++	local out
++	local member_ip
++	local endpoint_url
++	local member_list_json
++	local stale_member_id
++	local stale_member_id_hex
++
++	member_ip=$1
++	member_list_json=$(get_member_list_json)
++	rc=$?
++	if [ "$rc" -ne 0 ] ; then
++		ocf_log err "could not remove etcd member. Failed to get member list, error code: $rc"
++		return $OCF_ERR_GENERIC
++	fi
++
++	stale_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$member_ip\")) | any).ID")
++	if [ -z "$stale_member_id" ]; then
++		ocf_log err "could not remove etcd member. Failed to find member ID"
++		return $OCF_ERR_GENERIC
++	fi
++
++	# JSON member_id is decimal, while etcdctl command needs the hex version
++	if ! stale_member_id_hex=$(decimal_to_hex "$stale_member_id"); then
++		ocf_log err "could not remove etcd member. Failed to convert member_id '$stale_member_id' into hex format"
++		return $OCF_ERR_GENERIC
++	fi
++
++	endpoint_url=$(ip_url $(attribute_node_ip get))
++	out=$(podman exec "${CONTAINER}" etcdctl --endpoints="$endpoint_url:2379" member remove "$stale_member_id_hex" 2>&1)
++	rc=$?
++	if [ "$rc" -ne 0 ] ; then
++		ocf_log err "could not remove etcd member. etcdctl member remove command failed, error code: $rc, output: $out"
++		return $OCF_ERR_GENERIC
++	fi
++
++	ocf_log info "$out"
++	return $OCF_SUCCESS
++}
++
+ add_member_as_learner()
+ {
+ 	local rc
+@@ -1079,18 +1122,36 @@ add_member_as_learner()
+ 	local member_ip=$2
+ 	local endpoint_url=$(ip_url $(attribute_node_ip get))
+ 	local peer_url=$(ip_url $member_ip)
++	local i
++	local max_retries
++	local out
+ 
+-	ocf_log info "add $member_name ($member_ip) to the member list as learner"
+-	out=$(podman exec "${CONTAINER}" etcdctl --endpoints="$endpoint_url:2379" member add "$member_name" --peer-urls="$peer_url:2380" --learner 2>&1)
+-	rc=$?
+-	if [ $rc -ne 0 ]; then
+-		ocf_log err "could not add $member_name as learner, error code $rc, etcdctl output: $out"
+-		return $rc
+-	fi
+-	ocf_log info "$out"
++	i=0
++	max_retries=3
++	while [ "$i" -lt "$max_retries" ]; do
++		i=$((i + 1))
++		ocf_log info "adding $member_name ($member_ip) to the member list as learner (attempt $i of $max_retries)"
++		out=$(podman exec "${CONTAINER}" etcdctl --endpoints="$endpoint_url:2379" \
++				member add "$member_name" \
++				--peer-urls="$peer_url:2380" \
++				--learner 2>&1)
++		rc=$?
++		if [ "$rc" -eq 0 ]; then
++			ocf_log info "$out"
++			attribute_learner_node update "$member_name"
++			return $OCF_SUCCESS
++		fi
++		if echo "$out" | grep -q "Peer URLs already exists"; then
++			# etcd data might have stale membership data
++			ocf_log warn "could not add member: Peer URLs already exists"
++			remove_etcd_member_by_ip "$member_ip"
++		else
++			ocf_log warn "could not add member: $out"
++		fi
++	done
+ 
+-	attribute_learner_node update "$member_name"
+-	return $?
++	ocf_log err "could not add $member_name as learner, error code $rc, etcdctl output: $out"
++	return $OCF_ERR_GENERIC
+ }
+ 
+ set_force_new_cluster()
+@@ -1454,8 +1515,8 @@ manage_peer_membership()
+ 	#       "https://<node IP>:2379"
+ 	#   ]
+ 	# }
+-	# NOTE that the "name" field is present in voting members only, while "isLearner"
+-	# field in learner members only and the value is always true (not a string) in that case.
++	# NOTE: voting members have a "name" field but no "isLearner" field,
++	# while learner members have "isLearner": true (boolean) but no "name" field, so we search for peerURLs matching.
+ 	peer_member_id=$(printf "%s" "$member_list_json" | jq -r ".members[] | select( .peerURLs | map(test(\"$peer_member_ip\")) | any).ID")
+ 	if [ -z "$peer_member_id" ]; then
+ 		ocf_log info "$peer_member_name is not in the members list"
diff --git a/resource-agents.spec b/resource-agents.spec
index eb36a26..8a04283 100644
--- a/resource-agents.spec
+++ b/resource-agents.spec
@@ -45,7 +45,7 @@
 Name:		resource-agents
 Summary:	Open Source HA Reusable Cluster Resource Scripts
 Version:	4.10.0
-Release:	109%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
+Release:	110%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
 License:	GPLv2+ and LGPLv2+
 URL:		https://github.com/ClusterLabs/resource-agents
 Source0:	%{upstream_prefix}-%{upstream_version}.tar.gz
@@ -200,6 +200,9 @@ Patch147:	RHEL-143527-powervs-move-ip-powervs-subnet-fix-error-logging.patch
 Patch148:	RHEL-145628-podman-etcd-enhance-etcd-data-backup-with-snapshots-and-retention.patch
 Patch149:	RHEL-150700-podman-etcd-set-attributes-if-they-fail-during-force-new-cluster.patch
 Patch150:	RHEL-116151-4-portblock-check-inverse-action.patch
+Patch151:	RHEL-156709-podman-etcd-ignore-learners-when-considering-which-node-has-higher-revision.patch
+Patch152:	RHEL-157146-podman-etcd-handle-existing-peer-URLs-gracefully-during-force_new_cluster-recovery.patch
+Patch153:	RHEL-153158-db2-set-reintegration-when-promotion-is-successful.patch
 
 # bundled ha-cloud-support libs
 Patch500:	ha-cloud-support-aliyun.patch
@@ -495,6 +498,9 @@ exit 1
 %patch -p1 -P 148
 %patch -p1 -P 149
 %patch -p1 -P 150
+%patch -p1 -P 151
+%patch -p1 -P 152
+%patch -p1 -P 153
 
 # bundled ha-cloud-support libs
 %patch -p1 -P 500
@@ -827,6 +833,13 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
 %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
 
 %changelog
+* Thu Mar 19 2026 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-110
+- podman etcd: ignore learners when considering which node has higher revision
+- podman etcd: handle existing peer URLs gracefully during force_new_cluster recovery
+- db2: set reintegration when promotion is successful
+
+  Resolves: RHEL-156709, RHEL-157146, RHEL-153158
+
 * Fri Feb 27 2026 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.10.0-109
 - portblock: check inverse action state file for non-promotable
   resources to avoid issues when doing e.g. block followed by unblock