- podman-etcd: prevent last active member from leaving the etcd member

list

  Resolves: RHEL-130576
This commit is contained in:
Oyvind Albrigtsen 2025-11-24 13:19:37 +01:00
parent 220cf2c062
commit 1768be9e4e
3 changed files with 214 additions and 1 deletions

View File

@ -0,0 +1,161 @@
From 578e6d982e5ab705dac216cecf85c50fe3842af5 Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Sun, 16 Nov 2025 19:40:30 +0100
Subject: [PATCH] OCPBUGS-60098: podman-etcd: prevent last active member from
leaving the etcd member list
When stopping etcd instances, simultaneous member removal from both
nodes can corrupt the etcd Write-Ahead Log (WAL). This change implements
a two-part solution:
1. Concurrent stop protection: When multiple nodes are stopping, the
alphabetically second node delays its member removal by 10
seconds. This prevents simultaneous member list updates that can
corrupt WAL.
2. Last member detection: Checks active resource count after any
delay. If this is the last active member, skips member removal to
avoid leaving an empty cluster.
Additionally, reorders podman_stop() to clear the member_id attribute
after leaving the member list, ensuring the attribute reflects actual
cluster state during shutdown.
---
heartbeat/podman-etcd | 86 ++++++++++++++++++++++++++++++++++---------
1 file changed, 69 insertions(+), 17 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 7795130a6..7b6e08f11 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -1341,6 +1341,11 @@ container_health_check()
# recently (i.e. a failure), or not (fresh start)
monitor_cmd_exec
rc=$?
+ if [ "$rc" -ne 0 ]; then
+ ocf_log info "Container ${CONTAINER} not-running"
+ echo "not-running"
+ return
+ fi
if [ "$rc" -eq 0 ]; then
# Container is running - update state file with current epoch
local current_epoch
@@ -1639,7 +1644,7 @@ can_reuse_container() {
OCF_RESKEY_reuse=0
return "$OCF_SUCCESS"
fi
-
+
if ! filtered_original_pod_manifest=$(filter_pod_manifest "$OCF_RESKEY_pod_manifest"); then
return $OCF_ERR_GENERIC
fi
@@ -1866,7 +1871,7 @@ podman_start()
fi
if ocf_is_true "$JOIN_AS_LEARNER"; then
- local wait_timeout_sec=$((10*60))
+ local wait_timeout_sec=60
local poll_interval_sec=5
local retries=$(( wait_timeout_sec / poll_interval_sec ))
@@ -2021,6 +2026,64 @@ podman_start()
done
}
+# leave_etcd_member_list removes the current node from the etcd member list during
+# shutdown to ensure clean cluster state.
+#
+# Skips removal if this is the standalone (last) node. When both nodes are stopping
+# concurrently, delays the second node to prevent simultaneous member removal that
+# could corrupt the etcd WAL.
+leave_etcd_member_list()
+{
+ if ! member_id=$(attribute_node_member_id get); then
+ ocf_log err "error leaving members list: could not get member-id"
+ return
+ fi
+
+ if is_standalone; then
+ ocf_log info "last member. Not leaving the member list"
+ return
+ fi
+
+ local stopping_resources_count
+ stopping_resources_count=$(echo "$OCF_RESKEY_CRM_meta_notify_stop_resource" | wc -w)
+ ocf_log info "found '$stopping_resources_count' stopping etcd resources (stop: '$OCF_RESKEY_CRM_meta_notify_stop_resource')"
+ if [ "$stopping_resources_count" -gt 1 ]; then
+ # Prevent WAL corruption by delaying the alphabetically second node's member
+ # removal when both nodes are stopping concurrently.
+ local delayed_node
+
+ node_names_sorted=$(echo "$OCF_RESKEY_node_ip_map" | sed 's/:[^;]*//g; s/;/ /g' | tr ' ' '\n' | sort | tr '\n' ' ')
+ delayed_node="$(echo "$node_names_sorted" | cut -d' ' -f2)"
+
+ if [ -z "$delayed_node" ]; then
+ ocf_log warn "could not determine node to be delayed: not leaving the member list"
+ return
+ fi
+
+ if [ "$NODENAME" = "$delayed_node" ]; then
+ ocf_log info "delaying stop for ${DELAY_SECOND_NODE_LEAVE_SEC}s to prevent simultaneous etcd member removal"
+ sleep $DELAY_SECOND_NODE_LEAVE_SEC
+ fi
+ fi
+
+ # Ensure we're not the last active resource before leaving. The `standalone_node` property
+ # may not be set if stop was called before monitor check, or after the delayed node waited.
+ local active_resources_count
+ active_resources_count=$(get_truly_active_resources_count)
+ if [ "$active_resources_count" -lt 1 ]; then
+ ocf_log info "last member. Not leaving the member list"
+ return
+ fi
+
+ ocf_log info "leaving members list as member with ID $member_id"
+ local endpoint
+ endpoint="$(ip_url $(attribute_node_ip get)):2379"
+ if ! ocf_run podman exec "$CONTAINER" etcdctl member remove "$member_id" --endpoints="$endpoint"; then
+ rc=$?
+ ocf_log err "error leaving members list, error code: $rc"
+ fi
+}
+
podman_stop()
{
local timeout=60
@@ -2039,24 +2102,12 @@ podman_stop()
podman_simple_status
if [ $? -eq $OCF_NOT_RUNNING ]; then
ocf_log info "could not leave members list: etcd container not running"
+ attribute_node_member_id clear
return $OCF_SUCCESS
fi
- if ! member_id=$(attribute_node_member_id get); then
- ocf_log err "error leaving members list: could not get member-id"
- else
- # TODO: is it worth/possible to check the current status instead than relying on cached attributes?
- if is_standalone; then
- ocf_log info "last member. Not leaving the member list"
- else
- ocf_log info "leaving members list as member with ID $member_id"
- endpoint="$(ip_url $(attribute_node_ip get)):2379"
- if ! ocf_run podman exec "$CONTAINER" etcdctl member remove "$member_id" --endpoints="$endpoint"; then
- rc=$?
- ocf_log err "error leaving members list, error code: $rc"
- fi
- fi
- fi
+ leave_etcd_member_list
+ # clear node_member_id CIB attribute only after leaving the member list
attribute_node_member_id clear
if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
@@ -2197,6 +2248,7 @@ ETCD_CERTS_HASH_FILE="${OCF_RESKEY_config_location}/certs.hash"
# State file location: Uses HA_RSCTMP to ensure automatic cleanup on reboot.
# This is intentional - reboots are controlled stops, not failures requiring detection.
CONTAINER_HEARTBEAT_FILE=${HA_RSCTMP}/podman-container-last-running
+DELAY_SECOND_NODE_LEAVE_SEC=10
# Note: we currently monitor podman containers by with the "podman exec"
# command, so make sure that invocation is always valid by enforcing the

View File

@ -0,0 +1,42 @@
From 29df4255c5f65ea94fb6de997805dca65e31071c Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Mon, 24 Nov 2025 12:21:55 +0100
Subject: [PATCH] podman-etcd: remove test code (#2103)
---
heartbeat/podman-etcd | 8 +-------
1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 7b6e08f11..b1f52cd5c 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -1341,11 +1341,6 @@ container_health_check()
# recently (i.e. a failure), or not (fresh start)
monitor_cmd_exec
rc=$?
- if [ "$rc" -ne 0 ]; then
- ocf_log info "Container ${CONTAINER} not-running"
- echo "not-running"
- return
- fi
if [ "$rc" -eq 0 ]; then
# Container is running - update state file with current epoch
local current_epoch
@@ -1644,7 +1639,6 @@ can_reuse_container() {
OCF_RESKEY_reuse=0
return "$OCF_SUCCESS"
fi
-
if ! filtered_original_pod_manifest=$(filter_pod_manifest "$OCF_RESKEY_pod_manifest"); then
return $OCF_ERR_GENERIC
fi
@@ -1871,7 +1865,7 @@ podman_start()
fi
if ocf_is_true "$JOIN_AS_LEARNER"; then
- local wait_timeout_sec=60
+ local wait_timeout_sec=$((10*60))
local poll_interval_sec=5
local retries=$(( wait_timeout_sec / poll_interval_sec ))

View File

@ -45,7 +45,7 @@
Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.16.0
Release: 42%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
Release: 43%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPL-2.0-or-later AND LGPL-2.1-or-later
URL: https://github.com/ClusterLabs/resource-agents
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
@ -106,6 +106,8 @@ Patch53: RHEL-126791-storage_mon-fix-handling-of-4k-block-devices.patch
Patch54: RHEL-127840-podman-etcd-exclude-stopping-resources-from-active-count.patch
Patch55: RHEL-126083-podman-etcd-add-container-crash-detection-with-coordinated-recovery.patch
Patch56: RHEL-112443-2-nginx-restore-selinux-context-for-pid-file-during-validate-all-action.patch
Patch57: RHEL-130576-1-podman-etcd-prevent-last-active-member-from-leaving.patch
Patch58: RHEL-130576-2-podman-etcd-remove-test-code.patch
# bundled ha-cloud-support libs
Patch500: ha-cloud-support-aliyun.patch
@ -333,6 +335,8 @@ exit 1
%patch -p1 -P 54
%patch -p1 -P 55 -F2
%patch -p1 -P 56
%patch -p1 -P 57
%patch -p1 -P 58
# bundled ha-cloud-support libs
%patch -p1 -P 500
@ -665,6 +669,12 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
%changelog
* Mon Nov 24 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-43
- podman-etcd: prevent last active member from leaving the etcd member
list
Resolves: RHEL-130576
* Tue Nov 18 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-42
- nginx: fix validate warnings, and restore SELinux context for
pid-file during validate-all action