resource-agents/RHEL-124206-podman-etcd-compute-dynamic-revision-bump-from-maxRaftIndex.patch
Oyvind Albrigtsen 8890b6688b - podman-etcd: add support for cert rotation
- podman-etcd: compute dynamic revision bump from maxRaftIndex

  Resolves: RHEL-124203, RHEL-124206
2025-10-28 13:07:35 +01:00

116 lines
4.4 KiB
Diff

From 6a5608f02a657cf006b6d44d31200342c4bd19b9 Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Tue, 28 Oct 2025 12:47:10 +0100
Subject: [PATCH] podman-etcd: compute dynamic revision bump from maxRaftIndex
(#2087)
Replace hardcoded 1 billion revision bump with dynamic calculation based
on 20% of the last known maxRaftIndex from revision.json.
This aligns with the logic used by cluster-etcd-operator's
quorum-restore-pod utility and ensures the bump amount is proportional
to the cluster's actual revision state.
The implementation:
- Adds compute_bump_revision() function with safe fallback to 1bn
default
- Extracts magic values to named constants
(ETCD_REVISION_BUMP_PERCENTAGE, ETCD_BUMP_REV_DEFAULT,
ETCD_REVISION_JSON)
- Validates computed values (non-zero, not exceeding default)
- Logs computation results for debugging
Reference:
https://github.com/openshift/cluster-etcd-operator/blob/215998939f5223da9166
22c71fd07d17656faf6b/bindata/etcd/quorum-restore-pod.yaml#L26-L34
---
heartbeat/podman-etcd | 38 ++++++++++++++++++++++++++++++++++----
1 file changed, 34 insertions(+), 4 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index b8dfb2f9e..551d37a20 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -619,16 +619,43 @@ prepare_env() {
LISTEN_METRICS_URLS="0.0.0.0"
}
+compute_bump_revision() {
+ # Same logic used by cluster-etcd-operator quorum-restore-pod utility.
+ # see https://github.com/openshift/cluster-etcd-operator/blob/215998939f5223da916622c71fd07d17656faf6b/bindata/etcd/quorum-restore-pod.yaml#L26-L34
+ # set a default value: 1bn would be an etcd running at 1000 writes/s for about eleven days.
+ BUMP_REV=$ETCD_BUMP_REV_DEFAULT
+ if [ ! -f "${ETCD_REVISION_JSON}" ]; then
+ ocf_log err "could not compute bump revision: ${ETCD_REVISION_JSON} not found. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump"
+ return
+ fi
+
+ # this will bump by the amount of 20% of the last known live revision.
+ if ! COMPUTED_BUMP=$(jq -r "(.maxRaftIndex*${ETCD_REVISION_BUMP_PERCENTAGE}|floor)" "${ETCD_REVISION_JSON}"); then
+ ocf_log err "could not compute maxRaftIndex for bump revision, jq error code: $?. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump"
+ return
+ fi
+
+ if [ -z "${COMPUTED_BUMP}" ] || [ "${COMPUTED_BUMP}" -le 0 ] || [ "${COMPUTED_BUMP}" -gt "${ETCD_BUMP_REV_DEFAULT}" ]; then
+ ocf_log err "computed bump revision (${COMPUTED_BUMP}) is invalid. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump"
+ return
+ fi
+
+ BUMP_REV="${COMPUTED_BUMP}"
+ ocf_log info "bumping etcd revisions by ${BUMP_REV}"
+}
generate_etcd_configuration() {
if is_force_new_cluster; then
+ compute_bump_revision
# The embedded newline is required for correct YAML formatting.
FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: true
-force-new-cluster-bump-amount: 1000000000"
+force-new-cluster-bump-amount: $BUMP_REV"
else
FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: false"
fi
+ # the space indentation for client-transport-security and peer-transport-security
+ # is required for correct YAML formatting.
cat > "$ETCD_CONFIGURATION_FILE" << EOF
logger: zap
log-level: info
@@ -707,7 +734,7 @@ attribute_node_cluster_id()
{
local action="$1"
local value
- if ! value=$(jq -r ".clusterId" /var/lib/etcd/revision.json); then
+ if ! value=$(jq -r ".clusterId" "$ETCD_REVISION_JSON"); then
rc=$?
ocf_log err "could not get cluster_id, error code: $rc"
return "$rc"
@@ -745,7 +772,7 @@ attribute_node_revision()
local value
local attribute="revision"
- if ! value=$(jq -r ".maxRaftIndex" /var/lib/etcd/revision.json); then
+ if ! value=$(jq -r ".maxRaftIndex" "$ETCD_REVISION_JSON"); then
rc=$?
ocf_log err "could not get $attribute, error code: $rc"
return "$rc"
@@ -1456,7 +1483,7 @@ can_reuse_container() {
# If the container does not exist it cannot be reused
- if ! container_exists; then
+ if ! container_exists; then
OCF_RESKEY_reuse=0
return "$OCF_SUCCESS"
fi
@@ -2006,6 +2033,9 @@ CONTAINER=$OCF_RESKEY_name
POD_MANIFEST_COPY="${OCF_RESKEY_config_location}/pod.yaml"
ETCD_CONFIGURATION_FILE="${OCF_RESKEY_config_location}/config.yaml"
ETCD_BACKUP_FILE="${OCF_RESKEY_backup_location}/config-previous.tar.gz"
+ETCD_REVISION_JSON="/var/lib/etcd/revision.json"
+ETCD_REVISION_BUMP_PERCENTAGE=0.2
+ETCD_BUMP_REV_DEFAULT=1000000000
ETCD_CERTS_HASH_FILE="${OCF_RESKEY_config_location}/certs.hash"
# Note: we currently monitor podman containers by with the "podman exec"