- podman-etcd: compute dynamic revision bump from maxRaftIndex Resolves: RHEL-124203, RHEL-124206
116 lines
4.4 KiB
Diff
116 lines
4.4 KiB
Diff
From 6a5608f02a657cf006b6d44d31200342c4bd19b9 Mon Sep 17 00:00:00 2001
|
|
From: Carlo Lobrano <c.lobrano@gmail.com>
|
|
Date: Tue, 28 Oct 2025 12:47:10 +0100
|
|
Subject: [PATCH] podman-etcd: compute dynamic revision bump from maxRaftIndex
|
|
(#2087)
|
|
|
|
Replace hardcoded 1 billion revision bump with dynamic calculation based
|
|
on 20% of the last known maxRaftIndex from revision.json.
|
|
|
|
This aligns with the logic used by cluster-etcd-operator's
|
|
quorum-restore-pod utility and ensures the bump amount is proportional
|
|
to the cluster's actual revision state.
|
|
|
|
The implementation:
|
|
- Adds compute_bump_revision() function with safe fallback to 1bn
|
|
default
|
|
- Extracts magic values to named constants
|
|
(ETCD_REVISION_BUMP_PERCENTAGE, ETCD_BUMP_REV_DEFAULT,
|
|
ETCD_REVISION_JSON)
|
|
- Validates computed values (non-zero, not exceeding default)
|
|
- Logs computation results for debugging
|
|
|
|
Reference:
|
|
https://github.com/openshift/cluster-etcd-operator/blob/215998939f5223da9166
|
|
22c71fd07d17656faf6b/bindata/etcd/quorum-restore-pod.yaml#L26-L34
|
|
---
|
|
heartbeat/podman-etcd | 38 ++++++++++++++++++++++++++++++++++----
|
|
1 file changed, 34 insertions(+), 4 deletions(-)
|
|
|
|
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
|
|
index b8dfb2f9e..551d37a20 100755
|
|
--- a/heartbeat/podman-etcd
|
|
+++ b/heartbeat/podman-etcd
|
|
@@ -619,16 +619,43 @@ prepare_env() {
|
|
LISTEN_METRICS_URLS="0.0.0.0"
|
|
}
|
|
|
|
+compute_bump_revision() {
|
|
+ # Same logic used by cluster-etcd-operator quorum-restore-pod utility.
|
|
+ # see https://github.com/openshift/cluster-etcd-operator/blob/215998939f5223da916622c71fd07d17656faf6b/bindata/etcd/quorum-restore-pod.yaml#L26-L34
|
|
+ # set a default value: 1bn would be an etcd running at 1000 writes/s for about eleven days.
|
|
+ BUMP_REV=$ETCD_BUMP_REV_DEFAULT
|
|
+ if [ ! -f "${ETCD_REVISION_JSON}" ]; then
|
|
+ ocf_log err "could not compute bump revision: ${ETCD_REVISION_JSON} not found. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump"
|
|
+ return
|
|
+ fi
|
|
+
|
|
+ # this will bump by the amount of 20% of the last known live revision.
|
|
+ if ! COMPUTED_BUMP=$(jq -r "(.maxRaftIndex*${ETCD_REVISION_BUMP_PERCENTAGE}|floor)" "${ETCD_REVISION_JSON}"); then
|
|
+ ocf_log err "could not compute maxRaftIndex for bump revision, jq error code: $?. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump"
|
|
+ return
|
|
+ fi
|
|
+
|
|
+ if [ -z "${COMPUTED_BUMP}" ] || [ "${COMPUTED_BUMP}" -le 0 ] || [ "${COMPUTED_BUMP}" -gt "${ETCD_BUMP_REV_DEFAULT}" ]; then
|
|
+ ocf_log err "computed bump revision (${COMPUTED_BUMP}) is invalid. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump"
|
|
+ return
|
|
+ fi
|
|
+
|
|
+ BUMP_REV="${COMPUTED_BUMP}"
|
|
+ ocf_log info "bumping etcd revisions by ${BUMP_REV}"
|
|
+}
|
|
|
|
generate_etcd_configuration() {
|
|
if is_force_new_cluster; then
|
|
+ compute_bump_revision
|
|
# The embedded newline is required for correct YAML formatting.
|
|
FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: true
|
|
-force-new-cluster-bump-amount: 1000000000"
|
|
+force-new-cluster-bump-amount: $BUMP_REV"
|
|
else
|
|
FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: false"
|
|
fi
|
|
|
|
+ # the space indentation for client-transport-security and peer-transport-security
|
|
+ # is required for correct YAML formatting.
|
|
cat > "$ETCD_CONFIGURATION_FILE" << EOF
|
|
logger: zap
|
|
log-level: info
|
|
@@ -707,7 +734,7 @@ attribute_node_cluster_id()
|
|
{
|
|
local action="$1"
|
|
local value
|
|
- if ! value=$(jq -r ".clusterId" /var/lib/etcd/revision.json); then
|
|
+ if ! value=$(jq -r ".clusterId" "$ETCD_REVISION_JSON"); then
|
|
rc=$?
|
|
ocf_log err "could not get cluster_id, error code: $rc"
|
|
return "$rc"
|
|
@@ -745,7 +772,7 @@ attribute_node_revision()
|
|
local value
|
|
local attribute="revision"
|
|
|
|
- if ! value=$(jq -r ".maxRaftIndex" /var/lib/etcd/revision.json); then
|
|
+ if ! value=$(jq -r ".maxRaftIndex" "$ETCD_REVISION_JSON"); then
|
|
rc=$?
|
|
ocf_log err "could not get $attribute, error code: $rc"
|
|
return "$rc"
|
|
@@ -1456,7 +1483,7 @@ can_reuse_container() {
|
|
|
|
|
|
# If the container does not exist it cannot be reused
|
|
- if ! container_exists; then
|
|
+ if ! container_exists; then
|
|
OCF_RESKEY_reuse=0
|
|
return "$OCF_SUCCESS"
|
|
fi
|
|
@@ -2006,6 +2033,9 @@ CONTAINER=$OCF_RESKEY_name
|
|
POD_MANIFEST_COPY="${OCF_RESKEY_config_location}/pod.yaml"
|
|
ETCD_CONFIGURATION_FILE="${OCF_RESKEY_config_location}/config.yaml"
|
|
ETCD_BACKUP_FILE="${OCF_RESKEY_backup_location}/config-previous.tar.gz"
|
|
+ETCD_REVISION_JSON="/var/lib/etcd/revision.json"
|
|
+ETCD_REVISION_BUMP_PERCENTAGE=0.2
|
|
+ETCD_BUMP_REV_DEFAULT=1000000000
|
|
ETCD_CERTS_HASH_FILE="${OCF_RESKEY_config_location}/certs.hash"
|
|
|
|
# Note: we currently monitor podman containers by with the "podman exec"
|