From 6a5608f02a657cf006b6d44d31200342c4bd19b9 Mon Sep 17 00:00:00 2001 From: Carlo Lobrano Date: Tue, 28 Oct 2025 12:47:10 +0100 Subject: [PATCH] podman-etcd: compute dynamic revision bump from maxRaftIndex (#2087) Replace hardcoded 1 billion revision bump with dynamic calculation based on 20% of the last known maxRaftIndex from revision.json. This aligns with the logic used by cluster-etcd-operator's quorum-restore-pod utility and ensures the bump amount is proportional to the cluster's actual revision state. The implementation: - Adds compute_bump_revision() function with safe fallback to 1bn default - Extracts magic values to named constants (ETCD_REVISION_BUMP_PERCENTAGE, ETCD_BUMP_REV_DEFAULT, ETCD_REVISION_JSON) - Validates computed values (non-zero, not exceeding default) - Logs computation results for debugging Reference: https://github.com/openshift/cluster-etcd-operator/blob/215998939f5223da9166 22c71fd07d17656faf6b/bindata/etcd/quorum-restore-pod.yaml#L26-L34 --- heartbeat/podman-etcd | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd index b8dfb2f9e..551d37a20 100755 --- a/heartbeat/podman-etcd +++ b/heartbeat/podman-etcd @@ -619,16 +619,43 @@ prepare_env() { LISTEN_METRICS_URLS="0.0.0.0" } +compute_bump_revision() { + # Same logic used by cluster-etcd-operator quorum-restore-pod utility. + # see https://github.com/openshift/cluster-etcd-operator/blob/215998939f5223da916622c71fd07d17656faf6b/bindata/etcd/quorum-restore-pod.yaml#L26-L34 + # set a default value: 1bn would be an etcd running at 1000 writes/s for about eleven days. + BUMP_REV=$ETCD_BUMP_REV_DEFAULT + if [ ! -f "${ETCD_REVISION_JSON}" ]; then + ocf_log err "could not compute bump revision: ${ETCD_REVISION_JSON} not found. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump" + return + fi + + # this will bump by the amount of 20% of the last known live revision. + if ! COMPUTED_BUMP=$(jq -r "(.maxRaftIndex*${ETCD_REVISION_BUMP_PERCENTAGE}|floor)" "${ETCD_REVISION_JSON}"); then + ocf_log err "could not compute maxRaftIndex for bump revision, jq error code: $?. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump" + return + fi + + if [ -z "${COMPUTED_BUMP}" ] || [ "${COMPUTED_BUMP}" -le 0 ] || [ "${COMPUTED_BUMP}" -gt "${ETCD_BUMP_REV_DEFAULT}" ]; then + ocf_log err "computed bump revision (${COMPUTED_BUMP}) is invalid. Defaulting to ${ETCD_BUMP_REV_DEFAULT} revision bump" + return + fi + + BUMP_REV="${COMPUTED_BUMP}" + ocf_log info "bumping etcd revisions by ${BUMP_REV}" +} generate_etcd_configuration() { if is_force_new_cluster; then + compute_bump_revision # The embedded newline is required for correct YAML formatting. FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: true -force-new-cluster-bump-amount: 1000000000" +force-new-cluster-bump-amount: $BUMP_REV" else FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: false" fi + # the space indentation for client-transport-security and peer-transport-security + # is required for correct YAML formatting. cat > "$ETCD_CONFIGURATION_FILE" << EOF logger: zap log-level: info @@ -707,7 +734,7 @@ attribute_node_cluster_id() { local action="$1" local value - if ! value=$(jq -r ".clusterId" /var/lib/etcd/revision.json); then + if ! value=$(jq -r ".clusterId" "$ETCD_REVISION_JSON"); then rc=$? ocf_log err "could not get cluster_id, error code: $rc" return "$rc" @@ -745,7 +772,7 @@ attribute_node_revision() local value local attribute="revision" - if ! value=$(jq -r ".maxRaftIndex" /var/lib/etcd/revision.json); then + if ! value=$(jq -r ".maxRaftIndex" "$ETCD_REVISION_JSON"); then rc=$? ocf_log err "could not get $attribute, error code: $rc" return "$rc" @@ -1456,7 +1483,7 @@ can_reuse_container() { # If the container does not exist it cannot be reused - if ! container_exists; then + if ! container_exists; then OCF_RESKEY_reuse=0 return "$OCF_SUCCESS" fi @@ -2006,6 +2033,9 @@ CONTAINER=$OCF_RESKEY_name POD_MANIFEST_COPY="${OCF_RESKEY_config_location}/pod.yaml" ETCD_CONFIGURATION_FILE="${OCF_RESKEY_config_location}/config.yaml" ETCD_BACKUP_FILE="${OCF_RESKEY_backup_location}/config-previous.tar.gz" +ETCD_REVISION_JSON="/var/lib/etcd/revision.json" +ETCD_REVISION_BUMP_PERCENTAGE=0.2 +ETCD_BUMP_REV_DEFAULT=1000000000 ETCD_CERTS_HASH_FILE="${OCF_RESKEY_config_location}/certs.hash" # Note: we currently monitor podman containers by with the "podman exec"