From 6e9200dc2ffc89382188794742361985309936b2 Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Wed, 23 Jul 2025 09:34:13 +0200
Subject: [PATCH] podman-etcd: preserve containers for debugging

This change modifies the agent to keep stopped containers for log
inspection and debugging, with supporting changes to enable this
behavior.

* Conditionally reuse existing containers when configuration unchanged
* Move etcd inline configuration flags to external file to allow
  restarts without container recreation (mainly for the
  force-new-cluster flag)
* Archive previous container renaming it into *-previous, and its
  configuration files into /var/lib/etcd/config-previous.tar.gz archive.
  The tar.gz archive consists in:
    * the pod manifest created by CEO, used to generated the Etc
      configuration file
    * the Etcd configuration file
    * the auth json file
  Only one copy is maintained to limit disk usage.
* Both configuration and backup files location is configurable with 2
  new input arguments.

Signed-off-by: Carlo Lobrano <c.lobrano@gmail.com>
---
 heartbeat/podman-etcd | 438 ++++++++++++++++++++++++++++++++----------
 1 file changed, 336 insertions(+), 102 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 4969fbaaf..33804414a 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -46,6 +46,8 @@ OCF_RESKEY_authfile_default="/var/lib/kubelet/config.json"
 OCF_RESKEY_allow_pull_default="1"
 OCF_RESKEY_reuse_default="0"
 OCF_RESKEY_oom_default="-997"
+OCF_RESKEY_config_location_default="/var/lib/etcd"
+OCF_RESKEY_backup_location_default="/var/lib/etcd"
 
 : ${OCF_RESKEY_image=${OCF_RESKEY_image_default}}
 : ${OCF_RESKEY_pod_manifest=${OCF_RESKEY_pod_manifest_default}}
@@ -55,6 +57,9 @@ OCF_RESKEY_oom_default="-997"
 : ${OCF_RESKEY_allow_pull=${OCF_RESKEY_allow_pull_default}}
 : ${OCF_RESKEY_reuse=${OCF_RESKEY_reuse_default}}
 : ${OCF_RESKEY_oom=${OCF_RESKEY_oom_default}}
+: ${OCF_RESKEY_config_location=${OCF_RESKEY_config_location_default}}
+: ${OCF_RESKEY_backup_location=${OCF_RESKEY_backup_location_default}}
+
 
 #######################################################################
 
@@ -242,6 +247,23 @@ https://kubernetes.io/docs/concepts/scheduling-eviction/node-pressure-eviction/#
 <shortdesc lang="en">OOM for container</shortdesc>
 <content type="integer" default="${OCF_RESKEY_oom_default}"/>
 </parameter>
+
+<parameter name="config_location" required="0" unique="0">
+<longdesc lang="en">
+The directory where the resource agent stores its state files, such as the generated etcd configuration and a copy of the pod manifest.
+</longdesc>
+<shortdesc lang="en">Resource agent state directory</shortdesc>
+<content type="string" default="${OCF_RESKEY_config_location_default}"/>
+</parameter>
+
+<parameter name="backup_location" required="0" unique="0">
+<longdesc lang="en">
+The directory where the resource agent stores its backups.
+</longdesc>
+<shortdesc lang="en">Resource agent backup directory</shortdesc>
+<content type="string" default="${OCF_RESKEY_backup_location_default}"/>
+</parameter>
+
 </parameters>
 
 <actions>
@@ -309,42 +331,52 @@ container_exists()
 	return 1
 }
 
-remove_container()
+# archive_current_container archives the current
+# podman etcd container and its configuration files.
+archive_current_container()
 {
-	local rc
-	local execids
+	# don't attempt to archive a container that doesn't exist
+	if ! container_exists; then
+		return
+	fi
 
-	if ocf_is_true "$OCF_RESKEY_reuse"; then
-		# never remove the container if we have reuse enabled.
-		return 0
+	# delete any container named "*-previous", or we won't be able to archive the current container.
+	if podman inspect "${CONTAINER}-previous" >/dev/null 2>&1; then
+		ocf_log info "removing old archived container '$CONTAINER-previous'"
+		if ! ocf_run podman rm --volumes --force "$CONTAINER-previous"; then
+			ocf_log warn "could not remove old archived container (podman rm failed, error code: $?). Won't be able to archive current container"
+			return
+		fi
 	fi
 
-	if ! container_exists; then
-		# don't attempt to remove a container that doesn't exist
-		return 0
+	ocf_log info "archiving '$CONTAINER' container as '$CONTAINER-previous' for debugging purposes"
+	if ! ocf_run podman rename "$CONTAINER" "$CONTAINER-previous"; then
+		ocf_log err "could not archive container '$CONTAINER', error code: $?"
+		return
 	fi
-	ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
-	ocf_run podman rm -v "$CONTAINER"
-	rc=$?
-	if [ $rc -ne 0 ]; then
-		if [ $rc -eq 2 ]; then
-			if podman inspect --format '{{.State.Status}}' "$CONTAINER" | grep -wq "stopping"; then
-				ocf_log err "Inactive container ${CONTAINER} is stuck in 'stopping' state. Force-remove it."
-				ocf_run podman rm -f "$CONTAINER"
-				rc=$?
-			fi
-		fi
-		# due to a podman bug (rhbz#1841485), sometimes a stopped
-		# container can still be associated with Exec sessions, in
-		# which case the "podman rm" has to be forced
-		execids=$(podman inspect "$CONTAINER" --format '{{len .ExecIDs}}')
-		if [ "$execids" -ne "0" ]; then
-			ocf_log warn "Inactive container ${CONTAINER} has lingering exec sessions. Force-remove it."
-			ocf_run podman rm -f "$CONTAINER"
-			rc=$?
+
+	# archive corresponding etcd configuration files
+	local files_to_archive=""
+	for file in "$OCF_RESKEY_authfile" "$POD_MANIFEST_COPY" "$ETCD_CONFIGURATION_FILE"; do
+		if [ -f "$file" ]; then
+			files_to_archive="$files_to_archive $file"
+		else
+			ocf_log warn "file '$file' is missing and won't be archived"
 		fi
+	done
+
+	if [ -z "$files_to_archive" ]; then
+		ocf_log warn "could not find any file to archive."
+		return
+	fi
+
+	# NOTE: tar will override any existing archive as wanted
+	# shellcheck disable=SC2086
+	if ! ocf_run tar --create --verbose --gzip --file "$ETCD_BACKUP_FILE" $files_to_archive; then
+		ocf_log warn "container archived successfully, but configuration backup failed (error: $?). Container debugging available, but without matching configuration files"
+	else
+		ocf_log info "container configuration also archived in '$ETCD_BACKUP_FILE'"
 	fi
-	return $rc
 }
 
 # Correctly wraps an ipv6 in [] for url otherwise use return normal ipv4 address.
@@ -365,6 +397,7 @@ attribute_node_ip()
 	local attribute="node_ip"
 	local ip_addr name
 
+	# TODO: We can retrieve both the local and peer IP addresses from this map, which eliminates the need to use CIB to share them between nodes
 	for node in $(echo "$OCF_RESKEY_node_ip_map" | sed "s/\s//g;s/;/ /g"); do
 		name=$(echo "$node" | cut -d: -f1)
 		# ignore other nodes
@@ -375,7 +408,7 @@ attribute_node_ip()
 	done
 
 	if [ -z "$ip_addr" ]; then
-		ocf_log err "ip address was empty when querying (getent ahosts) for hostname: $(hostname -f)"
+		ocf_log err "could not get local ip address from node_ip_map: '$OCF_RESKEY_node_ip_map'"
 		return 1
 	fi
 
@@ -384,9 +417,9 @@ attribute_node_ip()
 			echo "$ip_addr"
 			;;
 		update)
-			if ! crm_attribute --type nodes --node "$NODENAME" --name "$attribute" --update "$value"; then
+			if ! crm_attribute --type nodes --node "$NODENAME" --name "$attribute" --update "$ip_addr"; then
 				rc="$?"
-				ocf_log err "could not set $attribute to $value, error code: $rc"
+				ocf_log err "could not set $attribute to $ip_addr, error code: $rc"
 				return "$rc"
 			fi
 			;;
@@ -428,6 +461,48 @@ get_env_from_manifest() {
 	echo "$env_var_value"
 }
 
+# etcd configuration file expects duration to be expressed in nanoseconds
+convert_duration_in_nanoseconds() {
+	local duration=$1
+	local value unit nanoseconds
+
+	if [ -z "$duration" ]; then
+		ocf_log err "convert_duration_in_nanoseconds: no duration provided"
+		return 1
+	fi
+
+	if ! echo "$duration" | grep -qE '^[0-9]+[numµ]?s$'; then
+		ocf_log err "convert_duration_in_nanoseconds: invalid duration format \"$duration\". Expected format: <number><unit> where unit is one of s, ms, us, µs, ns"
+		return 1
+	fi
+
+	# Extract numeric value and unit from duration string
+	value=$(echo "$duration" | sed 's/[^0-9]*$//')
+	unit=$(echo "$duration" | sed 's/^[0-9]*//')
+
+	case "$unit" in
+		ns)
+			nanoseconds=$value
+			;;
+		us|µs)
+			nanoseconds=$((value * 1000))
+			;;
+		ms)
+			nanoseconds=$((value * 1000000))
+			;;
+		s)
+			nanoseconds=$((value * 1000000000))
+			;;
+		*)
+			# this should not happen as the input is already validated
+			ocf_log err "convert_duration_in_nanoseconds: unknown duration unit \"$unit\""
+			return 1
+			;;
+	esac
+
+	echo "$nanoseconds"
+}
+
 prepare_env() {
 	local name ip ipurl standalone_node
 
@@ -457,9 +532,14 @@ prepare_env() {
 	ETCDCTL_API=$(get_env_from_manifest "ETCDCTL_API")
 	ETCD_CIPHER_SUITES=$(get_env_from_manifest "ETCD_CIPHER_SUITES")
 	ETCD_DATA_DIR=$(get_env_from_manifest "ETCD_DATA_DIR")
+	if [ ! -d "$ETCD_DATA_DIR" ]; then
+		ocf_log err "could not find data-dir at path \"$ETCD_DATA_DIR\""
+		return "$OCF_ERR_ARGS"
+	else
+		ocf_log info "using data-dir: $ETCD_DATA_DIR"
+	fi
 	ETCD_ELECTION_TIMEOUT=$(get_env_from_manifest "ETCD_ELECTION_TIMEOUT")
 	ETCD_ENABLE_PPROF=$(get_env_from_manifest "ETCD_ENABLE_PPROF")
-	ETCD_EXPERIMENTAL_MAX_LEARNERS=$(get_env_from_manifest "ETCD_EXPERIMENTAL_MAX_LEARNERS")
 	ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION=$(get_env_from_manifest "ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION")
 	ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL=$(get_env_from_manifest "ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL")
 	ETCD_HEARTBEAT_INTERVAL=$(get_env_from_manifest "ETCD_HEARTBEAT_INTERVAL")
@@ -475,6 +555,62 @@ prepare_env() {
 	LISTEN_METRICS_URLS="0.0.0.0"
 }
 
+
+generate_etcd_configuration() {
+	if is_force_new_cluster; then
+		# The embedded newline is required for correct YAML formatting.
+		FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: true
+force-new-cluster-bump-amount: 1000000000"
+	else
+		FORCE_NEW_CLUSTER_CONFIG="force-new-cluster: false"
+	fi
+
+	cat > "$ETCD_CONFIGURATION_FILE" << EOF
+logger: zap
+log-level: info
+snapshot-count: 10000
+name: $NODENAME
+data-dir: $ETCD_DATA_DIR
+$FORCE_NEW_CLUSTER_CONFIG
+socket-reuse-address: $ETCD_SOCKET_REUSE_ADDRESS
+election-timeout: $ETCD_ELECTION_TIMEOUT
+enable-pprof: $ETCD_ENABLE_PPROF
+heartbeat-interval: $ETCD_HEARTBEAT_INTERVAL
+quota-backend-bytes: $ETCD_QUOTA_BACKEND_BYTES
+initial-advertise-peer-urls: "$NODEIPURL:2380"
+listen-peer-urls: "$(ip_url ${LISTEN_PEER_URLS}):2380"
+listen-client-urls: "$(ip_url ${LISTEN_CLIENT_URLS}):2379,unixs://${NODEIP}:0"
+initial-cluster: $ETCD_INITIAL_CLUSTER
+initial-cluster-state: $ETCD_INITIAL_CLUSTER_STATE
+client-transport-security:
+  cert-file: /etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-${NODENAME}.crt
+  key-file: /etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-${NODENAME}.key
+  client-cert-auth: true
+  trusted-ca-file: $SERVER_CACERT
+peer-transport-security:
+  cert-file: $ETCD_PEER_CERT
+  key-file: $ETCD_PEER_KEY
+  client-cert-auth: true
+  trusted-ca-file: $SERVER_CACERT
+advertise-client-urls: "$NODEIPURL:2379"
+listen-metrics-urls: "$(ip_url ${LISTEN_METRICS_URLS}):9978"
+metrics: extensive
+experimental-initial-corrupt-check: true
+experimental-max-learners: 1
+experimental-warning-apply-duration: $(convert_duration_in_nanoseconds "$ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION")
+experimental-watch-progress-notify-interval: $(convert_duration_in_nanoseconds "$ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL")
+EOF
+
+	{
+		if [ -n "$ETCD_CIPHER_SUITES" ]; then
+			echo "cipher-suites:"
+			echo "$ETCD_CIPHER_SUITES" | tr ',' '\n' | while read -r cipher; do
+				echo "  - \"$cipher\""
+			done
+		fi
+	} >> "$ETCD_CONFIGURATION_FILE"
+}
+
 archive_data_folder()
 {
 	# TODO: use etcd snapshots
@@ -634,7 +770,7 @@ add_member_as_learner()
 	local endpoint_url=$(ip_url $(attribute_node_ip get))
 	local peer_url=$(ip_url $member_ip)
 
-	ocf_log info "add $member_name ($member_ip) to the member list as learner"
+	ocf_log info "add $member_name ($member_ip, $endpoint_url) to the member list as learner"
 	out=$(podman exec "${CONTAINER}" etcdctl --endpoints="$endpoint_url:2379" member add "$member_name" --peer-urls="$peer_url:2380" --learner)
 	rc=$?
 	if [ $rc -ne 0 ]; then
@@ -1104,18 +1240,18 @@ compare_revision()
 	peer_revision=$(attribute_node_revision_peer)
 
 	if [ "$revision" = "" ] || [ "$revision" = "null" ] || [ "$peer_revision" = "" ] || [ "$peer_revision" = "null" ]; then
-		ocf_log err "could not compare revisions: $NODENAME local revision: $revision, peer revision: $peer_revision"
+		ocf_log err "could not compare revisions: '$NODENAME' local revision='$revision', peer revision='$peer_revision'"
 		return "$OCF_ERR_GENERIC"
 	fi
 
 	if [ "$revision" -gt "$peer_revision" ]; then
-		ocf_log info "$NODENAME revision: $revision is newer than peer revision: $peer_revision"
+		ocf_log info "$NODENAME revision: '$revision' is newer than peer revision: '$peer_revision'"
 		echo "newer"
 	elif [ "$revision" -eq "$peer_revision" ]; then
-		ocf_log info "$NODENAME revision: $revision is equal to peer revision: $peer_revision"
+		ocf_log info "$NODENAME revision: '$revision' is equal to peer revision: '$peer_revision'"
 		echo "equal"
 	else
-		ocf_log info "$NODENAME revision: $revision is older than peer revision: $peer_revision"
+		ocf_log info "$NODENAME revision: '$revision' is older than peer revision: '$peer_revision'"
 		echo "older"
 	fi
 	return "$OCF_SUCCESS"
@@ -1144,6 +1280,100 @@ ensure_pod_manifest_exists()
 	return "$OCF_SUCCESS"
 }
 
+filter_pod_manifest() {
+	# Remove pod-version related fields from POD manifest
+	local pod_manifest="$1"
+	local temporary_file
+	local jq_filter='del(.metadata.labels.revision) | .spec.containers[] |= ( .env |= map(select( .name != "ETCD_STATIC_POD_VERSION" ))) | .spec.volumes |= map( select( .name != "resource-dir" ))'
+
+	if ! temporary_file=$(mktemp); then
+		ocf_log err "could not create temporary file for '$pod_manifest', error code: $?"
+		return $OCF_ERR_GENERIC
+	fi
+	if ! jq "$jq_filter" "$pod_manifest" > "$temporary_file"; then
+		ocf_log err "could not remove pod version related data from '$pod_manifest', error code: $?"
+		return $OCF_ERR_GENERIC
+	fi
+	echo "$temporary_file"
+}
+
+can_reuse_container() {
+	# Decide whether to reuse the existing container or create a new one based on etcd pod manifest changes.
+	# NOTE: explicitly ignore POD version and POD version related data, as the content might be the same even if the revision number has changed.
+	local cp_rc
+	local diff_rc
+	local filtered_original_pod_manifest
+	local filtered_copy_pod_manifest
+
+
+	# If the container does not exist it cannot be reused
+	if ! container_exists; then 
+		OCF_RESKEY_reuse=0
+		return "$OCF_SUCCESS"
+	fi
+
+	# If the manifest copy doesn't exist, we need a new container.
+	if [ ! -f "$POD_MANIFEST_COPY" ]; then
+		ocf_log info "a working copy of $OCF_RESKEY_pod_manifest was not found. A new etcd container will be created."
+		OCF_RESKEY_reuse=0
+		return "$OCF_SUCCESS"
+	fi
+	
+	if ! filtered_original_pod_manifest=$(filter_pod_manifest "$OCF_RESKEY_pod_manifest"); then
+		return $OCF_ERR_GENERIC
+	fi
+	if ! filtered_copy_pod_manifest=$(filter_pod_manifest "$POD_MANIFEST_COPY"); then
+		return $OCF_ERR_GENERIC
+	fi
+
+	ocf_log info "comparing $OCF_RESKEY_pod_manifest with local copy $POD_MANIFEST_COPY"
+	ocf_run diff -s "$filtered_original_pod_manifest" "$filtered_copy_pod_manifest"
+	diff_rc="$?"
+	# clean up temporary files
+	rm -f "$filtered_original_pod_manifest" "$filtered_copy_pod_manifest"
+	case "$diff_rc" in
+		0)
+			ocf_log info "Reusing the existing etcd container"
+			OCF_RESKEY_reuse=1
+			;;
+		1)
+			ocf_log info "Etcd pod manifest changes detected: creating a new etcd container to apply the changes"
+			if ! ocf_run cp -p "$OCF_RESKEY_pod_manifest" "$POD_MANIFEST_COPY"; then
+				cp_rc="$?"
+				ocf_log err "Could not create a working copy of $OCF_RESKEY_pod_manifest, rc: $cp_rc"
+				return "$OCF_ERR_GENERIC"
+			fi
+			ocf_log info "A working copy of $OCF_RESKEY_pod_manifest was created"
+			OCF_RESKEY_reuse=0
+			;;
+		*)
+			ocf_log err "Could not check if etcd pod manifest has changed, diff rc: $diff_rc"
+			return "$OCF_ERR_GENERIC"
+			;;
+	esac
+
+	return "$OCF_SUCCESS"
+}
+
+ensure_pod_manifest_copy_exists() {
+	local cp_rc
+
+	if [ -f "$POD_MANIFEST_COPY" ]; then
+		return "$OCF_SUCCESS"
+	fi
+
+	# If the manifest copy doesn't exist, create it and ensure a new container.
+	if ! ocf_run cp -p "$OCF_RESKEY_pod_manifest" "$POD_MANIFEST_COPY"; then
+		cp_rc="$?"
+		ocf_log err "Could not create a working copy of $OCF_RESKEY_pod_manifest, rc: $cp_rc"
+		return "$OCF_ERR_GENERIC"
+	fi
+
+	ocf_log info "a new working copy of $OCF_RESKEY_pod_manifest was created"
+
+	return "$OCF_SUCCESS"
+}
+
 podman_start()
 {
 	local cid
@@ -1173,6 +1403,13 @@ podman_start()
 		return $OCF_ERR_GENERIC
 	fi
 
+	# check if the container has already started
+	podman_simple_status
+	if [ $? -eq $OCF_SUCCESS ]; then
+		ocf_log info "the '$CONTAINER' has already started. Nothing to do"
+		return "$OCF_SUCCESS"
+	fi
+
 	if ! ensure_pod_manifest_exists; then
 		ocf_exit_reason "could not find etcd pod manifest ($OCF_RESKEY_pod_manifest)"
 		return "$OCF_ERR_GENERIC"
@@ -1186,8 +1423,9 @@ podman_start()
 		ocf_log info "static pod was running: start normally"
 	else
 		if is_force_new_cluster; then
-			ocf_log notice "$NODENAME marked to force-new-cluster"
+			ocf_log notice "'$NODENAME' marked to force-new-cluster"
 		else
+			ocf_log info "'$NODENAME' is not marked to force-new-cluster"
 			# When the local agent starts, we can infer the cluster state by counting
 			# how many agents are starting or already active:
 			# - 1 active agent: it's the peer (we are just starting)
@@ -1195,6 +1433,7 @@ podman_start()
 			# - 0 active agents, 2 starting: both agents are starting simultaneously
 			local active_resources_count
 			active_resources_count=$(echo "$OCF_RESKEY_CRM_meta_notify_active_resource" | wc -w)
+			ocf_log info "found '$active_resources_count' active etcd resources (meta notify environment variable: '$OCF_RESKEY_CRM_meta_notify_active_resource')"
 			case "$active_resources_count" in
 			1)
 				if [ "$(attribute_learner_node get)" = "$(get_peer_node_name)" ]; then
@@ -1205,17 +1444,17 @@ podman_start()
 				fi
 				;;
 			0)
+				# count how many agents are starting now
+				local start_resources_count
+				start_resources_count=$(echo "$OCF_RESKEY_CRM_meta_notify_start_resource" | wc -w)
+				ocf_log info "found '$start_resources_count' starting etcd resources (meta notify environment variable: '$OCF_RESKEY_CRM_meta_notify_start_resource')"
+
 				# we need to compare the revisions in any of the following branches
 				# so call the function only once here
 				if ! revision_compare_result=$(compare_revision); then
 					ocf_log err "could not compare revisions, error code: $?"
 					return "$OCF_ERR_GENERIC"
 				fi
-
-				# count how many agents are starting now
-				local start_resources_count
-				start_resources_count=$(echo "$OCF_RESKEY_CRM_meta_notify_start_resource" | wc -w)
-
 				case "$start_resources_count" in
 				1)
 					ocf_log debug "peer not starting: ensure we can start a new cluster"
@@ -1231,6 +1470,7 @@ podman_start()
 					fi
 					;;
 				2)
+					# TODO: can we start "normally", regardless the revisions, if the container-id is the same on both nodes?
 					ocf_log info "peer starting"
 					if [ "$revision_compare_result" = "newer" ]; then
 						set_force_new_cluster
@@ -1263,7 +1503,7 @@ podman_start()
 	fi
 
 	podman_create_mounts
-	local run_opts="--detach --name=${CONTAINER}"
+	local run_opts="--detach --name=${CONTAINER} --replace"
 
 	run_opts="$run_opts --oom-score-adj=${OCF_RESKEY_oom}"
 
@@ -1297,61 +1537,59 @@ podman_start()
 		archive_data_folder
 	fi
 
-	prepare_env
+	ocf_log info "check for changes in pod manifest to decide if the container should be reused or replaced"
+	if ! can_reuse_container ; then
+		rc="$?"
+		ocf_log err "could not determine etcd container reuse strategy, rc: $rc"
+		return "$rc"
+	fi
+
+	# Archive current container and its configuration before creating
+	# new configuration files.
+	if ! ocf_is_true "$OCF_RESKEY_reuse"; then
+		# Log archive container failures but don't block, as the priority
+		# is ensuring the etcd container starts successfully.
+		archive_current_container
+	fi
+
+	if ! ensure_pod_manifest_copy_exists; then
+		return $OCF_ERR_GENERIC
+	fi
+
+	if ! prepare_env; then
+		ocf_log err "Could not prepare environment for podman, error code: $?"
+		return $OCF_ERR_GENERIC
+	fi
+
+	if ! generate_etcd_configuration; then
+		ocf_log err "Could not generate etcd configuration, error code: $?"
+		return $OCF_ERR_GENERIC
+	fi
 
-	# add etcd-specific opts
 	run_opts="$run_opts \
-		    --network=host \
-		    -v /etc/kubernetes/static-pod-resources/etcd-certs:/etc/kubernetes/static-pod-certs \
-		    -v /var/lib/etcd:/var/lib/etcd \
-		    --env ALL_ETCD_ENDPOINTS=$ALL_ETCD_ENDPOINTS \
-		    --env ETCD_CIPHER_SUITES=$ETCD_CIPHER_SUITES \
-		    --env ETCD_DATA_DIR=$ETCD_DATA_DIR \
-		    --env ETCD_ELECTION_TIMEOUT=$ETCD_ELECTION_TIMEOUT \
-		    --env ETCD_ENABLE_PPROF=$ETCD_ENABLE_PPROF \
-		    --env ETCD_EXPERIMENTAL_MAX_LEARNERS=$ETCD_EXPERIMENTAL_MAX_LEARNERS \
-		    --env ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION=$ETCD_EXPERIMENTAL_WARNING_APPLY_DURATION \
-		    --env ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL=$ETCD_EXPERIMENTAL_WATCH_PROGRESS_NOTIFY_INTERVAL \
-		    --env ETCD_HEARTBEAT_INTERVAL=$ETCD_HEARTBEAT_INTERVAL \
-		    --env ETCD_INITIAL_CLUSTER=$ETCD_INITIAL_CLUSTER \
-		    --env ETCD_INITIAL_CLUSTER_STATE=$ETCD_INITIAL_CLUSTER_STATE \
-		    --env ETCD_NAME=$NODENAME \
-		    --env ETCD_QUOTA_BACKEND_BYTES=$ETCD_QUOTA_BACKEND_BYTES \
-		    --env ETCD_SOCKET_REUSE_ADDRESS=$ETCD_SOCKET_REUSE_ADDRESS \
-		    --env ETCDCTL_API=$ETCDCTL_API \
-		    --env ETCDCTL_CACERT=$SERVER_CACERT \
-		    --env ETCDCTL_CERT=$ETCD_PEER_CERT \
-		    --env ETCDCTL_KEY=$ETCD_PEER_KEY \
-		    --authfile=$OCF_RESKEY_authfile \
-		    --security-opt label=disable"
+			--network=host \
+			-v /etc/kubernetes/static-pod-resources/etcd-certs:/etc/kubernetes/static-pod-certs \
+			-v /var/lib/etcd:/var/lib/etcd \
+			--env ETCDCTL_API=$ETCDCTL_API \
+			--env ETCDCTL_CACERT=$SERVER_CACERT \
+			--env ETCDCTL_CERT=$ETCD_PEER_CERT \
+			--env ETCDCTL_KEY=$ETCD_PEER_KEY \
+			--authfile=$OCF_RESKEY_authfile \
+			--security-opt label=disable"
 	if [ -n "$OCF_RESKEY_run_opts" ]; then
 		run_opts="$run_opts $OCF_RESKEY_run_opts"
 	fi
 
-	OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd --logger=zap \
-		    --log-level=info \
-		    --experimental-initial-corrupt-check=true \
-		    --snapshot-count=10000 \
-		    --initial-advertise-peer-urls=$NODEIPURL:2380 \
-		    --cert-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-${NODENAME}.crt \
-		    --key-file=/etc/kubernetes/static-pod-certs/secrets/etcd-all-certs/etcd-serving-${NODENAME}.key \
-		    --trusted-ca-file=$SERVER_CACERT \
-		    --client-cert-auth=true \
-		    --peer-cert-file=$ETCD_PEER_CERT \
-		    --peer-key-file=$ETCD_PEER_KEY \
-		    --peer-trusted-ca-file=$SERVER_CACERT \
-		    --peer-client-cert-auth=true \
-		    --advertise-client-urls=$NODEIPURL:2379 \
-		    --listen-client-urls=$(ip_url ${LISTEN_CLIENT_URLS}):2379,unixs://${NODEIP}:0 \
-		    --listen-peer-urls=$(ip_url ${LISTEN_PEER_URLS}):2380 \
-		    --metrics=extensive \
-		    --listen-metrics-urls=$(ip_url ${LISTEN_METRICS_URLS}):9978"
-	if [ -n "$OCF_RESKEY_run_cmd_opts" ]; then
-		OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd $OCF_RESKEY_run_cmd_opts"
+	if [ -f  "$ETCD_CONFIGURATION_FILE" ]; then
+		ocf_log info "using etcd configuration file: $ETCD_CONFIGURATION_FILE"
+	else
+		ocf_log err "could not find $ETCD_CONFIGURATION_FILE"
+		return "$OCF_ERR_GENERIC"
 	fi
 
-	if is_force_new_cluster; then
-		OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd --force-new-cluster"
+	OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd --config-file=$ETCD_CONFIGURATION_FILE"
+	if [ -n "$OCF_RESKEY_run_cmd_opts" ]; then
+		OCF_RESKEY_run_cmd="$OCF_RESKEY_run_cmd $OCF_RESKEY_run_cmd_opts"
 	fi
 
 	if [ "$OCF_RESKEY_image" = "$OCF_RESKEY_image_default" ]; then
@@ -1377,9 +1615,7 @@ podman_start()
 		ocf_log info "starting existing container $CONTAINER."
 		ocf_run podman start "$CONTAINER"
 	else
-		# make sure any previous container matching our container name is cleaned up first.
-		# we already know at this point it wouldn't be running
-		remove_container
+		ocf_log info "starting new container $CONTAINER."
 		run_new_container "$run_opts" "$OCF_RESKEY_image" "$OCF_RESKEY_run_cmd"
 		if [ $? -eq 125 ]; then
 			return $OCF_ERR_GENERIC
@@ -1439,7 +1675,6 @@ podman_stop()
 	local rc
 	podman_simple_status
 	if [ $? -eq  $OCF_NOT_RUNNING ]; then
-		remove_container
 		ocf_log info "could not leave members list: etcd container not running"
 		return $OCF_SUCCESS
 	fi
@@ -1475,7 +1710,7 @@ podman_stop()
 		ocf_run podman kill "$CONTAINER"
 		rc=$?
 	else
-		ocf_log debug "waiting $timeout second[s] before killing container"
+		ocf_log info "waiting $timeout second[s] before killing container"
 		ocf_run podman stop -t="$timeout" "$CONTAINER"
 		rc=$?
 		# on stop, systemd will automatically delete any transient
@@ -1496,11 +1731,6 @@ podman_stop()
 		fi
 	fi
 
-	if ! remove_container; then
-		ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
-		return $OCF_ERR_GENERIC
-	fi
-
 	return $OCF_SUCCESS
 }
 
@@ -1532,6 +1762,7 @@ podman_validate()
 	check_binary oc
 	check_binary podman
 	check_binary jq
+	check_binary tar
 
 	if [ -z "$OCF_RESKEY_node_ip_map" ]; then
 		ocf_exit_reason "'node_ip_map' option is required"
@@ -1589,6 +1820,9 @@ else
 fi
 
 CONTAINER=$OCF_RESKEY_name
+POD_MANIFEST_COPY="${OCF_RESKEY_config_location}/pod.yaml"
+ETCD_CONFIGURATION_FILE="${OCF_RESKEY_config_location}/config.yaml"
+ETCD_BACKUP_FILE="${OCF_RESKEY_backup_location}/config-previous.tar.gz"
 
 # Note: we currently monitor podman containers by with the "podman exec"
 # command, so make sure that invocation is always valid by enforcing the