- podman-etcd: new resource agent

- podman-etcd: add oom parameter to be able to tune the Out-Of-Memory (OOM)
  score for etcd containers

  Resolves: RHEL-88431, RHEL-113104
This commit is contained in:
Oyvind Albrigtsen 2025-09-09 11:36:34 +02:00
parent b44b82f9a9
commit c8ead1c160
5 changed files with 1811 additions and 1 deletions

View File

@ -0,0 +1,85 @@
From d08a7f74427ea2cf7d355a0f7f6d8f583e2d0cba Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Thu, 3 Jul 2025 12:22:12 +0200
Subject: [PATCH] OCPBUGS-58324: podman-etcd Add OOM score adjustment for etcd
containers
This change introduces a new `oom` parameter to the `podman-etcd` OCF
agent. This allows tuning the Out-Of-Memory (OOM) score adjustment for
the etcd container.
The `oom` parameter accepts integer values from -1000 to 1000,
defaulting to -997 (system-node-critical equivalent).
see https://kubernetes.io/docs/concepts/scheduling-eviction/node-pressure-eviction/#node-out-of-memory-behavior
Key changes:
- Added `OCF_RESKEY_oom` parameter to agent definition (`content type="integer"`).
- Integrated `--oom-score-adj` option into `podman_start()`.
- Implemented input validation for `oom` in `podman_validate()`,
ensuring values are within the [-1000:1000] range.
---
heartbeat/podman-etcd | 22 +++++++++++++++++++++-
1 file changed, 21 insertions(+), 1 deletion(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 6762112ec..884b7c579 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -45,6 +45,7 @@ OCF_RESKEY_nic_default="br-ex"
OCF_RESKEY_authfile_default="/var/lib/kubelet/config.json"
OCF_RESKEY_allow_pull_default="1"
OCF_RESKEY_reuse_default="0"
+OCF_RESKEY_oom_default="-997"
: ${OCF_RESKEY_image=${OCF_RESKEY_image_default}}
: ${OCF_RESKEY_pod_manifest=${OCF_RESKEY_pod_manifest_default}}
@@ -53,6 +54,7 @@ OCF_RESKEY_reuse_default="0"
: ${OCF_RESKEY_authfile=${OCF_RESKEY_authfile_default}}
: ${OCF_RESKEY_allow_pull=${OCF_RESKEY_allow_pull_default}}
: ${OCF_RESKEY_reuse=${OCF_RESKEY_reuse_default}}
+: ${OCF_RESKEY_oom=${OCF_RESKEY_oom_default}}
#######################################################################
@@ -230,6 +232,16 @@ to stop the container before pacemaker.
<shortdesc lang="en">drop-in dependency</shortdesc>
<content type="boolean"/>
</parameter>
+
+<parameter name="oom" required="0" unique="0">
+<longdesc lang="en">
+Tune the host's Out-Of-Memory (OOM) preferences for containers (accepts values from -1000 to 1000).
+Default to same OOM score as system-node-critical
+https://kubernetes.io/docs/concepts/scheduling-eviction/node-pressure-eviction/#node-out-of-memory-behavior
+</longdesc>
+<shortdesc lang="en">OOM for container</shortdesc>
+<content type="integer" default="${OCF_RESKEY_oom_default}"/>
+</parameter>
</parameters>
<actions>
@@ -1226,7 +1238,10 @@ podman_start()
fi
podman_create_mounts
- local run_opts="-d --name=${CONTAINER}"
+ local run_opts="--detach --name=${CONTAINER}"
+
+ run_opts="$run_opts --oom-score-adj=${OCF_RESKEY_oom}"
+
# check to see if the container has already started
podman_simple_status
if [ $? -eq $OCF_SUCCESS ]; then
@@ -1513,6 +1528,11 @@ podman_validate()
exit $OCF_ERR_CONFIGURED
fi
+ if [ "$OCF_RESKEY_oom" -lt -1000 ] || [ "$OCF_RESKEY_oom" -gt 1000 ]; then
+ ocf_exit_reason "'oom' value ${OCF_RESKEY_oom} is out of range [-1000:1000]"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
return $OCF_SUCCESS
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
From 6a3249aae260c081ccbcfd09444d5d85ebc4e3b3 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Mon, 28 Apr 2025 15:48:29 +0200
Subject: [PATCH] podman-etcd: remove unused actions from metadata
---
heartbeat/podman-etcd | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 514dd2e5b..3a2323260 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -236,8 +236,6 @@ to stop the container before pacemaker.
<action name="start" timeout="600s" />
<action name="stop" timeout="90s" />
<action name="monitor" timeout="25s" interval="30s" depth="0" />
-<action name="promote" timeout="300s" />
-<action name="demote" timeout="120s" />
<action name="meta-data" timeout="5s" />
<action name="validate-all" timeout="30s" />
</actions>
@@ -251,7 +249,7 @@ REQUIRE_IMAGE_PULL=0
podman_usage()
{
cat <<END
-usage: $0 {start|stop|monitor|promote|demote|validate-all|meta-data}
+usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END

View File

@ -0,0 +1,36 @@
From 5f7b9b045d4713e8ff27a4fc8b2799669c1b823a Mon Sep 17 00:00:00 2001
From: Carlo Lobrano <c.lobrano@gmail.com>
Date: Tue, 20 May 2025 09:34:03 +0200
Subject: [PATCH] podman-etcd: fix listen-peer-urls binding (#2049)
This change ensures learner etcd listens on all interfaces for peer
connections, resolving accessibility issues.
Fix: OCPBUGS-56447
---
heartbeat/podman-etcd | 12 +++---------
1 file changed, 3 insertions(+), 9 deletions(-)
diff --git a/heartbeat/podman-etcd b/heartbeat/podman-etcd
index 3a2323260..6762112ec 100755
--- a/heartbeat/podman-etcd
+++ b/heartbeat/podman-etcd
@@ -436,15 +436,9 @@ prepare_env() {
ETCD_PEER_CERT=$(get_env_from_manifest "ETCDCTL_CERT")
ETCD_PEER_KEY=$(get_env_from_manifest "ETCDCTL_KEY")
- if is_learner; then
- LISTEN_CLIENT_URLS="$NODEIP"
- LISTEN_PEER_URLS="$NODEIP"
- LISTEN_METRICS_URLS="$NODEIP"
- else
- LISTEN_CLIENT_URLS="0.0.0.0"
- LISTEN_PEER_URLS="0.0.0.0"
- LISTEN_METRICS_URLS="0.0.0.0"
- fi
+ LISTEN_CLIENT_URLS="0.0.0.0"
+ LISTEN_PEER_URLS="0.0.0.0"
+ LISTEN_METRICS_URLS="0.0.0.0"
}
archive_data_folder()

View File

@ -45,7 +45,7 @@
Name: resource-agents
Summary: Open Source HA Reusable Cluster Resource Scripts
Version: 4.16.0
Release: 22%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
Release: 23%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist}
License: GPL-2.0-or-later AND LGPL-2.1-or-later
URL: https://github.com/ClusterLabs/resource-agents
Source0: %{upstream_prefix}-%{upstream_version}.tar.gz
@ -77,6 +77,10 @@ Patch24: RHEL-85014-IPaddr2-add-link-status-DOWN-LOWERLAYERDOWN-check.patch
Patch25: RHEL-99743-Filesystem-remove-validate-all-fstype-check.patch
Patch26: RHEL-97216-Filesystem-fix-issue-with-Vormetric-mounts.patch
Patch27: RHEL-102728-ocf-shellfuncs-remove-extra-sleep-from-curl_retry.patch
Patch28: RHEL-88431-1-podman-etcd-new-ra.patch
Patch29: RHEL-88431-2-podman-etcd-remove-unused-actions-from-metadata.patch
Patch30: RHEL-88431-3-podman-etcd-fix-listen-peer-urls-binding.patch
Patch31: RHEL-113104-podman-etcd-add-oom-parameter.patch
# bundled ha-cloud-support libs
Patch500: ha-cloud-support-aliyun.patch
@ -263,6 +267,10 @@ exit 1
%patch -p1 -P 25
%patch -p1 -P 26
%patch -p1 -P 27
%patch -p1 -P 28 -F1
%patch -p1 -P 29
%patch -p1 -P 30
%patch -p1 -P 31
# bundled ha-cloud-support libs
%patch -p1 -P 500
@ -593,6 +601,13 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents
%{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm
%changelog
* Tue Sep 9 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-23
- podman-etcd: new resource agent
- podman-etcd: add oom parameter to be able to tune the Out-Of-Memory (OOM)
score for etcd containers
Resolves: RHEL-88431, RHEL-113104
* Tue Jul 15 2025 Oyvind Albrigtsen <oalbrigt@redhat.com> - 4.16.0-22
- ocf-shellfuncs/AWS agents: dont sleep after the final try in
curl_retry()