From 4800c63bd00a3acb46b2e8b1404fead82d0f96ea Mon Sep 17 00:00:00 2001 From: Oyvind Albrigtsen Date: Tue, 4 Nov 2025 13:31:59 +0100 Subject: [PATCH] - pgsqlms: fix validate warnings - nginx: fix validate warnings - Filesystem: speed up get PIDs Resolves: RHEL-102779, RHEL-112443, RHEL-121985 --- ...102779-pgsqlms-fix-validate-warnings.patch | 181 ++++++++++++++++++ RHEL-112443-nginx-fix-validate-warnings.patch | 66 +++++++ ...-121985-Filesystem-speed-up-get-PIDs.patch | 135 +++++++++++++ resource-agents.spec | 16 +- 4 files changed, 397 insertions(+), 1 deletion(-) create mode 100644 RHEL-102779-pgsqlms-fix-validate-warnings.patch create mode 100644 RHEL-112443-nginx-fix-validate-warnings.patch create mode 100644 RHEL-121985-Filesystem-speed-up-get-PIDs.patch diff --git a/RHEL-102779-pgsqlms-fix-validate-warnings.patch b/RHEL-102779-pgsqlms-fix-validate-warnings.patch new file mode 100644 index 0000000..a1ab16a --- /dev/null +++ b/RHEL-102779-pgsqlms-fix-validate-warnings.patch @@ -0,0 +1,181 @@ +From 443841ea27d61a2eedff4a0c4f18bb5771fb8d5e Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 8 Jul 2025 15:19:09 +0200 +Subject: [PATCH] pgsqlms: improvements and fixes + +- add support for promotable variables +- dont fail during validate-all action if notify != true (to avoid + error and future fails during `pcs resource create`) +- report NOT_RUNNING during probe-action when no database has been + created or postgresql is not installed +--- + script/pgsqlms | 74 +++++++++++++++++++++++++++++++++----------------- + 1 file changed, 49 insertions(+), 25 deletions(-) + +diff --git a/heartbeat/pgsqlms b/heartbeat/pgsqlms +index 5ddd67a..1abffeb 100755 +--- a/heartbeat/pgsqlms ++++ b/heartbeat/pgsqlms +@@ -485,7 +485,7 @@ sub _pg_isready { + # Add 60s to the timeout or use a 24h timeout fallback to make sure + # Pacemaker will give up before us and take decisions + my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; +- my $rc = _runas( $PGISREADY, '-h', $pghost, '-p', $pgport, '-d', 'postgres', '-t', $timeout ); ++ my $rc = _runas( $PGISREADY, '-q', '-h', $pghost, '-p', $pgport, '-d', 'postgres', '-t', $timeout ); + + # Possible error codes: + # 1: ping rejected (usually when instance is in startup, in crash +@@ -624,14 +624,18 @@ sub _get_controldata { + and defined $controldata{'redo'} + and defined $controldata{'wal_level'}; + +- ocf_exit_reason( 'Could not read all datas from controldata file for "%s"', +- $datadir ); ++ if ( ! ocf_is_probe() ) { ++ ocf_exit_reason( 'Could not read all datas from controldata file for "%s"', ++ $datadir ); + +- ocf_log( 'debug', +- "_get_controldata: controldata file: %s", +- Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump, $ans ); ++ ocf_log( 'debug', ++ "_get_controldata: controldata file: %s", ++ Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump, $ans ); + +- exit $OCF_ERR_ARGS; ++ exit $OCF_ERR_ARGS; ++ } ++ ++ return (); + } + + # Pead major version from datadir/PG_VERSION and return it as numeric version +@@ -642,8 +646,12 @@ sub _get_pg_version { + + # check PG_VERSION + if ( ! -s "$datadir/PG_VERSION" ) { +- ocf_exit_reason( 'PG_VERSION does not exist in "%s"', $datadir ); +- exit $OCF_ERR_ARGS; ++ if ( ! ocf_is_probe() ) { ++ ocf_exit_reason( 'PG_VERSION does not exist in "%s"', $datadir ); ++ exit $OCF_ERR_ARGS; ++ } else { ++ return -1; ++ } + } + + unless ( open( $fh, '<', "$datadir/PG_VERSION" ) ) { +@@ -1324,22 +1332,34 @@ sub pgsql_validate_all { + } + + # check notify=true +- unless ( defined $ENV{'OCF_RESKEY_CRM_meta_notify'} +- and lc($ENV{'OCF_RESKEY_CRM_meta_notify'}) =~ /^true$|^on$|^yes$|^y$|^1$/ ) { ++ unless ( $__OCF_ACTION eq 'validate-all' ++ or ( defined $ENV{'OCF_RESKEY_CRM_meta_notify'} ++ and lc($ENV{'OCF_RESKEY_CRM_meta_notify'}) =~ /^true$|^on$|^yes$|^y$|^1$/ ) ) { + ocf_exit_reason( + 'You must set meta parameter notify=true for your "master" resource' + ); + return $OCF_ERR_INSTALLED; + } + +- # check master-max=1 ++ # check promoted_max=1/master-max=1 + unless ( +- defined $ENV{'OCF_RESKEY_CRM_meta_master_max'} +- and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1' ++ $__OCF_ACTION eq 'validate-all' ++ or ++ ( defined $ENV{'OCF_RESKEY_CRM_meta_promoted_max'} ++ and $ENV{'OCF_RESKEY_CRM_meta_promoted_max'} eq '1' ) ++ or ++ (defined $ENV{'OCF_RESKEY_CRM_meta_master_max'} ++ and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1') + ) { +- ocf_exit_reason( +- 'You must set meta parameter master-max=1 for your "master" resource' +- ); ++ if ( ocf_version_cmp( $ENV{"OCF_RESKEY_crm_feature_set"}, '3.1.0' ) =~ /^[21]$/ ) { ++ ocf_exit_reason( ++ 'You must set meta parameter promoted_max=1 for your "promotable" resource' ++ ); ++ } else { ++ ocf_exit_reason( ++ 'You must set meta parameter master-max=1 for your "master" resource' ++ ); ++ } + return $OCF_ERR_INSTALLED; + } + +@@ -1366,14 +1386,14 @@ sub pgsql_validate_all { + } + + $guc = qx{ $POSTGRES -C primary_conninfo -D "$pgdata" $start_opts}; +- unless ($guc =~ /\bapplication_name='?$nodename'?\b/) { ++ unless ($guc =~ /\bapplication_name='?$nodename'?\b/ or $__OCF_ACTION eq 'validate-all') { + ocf_exit_reason( + q{Parameter "primary_conninfo" MUST contain 'application_name=%s'. }. + q{It is currently set to '%s'}, $nodename, $guc ); + return $OCF_ERR_ARGS; + } + } +- else { ++ elsif ($PGVERNUM > -1 ) { + my @content; + + # check recovery template +@@ -1428,14 +1448,14 @@ sub pgsql_validate_all { + } + + # require 9.3 minimum +- if ( $PGVERNUM < $PGVER_93 ) { ++ if ( $PGVERNUM < $PGVER_93 && $PGVERNUM > -1 ) { + ocf_exit_reason( "Require 9.3 and more" ); + return $OCF_ERR_INSTALLED; + } + + # check binaries +- unless ( -x $PGCTL and -x $PGPSQL and -x $PGCTRLDATA and -x $PGISREADY +- and ( -x $PGWALDUMP or -x "$bindir/pg_xlogdump") ++ unless ( ( -x $PGCTL and -x $PGPSQL and -x $PGCTRLDATA and -x $PGISREADY ++ and ( -x $PGWALDUMP or -x "$bindir/pg_xlogdump") ) or ocf_is_probe() + ) { + ocf_exit_reason( + "Missing one or more binary. Check following path: %s, %s, %s, %s, %s or %s", +@@ -1445,7 +1465,7 @@ sub pgsql_validate_all { + + # require wal_level >= hot_standby + %cdata = _get_controldata(); +- unless ( $cdata{'wal_level'} =~ m{hot_standby|logical|replica} ) { ++ unless ( (defined $cdata{'wal_level'} and $cdata{'wal_level'} =~ m{hot_standby|logical|replica}) or ocf_is_probe() ) { + ocf_exit_reason( + 'wal_level must be one of "hot_standby", "logical" or "replica"' ); + return $OCF_ERR_ARGS; +@@ -1599,6 +1619,10 @@ sub pgsql_monitor { + return _confirm_role(); + } + ++ if ( ocf_is_probe() ) { ++ return $OCF_NOT_RUNNING; ++ } ++ + if ( $pgisready_rc == 1 ) { + # The attempt was rejected. + # This could happen in several cases: +@@ -2254,13 +2278,13 @@ chdir File::Spec->tmpdir(); + + # mandatory sanity checks + # check pgdata +-if ( ! -d $pgdata ) { ++if ( ! -d $pgdata and ! ocf_is_probe() ) { + ocf_exit_reason( 'PGDATA "%s" does not exist', $pgdata ); + exit $OCF_ERR_ARGS; + } + + # check datadir +-if ( ! -d $datadir ) { ++if ( ! -d $datadir and ! ocf_is_probe() ) { + ocf_exit_reason( 'data_directory "%s" does not exist', $datadir ); + exit $OCF_ERR_ARGS; + } diff --git a/RHEL-112443-nginx-fix-validate-warnings.patch b/RHEL-112443-nginx-fix-validate-warnings.patch new file mode 100644 index 0000000..7f7c889 --- /dev/null +++ b/RHEL-112443-nginx-fix-validate-warnings.patch @@ -0,0 +1,66 @@ +From 10d61eb3d8d8adcd0356fd855cbba4589027bfcb Mon Sep 17 00:00:00 2001 +From: Oyvind Albrigtsen +Date: Tue, 4 Nov 2025 12:58:18 +0100 +Subject: [PATCH] nginx: fix ls-redirection, mute non-errors in validate-all, + and set unique intervals for monitor actions + +--- + heartbeat/nginx | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/heartbeat/nginx b/heartbeat/nginx +index cb1c6ec27a..0f856175de 100755 +--- a/heartbeat/nginx ++++ b/heartbeat/nginx +@@ -251,7 +251,7 @@ nginxcat() { + close(cmd); + } + function listfiles(pattern, cmd,f) { +- cmd="ls "pattern" 2>/dev/null"; ++ cmd="ls "pattern; + while( ( cmd | getline f ) > 0 ) { + printfile(f); + } +@@ -271,7 +271,7 @@ nginxcat() { + return !system("test -d \""s"\""); + } + { procline(); } +- ' $1 | ++ ' $1 2> /dev/null | + sed 's/#.*//;s/[[:blank:]]*$//;s/^[[:blank:]]*//' | + grep -v '^$' + } +@@ -800,8 +800,8 @@ Extra options to apply when starting nginx. + + + +- +- ++ ++ + + + +@@ -847,11 +847,11 @@ validate_all_nginx() { + exit $OCF_ERR_CONFIGURED + fi + if +- ocf_run $NGINXD $OPTIONS -t -c $CONFIGFILE ++ ocf_run $NGINXD $OPTIONS -q -t -c $CONFIGFILE + then + : Cool $NGINXD likes $CONFIGFILE + else +- ocf_exit_reason "$NGINXD $OPTIONS -t -c $CONFIGFILE reported a configuration error." ++ ocf_exit_reason "$NGINXD $OPTIONS -q -t -c $CONFIGFILE reported a configuration error." + return $OCF_ERR_CONFIGURED + fi + return $OCF_SUCCESS +@@ -908,7 +908,7 @@ then + if + [ ! -z "$OCF_RESKEY_httpd" ] + then +- ocf_log info "Using $NGINXD as nginx" ++ ocf_log debug "Using $NGINXD as nginx" + fi + fi + diff --git a/RHEL-121985-Filesystem-speed-up-get-PIDs.patch b/RHEL-121985-Filesystem-speed-up-get-PIDs.patch new file mode 100644 index 0000000..60f31c8 --- /dev/null +++ b/RHEL-121985-Filesystem-speed-up-get-PIDs.patch @@ -0,0 +1,135 @@ +From 93729d83fa5bf15f4ec694e08e9777bde858fb41 Mon Sep 17 00:00:00 2001 +From: Lars Ellenberg +Date: Thu, 16 Oct 2025 10:58:37 +0200 +Subject: [PATCH 1/2] Filesystem: speed up get_pids + +With force_umount=safe, we "manually" scan the /proc/ file system. + +We look for symlinks pointing into the path we are interested in. +Specifically, we are interested in + /proc//{root,exe,cwd} + /proc//fd/ +We also look for relevant memory mappings in /proc//maps + +All these are per process, not per "task" or "thread". +see procfs(5) and pthreads(7). +Still, we currently also scan /proc//task// +for all the same things. + +With a large system with many heavily threaded processes, +this can significantly slow down this scanning, +without gaining new information. + +Adding -maxdepth to the find command line avoids this useless work, +potentially reducing the scanning time by orders of magnitute +on systems with many heavily threaded processes. + +We could also write a dedicated helper in C to do the very same thing, +with the option to "short circuit" and proceed with the next pid +as soon as the first "match" is found for the currently inspected pid. + +That could further reduce the scanning time +by about an additional factor of 10. +--- + heartbeat/Filesystem | 25 +++++++++++++++++++++---- + 1 file changed, 21 insertions(+), 4 deletions(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index 6d3960162..f76339fd6 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -680,14 +680,31 @@ get_pids() + # -path "/proc/[!0-9]*" -prune -o ... + # -path "/proc/[0-9]*" -a ... + # the latter seemd to be significantly faster for this one in my naive test. ++ ++ # root, cwd, exe, maps, fd: all per process, not per task ("thread"). ++ # -maxdepth to avoid repeatedly scanning the same thing ++ # for all threads of a heavily threaded process. ++ # ++ # Adding -maxdepth reduced scanning from > 16 seconds to < 2 seconds ++ # on a mostly idle system that happened to run a few java processes. ++ # ++ # We can also add a dedicated helper in C do twhat is done below, ++ # which would reduce the scanning time by an ++ # additional factor of 10 again. ++ # ++ # Or trust that fuser (above) learned something in the last 15 years ++ # and avoids blocking operations meanwhile? + procs=$(exec 2>/dev/null; +- find /proc -path "/proc/[0-9]*" -type l \( -lname "${dir}/*" -o -lname "${dir}" \) -print | ++ find /proc -mindepth 1 -maxdepth 3 \ ++ -path "/proc/[0-9]*" \ ++ -type l \( -lname "${dir}/*" -o -lname "${dir}" \) -print | + awk -F/ '{print $3}' | uniq) + +- # This finds both /proc//maps and /proc//task//maps; +- # if you don't want the latter, add -maxdepth. ++ # memory mappings are also per process, not per task. ++ # This finds only /proc//maps, and not /proc//task//maps; ++ # if you also want the latter, drop -maxdepth. + mmap_procs=$(exec 2>/dev/null; +- find /proc -path "/proc/[0-9]*/maps" -print | ++ find /proc -mindepth 2 -maxdepth 2 -path "/proc/[0-9]*/maps" -print | + xargs -r grep -l " ${dir}/" | awk -F/ '{print $3}' | uniq) + printf "${procs}\n${mmap_procs}" | sort -u + fi + +From 3d34db0c60a125126361b45ff8303358b6275298 Mon Sep 17 00:00:00 2001 +From: Lars Ellenberg +Date: Thu, 16 Oct 2025 11:31:00 +0200 +Subject: [PATCH 2/2] Filesystem: futher speed up get_pids + +If we have /proc//map_files/* symlinks, +we don't need to additionally grep /proc//maps. + +Also don't first collect output of commands into variables +just to pipe them to sort -u later, +just pipe the output of the commands through sort -u directly. +--- + heartbeat/Filesystem | 31 +++++++++++++++++++------------ + 1 file changed, 19 insertions(+), 12 deletions(-) + +diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem +index f76339fd6..7021f13da 100755 +--- a/heartbeat/Filesystem ++++ b/heartbeat/Filesystem +@@ -694,19 +694,26 @@ get_pids() + # + # Or trust that fuser (above) learned something in the last 15 years + # and avoids blocking operations meanwhile? +- procs=$(exec 2>/dev/null; +- find /proc -mindepth 1 -maxdepth 3 \ +- -path "/proc/[0-9]*" \ +- -type l \( -lname "${dir}/*" -o -lname "${dir}" \) -print | +- awk -F/ '{print $3}' | uniq) +- +- # memory mappings are also per process, not per task. +- # This finds only /proc//maps, and not /proc//task//maps; +- # if you also want the latter, drop -maxdepth. +- mmap_procs=$(exec 2>/dev/null; ++ ( ++ # If you want to debug this, drop this redirection. ++ # But it producess too much "No such file" noise for kernel ++ # threads or due to races with exiting processes or closing fds. ++ exec 2>/dev/null; ++ find /proc -mindepth 1 -maxdepth 3 \ ++ -path "/proc/[0-9]*" \ ++ -type l \( -lname "${dir}/*" -o -lname "${dir}" \) -print | ++ awk -F/ '{print $3}' | uniq ++ ++ # If we have "map_files/", "find" above already found the ++ # relevant symlinks, and we don't need to grep "maps" below. ++ # Available since kernel 3.3, respectively 4.3. ++ test -d /proc/$$/map_files || ++ # memory mappings are also per process, not per task. ++ # This finds only /proc//maps, and not /proc//task//maps; ++ # if you also want the latter, drop -maxdepth. + find /proc -mindepth 2 -maxdepth 2 -path "/proc/[0-9]*/maps" -print | +- xargs -r grep -l " ${dir}/" | awk -F/ '{print $3}' | uniq) +- printf "${procs}\n${mmap_procs}" | sort -u ++ xargs -r grep -l " ${dir}/" | awk -F/ '{print $3}' | uniq ++ ) | sort -u + fi + } + diff --git a/resource-agents.spec b/resource-agents.spec index 0d92cc3..0394734 100644 --- a/resource-agents.spec +++ b/resource-agents.spec @@ -45,7 +45,7 @@ Name: resource-agents Summary: Open Source HA Reusable Cluster Resource Scripts Version: 4.16.0 -Release: 38%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} +Release: 39%{?rcver:%{rcver}}%{?numcomm:.%{numcomm}}%{?alphatag:.%{alphatag}}%{?dirty:.%{dirty}}%{?dist} License: GPL-2.0-or-later AND LGPL-2.1-or-later URL: https://github.com/ClusterLabs/resource-agents Source0: %{upstream_prefix}-%{upstream_version}.tar.gz @@ -99,6 +99,9 @@ Patch46: RHEL-124881-oracle-improve-monpassword-description.patch Patch47: RHEL-109486-1-nfsserver-support-non-clustered-kerberized-mounts.patch Patch48: RHEL-109486-2-nfsserver-fix-error-message.patch Patch49: RHEL-109013-2-powervs-move-ip-add-iflabel-parameter.patch +Patch50: RHEL-102779-pgsqlms-fix-validate-warnings.patch +Patch51: RHEL-112443-nginx-fix-validate-warnings.patch +Patch52: RHEL-121985-Filesystem-speed-up-get-PIDs.patch # bundled ha-cloud-support libs Patch500: ha-cloud-support-aliyun.patch @@ -319,6 +322,9 @@ exit 1 %patch -p1 -P 47 %patch -p1 -P 48 %patch -p1 -P 49 +%patch -p1 -P 50 +%patch -p1 -P 51 +%patch -p1 -P 52 # bundled ha-cloud-support libs %patch -p1 -P 500 @@ -651,6 +657,14 @@ rm -rf %{buildroot}/usr/share/doc/resource-agents %{_usr}/lib/ocf/lib/heartbeat/OCF_*.pm %changelog +* Tue Nov 4 2025 Oyvind Albrigtsen - 4.16.0-39 +- pgsqlms: fix validate warnings +- nginx: fix validate warnings +- Filesystem: speed up get PIDs + + Resolves: RHEL-102779, RHEL-112443, RHEL-121985 + + * Mon Nov 3 2025 Oyvind Albrigtsen - 4.16.0-38 - powervs-move-ip: new resource agent