Bug fix pcp-5.3.5-3 update resolving QE-detected issues

Resolves: rhbz#2030123
Resolves: rhbz#2030122
Resolves: rhbz#2030138
Resolves: rhbz#2030141
This commit is contained in:
Nathan Scott 2021-12-09 13:04:11 +11:00
parent faadd7c7e2
commit af3b85f31e
5 changed files with 975 additions and 28 deletions

View File

@ -1,6 +1,6 @@
Name: pcp Name: pcp
Version: 5.3.5 Version: 5.3.5
Release: 2%{?dist} Release: 3%{?dist}
Summary: System-level performance monitoring and performance management Summary: System-level performance monitoring and performance management
License: GPLv2+ and LGPLv2+ and CC-BY License: GPLv2+ and LGPLv2+ and CC-BY
URL: https://pcp.io URL: https://pcp.io
@ -9,6 +9,10 @@ URL: https://pcp.io
Source0: %{artifactory}/pcp-source-release/pcp-%{version}.src.tar.gz Source0: %{artifactory}/pcp-source-release/pcp-%{version}.src.tar.gz
Patch0: redhat-bugzilla-2017632.patch Patch0: redhat-bugzilla-2017632.patch
Patch1: redhat-bugzilla-2029301.patch
Patch2: redhat-bugzilla-2030121.patch
Patch3: redhat-bugzilla-2027753.patch
Patch4: redhat-bugzilla-2030140.patch
%if 0%{?fedora} >= 26 || 0%{?rhel} > 7 %if 0%{?fedora} >= 26 || 0%{?rhel} > 7
%global __python2 python2 %global __python2 python2
@ -2278,6 +2282,10 @@ updated policy package.
%prep %prep
%setup -q %setup -q
%patch0 -p1 %patch0 -p1
%patch1 -p1
%patch2 -p1
%patch3 -p1
%patch4 -p1
%build %build
# the buildsubdir macro gets defined in %setup and is apparently only available in the next step (i.e. the %build step) # the buildsubdir macro gets defined in %setup and is apparently only available in the next step (i.e. the %build step)
@ -2961,11 +2969,13 @@ then
%systemd_preun pmie.service %systemd_preun pmie.service
%systemd_preun pmie_farm.service %systemd_preun pmie_farm.service
%systemd_preun pmproxy.service %systemd_preun pmproxy.service
%systemd_preun pmfind.service
%systemd_preun pmcd.service %systemd_preun pmcd.service
%systemd_preun pmie_daily.timer
%systemd_preun pmlogger_daily.timer %systemd_preun pmlogger_daily.timer
%systemd_preun pmlogger_check.timer %systemd_preun pmlogger_check.timer
%systemd_preun pmlogger_farm_check.timer %systemd_preun pmlogger_farm_check.timer
%systemd_preun pmie_daily.timer
%systemd_preun pmie_check.timer
%systemd_preun pmie_farm_check.timer %systemd_preun pmie_farm_check.timer
systemctl stop pmlogger.service >/dev/null 2>&1 systemctl stop pmlogger.service >/dev/null 2>&1
@ -2973,6 +2983,7 @@ then
systemctl stop pmie.service >/dev/null 2>&1 systemctl stop pmie.service >/dev/null 2>&1
systemctl stop pmie_farm.service >/dev/null 2>&1 systemctl stop pmie_farm.service >/dev/null 2>&1
systemctl stop pmproxy.service >/dev/null 2>&1 systemctl stop pmproxy.service >/dev/null 2>&1
systemctl stop pmfind.service >/dev/null 2>&1
systemctl stop pmcd.service >/dev/null 2>&1 systemctl stop pmcd.service >/dev/null 2>&1
%else %else
/sbin/service pmlogger stop >/dev/null 2>&1 /sbin/service pmlogger stop >/dev/null 2>&1
@ -3012,25 +3023,17 @@ pmieconf -c enable dmthin
%if !%{disable_systemd} %if !%{disable_systemd}
systemctl restart pmcd >/dev/null 2>&1 systemctl restart pmcd >/dev/null 2>&1
systemctl restart pmlogger >/dev/null 2>&1 systemctl restart pmlogger >/dev/null 2>&1
systemctl restart pmlogger_farm >/dev/null 2>&1
systemctl restart pmie >/dev/null 2>&1 systemctl restart pmie >/dev/null 2>&1
systemctl restart pmie_farm >/dev/null 2>&1
systemctl enable pmcd >/dev/null 2>&1 systemctl enable pmcd >/dev/null 2>&1
systemctl enable pmlogger >/dev/null 2>&1 systemctl enable pmlogger >/dev/null 2>&1
systemctl enable pmlogger_farm >/dev/null 2>&1
systemctl enable pmie >/dev/null 2>&1 systemctl enable pmie >/dev/null 2>&1
systemctl enable pmie_farm >/dev/null 2>&1
%else %else
/sbin/chkconfig --add pmcd >/dev/null 2>&1 /sbin/chkconfig --add pmcd >/dev/null 2>&1
/sbin/chkconfig --add pmlogger >/dev/null 2>&1 /sbin/chkconfig --add pmlogger >/dev/null 2>&1
/sbin/chkconfig --add pmlogger_farm >/dev/null 2>&1
/sbin/chkconfig --add pmie >/dev/null 2>&1 /sbin/chkconfig --add pmie >/dev/null 2>&1
/sbin/chkconfig --add pmie_farm >/dev/null 2>&1
/sbin/service pmcd condrestart /sbin/service pmcd condrestart
/sbin/service pmlogger condrestart /sbin/service pmlogger condrestart
/sbin/service pmlogger_farm condrestart
/sbin/service pmie condrestart /sbin/service pmie condrestart
/sbin/service pmie_farm condrestart
%endif %endif
%endif %endif
@ -3054,30 +3057,15 @@ PCP_LOG_DIR=%{_logsdir}
# clean up any stale symlinks for deprecated pm*-poll services # clean up any stale symlinks for deprecated pm*-poll services
rm -f %{_sysconfdir}/systemd/system/pm*.requires/pm*-poll.* >/dev/null 2>&1 || true rm -f %{_sysconfdir}/systemd/system/pm*.requires/pm*-poll.* >/dev/null 2>&1 || true
# pmlogger_farm service inherits the same initial state as pmlogger service
if systemctl is-enabled pmlogger.service >/dev/null; then
systemctl enable pmlogger_farm.service pmlogger_farm_check.service
systemctl start pmlogger_farm.service pmlogger_farm_check.service
fi
# pmie_farm service inherits the same initial state as pmie service
if systemctl is-enabled pmie.service >/dev/null; then
systemctl enable pmie_farm.service pmie_farm_check.service
systemctl start pmie_farm.service pmie_farm_check.service
fi
%systemd_postun_with_restart pmcd.service %systemd_postun_with_restart pmcd.service
%systemd_post pmcd.service %systemd_post pmcd.service
%systemd_postun_with_restart pmlogger.service %systemd_postun_with_restart pmlogger.service
%systemd_post pmlogger.service %systemd_post pmlogger.service
%systemd_postun_with_restart pmlogger_farm.service
%systemd_post pmlogger_farm.service
%systemd_post pmlogger_farm_check.service
%systemd_postun_with_restart pmie.service %systemd_postun_with_restart pmie.service
%systemd_post pmie.service %systemd_post pmie.service
%systemd_postun_with_restart pmie_farm.service %systemd_postun_with_restart pmproxy.service
%systemd_post pmie_farm.service %systemd_post pmproxy.service
%systemd_post pmie_farm_check.service %systemd_post pmfind.service
systemctl condrestart pmproxy.service >/dev/null 2>&1
%else %else
/sbin/chkconfig --add pmcd >/dev/null 2>&1 /sbin/chkconfig --add pmcd >/dev/null 2>&1
/sbin/service pmcd condrestart /sbin/service pmcd condrestart
@ -3388,6 +3376,12 @@ PCP_LOG_DIR=%{_logsdir}
%files zeroconf -f pcp-zeroconf-files.rpm %files zeroconf -f pcp-zeroconf-files.rpm
%changelog %changelog
* Thu Dec 09 2021 Nathan Scott <nathans@redhat.com> - 5.3.5-3
- Resolve failure in the Nvidia metrics agent (BZ 2030123)
- PMDA indom cache loading performance improvements (BZ 2030122)
- Consistent user experience for new farm services (BZ 2030138)
- Resilience improvements for the pmproxy service (BZ 2030141)
* Fri Nov 26 2021 Nathan Scott <nathans@redhat.com> - 5.3.5-2 * Fri Nov 26 2021 Nathan Scott <nathans@redhat.com> - 5.3.5-2
- Updates to pmlogconf persistance changes (BZ 2017632) - Updates to pmlogconf persistance changes (BZ 2017632)

View File

@ -0,0 +1,123 @@
commit b076a10d6901338707cb5e5d503fc25e2f36ba94
Author: Nathan Scott <nathans@redhat.com>
Date: Wed Dec 8 15:24:49 2021 +1100
Resolve inconsistencies in new 'farm' and other systemd units
This change most importantly introduces the Wants= line Mark
(and Jan earlier, indirectly) proposed to make pmlogger_farm
handling function as end-users will expect when manipulating
the pmlogger.service. Ditto for pmie.
There's also several cleanups of things that are inconsistent
and just plain wrong or missing, particularly in spec files.
This supercedes PR #1492 and PR #1489.
This resolves Red Hat BZ #2027753.
diff --git a/src/pmie/pmie.service.in b/src/pmie/pmie.service.in
index d234c8a5e5..bf4e64980a 100644
--- a/src/pmie/pmie.service.in
+++ b/src/pmie/pmie.service.in
@@ -4,7 +4,7 @@ Documentation=man:pmie(1)
After=network-online.target pmcd.service
Before=pmie_check.timer pmie_daily.timer
BindsTo=pmie_check.timer pmie_daily.timer
-Wants=pmcd.service
+Wants=pmcd.service pmie_farm.service
[Service]
Type=notify
diff --git a/src/pmie/pmie_farm.service.in b/src/pmie/pmie_farm.service.in
index 6679e48ba1..5459adb310 100644
--- a/src/pmie/pmie_farm.service.in
+++ b/src/pmie/pmie_farm.service.in
@@ -22,6 +22,3 @@ User=@PCP_USER@
[Install]
WantedBy=multi-user.target
-
-# This dependency will be removed in PCPv6.
-WantedBy=pmie.service
diff --git a/src/pmlogger/pmlogger.service.in b/src/pmlogger/pmlogger.service.in
index de0df29db1..59299ac15d 100644
--- a/src/pmlogger/pmlogger.service.in
+++ b/src/pmlogger/pmlogger.service.in
@@ -4,7 +4,7 @@ Documentation=man:pmlogger(1)
After=network-online.target pmcd.service
Before=pmlogger_check.timer pmlogger_daily.timer
BindsTo=pmlogger_check.timer pmlogger_daily.timer
-Wants=pmcd.service
+Wants=pmcd.service pmlogger_farm.service
[Service]
Type=notify
diff --git a/src/pmlogger/pmlogger_farm.service.in b/src/pmlogger/pmlogger_farm.service.in
index fe753afdf6..3bfa2e7098 100644
--- a/src/pmlogger/pmlogger_farm.service.in
+++ b/src/pmlogger/pmlogger_farm.service.in
@@ -22,6 +22,3 @@ User=@PCP_USER@
[Install]
WantedBy=multi-user.target
-
-# This dependency will be removed in PCPv6.
-WantedBy=pmlogger.service
commit cc2dddfb7a04d98f97bdf759f057bae2727260ff
Author: Nathan Scott <nathans@redhat.com>
Date: Thu Dec 9 10:41:22 2021 +1100
Resolve inconsistencies in new 'farm' systemd timers
When the farm systemd timers were introduced the check interval
was drastically reduced from half hourly to 5 minutely. There
wasn't any discussion about rationales for this and its now not
consistent (does not dovetail at all) with the primary pmlogger
and pmie service. If startup takes a long time (large farms or
slow networks) these will likely overlap constantly, and timing
should be such that we work with the primary services in mind.
Reset to half hourly for these checks, and lets revisit this in
the new year when the other systemd changes are being proposed.
Related to https://github.com/performancecopilot/pcp/pull/1495
diff --git a/src/pmie/pmie_farm_check.timer b/src/pmie/pmie_farm_check.timer
index ee7aa21242..97dc061af2 100644
--- a/src/pmie/pmie_farm_check.timer
+++ b/src/pmie/pmie_farm_check.timer
@@ -1,10 +1,11 @@
[Unit]
-Description=5 minute check of pmie farm instances
+Description=Half-hourly check of pmie farm instances
[Timer]
-# if enabled, runs 1m after boot and every 5 mins
+# if enabled, runs 1m after boot and every half hour
OnBootSec=1min
-OnCalendar=*:00/5
+OnCalendar=*-*-* *:28:10
+OnCalendar=*-*-* *:58:10
[Install]
WantedBy=timers.target
diff --git a/src/pmlogger/pmlogger_farm_check.timer b/src/pmlogger/pmlogger_farm_check.timer
index 094fb4505d..f234ef7839 100644
--- a/src/pmlogger/pmlogger_farm_check.timer
+++ b/src/pmlogger/pmlogger_farm_check.timer
@@ -1,10 +1,11 @@
[Unit]
-Description=5 minute check of pmlogger farm instances
+Description=Half-hourly check of pmlogger farm instances
[Timer]
-# if enabled, runs 1m after boot and every 5 mins
+# if enabled, runs 1m after boot and every half hour
OnBootSec=1min
-OnCalendar=*:00/5
+OnCalendar=*-*-* *:25:10
+OnCalendar=*-*-* *:55:10
[Install]
WantedBy=timers.target

View File

@ -0,0 +1,38 @@
commit 9d9adc9d6c8eb24a6884da81c18b927ea706a68e
Author: Nathan Scott <nathans@redhat.com>
Date: Tue Dec 7 11:18:11 2021 +1100
pmdanvidia: fix mishandling of zero-byte size passed to realloc
Picked up during QA of recent nvidia changes - some hardware lacks
support for per-process metrics, or the hardware (GPU) has not yet
been accessed by a process using its resources, which had the side
effect that a zero-byte size argument was passed into realloc. In
turn, this passes back something that can be freed and an issue in
the logic meant this would happen on subsequent calls also.
Resolves the QA failure and Red Hat BZ #2029301
diff --git a/src/pmdas/nvidia/nvidia.c b/src/pmdas/nvidia/nvidia.c
index f1c12f2275..dc5bb93a0d 100644
--- a/src/pmdas/nvidia/nvidia.c
+++ b/src/pmdas/nvidia/nvidia.c
@@ -617,11 +617,16 @@ refresh(pcp_nvinfo_t *nvinfo, int need_processes)
/* update indoms, cull old entries that remain inactive */
if (need_processes) {
pmdaIndom *proc_indomp = &indomtab[PROC_INDOM];
- pmdaInstid *it_set = proc_indomp->it_set;
+ pmdaInstid *it_set = NULL;
size_t bytes = nproc * sizeof(pmdaInstid);
- if ((it_set = (pmdaInstid *)realloc(it_set, bytes)) == NULL)
+ if (bytes > 0) {
+ it_set = (pmdaInstid *)realloc(proc_indomp->it_set, bytes);
+ if (it_set == NULL)
+ free(proc_indomp->it_set);
+ } else if (proc_indomp->it_set != NULL) {
free(proc_indomp->it_set);
+ }
if ((proc_indomp->it_set = it_set) != NULL) {
for (i = j = 0; i < processes.hsize && j < nproc; i++) {

View File

@ -0,0 +1,50 @@
commit df7b7bf64eb354114e6c519e3e03ffc446afa8ba
Author: Nathan Scott <nathans@redhat.com>
Date: Fri Nov 26 09:17:23 2021 +1100
libpcp_pmda: add indom cache fast-paths for inst lookup beyond max
We encountered a situation where indom cache loading consumed vast
CPU resources for an indom of size ~150k instances. Profiling was
used to identify the insert loop that ensures the inst linked list
within the cache hash tables is sorted - this loop is O(N*2) as we
potentially walk this list from the start on every insert during a
cache load. Because cache loading happens from a sorted file, the
worst-case scenario happened every time - each new instance insert
occurs beyond the current maximum. Fortunately we maintain a last
entry pointer, so the new fast path uses that first and falls back
to the original behaviour for an out-of-order insertion.
A second opportunity for the same optimization was identified when
auditing the rest of cache.c - in the find_inst() routine for inst
identifier lookups beyond the current maximum observed instance.
Resolves Red Hat BZ #2024648
diff --git a/src/libpcp_pmda/src/cache.c b/src/libpcp_pmda/src/cache.c
index 0e66506d74..196ffc1da9 100644
--- a/src/libpcp_pmda/src/cache.c
+++ b/src/libpcp_pmda/src/cache.c
@@ -328,6 +328,9 @@ find_inst(hdr_t *h, int inst)
{
entry_t *e;
+ if ((e = h->last) != NULL && e->inst < inst)
+ return NULL;
+
for (e = h->first; e != NULL; e = e->next) {
if (e->inst == inst && e->state != PMDA_CACHE_EMPTY)
break;
@@ -621,7 +624,11 @@ insert_cache(hdr_t *h, const char *name, int inst, int *sts)
*sts = PM_ERR_INST;
return e;
}
- for (e = h->first; e != NULL; e = e->next) {
+ /* if this entry is beyond the (sorted) list end, avoid linear scan */
+ if ((e = h->last) == NULL || e->inst > inst)
+ e = h->first;
+ /* linear search over linked list, starting at either first or last */
+ for (; e != NULL; e = e->next) {
if (e->inst < inst)
last_e = e;
else if (e->inst > inst)

View File

@ -0,0 +1,742 @@
diff -Naurp pcp-5.3.5.orig/qa/1458 pcp-5.3.5/qa/1458
--- pcp-5.3.5.orig/qa/1458 1970-01-01 10:00:00.000000000 +1000
+++ pcp-5.3.5/qa/1458 2021-12-09 11:25:01.973327231 +1100
@@ -0,0 +1,219 @@
+#!/bin/sh
+# PCP QA Test No. 1458
+# Exercise access pmproxy with secure.enabled = false
+#
+# The main purpose of this is to test that the component works correctly
+# when secure.enabled = false; we can expect the https URLs to fail.
+#
+# See https://github.com/performancecopilot/pcp/issues/1490
+
+# Copyright (c) 2019,2021 Red Hat
+# Modified by Netflix, Inc.
+#
+
+seq=`basename $0`
+echo "QA output created by $seq"
+
+# get standard environment, filters and checks
+. ./common.product
+. ./common.filter
+. ./common.check
+
+_check_series # pmseries availability means libuv is in use
+_check_valgrind
+openssl help 2>/dev/null || _notrun "No openssl binary found"
+
+if [ -f /etc/lsb-release ]
+then
+ . /etc/lsb-release
+ if [ "$DISTRIB_ID" = Ubuntu ]
+ then
+ # This test fails for Ubuntu 19.10 with a myriad of errors involving
+ # the use of uninitialized values. The code paths very but typically
+ # involve libuv -> libssl -> libcrypto
+ #
+ case "$DISTRIB_RELEASE"
+ in
+ 19.10)
+ _notrun "problems with libuv, libssl, libcrypto and valgrind on Ubuntu $DISTRIB_RELEASE"
+ ;;
+ esac
+ fi
+fi
+
+_cleanup()
+{
+ cd $here
+ if $need_restore
+ then
+ need_restore=false
+ _restore_config $PCP_SYSCONF_DIR/labels
+ _sighup_pmcd
+ fi
+ $sudo rm -rf $tmp $tmp.*
+}
+
+status=1 # failure is the default!
+need_restore=false
+username=`id -u -n`
+$sudo rm -rf $tmp $tmp.* $seq.full
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_check_empty()
+{
+ tee -a $seq.full > $tmp.unfiltered
+ if [ -s $tmp.unfiltered ]
+ then
+ echo "Botch: got output from curl"
+ else
+ echo "Good!, empty output from curl"
+ fi
+}
+
+_filter_json()
+{
+ tee -a $seq.full > $tmp.unfiltered
+ if [ -s $tmp.unfiltered ]
+ then
+ pmjson < $tmp.unfiltered > $tmp.filtered
+ status=$?
+ if [ $status -eq 0 ]; then
+ cat $tmp.filtered | \
+ sed \
+ -e '/"machineid": .*/d' \
+ -e 's,"series": .*,"series": "SERIES",g' \
+ -e 's,"context": .*,"context": "CONTEXT",g' \
+ -e 's,"hostname": .*,"hostname": "HOSTNAME",g' \
+ -e 's,"domainname": .*,"domainname": "DOMAINNAME",g' \
+ #end
+ else
+ echo "Invalid JSON: $status"
+ cat $tmp.unfiltered
+ rm -f $tmp.context
+ fi
+ else
+ echo "Botch: no output from curl"
+ fi
+}
+
+_filter_port()
+{
+ sed \
+ -e '/ ipv6 /d' \
+ -e "s/ $port / PORT /g" \
+ #end
+}
+
+# real QA test starts here
+_save_config $PCP_SYSCONF_DIR/labels
+need_restore=true
+
+$sudo rm -rf $PCP_SYSCONF_DIR/labels/*
+_sighup_pmcd
+
+openssl req \
+ -new -newkey rsa:4096 -days 365 -nodes -x509 \
+ -subj "/C=US/ST=Denial/L=Springfield/O=Dis/CN=www.pcpqa.com" \
+ -keyout $tmp.key -out $tmp.cert >$seq.full 2>&1
+# creates a self-signed (insecure) certificate, so for testing only
+
+echo "[pmproxy]" >> $tmp.conf
+echo "pcp.enabled = true" >> $tmp.conf
+echo "http.enabled = true" >> $tmp.conf
+echo "redis.enabled = false" >> $tmp.conf
+echo "secure.enabled = false" >> $tmp.conf
+
+port=`_find_free_port`
+mkdir -p $tmp.pmproxy/pmproxy
+export PCP_RUN_DIR=$tmp.pmproxy
+export PCP_TMP_DIR=$tmp.pmproxy
+
+$_valgrind_clean_assert pmproxy -f -l- --timeseries \
+ -c $tmp.conf -p $port -U $username \
+ >$tmp.valout 2>$tmp.valerr &
+pid=$!
+
+echo "valgrind pid: $pid" >>$seq.full
+echo "pmproxy port: $port" >>$seq.full
+
+# valgrind takes awhile to fire up
+i=0
+while [ $i -lt 40 ]
+do
+ $PCP_BINADM_DIR/telnet-probe -c localhost $port && break
+ sleep 1
+ i=`expr $i + 1`
+done
+if $PCP_BINADM_DIR/telnet-probe -c localhost $port
+then
+ echo "Startup took $i secs" >>$seq.full
+else
+ echo "Arrgh: valgrind failed start pmproxy and get port $port ready after 30 secs"
+ exit
+fi
+
+date >>$seq.full
+echo "=== checking serial http operation ===" | tee -a $seq.full
+for i in 1 2 3 4; do
+ curl -Gs "http://localhost:$port/pmapi/metric?name=sample.long.ten" 2>$tmp.err$i >$tmp.out$i
+done
+for i in 1 2 3 4; do
+echo === out$i === | tee -a $seq.full
+_filter_json < $tmp.out$i
+done
+
+date >>$seq.full
+echo "=== checking parallel http operation ===" | tee -a $seq.full
+for i in 1 2 3 4; do
+ curl -Gs "http://localhost:$port/pmapi/metric?name=sample.long.ten" 2>$tmp.err$i >$tmp.out$i & 2>/dev/null eval pid$i=$!
+done
+wait $pid1 $pid2 $pid3 $pid4
+for i in 1 2 3 4; do
+echo === out$i === | tee -a $seq.full
+_filter_json < $tmp.out$i
+done
+
+date >>$seq.full
+echo "=== checking serial https/TLS operation ===" | tee -a $seq.full
+for i in 1 2 3 4; do
+ curl -k -Gs "https://localhost:$port/pmapi/metric?name=sample.long.ten" 2>$tmp.err$i >$tmp.out$i
+done
+for i in 1 2 3 4; do
+echo === out$i === | tee -a $seq.full
+_check_empty < $tmp.out$i
+done
+
+date >>$seq.full
+echo "=== checking parallel https/TLS operation ===" | tee -a $seq.full
+for i in 1 2 3 4; do
+ curl -k -Gs "https://localhost:$port/pmapi/metric?name=sample.long.ten" 2>$tmp.err$i >$tmp.out$i & 2>/dev/null eval pid$i=$!
+done
+wait $pid1 $pid2 $pid3 $pid4
+for i in 1 2 3 4; do
+echo === out$i === | tee -a $seq.full
+_check_empty < $tmp.out$i
+done
+
+echo "=== check pmproxy is running ==="
+pminfo -v -h localhost@localhost:$port hinv.ncpu
+if [ $? -eq 0 ]; then
+ echo "pmproxy check passed"
+else
+ echo "pmproxy check failed"
+fi
+
+# valgrind takes awhile to shutdown too
+pmsignal $pid >/dev/null 2>&1
+pmsleep 3.5
+echo "=== valgrind stdout ===" | tee -a $seq.full
+cat $tmp.valout | _filter_valgrind
+
+echo "=== valgrind stderr ===" | tee -a $seq.full
+cat $tmp.valerr | _filter_pmproxy_log | _filter_port
+
+# final kill if it's spinning
+$sudo kill -9 $pid >/dev/null 2>&1
+
+# success, all done
+status=0
+exit
diff -Naurp pcp-5.3.5.orig/qa/1458.out pcp-5.3.5/qa/1458.out
--- pcp-5.3.5.orig/qa/1458.out 1970-01-01 10:00:00.000000000 +1000
+++ pcp-5.3.5/qa/1458.out 2021-12-09 11:25:01.973327231 +1100
@@ -0,0 +1,221 @@
+QA output created by 1458
+=== checking serial http operation ===
+=== out1 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== out2 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== out3 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== out4 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== checking parallel http operation ===
+=== out1 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== out2 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== out3 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== out4 ===
+{
+ "context": "CONTEXT"
+ "metrics": [
+ {
+ "name": "sample.long.ten",
+ "series": "SERIES"
+ "pmid": "29.0.11",
+ "type": "32",
+ "sem": "instant",
+ "units": "none",
+ "labels": {
+ "agent": "sample",
+ "cluster": "zero",
+ "domainname": "DOMAINNAME"
+ "hostname": "HOSTNAME"
+ "role": "testing"
+ },
+ "text-oneline": "10 as a 32-bit integer",
+ "text-help": "10 as a 32-bit integer"
+ }
+ ]
+}
+=== checking serial https/TLS operation ===
+=== out1 ===
+Good!, empty output from curl
+=== out2 ===
+Good!, empty output from curl
+=== out3 ===
+Good!, empty output from curl
+=== out4 ===
+Good!, empty output from curl
+=== checking parallel https/TLS operation ===
+=== out1 ===
+Good!, empty output from curl
+=== out2 ===
+Good!, empty output from curl
+=== out3 ===
+Good!, empty output from curl
+=== out4 ===
+Good!, empty output from curl
+=== check pmproxy is running ===
+pmproxy check passed
+=== valgrind stdout ===
+=== valgrind stderr ===
+Log for pmproxy on HOST started DATE
+
+pmproxy: PID = PID
+pmproxy request port(s):
+ sts fd port family address
+ === ==== ===== ====== =======
+ok FD unix UNIX_DOMAIN_SOCKET
+ok FD PORT inet INADDR_ANY
+[DATE] pmproxy(PID) Info: pmproxy caught SIGTERM
+[DATE] pmproxy(PID) Info: pmproxy Shutdown
+
+Log finished DATE
diff -Naurp pcp-5.3.5.orig/src/pmproxy/src/pcp.c pcp-5.3.5/src/pmproxy/src/pcp.c
--- pcp-5.3.5.orig/src/pmproxy/src/pcp.c 2021-09-24 09:33:06.000000000 +1000
+++ pcp-5.3.5/src/pmproxy/src/pcp.c 2021-12-09 11:22:09.829321418 +1100
@@ -1,6 +1,6 @@
/*
- * Copyright (c) 2018-2019 Red Hat.
- *
+ * Copyright (c) 2018-2019,2021 Red Hat.
+ *
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
@@ -19,22 +19,13 @@
#define PDU_MAXLENGTH (MAXHOSTNAMELEN + HEADER_LENGTH + sizeof("65536")-1)
static void
-client_free(struct client *client)
-{
- if (client->u.pcp.hostname)
- sdsfree(client->u.pcp.hostname);
- if (client->buffer)
- sdsfree(client->buffer);
-}
-
-static void
on_server_close(uv_handle_t *handle)
{
struct client *client = (struct client *)handle;
if (pmDebugOptions.pdu)
fprintf(stderr, "client %p pmcd connection closed\n", client);
- client_free(client);
+ client_put(client);
}
static void
@@ -92,12 +83,11 @@ on_server_read(uv_stream_t *stream, ssiz
void
on_pcp_client_close(struct client *client)
{
- if (client->u.pcp.connected) {
+ if (client->u.pcp.connected)
uv_close((uv_handle_t *)&client->u.pcp.socket, on_server_close);
- memset(&client->u.pcp, 0, sizeof(client->u.pcp));
- } else {
- client_free(client);
- }
+ if (client->u.pcp.hostname)
+ sdsfree(client->u.pcp.hostname);
+ memset(&client->u.pcp, 0, sizeof(client->u.pcp));
}
static void
@@ -118,6 +108,8 @@ on_pcp_client_connect(uv_connect_t *conn
/* socket connection to pmcd successfully established */
client->u.pcp.state = PCP_PROXY_SETUP;
+ client->u.pcp.connected = 1;
+ client_get(client);
/* if we have already received PDUs, send them on now */
if ((buffer = client->buffer) != NULL) {
diff -Naurp pcp-5.3.5.orig/src/pmproxy/src/secure.c pcp-5.3.5/src/pmproxy/src/secure.c
--- pcp-5.3.5.orig/src/pmproxy/src/secure.c 2021-11-01 13:02:26.000000000 +1100
+++ pcp-5.3.5/src/pmproxy/src/secure.c 2021-12-09 11:22:09.831321384 +1100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 Red Hat.
+ * Copyright (c) 2019,2021 Red Hat.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -16,7 +16,7 @@
#include <openssl/opensslv.h>
#include <openssl/ssl.h>
-/* called with proxy->mutex locked */
+/* called with proxy->write_mutex locked */
static void
remove_connection_from_queue(struct client *client)
{
@@ -44,9 +44,9 @@ on_secure_client_close(struct client *cl
if (pmDebugOptions.auth || pmDebugOptions.http)
fprintf(stderr, "%s: client %p\n", "on_secure_client_close", client);
- uv_mutex_lock(&client->proxy->mutex);
+ uv_mutex_lock(&client->proxy->write_mutex);
remove_connection_from_queue(client);
- uv_mutex_unlock(&client->proxy->mutex);
+ uv_mutex_unlock(&client->proxy->write_mutex);
/* client->read and client->write freed by SSL_free */
SSL_free(client->secure.ssl);
}
@@ -63,7 +63,7 @@ maybe_flush_ssl(struct proxy *proxy, str
client->secure.pending.writes_count > 0)
return;
- uv_mutex_lock(&proxy->mutex);
+ uv_mutex_lock(&proxy->write_mutex);
if (proxy->pending_writes == NULL) {
proxy->pending_writes = client;
client->secure.pending.prev = client->secure.pending.next = NULL;
@@ -75,7 +75,7 @@ maybe_flush_ssl(struct proxy *proxy, str
client->secure.pending.prev = c;
}
client->secure.pending.queued = 1;
- uv_mutex_unlock(&proxy->mutex);
+ uv_mutex_unlock(&proxy->write_mutex);
}
static void
@@ -161,7 +161,7 @@ flush_secure_module(struct proxy *proxy)
size_t i, used;
int sts;
- uv_mutex_lock(&proxy->mutex);
+ uv_mutex_lock(&proxy->write_mutex);
head = &proxy->pending_writes;
while ((client = *head) != NULL) {
flush_ssl_buffer(client);
@@ -212,7 +212,7 @@ flush_secure_module(struct proxy *proxy)
sizeof(uv_buf_t) * client->secure.pending.writes_count);
}
}
- uv_mutex_unlock(&proxy->mutex);
+ uv_mutex_unlock(&proxy->write_mutex);
}
void
@@ -221,7 +221,8 @@ secure_client_write(struct client *clien
struct proxy *proxy = client->proxy;
uv_buf_t *dup;
size_t count, bytes;
- int i, sts, defer = 0, maybe = 0;
+ unsigned int i;
+ int sts, defer = 0, maybe = 0;
for (i = 0; i < request->nbuffers; i++) {
if (defer == 0) {
diff -Naurp pcp-5.3.5.orig/src/pmproxy/src/server.c pcp-5.3.5/src/pmproxy/src/server.c
--- pcp-5.3.5.orig/src/pmproxy/src/server.c 2021-11-01 13:02:26.000000000 +1100
+++ pcp-5.3.5/src/pmproxy/src/server.c 2021-12-09 11:22:09.831321384 +1100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Red Hat.
+ * Copyright (c) 2018-2019,2021 Red Hat.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -125,7 +125,7 @@ server_init(int portcount, const char *l
pmGetProgname());
return NULL;
}
- uv_mutex_init(&proxy->mutex);
+ uv_mutex_init(&proxy->write_mutex);
count = portcount + (*localpath ? 1 : 0);
if (count) {
@@ -229,7 +229,6 @@ void
client_put(struct client *client)
{
unsigned int refcount;
- struct proxy *proxy = client->proxy;
uv_mutex_lock(&client->mutex);
assert(client->refcount);
@@ -237,22 +236,16 @@ client_put(struct client *client)
uv_mutex_unlock(&client->mutex);
if (refcount == 0) {
- /* remove client from the doubly-linked list */
- uv_mutex_lock(&proxy->mutex);
- if (client->next != NULL)
- client->next->prev = client->prev;
- *client->prev = client->next;
- uv_mutex_unlock(&proxy->mutex);
-
if (client->protocol & STREAM_PCP)
on_pcp_client_close(client);
if (client->protocol & STREAM_HTTP)
on_http_client_close(client);
if (client->protocol & STREAM_REDIS)
on_redis_client_close(client);
- if (client->protocol & STREAM_SECURE)
+ if ((client->protocol & STREAM_SECURE) && client->stream.secure)
on_secure_client_close(client);
-
+ if (client->buffer)
+ sdsfree(client->buffer);
memset(client, 0, sizeof(*client));
free(client);
}
@@ -284,7 +277,7 @@ on_client_write(uv_write_t *writer, int
"on_client_write", status, client);
if (status == 0) {
- if (client->protocol & STREAM_SECURE)
+ if ((client->protocol & STREAM_SECURE) && client->stream.secure)
on_secure_client_write(client);
if (client->protocol & STREAM_PCP)
on_pcp_client_write(client);
@@ -434,10 +427,16 @@ on_client_read(uv_stream_t *stream, ssiz
if (nread > 0) {
if (client->protocol == STREAM_UNKNOWN)
client->protocol |= client_protocol(*buf->base);
- if (client->protocol & STREAM_SECURE)
+
+#ifdef HAVE_OPENSSL
+ if ((client->protocol & STREAM_SECURE) && (proxy->ssl != NULL))
on_secure_client_read(proxy, client, nread, buf);
else
on_protocol_read(stream, nread, buf);
+#else
+ on_protocol_read(stream, nread, buf);
+#endif
+
} else if (nread < 0) {
if (pmDebugOptions.af)
fprintf(stderr, "%s: read error %ld "
@@ -494,14 +493,6 @@ on_client_connection(uv_stream_t *stream
handle->data = (void *)proxy;
client->proxy = proxy;
- /* insert client into doubly-linked list at the head */
- uv_mutex_lock(&proxy->mutex);
- if ((client->next = proxy->first) != NULL)
- proxy->first->prev = &client->next;
- proxy->first = client;
- client->prev = &proxy->first;
- uv_mutex_unlock(&proxy->mutex);
-
status = uv_read_start((uv_stream_t *)&client->stream.u.tcp,
on_buffer_alloc, on_client_read);
if (status != 0) {
@@ -719,7 +710,7 @@ shutdown_ports(void *arg)
struct proxy *proxy = (struct proxy *)arg;
struct server *server;
struct stream *stream;
- int i;
+ unsigned int i;
for (i = 0; i < proxy->nservers; i++) {
server = &proxy->servers[i];
@@ -756,7 +747,8 @@ dump_request_ports(FILE *output, void *a
struct proxy *proxy = (struct proxy *)arg;
struct stream *stream;
uv_os_fd_t uv_fd;
- int i, fd;
+ unsigned int i;
+ int fd;
fprintf(output, "%s request port(s):\n"
" sts fd port family address\n"
diff -Naurp pcp-5.3.5.orig/src/pmproxy/src/server.h pcp-5.3.5/src/pmproxy/src/server.h
--- pcp-5.3.5.orig/src/pmproxy/src/server.h 2021-09-24 09:33:06.000000000 +1000
+++ pcp-5.3.5/src/pmproxy/src/server.h 2021-12-09 11:22:09.830321401 +1100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Red Hat.
+ * Copyright (c) 2018-2019,2021 Red Hat.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
@@ -97,11 +97,11 @@ typedef struct http_client {
typedef struct pcp_client {
pcp_proxy_state_t state;
- sds hostname;
unsigned int port : 16;
unsigned int certreq : 1;
unsigned int connected : 1;
unsigned int pad : 14;
+ sds hostname;
uv_connect_t pmcd;
uv_tcp_t socket;
} pcp_client_t;
@@ -136,8 +136,6 @@ typedef struct client {
pcp_client_t pcp;
} u;
struct proxy *proxy;
- struct client *next;
- struct client **prev;
sds buffer;
} client_t;
@@ -161,7 +159,7 @@ typedef struct proxy {
struct dict *config; /* configuration dictionary */
uv_loop_t *events; /* global, async event loop */
uv_callback_t write_callbacks;
- uv_mutex_t mutex; /* protects client lists and pending writes */
+ uv_mutex_t write_mutex; /* protects pending writes */
} proxy_t;
extern void proxylog(pmLogLevel, sds, void *);