Resolves: RHEL-101745

Resolves: RHEL-106772
Resolves: RHEL-79767
This commit is contained in:
lmchilton 2025-10-15 16:18:59 -04:00
parent b1a11c7d71
commit f5f1fcd82a
7 changed files with 5142 additions and 94 deletions

4574
atop-cpu-utilization.patch Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
Name: pcp
Version: 6.3.7
Release: 5%{?dist}
Release: 6%{?dist}
Summary: System-level performance monitoring and performance management
License: GPL-2.0-or-later AND LGPL-2.1-or-later AND CC-BY-3.0
URL: https://pcp.io
@ -18,6 +18,11 @@ Patch6: pmapi-header-multilib-fix.patch
Patch7: python-pmda-wrapper-list-fix.patch
Patch8: systemd-tmpfiles.d-directories.patch
Patch9: fix-pmdabpf-noarch-man-page-build-failure.patch
Patch10: pmda-openmetrics_qa1976.patch
Patch11: pmda-openmetrics_removal.patch
Patch12: pcp2openmetrics_hang.patch
Patch13: atop-cpu-utilization.patch
Patch14: pmda-openmetrics-performance.patch
%if 0%{?fedora} >= 40 || 0%{?rhel} >= 10
ExcludeArch: %{ix86}
@ -3628,6 +3633,11 @@ fi
%files zeroconf -f pcp-zeroconf-files.rpm
%changelog
* Wed Nov 19 2025 Lauren Chilton <lchilton@redhat.com> - 6.3.7-6
- Backport fix for pmdaopenmetrics metric removal (RHEL-101745)
- Backport fix for pcp2openmetrics metric hanging (RHEL-106772)
- Backport fix for atop cpu utilization bug (RHEL-79767)
* Fri Jun 27 2025 Nathan Scott <nathans@redhat.com> - 6.3.7-5
- Backport some more fixes to the OpenMetrics PMDA (RHEL-54039)
- Fix a multilib regression in PCP header files (RHEL-93186)

View File

@ -0,0 +1,42 @@
commit 50967b7d1ca7c6f1d440236e6faadd1e768e75b1
Author: lmchilton <lauren.chilton26@gmail.com>
Date: Wed Jul 30 12:48:18 2025 -0400
pcp2openmetrics tool causing hanging
bug found that caused tool to hang
failing to output more than one metric.
Changing the placement of setting the
timezone to UTC fixed the issue. Setting
the timezone at the top of the program
before command line processing & setting
defaults is a possible explanation for the
issue.
diff --git a/src/pcp2openmetrics/pcp2openmetrics.py b/src/pcp2openmetrics/pcp2openmetrics.py
index 8d8b62066..a228aaf34 100755
--- a/src/pcp2openmetrics/pcp2openmetrics.py
+++ b/src/pcp2openmetrics/pcp2openmetrics.py
@@ -39,11 +39,14 @@ CONFVER = 1
INDENT = 2
TIMEFMT = "%Y-%m-%d %H:%M:%S"
TIMEOUT = 2.5 # seconds
+os.environ["TZ"] = "UTC"
+time.tzset()
class PCP2OPENMETRICS(object):
""" PCP to OPENMETRICS """
def __init__(self):
""" Construct object, prepare for command line handling """
+
self.context = None
self.daemonize = 0
self.pmconfig = pmconfig.pmConfig(self)
@@ -390,7 +393,6 @@ class PCP2OPENMETRICS(object):
# Silent goodbye, close in finalize()
return
- self.context.pmNewZone("UTC")
ts = self.context.datetime_to_secs(self.pmfg_ts(), PM_TIME_SEC)
if self.prev_ts is None:

View File

@ -0,0 +1,28 @@
commit c671c1269fdf7d1832b72ec22394c2c5957d7fee
Merge: 7b4f81bb70 db214c8409
Author: Nathan Scott <nathans@redhat.com>
Date: Sat Nov 8 10:42:55 2025 +1100
Merge branch 'rhel-124769' of github.com:lmchilton/pcp into lmchilton-rhel-124769
diff --git a/src/pmdas/openmetrics/pmdaopenmetrics.python b/src/pmdas/openmetrics/pmdaopenmetrics.python
index 352f6565f2..927ab0cd94 100755
--- a/src/pmdas/openmetrics/pmdaopenmetrics.python
+++ b/src/pmdas/openmetrics/pmdaopenmetrics.python
@@ -565,6 +565,7 @@ class Source(object):
self.document = None
self.refresh_time = 0 # "never"
+
if not is_scripted:
# source is a URL. Create a session for it and initialize a few things
self.requests = self.pmda.requests # allow persistent connections etc.
@@ -601,7 +602,7 @@ class Source(object):
'''
now = time.time()
last_try_age = now - self.refresh_time
- return len(self.metrics_by_name) == 0 or last_try_age > self.pmda.refresh_timeout
+ return last_try_age > self.pmda.refresh_timeout
def check_filter(self, name, entrytype):
'''

View File

@ -1,6 +1,6 @@
diff -Naurp pcp-6.3.7.orig/qa/1976 pcp-6.3.7/qa/1976
--- pcp-6.3.7.orig/qa/1976 1970-01-01 10:00:00.000000000 +1000
+++ pcp-6.3.7/qa/1976 2025-06-26 18:02:39.313416612 +1000
--- pcp-6.3.7.orig/qa/1976 1969-12-31 19:00:00.000000000 -0500
+++ pcp-6.3.7/qa/1976 2025-10-14 09:47:42.874408447 -0400
@@ -0,0 +1,85 @@
+#!/bin/sh
+# PCP QA Test No. 1976
@ -88,8 +88,8 @@ diff -Naurp pcp-6.3.7.orig/qa/1976 pcp-6.3.7/qa/1976
+status=0
+exit
diff -Naurp pcp-6.3.7.orig/qa/1976.out pcp-6.3.7/qa/1976.out
--- pcp-6.3.7.orig/qa/1976.out 1970-01-01 10:00:00.000000000 +1000
+++ pcp-6.3.7/qa/1976.out 2025-06-26 18:02:01.879861640 +1000
--- pcp-6.3.7.orig/qa/1976.out 1969-12-31 19:00:00.000000000 -0500
+++ pcp-6.3.7/qa/1976.out 2025-10-14 09:47:42.874575552 -0400
@@ -0,0 +1,17 @@
+QA output created by 1976
+
@ -109,8 +109,8 @@ diff -Naurp pcp-6.3.7.orig/qa/1976.out pcp-6.3.7/qa/1976.out
+openmetrics.simple_metric.metric1
+
diff -Naurp pcp-6.3.7.orig/qa/group pcp-6.3.7/qa/group
--- pcp-6.3.7.orig/qa/group 2025-06-26 17:55:37.790462638 +1000
+++ pcp-6.3.7/qa/group 2025-06-26 17:58:18.996389423 +1000
--- pcp-6.3.7.orig/qa/group 2025-03-30 21:41:26.000000000 -0400
+++ pcp-6.3.7/qa/group 2025-10-14 09:47:42.874918504 -0400
@@ -2205,6 +2205,7 @@ pmcd.pdu
1963 pmda.linux local
1970 pmda.bpf local
@ -120,8 +120,8 @@ diff -Naurp pcp-6.3.7.orig/qa/group pcp-6.3.7/qa/group
1984 pmlogconf pmda.redis local
1985 pmfind local valgrind
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl/script.sh pcp-6.3.7/qa/openmetrics/scripts/curl/script.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl/script.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl/script.sh 2025-06-26 18:02:50.155355973 +1000
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl/script.sh 2023-11-16 01:51:39.000000000 -0500
+++ pcp-6.3.7/qa/openmetrics/scripts/curl/script.sh 2025-10-14 09:47:42.875376079 -0400
@@ -1,4 +1,4 @@
#! /bin/sh
@ -129,8 +129,8 @@ diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl/script.sh pcp-6.3.7/qa/op
-curl -Gq file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_filtered.sh pcp-6.3.7/qa/openmetrics/scripts/curl_filtered.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_filtered.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_filtered.sh 2025-06-26 18:02:50.155694515 +1000
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_filtered.sh 2023-11-16 01:51:39.000000000 -0500
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_filtered.sh 2025-10-14 09:47:42.875590832 -0400
@@ -7,6 +7,6 @@
. /etc/pcp.conf
@ -141,8 +141,8 @@ diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_filtered.sh pcp-6.3.7/qa/
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_other_metric.txt) \
| sed -e '/metric2/d'
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_hostname_label.sh pcp-6.3.7/qa/openmetrics/scripts/curl_hostname_label.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_hostname_label.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_hostname_label.sh 2025-06-26 18:02:50.155942641 +1000
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_hostname_label.sh 2023-11-16 01:51:39.000000000 -0500
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_hostname_label.sh 2025-10-14 09:47:42.875772077 -0400
@@ -12,5 +12,5 @@
# here for QA purposes we're fetching from a local file
@ -151,8 +151,8 @@ diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_hostname_label.sh pcp-6.3
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt 2>/dev/null | \
sed -e 's/[a-z0-9]*=/hostname="remotehost",&/'
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_scripted.sh pcp-6.3.7/qa/openmetrics/scripts/curl_scripted.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_scripted.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_scripted.sh 2025-06-26 18:02:50.156229975 +1000
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_scripted.sh 2023-11-16 01:51:39.000000000 -0500
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_scripted.sh 2025-10-14 09:47:42.875946730 -0400
@@ -1,5 +1,5 @@
#! /bin/sh
@ -162,19 +162,18 @@ diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_scripted.sh pcp-6.3.7/qa/
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_other_metric.txt
diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.7/src/pmdas/openmetrics/pmdaopenmetrics.python
--- pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python 2025-06-26 17:55:37.790973098 +1000
+++ pcp-6.3.7/src/pmdas/openmetrics/pmdaopenmetrics.python 2025-06-26 18:02:26.571388227 +1000
@@ -32,7 +32,8 @@ import subprocess
--- pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python 2025-03-27 02:01:59.000000000 -0400
+++ pcp-6.3.7/src/pmdas/openmetrics/pmdaopenmetrics.python 2025-10-14 09:51:21.874137509 -0400
@@ -32,7 +32,7 @@ import subprocess
import sys
from ctypes import c_int
from socket import gethostname
-from stat import ST_MODE, S_IXUSR, ST_CTIME
+from urllib.parse import urlparse
+from stat import ST_MODE, S_IXUSR
import requests
from pcp.pmapi import pmUnits, pmContext
@@ -127,15 +128,9 @@ class Metric(object):
@@ -127,15 +127,9 @@ class Metric(object):
(name, pmContext.pmIDStr(self.pmid), self.mtype, self.msem, self.singular, self.mindom, self.labels))
self.obj = pmdaMetric(self.pmid, self.mtype, self.mindom, self.msem, self.munits)
@ -192,7 +191,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
try:
self.source.pmda.add_metric(self.mname, self.obj, help_oneline, help_text)
@@ -565,7 +560,7 @@ class Source(object):
@@ -565,7 +559,7 @@ class Source(object):
self.path = path # pathname to .url or executable file
self.url = None
self.parse_error = False
@ -201,7 +200,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
self.is_scripted = is_scripted
self.pmda = thispmda # the shared pmda
self.requests = None
@@ -588,6 +583,16 @@ class Source(object):
@@ -588,6 +582,16 @@ class Source(object):
self.metrics_by_name = {} # name -> Metric
self.metrics_by_num = {} # number (last component of pmid) -> Metric
@ -218,7 +217,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
def old_enough_for_refresh(self):
'''But what is "old"? If it is empty (no metrics), then it
@@ -685,6 +690,23 @@ class Source(object):
@@ -685,6 +689,23 @@ class Source(object):
self.pmda.debug("included_labels '%s'" % (included_labels)) if self.pmda.dbg else None
self.pmda.debug("optional_labels '%s'" % (optional_labels)) if self.pmda.dbg else None
if sp.name in self.metrics_by_name:
@ -242,7 +241,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
m = self.metrics_by_name[sp.name]
assert self.metrics_by_num[m.metricnum] == m
if m.singular:
@@ -693,6 +715,7 @@ class Source(object):
@@ -693,6 +714,7 @@ class Source(object):
else:
m.store_inst(naming_labels, sp.value)
self.pmda.debug("naming_labels '%s'" % (naming_labels)) if self.pmda.dbg else None
@ -250,7 +249,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
else:
# check metric is not excluded by filters
fullname = "openmetrics.%s.%s" % (self.name, sp.name)
@@ -800,9 +823,9 @@ class Source(object):
@@ -800,9 +822,9 @@ class Source(object):
return num_metrics
@ -262,7 +261,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
Remaining lines are prefixed with a keyword. Supported keywords
include '#' for a comment, 'HEADER:' to add to the header passed
to the headers dict parameter to the get() call. Note the ':' are
@@ -900,12 +923,12 @@ class Source(object):
@@ -900,12 +922,12 @@ class Source(object):
if not s[ST_MODE] & S_IXUSR:
self.pmda.err("cannot execute script '%s'" % self.path)
return
@ -279,7 +278,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
return
# fetch the document
@@ -1001,7 +1024,7 @@ class OpenMetricsPMDA(PMDA):
@@ -1001,7 +1023,7 @@ class OpenMetricsPMDA(PMDA):
# now everything else may take time
self.pmda_name = pmda_name
self.config_dir = os.path.normpath(config)
@ -288,7 +287,7 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
self.timeout = timeout
# a single central Session that all our sources can concurrently reuse
@@ -1014,6 +1037,11 @@ class OpenMetricsPMDA(PMDA):
@@ -1014,6 +1036,10 @@ class OpenMetricsPMDA(PMDA):
reserved_cluster = self.cluster_table.intern_lookup_value("control")
assert reserved_cluster == 0
self.source_by_cluster = {}
@ -296,11 +295,21 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
+ self.all_metrics = {}
+ # keep track of removed metrics, in case of re-addition
+ self.removed_metrics = {}
+ self.controls = {0:0}
# compiled regex cache
self.regex_cache = {}
@@ -1106,26 +1134,53 @@ class OpenMetricsPMDA(PMDA):
@@ -1053,7 +1079,9 @@ class OpenMetricsPMDA(PMDA):
self.add_metric('%s.control.debug' % self.pmda_name, pmdaMetric(self.pmid(0, 4),
c_api.PM_TYPE_U32, c_api.PM_INDOM_NULL, c_api.PM_SEM_DISCRETE,
pmUnits(0, 0, 0, 0, 0, 0)),
- 'debug flag to enable verbose log messages, to enable: pmstore %s.control.debug 1' % self.pmda_name)
+ 'debug flag to enable verbose log messages',
+ 'Debugging flag to enable verbose log messages, to enable:\n'
+ '> pmstore %s.control.debug 1' % self.pmda_name)
# response status string, per-source end-point
self.stats_status = {0:"none"} # status string, keyed by cluster number
@@ -1106,26 +1134,26 @@ class OpenMetricsPMDA(PMDA):
assert s == self.source_by_name[s.name]
@ -326,14 +335,6 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
+ if mtime is None or m > mtime:
+ mtime = m
ret.append(fname)
+ fname = os.path.join(path, f)
+ with open(fname, 'r') as name:
+ f_path = name.readline().strip()
+ if f_path.startswith("file:///"):
+ parsed = urlparse(f_path)
+ m = os.path.getmtime(parsed.path)
+ if mtime is None or m > mtime:
+ mtime = m
for d in subdirs:
- m, _ = self.traverse(os.path.join(path, d), ctime)
- if ctime is None or m > ctime:
@ -343,29 +344,10 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
+ if mtime is None or m > mtime:
+ mtime = m
+ return mtime, ret
+
+ def initialize_controls(self, cluster):
+ # initialize statistics
+ self.stats_fetch_calls[cluster] = 0
+ self.stats_fetch_time[cluster] = 0
+ self.stats_parse_time[cluster] = 0
+ self.stats_status[cluster] = "unknown"
+ self.stats_status_code[cluster] = 0
+
+ self.controls[cluster] = 1
+
+ def delete_controls(self, cluster):
+ del self.stats_fetch_calls[cluster]
+ del self.stats_fetch_time[cluster]
+ del self.stats_parse_time[cluster]
+ del self.stats_status[cluster]
+ del self.stats_status_code[cluster]
+
+ self.controls[cluster] = 0
def rescan_confdir(self):
'''Scan the configuration directories for any new .url files
@@ -1138,19 +1193,56 @@ class OpenMetricsPMDA(PMDA):
@@ -1138,18 +1166,50 @@ class OpenMetricsPMDA(PMDA):
'''
traverse_time = time.time()
@ -405,9 +387,6 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
+ try:
+ remove_name = key
+ remove_obj = value
+ cluster = self.cluster_table.intern_lookup_value(split_name[1])
+ if self.controls[cluster] == 1:
+ self.delete_controls(cluster)
+ self.remove_metric(remove_name, remove_obj)
+ self.removed_metrics[remove_name] = remove_obj
+ self.debug("removed metric name: %s" % remove_name) if self.dbg else None
@ -421,45 +400,22 @@ diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.
- # TODO: maybe nuke sources related to removed files
save_cluster_table = False
+ cluster_for_refresh_names = []
+ cluster_for_refresh = []
if sort_conf_list:
# sorted for indom cluster consistency
conf_filelist = sorted(conf_filelist)
@@ -1177,6 +1269,13 @@ class OpenMetricsPMDA(PMDA):
@@ -1177,6 +1237,16 @@ class OpenMetricsPMDA(PMDA):
if name in self.source_by_name:
# this source is already known
self.assert_source_invariants(name=name)
+ s = self.source_by_name[name]
+ for key, value in self.source_by_cluster.items():
+ if value == s:
+ cluster_for_refresh.append(key)
+ cluster_for_refresh_names.append(name)
+ if name in self.re_add_list:
+ self.initialize_controls(key)
+ cluster_for_refresh = []
+ cluster_for_refresh_names = []
+ if name in self.re_add_list:
+ for key,value in self.source_by_cluster.items():
+ if value == s:
+ cluster_for_refresh.append(key)
+ cluster_for_refresh_names.append(name)
+ self.debug("refreshing cluster list: %s" % cluster_for_refresh_names) if self.dbg else None
+ self.refresh_some_clusters_for_fetch(cluster_for_refresh)
else:
try:
path = file
@@ -1185,17 +1284,16 @@ class OpenMetricsPMDA(PMDA):
self.source_by_name[source.name] = source
self.source_by_cluster[source.cluster] = source
- # initialize statistics
- self.stats_fetch_calls[cluster] = 0
- self.stats_fetch_time[cluster] = 0
- self.stats_parse_time[cluster] = 0
- self.stats_status[cluster] = "unknown"
- self.stats_status_code[cluster] = 0
+ self.initialize_controls(cluster)
save_cluster_table = True
self.log("Found source %s cluster %d" % (name, cluster))
except Exception as e:
self.err("Error allocating new cluster/source %s (%s)" % (name, e))
+
+ self.debug("refreshing cluster list: %s" % cluster_for_refresh_names) if self.dbg else None
+ self.refresh_some_clusters_for_fetch(cluster_for_refresh)
+
if save_cluster_table:
self.cluster_table.save()
self.set_notify_change()

View File

@ -0,0 +1,24 @@
commit 401a5d5d87754af00a1c3f38c036042fb4f50d63
Author: Nathan Scott <nathans@redhat.com>
Date: Mon Aug 11 12:44:06 2025 +1000
qa: simply my previous additional case within test qa/1976
diff --git a/qa/1976 b/qa/1976
index 54a219e36..9700d9934 100755
--- a/qa/1976
+++ b/qa/1976
@@ -71,10 +71,9 @@ pminfo openmetrics.simple_metric
echo
echo "-- source re-addition with epoch timestamp --"
-txtpath=$here/openmetrics/samples/simple_metric.txt
-urlfile=$PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
-echo 'file:///'$txtpath > $urlfile
-$sudo touch -t 197001010000 $urlfile
+cp $here/openmetrics/samples/simple_metric.txt $tmp.simple_metric.txt
+echo 'file:///'$tmp.simple_metric.txt > $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
+$sudo touch -t 197001010000 $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
pminfo openmetrics.simple_metric
echo

View File

@ -0,0 +1,414 @@
commit 137de0c80681723845b880cea42d4ce7d9f0007e
Author: lmchilton <lauren.chilton26@gmail.com>
Date: Fri Sep 19 15:40:45 2025 -0400
pmdaopenmetrics: update metric removal mechanism
update metric removal mechanism to remove metrics
deleted from existing sources. New mechanism first
marks all metrics as true (to be removed) then as
metrics are seen they are marked as false (do not
remove). At the end of the refresh any existing
metrics marked true are removed. Added code for
control initialization and removal. All QA in
pmda.openmetrics group is passing. Edited
function old_enough_for_refresh() to allow
refreshs. In the previous state:
len(self.metrics_by_name) is never 0 after
the first pass, so the function would
always return False.
diff --git a/qa/1976 b/qa/1976
index 9700d9934..88405387a 100755
--- a/qa/1976
+++ b/qa/1976
@@ -2,7 +2,7 @@
# PCP QA Test No. 1976
# Test pmdaopenmetrics metric removal
#
-# Copyright (c) 2017, 2025 Red Hat. All Rights Reserved.
+# Copyright (c) 2025 Red Hat. All Rights Reserved.
#
seq=`basename $0`
echo "QA output created by $seq"
@@ -45,6 +45,15 @@ find $PCP_PMDAS_DIR/openmetrics/config.d -name GNU\* -exec rm -f {} ";"
_pmdaopenmetrics_install
+iam=openmetrics
+# append -R option to pmcd config
+sed < $PCP_PMCDCONF_PATH \
+ -e "/^$iam.*/s/$/ -R 1/" \
+ > $tmp.conf
+$sudo cp $tmp.conf $PCP_PMCDCONF_PATH
+$sudo systemctl restart pmcd
+sleep 2
+
if ! _pmdaopenmetrics_wait_for_metric openmetrics.thermostat
then
status=1
@@ -56,15 +65,23 @@ $sudo rm $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
pminfo openmetrics.simple_metric
echo
+echo "-- check control metrics disappeared --"
+pminfo -dfmt openmetrics.control.status_code
+echo
+
echo "-- source re-addition --"
# same access controls logic as above, user $PCP_USER needs to be
# able to read the file at the end of the URL
#
cp $here/openmetrics/samples/simple_metric.txt $tmp.simple_metric.txt
-echo 'file:///'$tmp.simple_metric.txt >$PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
+echo 'file:///'$tmp.simple_metric.txt > $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
pminfo openmetrics.simple_metric
echo
+echo "-- check control metrics reappeared --"
+pminfo -dfmt openmetrics.control.status_code
+echo
+
echo "-- metric removal of recognized source/metric --"
$sudo rm $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
pminfo openmetrics.simple_metric
@@ -77,6 +94,16 @@ $sudo touch -t 197001010000 $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.ur
pminfo openmetrics.simple_metric
echo
+echo "-- metric removal by modifying source file, source persists --"
+$sudo sed -i -e "/metric2/d" $tmp'.simple_metric.txt'
+
+echo "-- sleep to allow for old_enough_for_refresh() --"
+sleep 2
+echo
+
+echo "-- metric2 removed --"
+pminfo openmetrics.simple_metric
+
_pmdaopenmetrics_remove >/dev/null 2>&1
# success, all done
diff --git a/qa/1976.out b/qa/1976.out
index e72b1984c..a0dd60721 100644
--- a/qa/1976.out
+++ b/qa/1976.out
@@ -4,10 +4,85 @@ QA output created by 1976
-- metric removal of new source/metric --
Error: openmetrics.simple_metric: Unknown metric name
+-- check control metrics disappeared --
+
+openmetrics.control.status_code PMID: 144.0.6 [per-end-point source URL response status code after the most recent fetch]
+ Data Type: 32-bit int InDom: 144.0 0x24000000
+ Semantics: discrete Units: none
+ inst [0 or "control"] value 0
+ inst [1 or "awk_scripted"] value 0
+ inst [2 or "bad_summary_nometa"] value 0
+ inst [3 or "collectd_sample"] value 0
+ inst [4 or "curl.script"] value 0
+ inst [5 or "curl_filtered"] value 0
+ inst [6 or "curl_hostname_label"] value 0
+ inst [7 or "curl_scripted"] value 0
+ inst [8 or "duplicate_hostname_label"] value 0
+ inst [9 or "good_summary_nometa"] value 0
+ inst [10 or "jenkins_monitoring_javamelody"] value 0
+ inst [11 or "jenkins_prometheus_plugin"] value 0
+ inst [12 or "labelfiltering"] value 0
+ inst [13 or "multiple.namespace.levels"] value 0
+ inst [14 or "multiple.namespace.more"] value 0
+ inst [15 or "pmwebd_3_12_2"] value 0
+ inst [16 or "problematic_strings"] value 0
+ inst [17 or "prom_exposition_formats_example1"] value 0
+ inst [18 or "python_sample_client_server"] value 0
+ inst [19 or "python_scripted"] value 0
+ inst [20 or "reordered_labels"] value 0
+ inst [21 or "sample_pmda_3_12_2"] value 0
+ inst [22 or "sample_pmda_instname_5_0_0"] value 0
+ inst [23 or "sample_pmda_pcp5_metadata"] value 0
+ inst [24 or "sample_prometheus_metrics"] value 0
+ inst [25 or "script_failed"] value 0
+ inst [26 or "sh_script_no_suffix"] value 0
+ inst [27 or "sh_scripted"] value 0
+ inst [29 or "stderr_check"] value 0
+ inst [30 or "thermostat"] value 0
+ inst [31 or "vmware_exporter"] value 0
+
-- source re-addition --
openmetrics.simple_metric.metric2
openmetrics.simple_metric.metric1
+-- check control metrics reappeared --
+
+openmetrics.control.status_code PMID: 144.0.6 [per-end-point source URL response status code after the most recent fetch]
+ Data Type: 32-bit int InDom: 144.0 0x24000000
+ Semantics: discrete Units: none
+ inst [0 or "control"] value 0
+ inst [1 or "awk_scripted"] value 0
+ inst [2 or "bad_summary_nometa"] value 0
+ inst [3 or "collectd_sample"] value 0
+ inst [4 or "curl.script"] value 0
+ inst [5 or "curl_filtered"] value 0
+ inst [6 or "curl_hostname_label"] value 0
+ inst [7 or "curl_scripted"] value 0
+ inst [8 or "duplicate_hostname_label"] value 0
+ inst [9 or "good_summary_nometa"] value 0
+ inst [10 or "jenkins_monitoring_javamelody"] value 0
+ inst [11 or "jenkins_prometheus_plugin"] value 0
+ inst [12 or "labelfiltering"] value 0
+ inst [13 or "multiple.namespace.levels"] value 0
+ inst [14 or "multiple.namespace.more"] value 0
+ inst [15 or "pmwebd_3_12_2"] value 0
+ inst [16 or "problematic_strings"] value 0
+ inst [17 or "prom_exposition_formats_example1"] value 0
+ inst [18 or "python_sample_client_server"] value 0
+ inst [19 or "python_scripted"] value 0
+ inst [20 or "reordered_labels"] value 0
+ inst [21 or "sample_pmda_3_12_2"] value 0
+ inst [22 or "sample_pmda_instname_5_0_0"] value 0
+ inst [23 or "sample_pmda_pcp5_metadata"] value 0
+ inst [24 or "sample_prometheus_metrics"] value 0
+ inst [25 or "script_failed"] value 0
+ inst [26 or "sh_script_no_suffix"] value 0
+ inst [27 or "sh_scripted"] value 0
+ inst [28 or "simple_metric"] value 0
+ inst [29 or "stderr_check"] value 0
+ inst [30 or "thermostat"] value 0
+ inst [31 or "vmware_exporter"] value 0
+
-- metric removal of recognized source/metric --
Error: openmetrics.simple_metric: Unknown metric name
@@ -15,3 +90,8 @@ Error: openmetrics.simple_metric: Unknown metric name
openmetrics.simple_metric.metric2
openmetrics.simple_metric.metric1
+-- metric removal by modifying source file, source persists --
+-- sleep to allow for old_enough_for_refresh() --
+
+-- metric2 removed --
+openmetrics.simple_metric.metric1
diff --git a/src/pmdas/openmetrics/pmdaopenmetrics.1 b/src/pmdas/openmetrics/pmdaopenmetrics.1
index 29370d1cb..e696b2f70 100644
--- a/src/pmdas/openmetrics/pmdaopenmetrics.1
+++ b/src/pmdas/openmetrics/pmdaopenmetrics.1
@@ -2,6 +2,7 @@
.\"
.\" Copyright (c) 2017-2019 Red Hat.
.\" Copyright (c) 2017 Ronak Jain.
+.\" Copyright (c) 2025 Lauren Chilton.
.\"
.\" This program is free software; you can redistribute it and/or modify it
.\" under the terms of the GNU General Public License as published by the
@@ -25,6 +26,7 @@
[\f3\-c\f1 \f2config\f1]
[\f3\-d\f1 \f2domain\f1]
[\f3\-l\f1 \f2logfile\f1]
+[\f3\-R\f1 \f2refresh_timeout\f1]
[\f3\-r\f1 \f2root\f1]
[\f3\-t\f1 \f2timeout\f1]
[\f3\-u\f1 \f2user\f1]
@@ -138,6 +140,13 @@ Use of the
.B \-r
option may also change the defaults for some other command line options,
e.g. the default log file name and the default configuration directory.
+.PP
+The
+.B \-R
+option allows the user to configure the \fItimeout\fR,
+in seconds, between cluster refreshes. The default value is
+.B 10
+seconds.
.SH "CONFIGURATION SOURCES"
As it runs,
.B pmdaopenmetrics
diff --git a/src/pmdas/openmetrics/pmdaopenmetrics.python b/src/pmdas/openmetrics/pmdaopenmetrics.python
index 383c7c899..352f6565f 100755
--- a/src/pmdas/openmetrics/pmdaopenmetrics.python
+++ b/src/pmdas/openmetrics/pmdaopenmetrics.python
@@ -51,10 +51,6 @@ else:
# and/or scripts. See the --nosort option to turn it off.
sort_conf_list = True
-# Number of seconds to wait between poll attempts on a source that
-# we've never been able to connect to & collect a list of metrics from.
-empty_source_pmns_poll = 10.0
-
MAX_CLUSTER = 0xfff # ~ max. number of openmetrics sources
MAX_METRIC = 0x3ff # ~ max. number of metrics per source
MAX_INDOM = 0x7fffffff # coincidentally, ~ product of above
@@ -581,6 +577,8 @@ class Source(object):
self.metrics_by_name = {} # name -> Metric
self.metrics_by_num = {} # number (last component of pmid) -> Metric
+ self.metric_removal_flags = {}
+ self.metric_fullnames = {}
def helptext(self, helpline):
if helpline: # it could be None!
@@ -603,7 +601,7 @@ class Source(object):
'''
now = time.time()
last_try_age = now - self.refresh_time
- return len(self.metrics_by_name) == 0 and last_try_age > empty_source_pmns_poll
+ return len(self.metrics_by_name) == 0 or last_try_age > self.pmda.refresh_timeout
def check_filter(self, name, entrytype):
'''
@@ -689,13 +687,15 @@ class Source(object):
self.pmda.debug("included_labels '%s'" % (included_labels)) if self.pmda.dbg else None
self.pmda.debug("optional_labels '%s'" % (optional_labels)) if self.pmda.dbg else None
if sp.name in self.metrics_by_name:
- if ("openmetrics.%s.%s" % (self.name, sp.name)) not in self.pmda.all_metrics and self.name in self.pmda.re_add_list:
+ self.metric_removal_flags[sp.name] = False
+ if pcpline:
+ split = pcpline.split(" ")
+ fullname = "openmetrics.%s.%s" % (self.name, split[1])
+ else:
+ fullname = "openmetrics.%s.%s" % (self.name, sp.name.replace(":", "."))
+ self.metric_fullnames[sp.name] = fullname
+ if ("openmetrics.%s.%s" % (self.name, sp.name)) not in self.pmda.all_metrics:
# re-add metric to namespace
- if pcpline:
- split = pcpline.split(" ")
- fullname = "openmetrics.%s.%s" % (self.name, split[1])
- else:
- fullname = "openmetrics.%s.%s" % (self.name, sp.name.replace(":", "."))
help_oneline, help_text = self.helptext(helpline)
try:
obj = self.pmda.removed_metrics[fullname]
@@ -967,6 +967,9 @@ class Source(object):
if self.document is None: # error during fetch?
return
+ for metric in self.metrics_by_name:
+ self.metric_removal_flags[metric] = True
+
# parse and handle the openmetrics formatted metric data
parse_time = time.time()
s = self.parse_lines(self.document)
@@ -976,6 +979,19 @@ class Source(object):
self.pmda.stats_parse_time[self.cluster] += incr
self.pmda.stats_parse_time[0] += incr # total
+ for metric, value in self.metric_removal_flags.items():
+ remove_name = self.metric_fullnames[metric]
+ if value is True and remove_name in self.pmda.all_metrics:
+ self.pmda.debug("removing metric from existing source: %s" % metric) if self.pmda.dbg else None
+ try:
+ remove_object = self.pmda.all_metrics[remove_name]
+ self.pmda.remove_metric(remove_name, remove_object)
+ self.pmda.set_need_refresh()
+ del self.pmda.all_metrics[remove_name]
+ self.pmda.removed_metrics[remove_name] = remove_object
+ except Exception as e:
+ self.pmda.debug("cannot remove metric from existing source, see error: %s" % e) if self.pmda.dbg else None
+
# save metric & indom lookup tables changes, if any
for _, m in self.metrics_by_name.items():
try: # NB: must process whole list even if exceptions escape
@@ -1000,7 +1016,7 @@ class Source(object):
return [c_api.PM_ERR_AGAIN, 0]
class OpenMetricsPMDA(PMDA):
- def __init__(self, pmda_name, domain, config, timeout, user, debugflag, logfile):
+ def __init__(self, pmda_name, domain, config, timeout, refresh_timeout, user, debugflag, logfile):
'''
Initialize the PMDA. This can take a while for large configurations.
The openmetrics entry in pmcd.conf specifies to start up in "notready"
@@ -1020,6 +1036,9 @@ class OpenMetricsPMDA(PMDA):
# and the storable metric $(pmda_name).control.debug
self.dbg = debugflag
+ # Number of seconds to wait between poll attempts on a source
+ self.refresh_timeout = refresh_timeout
+
# now everything else may take time
self.pmda_name = pmda_name
self.config_dir = os.path.normpath(config)
@@ -1040,6 +1059,7 @@ class OpenMetricsPMDA(PMDA):
self.all_metrics = {}
# keep track of removed metrics, in case of re-addition
self.removed_metrics = {}
+ self.controls = {0:0}
# compiled regex cache
self.regex_cache = {}
@@ -1155,6 +1175,24 @@ class OpenMetricsPMDA(PMDA):
mtime = m
return mtime, ret
+ def initialize_controls(self, cluster):
+ self.stats_fetch_calls[cluster] = 0
+ self.stats_fetch_time[cluster] = 0
+ self.stats_parse_time[cluster] = 0
+ self.stats_status[cluster] = "unknown"
+ self.stats_status_code[cluster] = 0
+
+ self.controls[cluster] = 1
+
+ def delete_controls(self, cluster):
+ del self.stats_fetch_calls[cluster]
+ del self.stats_fetch_time[cluster]
+ del self.stats_parse_time[cluster]
+ del self.stats_status[cluster]
+ del self.stats_status_code[cluster]
+
+ self.controls[cluster] = 0
+
def rescan_confdir(self):
'''Scan the configuration directories for any new .url files
or scripts. Ensure there is a Source registered in the
@@ -1199,6 +1237,9 @@ class OpenMetricsPMDA(PMDA):
try:
remove_name = key
remove_obj = value
+ cluster = self.cluster_table.intern_lookup_value(split_name[1])
+ if self.controls[cluster] == 1:
+ self.delete_controls(cluster)
self.remove_metric(remove_name, remove_obj)
self.removed_metrics[remove_name] = remove_obj
self.debug("removed metric name: %s" % remove_name) if self.dbg else None
@@ -1245,6 +1286,8 @@ class OpenMetricsPMDA(PMDA):
if value == s:
cluster_for_refresh.append(key)
cluster_for_refresh_names.append(name)
+ if self.controls[key] == 0:
+ self.initialize_controls(key)
self.debug("refreshing cluster list: %s" % cluster_for_refresh_names) if self.dbg else None
self.refresh_some_clusters_for_fetch(cluster_for_refresh)
else:
@@ -1256,11 +1299,7 @@ class OpenMetricsPMDA(PMDA):
self.source_by_cluster[source.cluster] = source
# initialize statistics
- self.stats_fetch_calls[cluster] = 0
- self.stats_fetch_time[cluster] = 0
- self.stats_parse_time[cluster] = 0
- self.stats_status[cluster] = "unknown"
- self.stats_status_code[cluster] = 0
+ self.initialize_controls(cluster)
save_cluster_table = True
self.log("Found source %s cluster %d" % (name, cluster))
@@ -1583,6 +1622,11 @@ if __name__ == '__main__':
type=int,
default=2,
help='HTTP GET timeout for each end-point URL (default 2 seconds)')
+ parser.add_argument(
+ '-R', '--refresh',
+ type=int,
+ default=10,
+ help='timeout between cluster refreshes (default 10 seconds)')
parser.add_argument(
'-u', '--user',
type=str,
@@ -1602,7 +1646,7 @@ if __name__ == '__main__':
# the IPC protocol is ipc_prot="binary notready". See also pmcd(1) man page.
# The "binary notready" setting can also be manually configured in pmcd.conf.
# Default domain number is PMDA(144), see -d option.
- pmda = OpenMetricsPMDA(args.root, args.domain, args.config, args.timeout, args.user, args.debug, args.log)
+ pmda = OpenMetricsPMDA(args.root, args.domain, args.config, args.timeout, args.refresh, args.user, args.debug, args.log)
# Uncomment to force -D or use: pmstore openmetrics.control.debug 1
# pmda.dbg = True