pcp/SOURCES/pmda-openmetrics-rollup.patch

466 lines
20 KiB
Diff

diff -Naurp pcp-6.3.7.orig/qa/1976 pcp-6.3.7/qa/1976
--- pcp-6.3.7.orig/qa/1976 1970-01-01 10:00:00.000000000 +1000
+++ pcp-6.3.7/qa/1976 2025-06-26 18:02:39.313416612 +1000
@@ -0,0 +1,85 @@
+#!/bin/sh
+# PCP QA Test No. 1976
+# Test pmdaopenmetrics metric removal
+#
+# Copyright (c) 2017, 2025 Red Hat. All Rights Reserved.
+#
+seq=`basename $0`
+echo "QA output created by $seq"
+
+# get standard environment, filters and checks
+. ./common.openmetrics
+
+_pmdaopenmetrics_check || _notrun "openmetrics pmda not installed"
+
+status=1 # failure is the default!
+
+_cleanup()
+{
+ cd $here
+ _pmdaopenmetrics_cleanup
+ $sudo rm -rf $tmp $tmp.*
+}
+
+_prepare_pmda openmetrics
+trap "_cleanup; exit \$status" 0 1 2 3 15
+_stop_auto_restart pmcd
+
+_pmdaopenmetrics_save_config
+
+# add all the sample text files as urls.
+# need to be a place the user $PCP_USER (pmcd) can read
+#
+( cd $here/openmetrics/samples; ls -1 *.txt ) | sort | while read file
+do
+ cp $here/openmetrics/samples/$file $tmp.$file
+ urlbase=`basename "$file" .txt | tr .- _`
+ echo 'file://'$tmp.$file >$tmp.tmp
+ $sudo cp $tmp.tmp $PCP_PMDAS_DIR/openmetrics/config.d/$urlbase.url
+done
+ls -l $PCP_PMDAS_DIR/openmetrics/config.d >>$seq_full
+
+# add all the sample scripts
+cp -a $here/openmetrics/scripts/* $PCP_PMDAS_DIR/openmetrics/config.d
+find $PCP_PMDAS_DIR/openmetrics/config.d -name GNU\* -exec rm -f {} ";"
+
+_pmdaopenmetrics_install
+
+if ! _pmdaopenmetrics_wait_for_metric openmetrics.thermostat
+then
+ status=1
+ exit
+fi
+
+echo "-- metric removal of new source/metric --"
+$sudo rm $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
+pminfo openmetrics.simple_metric
+echo
+
+echo "-- source re-addition --"
+# same access controls logic as above, user $PCP_USER needs to be
+# able to read the file at the end of the URL
+#
+cp $here/openmetrics/samples/simple_metric.txt $tmp.simple_metric.txt
+echo 'file:///'$tmp.simple_metric.txt >$PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
+pminfo openmetrics.simple_metric
+echo
+
+echo "-- metric removal of recognized source/metric --"
+$sudo rm $PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
+pminfo openmetrics.simple_metric
+echo
+
+echo "-- source re-addition with epoch timestamp --"
+txtpath=$here/openmetrics/samples/simple_metric.txt
+urlfile=$PCP_PMDAS_DIR/openmetrics/config.d/simple_metric.url
+echo 'file:///'$txtpath > $urlfile
+$sudo touch -t 197001010000 $urlfile
+pminfo openmetrics.simple_metric
+echo
+
+_pmdaopenmetrics_remove >/dev/null 2>&1
+
+# success, all done
+status=0
+exit
diff -Naurp pcp-6.3.7.orig/qa/1976.out pcp-6.3.7/qa/1976.out
--- pcp-6.3.7.orig/qa/1976.out 1970-01-01 10:00:00.000000000 +1000
+++ pcp-6.3.7/qa/1976.out 2025-06-26 18:02:01.879861640 +1000
@@ -0,0 +1,17 @@
+QA output created by 1976
+
+=== openmetrics agent installation ===
+-- metric removal of new source/metric --
+Error: openmetrics.simple_metric: Unknown metric name
+
+-- source re-addition --
+openmetrics.simple_metric.metric2
+openmetrics.simple_metric.metric1
+
+-- metric removal of recognized source/metric --
+Error: openmetrics.simple_metric: Unknown metric name
+
+-- source re-addition with epoch timestamp --
+openmetrics.simple_metric.metric2
+openmetrics.simple_metric.metric1
+
diff -Naurp pcp-6.3.7.orig/qa/group pcp-6.3.7/qa/group
--- pcp-6.3.7.orig/qa/group 2025-06-26 17:55:37.790462638 +1000
+++ pcp-6.3.7/qa/group 2025-06-26 17:58:18.996389423 +1000
@@ -2205,6 +2205,7 @@ pmcd.pdu
1963 pmda.linux local
1970 pmda.bpf local
1973 pcp zoneinfo python local
+1976 pmdaopenmetrics python local
1978 atop local pmlogrewrite
1984 pmlogconf pmda.redis local
1985 pmfind local valgrind
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl/script.sh pcp-6.3.7/qa/openmetrics/scripts/curl/script.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl/script.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl/script.sh 2025-06-26 18:02:50.155355973 +1000
@@ -1,4 +1,4 @@
#! /bin/sh
. /etc/pcp.conf
-curl -Gq file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_filtered.sh pcp-6.3.7/qa/openmetrics/scripts/curl_filtered.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_filtered.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_filtered.sh 2025-06-26 18:02:50.155694515 +1000
@@ -7,6 +7,6 @@
. /etc/pcp.conf
-( curl -Gq file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt ; \
-curl -Gq file://$PCP_PMDAS_DIR/openmetrics/config.d/some_other_metric.txt) \
+( curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt ; \
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_other_metric.txt) \
| sed -e '/metric2/d'
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_hostname_label.sh pcp-6.3.7/qa/openmetrics/scripts/curl_hostname_label.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_hostname_label.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_hostname_label.sh 2025-06-26 18:02:50.155942641 +1000
@@ -12,5 +12,5 @@
# here for QA purposes we're fetching from a local file
# and just pretending it came from a remote host.
-curl -q -G file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt 2>/dev/null | \
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt 2>/dev/null | \
sed -e 's/[a-z0-9]*=/hostname="remotehost",&/'
diff -Naurp pcp-6.3.7.orig/qa/openmetrics/scripts/curl_scripted.sh pcp-6.3.7/qa/openmetrics/scripts/curl_scripted.sh
--- pcp-6.3.7.orig/qa/openmetrics/scripts/curl_scripted.sh 2023-11-16 17:51:39.000000000 +1100
+++ pcp-6.3.7/qa/openmetrics/scripts/curl_scripted.sh 2025-06-26 18:02:50.156229975 +1000
@@ -1,5 +1,5 @@
#! /bin/sh
. /etc/pcp.conf
-curl -Gq file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt
-curl -Gq file://$PCP_PMDAS_DIR/openmetrics/config.d/some_other_metric.txt
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_metric.txt
+curl -Gqs file://$PCP_PMDAS_DIR/openmetrics/config.d/some_other_metric.txt
diff -Naurp pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python pcp-6.3.7/src/pmdas/openmetrics/pmdaopenmetrics.python
--- pcp-6.3.7.orig/src/pmdas/openmetrics/pmdaopenmetrics.python 2025-06-26 17:55:37.790973098 +1000
+++ pcp-6.3.7/src/pmdas/openmetrics/pmdaopenmetrics.python 2025-06-26 18:02:26.571388227 +1000
@@ -32,7 +32,8 @@ import subprocess
import sys
from ctypes import c_int
from socket import gethostname
-from stat import ST_MODE, S_IXUSR, ST_CTIME
+from urllib.parse import urlparse
+from stat import ST_MODE, S_IXUSR
import requests
from pcp.pmapi import pmUnits, pmContext
@@ -127,15 +128,9 @@ class Metric(object):
(name, pmContext.pmIDStr(self.pmid), self.mtype, self.msem, self.singular, self.mindom, self.labels))
self.obj = pmdaMetric(self.pmid, self.mtype, self.mindom, self.msem, self.munits)
+ self.source.pmda.all_metrics[self.mname] = self.obj
- if helpline: # it could be None!
- unescaped = helpline.replace('\\\\', '\\').replace('\\n', '\n')
- split = unescaped.split('\n')
- help_oneline = split[0] # must have at least one entry
- help_text = '\n'.join(split[1:]) # may have other entries
- else:
- help_oneline = ''
- help_text = ''
+ help_text, help_oneline = self.source.helptext(helpline)
try:
self.source.pmda.add_metric(self.mname, self.obj, help_oneline, help_text)
@@ -565,7 +560,7 @@ class Source(object):
self.path = path # pathname to .url or executable file
self.url = None
self.parse_error = False
- self.parse_url_time = 0 # timestamp of config file when it was last parsed
+ self.parse_time = None # last time config file was parsed
self.is_scripted = is_scripted
self.pmda = thispmda # the shared pmda
self.requests = None
@@ -588,6 +583,16 @@ class Source(object):
self.metrics_by_name = {} # name -> Metric
self.metrics_by_num = {} # number (last component of pmid) -> Metric
+ def helptext(self, helpline):
+ if helpline: # it could be None!
+ unescaped = helpline.replace('\\\\', '\\').replace('\\n', '\n')
+ split = unescaped.split('\n')
+ help_oneline = split[0] # must have at least one entry
+ help_text = '\n'.join(split[1:]) # may have other entries
+ else:
+ help_oneline = ''
+ help_text = ''
+ return help_text, help_oneline
def old_enough_for_refresh(self):
'''But what is "old"? If it is empty (no metrics), then it
@@ -685,6 +690,23 @@ class Source(object):
self.pmda.debug("included_labels '%s'" % (included_labels)) if self.pmda.dbg else None
self.pmda.debug("optional_labels '%s'" % (optional_labels)) if self.pmda.dbg else None
if sp.name in self.metrics_by_name:
+ if ("openmetrics.%s.%s" % (self.name, sp.name)) not in self.pmda.all_metrics and self.name in self.pmda.re_add_list:
+ # re-add metric to namespace
+ if pcpline:
+ split = pcpline.split(" ")
+ fullname = "openmetrics.%s.%s" % (self.name, split[1])
+ else:
+ fullname = "openmetrics.%s.%s" % (self.name, sp.name.replace(":", "."))
+ help_oneline, help_text = self.helptext(helpline)
+ try:
+ obj = self.pmda.removed_metrics[fullname]
+ self.pmda.add_metric(fullname, obj, help_oneline, help_text)
+ self.pmda.debug("re-adding metric: %s to namespace" % fullname) if self.pmda.dbg else None
+ self.pmda.all_metrics[fullname] = obj
+ del self.pmda.removed_metrics[fullname]
+ self.pmda.set_need_refresh()
+ except Exception as e:
+ self.pmda.debug("Can't re-add metric: %s, see error: %s" % (fullname, e)) if self.pmda.dbg else None
m = self.metrics_by_name[sp.name]
assert self.metrics_by_num[m.metricnum] == m
if m.singular:
@@ -693,6 +715,7 @@ class Source(object):
else:
m.store_inst(naming_labels, sp.value)
self.pmda.debug("naming_labels '%s'" % (naming_labels)) if self.pmda.dbg else None
+ # new metric case
else:
# check metric is not excluded by filters
fullname = "openmetrics.%s.%s" % (self.name, sp.name)
@@ -800,9 +823,9 @@ class Source(object):
return num_metrics
- def parse_url_config(self, filepath):
+ def parse_config(self, filepath):
'''
- Parse a URL config file. The first line is always the URL.
+ Parse a configuration file. The first line is always the URL.
Remaining lines are prefixed with a keyword. Supported keywords
include '#' for a comment, 'HEADER:' to add to the header passed
to the headers dict parameter to the get() call. Note the ':' are
@@ -900,12 +923,12 @@ class Source(object):
if not s[ST_MODE] & S_IXUSR:
self.pmda.err("cannot execute script '%s'" % self.path)
return
- elif self.parse_url_time < s[ST_CTIME]:
+ elif self.parse_time is None or self.parse_time < s.st_mtime_ns:
# (re)parse the URL from given file
- self.parse_url_config(self.path)
- self.parse_url_time = s[ST_CTIME]
+ self.parse_config(self.path)
+ self.parse_time = s.st_mtime_ns
except Exception as e:
- self.pmda.err("cannot read %s: %s" % (self.path, e))
+ self.pmda.err("cannot stat %s: %s" % (self.path, e))
return
# fetch the document
@@ -1001,7 +1024,7 @@ class OpenMetricsPMDA(PMDA):
# now everything else may take time
self.pmda_name = pmda_name
self.config_dir = os.path.normpath(config)
- self.config_dir_ctime = None
+ self.config_dir_mtime = 0.0
self.timeout = timeout
# a single central Session that all our sources can concurrently reuse
@@ -1014,6 +1037,11 @@ class OpenMetricsPMDA(PMDA):
reserved_cluster = self.cluster_table.intern_lookup_value("control")
assert reserved_cluster == 0
self.source_by_cluster = {}
+ # all metrics added, to be used for removal
+ self.all_metrics = {}
+ # keep track of removed metrics, in case of re-addition
+ self.removed_metrics = {}
+ self.controls = {0:0}
# compiled regex cache
self.regex_cache = {}
@@ -1106,26 +1134,53 @@ class OpenMetricsPMDA(PMDA):
assert s == self.source_by_name[s.name]
- def traverse(self, directory, ctime):
+ def traverse(self, directory, mtime):
''' Return list of files below dir, recursively '''
ret = []
- m = os.path.getctime(directory)
- if ctime is None or m > ctime:
- ctime = m
+ m = os.path.getmtime(directory)
+ if mtime is None or m > mtime:
+ mtime = m
for path, subdirs, files in os.walk(directory):
for f in files:
if not f.startswith("."):
fname = os.path.join(path, f)
- m = os.path.getctime(fname)
- if ctime is None or m > ctime:
- ctime = m
+ m = os.path.getmtime(fname)
+ if mtime is None or m > mtime:
+ mtime = m
ret.append(fname)
+ fname = os.path.join(path, f)
+ with open(fname, 'r') as name:
+ f_path = name.readline().strip()
+ if f_path.startswith("file:///"):
+ parsed = urlparse(f_path)
+ m = os.path.getmtime(parsed.path)
+ if mtime is None or m > mtime:
+ mtime = m
for d in subdirs:
- m, _ = self.traverse(os.path.join(path, d), ctime)
- if ctime is None or m > ctime:
- ctime = m
- return ctime, ret
+ m, _ = self.traverse(os.path.join(path, d), mtime)
+ if mtime is None or m > mtime:
+ mtime = m
+ return mtime, ret
+
+ def initialize_controls(self, cluster):
+ # initialize statistics
+ self.stats_fetch_calls[cluster] = 0
+ self.stats_fetch_time[cluster] = 0
+ self.stats_parse_time[cluster] = 0
+ self.stats_status[cluster] = "unknown"
+ self.stats_status_code[cluster] = 0
+
+ self.controls[cluster] = 1
+
+ def delete_controls(self, cluster):
+ del self.stats_fetch_calls[cluster]
+ del self.stats_fetch_time[cluster]
+ del self.stats_parse_time[cluster]
+ del self.stats_status[cluster]
+ del self.stats_status_code[cluster]
+
+ self.controls[cluster] = 0
def rescan_confdir(self):
'''Scan the configuration directories for any new .url files
@@ -1138,19 +1193,56 @@ class OpenMetricsPMDA(PMDA):
'''
traverse_time = time.time()
- dir_ctime, conf_filelist = self.traverse(self.config_dir, self.config_dir_ctime)
+ dir_mtime, conf_filelist = self.traverse(self.config_dir, self.config_dir_mtime)
traverse_time = time.time() - traverse_time
- if self.config_dir_ctime is None or self.config_dir_ctime < dir_ctime:
- self.config_dir_ctime = dir_ctime
+ if self.config_dir_mtime < dir_mtime:
+ self.config_dir_mtime = dir_mtime
else: # no new or changed conf files, don't rescan directory
return
self.log("Config change detected, traversed %d config entries in %.04fs, rescanning ..." % (len(conf_filelist), traverse_time))
nickname_regexp = self.lookup_regex(r"^[A-Za-z][A-Za-z0-9_.]*$")
+ self.re_add_list = []
+
+ # calculate config entry nicknames
+ nicknames = []
+ for file in conf_filelist:
+ file_split = os.path.splitext(file)
+ name = file_split[0].replace(self.config_dir + "/", "").replace("/", ".")
+ nicknames.append(name)
+
+ # check if config change adds a previously removed source
+ for key in self.removed_metrics:
+ split_name = key.split(".")
+ if split_name[1] in nicknames:
+ self.re_add_list.append(split_name[1])
+
+ # if source is not in config directory, remove the metric
+ for key, value in self.all_metrics.items():
+ split_name = key.split(".")
+ if split_name[1] in nicknames:
+ continue
+ try:
+ remove_name = key
+ remove_obj = value
+ cluster = self.cluster_table.intern_lookup_value(split_name[1])
+ if self.controls[cluster] == 1:
+ self.delete_controls(cluster)
+ self.remove_metric(remove_name, remove_obj)
+ self.removed_metrics[remove_name] = remove_obj
+ self.debug("removed metric name: %s" % remove_name) if self.dbg else None
+ self.set_need_refresh()
+ except Exception as e:
+ self.debug("can't remove metric: %s, see error: %s" % (key, e)) if self.dbg else None
+
+ for key in self.removed_metrics:
+ if key in self.all_metrics:
+ del self.all_metrics[key]
- # TODO: maybe nuke sources related to removed files
save_cluster_table = False
+ cluster_for_refresh_names = []
+ cluster_for_refresh = []
if sort_conf_list:
# sorted for indom cluster consistency
conf_filelist = sorted(conf_filelist)
@@ -1177,6 +1269,13 @@ class OpenMetricsPMDA(PMDA):
if name in self.source_by_name:
# this source is already known
self.assert_source_invariants(name=name)
+ s = self.source_by_name[name]
+ for key, value in self.source_by_cluster.items():
+ if value == s:
+ cluster_for_refresh.append(key)
+ cluster_for_refresh_names.append(name)
+ if name in self.re_add_list:
+ self.initialize_controls(key)
else:
try:
path = file
@@ -1185,17 +1284,16 @@ class OpenMetricsPMDA(PMDA):
self.source_by_name[source.name] = source
self.source_by_cluster[source.cluster] = source
- # initialize statistics
- self.stats_fetch_calls[cluster] = 0
- self.stats_fetch_time[cluster] = 0
- self.stats_parse_time[cluster] = 0
- self.stats_status[cluster] = "unknown"
- self.stats_status_code[cluster] = 0
+ self.initialize_controls(cluster)
save_cluster_table = True
self.log("Found source %s cluster %d" % (name, cluster))
except Exception as e:
self.err("Error allocating new cluster/source %s (%s)" % (name, e))
+
+ self.debug("refreshing cluster list: %s" % cluster_for_refresh_names) if self.dbg else None
+ self.refresh_some_clusters_for_fetch(cluster_for_refresh)
+
if save_cluster_table:
self.cluster_table.save()
self.set_notify_change()