rteval: Add --cyclictest-threshold feature

Add --cyclictest-threshold feature
Resolves: rhbz#1995195

Signed-off-by: John Kacur <jkacur@redhat.com>
This commit is contained in:
John Kacur 2022-01-12 15:44:40 -05:00
parent ea90bc0f71
commit bbb443541c
6 changed files with 300 additions and 1 deletions

View File

@ -0,0 +1,60 @@
From b902c41fe1688cb767974a5cc6ca337e5ec420e0 Mon Sep 17 00:00:00 2001
From: John Kacur <jkacur@redhat.com>
Date: Wed, 12 Jan 2022 11:01:59 -0500
Subject: [PATCH] rteval: Add --cyclictest-threshold=USEC
Add --cyclictest-threshold=USEC
This option causes rteval to exit if latency is greater than USEC
This is similar to --cyclictest-breaktrace=USEC
and uses the --breaktrace option to cyclictest
The difference is that --cyclictest-threshold does NOT write a tracemark
when the latency is exceeded
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/cyclictest.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
index c094df499403..cc74b467913d 100644
--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
@@ -295,9 +295,12 @@ class Cyclictest(rtevalModulePrototype):
if 'threads' in self.__cfg and self.__cfg.threads:
self.__cmd.append("-t%d" % int(self.__cfg.threads))
+ # Should have either breaktrace or threshold, not both
if 'breaktrace' in self.__cfg and self.__cfg.breaktrace:
self.__cmd.append("-b%d" % int(self.__cfg.breaktrace))
self.__cmd.append("--tracemark")
+ elif 'threshold' in self.__cfg and self.__cfg.threshold:
+ self.__cmd.append("-b%d" % int(self.__cfg.threshold))
# Buffer for cyclictest data written to stdout
self.__cyclicoutput = tempfile.SpooledTemporaryFile(mode='w+b')
@@ -411,7 +414,7 @@ class Cyclictest(rtevalModulePrototype):
if self.__breaktraceval:
abrt_n.newProp('reason', 'breaktrace')
btv_n = abrt_n.newChild(None, 'breaktrace', None)
- btv_n.newProp('latency_threshold', str(self.__cfg.breaktrace))
+ btv_n.newProp('latency_threshold', str(self.__cfg.breaktrace) if self.__cfg.breaktrace else str(self.__cfg.threshold))
btv_n.newProp('measured_latency', str(self.__breaktraceval))
abrt = True
@@ -454,7 +457,10 @@ def ModuleParameters():
"metavar": "PRIO"},
"breaktrace": {"descr": "Send a break trace command when latency > USEC",
"default": None,
- "metavar": "USEC"}
+ "metavar": "USEC"},
+ "threshold": {"descr": "Exit rteval if latency > USEC",
+ "default": None,
+ "metavar": "USEC"}
}
--
2.31.1

View File

@ -0,0 +1,28 @@
From b22d7905d1588a0e96c70e000837ecee4872415f Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <atsushi.nemoto@sord.co.jp>
Date: Wed, 28 Jul 2021 20:20:15 +0900
Subject: [PATCH] rteval: cyclictest.py: Do not pass obsolete --notrace option
The notrace option was removed from cyclictest on rt-tests v1.4 in 2019.
Signed-off-by: Atsushi Nemoto <atsushi.nemoto@sord.co.jp>
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/cyclictest.py | 1 -
1 file changed, 1 deletion(-)
diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
index ae91dbb7c043..b1755d4f4421 100644
--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
@@ -286,7 +286,6 @@ class Cyclictest(rtevalModulePrototype):
if 'breaktrace' in self.__cfg and self.__cfg.breaktrace:
self.__cmd.append("-b%d" % int(self.__cfg.breaktrace))
self.__cmd.append("--tracemark")
- self.__cmd.append("--notrace")
# Buffer for cyclictest data written to stdout
self.__cyclicoutput = tempfile.SpooledTemporaryFile(mode='w+b')
--
2.31.1

View File

@ -0,0 +1,90 @@
From e528354ac4b9a82b12ee283808d3254944cfbf9e Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
Date: Mon, 18 Oct 2021 14:57:34 +0900
Subject: [PATCH 1/6] rteval: cyclictest.py Parse max latencies from cyclictest
output
When collecting a histogram of latencies, "cyclictest" reports the
maximum latency encountered on each core even if they fall outside the
configured no. of buckets. This can be useful to understand the worst
case latencies for the run as well as right sizing the number of
buckets for the histogram.
While processing the output of cyclictest, rteval skips the reported
max latencies and calculates them by capping to the no. of buckets.
Fix rteval by parsing the maximum latencies reported by cyclictest.
Signed-off-by: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/cyclictest.py | 31 +++++++++++++++++++-----
1 file changed, 25 insertions(+), 6 deletions(-)
diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
index b1755d4f4421..f79949faf031 100644
--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
@@ -67,20 +67,25 @@ class RunData:
retval += "mean: %f\n" % self.__mean
return retval
- def sample(self, value):
- self.__samples[value] += self.__samples.setdefault(value, 0) + 1
+ def update_max(self, value):
if value > self.__max:
self.__max = value
+
+ def update_min(self, value):
if value < self.__min:
self.__min = value
+
+ def sample(self, value):
+ self.__samples[value] += self.__samples.setdefault(value, 0) + 1
+ self.update_max(value)
+ self.update_min(value)
self.__numsamples += 1
def bucket(self, index, value):
self.__samples[index] = self.__samples.setdefault(index, 0) + value
- if value and index > self.__max:
- self.__max = index
- if value and index < self.__min:
- self.__min = index
+ if value:
+ self.update_max(index)
+ self.update_min(index)
self.__numsamples += value
def reduce(self):
@@ -325,6 +330,18 @@ class Cyclictest(rtevalModulePrototype):
return False
+ def _parse_max_latencies(self, line):
+ if not line.startswith('# Max Latencies: '):
+ return
+
+ line = line.split(':')[1]
+ vals = [int(x) for x in line.split()]
+
+ for i, core in enumerate(self.__cpus):
+ self.__cyclicdata[core].update_max(vals[i])
+ self.__cyclicdata['system'].update_max(vals[i])
+
+
def _WorkloadCleanup(self):
if not self.__started:
return
@@ -341,6 +358,8 @@ class Cyclictest(rtevalModulePrototype):
# Catch if cyclictest stopped due to a breaktrace
if line.startswith('# Break value: '):
self.__breaktraceval = int(line.split(':')[1])
+ elif line.startswith('# Max Latencies: '):
+ self._parse_max_latencies(line)
continue
# Skipping blank lines
--
2.31.1

View File

@ -0,0 +1,55 @@
From 0292c8963611f3376b88335b372cfc32b96db8cc Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
Date: Mon, 18 Oct 2021 14:57:36 +0900
Subject: [PATCH 3/6] rteval: cyclictest.py: Skip statistics reporting in case
of an overflow
The cyclictest.py module recently gained the capability to parse max
latency values as reported by cyclictest.
When the max latency exceeds the range of the latency histogram (or in
other words, the number of configured buckets), statistics such as
mean and standard deviation can not be calculated correctly due to
lost samples during measurement.
In the case of lost samples, skip statistics generation and report the
max latency warning to the user to rerun the measurement.
Signed-off-by: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
- Small edit to the explanation
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/cyclictest.py | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
index e459c1839865..c094df499403 100644
--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
@@ -67,6 +67,9 @@ class RunData:
retval += "mean: %f\n" % self.__mean
return retval
+ def get_max(self):
+ return self.__max
+
def update_max(self, value):
if value > self.__max:
self.__max = value
@@ -416,6 +419,13 @@ class Cyclictest(rtevalModulePrototype):
if abrt:
rep_n.addChild(abrt_n)
+ # Let the user know if max latency overshot the number of buckets
+ if self.__cyclicdata["system"].get_max() > self.__buckets:
+ self._log(Log.ERR, "Max latency(%dus) exceeded histogram range(%dus). Skipping statistics" %
+ (self.__cyclicdata["system"].get_max(), self.__buckets))
+ self._log(Log.ERR, "Increase number of buckets to avoid lost samples")
+ return rep_n
+
rep_n.addChild(self.__cyclicdata["system"].MakeReport())
for thr in self.__cpus:
if str(thr) not in self.__cyclicdata:
--
2.31.1

View File

@ -0,0 +1,46 @@
From 8240a34f22c09151501ec1fa2ae76cdad057f9e5 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
Date: Mon, 18 Oct 2021 14:57:35 +0900
Subject: [PATCH 2/6] rteval: cyclictest.py: Sort the list of cpus
online_cpus() returns a list of online cpus in arbitrary order. e.g.,
on a hexacore system it returns -
['5', '3', '1', '4', '2', '0']
Generally this wouldn't be a problem but the cyclictest.py module
matches the unsorted list with the latencies output by "cyclictest"
which are ordered by core number. This leads to incorrect reporting of
per-core latencies in the final report generated by rteval. The issue
was noticed when comparing the rteval report with cyclictest logs
(enabled by a recent change).
Fix the inconsistency in core numbering by sorting the list of cpus
used by cyclictest.py module. As the cpus are represented as a string,
sort with the integer key to avoid issues on systems with large number
of cores.
Signed-off-by: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/cyclictest.py | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
index f79949faf031..e459c1839865 100644
--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
@@ -214,6 +214,10 @@ class Cyclictest(rtevalModulePrototype):
else:
self.__cpus = online_cpus()
+ # Sort the list of cpus to align with the order reported by
+ # cyclictest
+ self.__cpus.sort(key=int)
+
# Get the cpuset from the environment
cpuset = os.sched_getaffinity(0)
--
2.31.1

View File

@ -1,6 +1,6 @@
Name: rteval
Version: 3.2
Release: 8%{?dist}
Release: 9%{?dist}
Summary: Utility to evaluate system suitability for RT Linux
Group: Development/Tools
@ -25,6 +25,7 @@ Requires: elfutils elfutils-libelf-devel
Requires: openssl openssl-devel
Requires: stress-ng
Requires: perl-interpreter, perl-devel, perl-generators
Requires: libmpc, libmpc-devel
BuildArch: noarch
#Patches
@ -35,6 +36,11 @@ Patch4: rteval-Remove-mult-from-hackbench.py.patch
Patch5: rteval-Remove-self.__err_sleep.patch
Patch6: rteval-Make-donotrun-work-correctly-in-load-modules.patch
Patch7: rteval-Add-idea-of-exclusive-load-module-and-make-st.patch
Patch8: rteval-cyclictest.py-Do-not-pass-obsolete-notrace-op.patch
Patch9: rteval-cyclictest.py-Parse-max-latencies-from-cyclic.patch
Patch10: rteval-cyclictest.py-Sort-the-list-of-cpus.patch
Patch11: rteval-cyclictest.py-Skip-statistics-reporting-in-ca.patch
Patch12: rteval-Add-cyclictest-threshold-USEC.patch
%description
The rteval script is a utility for measuring various aspects of
@ -54,6 +60,11 @@ to the screen.
%patch5 -p1
%patch6 -p1
%patch7 -p1
%patch8 -p1
%patch9 -p1
%patch10 -p1
%patch11 -p1
%patch12 -p1
%build
%{__python3} setup.py build
@ -80,6 +91,15 @@ rm -rf $RPM_BUILD_ROOT
%{python3_sitelib}/rteval/__pycache__/*
%changelog
* Wed Jan 12 2022 John Kacur <jkacur@redhat.com> - 3.2-9
- Do not pass obsolete notrace option to cyclictest
- Parse maximum latency even if outside configured buckets
- Sort the list of cpus
- Skip statistics generation if max latency outside of configured buckets
- Add --cyclictest-threshold=USEC feature
- Add libmpc and libmpc-devel to the Requires
Resolves: rhbz#1995195
* Thu Nov 04 2021 John Kacur <jkacur@redhat.com> - 3.2-8
- allow hackbench to run with warning on low mem
- clean-ups to hackbench.py