rteval: Add --cyclictest-threshold feature

Add --cyclictest-threshold feature Resolves: rhbz#1995195 Signed-off-by: John Kacur <jkacur@redhat.com>
2022-01-12 15:44:40 -05:00 · 2022-01-12 15:44:40 -05:00 · bbb443541c
commit bbb443541c
parent ea90bc0f71
6 changed files with 300 additions and 1 deletions
--- a/rteval-Add-cyclictest-threshold-USEC.patch
+++ b/rteval-Add-cyclictest-threshold-USEC.patch
@ -0,0 +1,60 @@
+From b902c41fe1688cb767974a5cc6ca337e5ec420e0 Mon Sep 17 00:00:00 2001
+From: John Kacur <jkacur@redhat.com>
+Date: Wed, 12 Jan 2022 11:01:59 -0500
+Subject: [PATCH] rteval: Add --cyclictest-threshold=USEC
+
+Add --cyclictest-threshold=USEC
+This option causes rteval to exit if latency is greater than USEC
+
+This is similar to --cyclictest-breaktrace=USEC
+and uses the --breaktrace option to cyclictest
+
+The difference is that --cyclictest-threshold does NOT write a tracemark
+when the latency is exceeded
+
+Signed-off-by: John Kacur <jkacur@redhat.com>
+---
+ rteval/modules/measurement/cyclictest.py | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
+index c094df499403..cc74b467913d 100644
+--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
+@@ -295,9 +295,12 @@ class Cyclictest(rtevalModulePrototype):
+         if 'threads' in self.__cfg and self.__cfg.threads:
+             self.__cmd.append("-t%d" % int(self.__cfg.threads))
+ 
+        # Should have either breaktrace or threshold, not both
+         if 'breaktrace' in self.__cfg and self.__cfg.breaktrace:
+             self.__cmd.append("-b%d" % int(self.__cfg.breaktrace))
+             self.__cmd.append("--tracemark")
+        elif 'threshold' in self.__cfg and self.__cfg.threshold:
+            self.__cmd.append("-b%d" % int(self.__cfg.threshold))
+ 
+         # Buffer for cyclictest data written to stdout
+         self.__cyclicoutput = tempfile.SpooledTemporaryFile(mode='w+b')
+@@ -411,7 +414,7 @@ class Cyclictest(rtevalModulePrototype):
+         if self.__breaktraceval:
+             abrt_n.newProp('reason', 'breaktrace')
+             btv_n = abrt_n.newChild(None, 'breaktrace', None)
+-            btv_n.newProp('latency_threshold', str(self.__cfg.breaktrace))
+            btv_n.newProp('latency_threshold', str(self.__cfg.breaktrace) if self.__cfg.breaktrace else str(self.__cfg.threshold))
+             btv_n.newProp('measured_latency', str(self.__breaktraceval))
+             abrt = True
+ 
+@@ -454,7 +457,10 @@ def ModuleParameters():
+                          "metavar": "PRIO"},
+             "breaktrace": {"descr": "Send a break trace command when latency > USEC",
+                            "default": None,
+-                           "metavar": "USEC"}
+                           "metavar": "USEC"},
+            "threshold": {"descr": "Exit rteval if latency > USEC",
+                          "default": None,
+                          "metavar": "USEC"}
+             }
+ 
+ 
+-- 
+2.31.1
+
--- a/rteval-cyclictest.py-Do-not-pass-obsolete-notrace-op.patch
+++ b/rteval-cyclictest.py-Do-not-pass-obsolete-notrace-op.patch
@ -0,0 +1,28 @@
+From b22d7905d1588a0e96c70e000837ecee4872415f Mon Sep 17 00:00:00 2001
+From: Atsushi Nemoto <atsushi.nemoto@sord.co.jp>
+Date: Wed, 28 Jul 2021 20:20:15 +0900
+Subject: [PATCH] rteval: cyclictest.py: Do not pass obsolete --notrace option
+
+The notrace option was removed from cyclictest on rt-tests v1.4 in 2019.
+
+Signed-off-by: Atsushi Nemoto <atsushi.nemoto@sord.co.jp>
+Signed-off-by: John Kacur <jkacur@redhat.com>
+---
+ rteval/modules/measurement/cyclictest.py | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
+index ae91dbb7c043..b1755d4f4421 100644
+--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
+@@ -286,7 +286,6 @@ class Cyclictest(rtevalModulePrototype):
+         if 'breaktrace' in self.__cfg and self.__cfg.breaktrace:
+             self.__cmd.append("-b%d" % int(self.__cfg.breaktrace))
+             self.__cmd.append("--tracemark")
+-            self.__cmd.append("--notrace")
+ 
+         # Buffer for cyclictest data written to stdout
+         self.__cyclicoutput = tempfile.SpooledTemporaryFile(mode='w+b')
+-- 
+2.31.1
+
--- a/rteval-cyclictest.py-Parse-max-latencies-from-cyclic.patch
+++ b/rteval-cyclictest.py-Parse-max-latencies-from-cyclic.patch
@ -0,0 +1,90 @@
+From e528354ac4b9a82b12ee283808d3254944cfbf9e Mon Sep 17 00:00:00 2001
+From: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
+Date: Mon, 18 Oct 2021 14:57:34 +0900
+Subject: [PATCH 1/6] rteval: cyclictest.py Parse max latencies from cyclictest
+ output
+
+When collecting a histogram of latencies, "cyclictest" reports the
+maximum latency encountered on each core even if they fall outside the
+configured no. of buckets. This can be useful to understand the worst
+case latencies for the run as well as right sizing the number of
+buckets for the histogram.
+
+While processing the output of cyclictest, rteval skips the reported
+max latencies and calculates them by capping to the no. of buckets.
+
+Fix rteval by parsing the maximum latencies reported by cyclictest.
+
+Signed-off-by: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
+Signed-off-by: John Kacur <jkacur@redhat.com>
+---
+ rteval/modules/measurement/cyclictest.py | 31 +++++++++++++++++++-----
+ 1 file changed, 25 insertions(+), 6 deletions(-)
+
+diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
+index b1755d4f4421..f79949faf031 100644
+--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
+@@ -67,20 +67,25 @@ class RunData:
+         retval += "mean:       %f\n" % self.__mean
+         return retval
+ 
+-    def sample(self, value):
+-        self.__samples[value] += self.__samples.setdefault(value, 0) + 1
+    def update_max(self, value):
+         if value > self.__max:
+             self.__max = value
+
+    def update_min(self, value):
+         if value < self.__min:
+             self.__min = value
+
+    def sample(self, value):
+        self.__samples[value] += self.__samples.setdefault(value, 0) + 1
+        self.update_max(value)
+        self.update_min(value)
+         self.__numsamples += 1
+ 
+     def bucket(self, index, value):
+         self.__samples[index] = self.__samples.setdefault(index, 0) + value
+-        if value and index > self.__max:
+-            self.__max = index
+-        if value and index < self.__min:
+-            self.__min = index
+        if value:
+            self.update_max(index)
+            self.update_min(index)
+         self.__numsamples += value
+ 
+     def reduce(self):
+@@ -325,6 +330,18 @@ class Cyclictest(rtevalModulePrototype):
+         return False
+ 
+ 
+    def _parse_max_latencies(self, line):
+        if not line.startswith('# Max Latencies: '):
+            return
+
+        line = line.split(':')[1]
+        vals = [int(x) for x in line.split()]
+
+        for i, core in enumerate(self.__cpus):
+            self.__cyclicdata[core].update_max(vals[i])
+            self.__cyclicdata['system'].update_max(vals[i])
+
+
+     def _WorkloadCleanup(self):
+         if not self.__started:
+             return
+@@ -341,6 +358,8 @@ class Cyclictest(rtevalModulePrototype):
+                 # Catch if cyclictest stopped due to a breaktrace
+                 if line.startswith('# Break value: '):
+                     self.__breaktraceval = int(line.split(':')[1])
+                elif line.startswith('# Max Latencies: '):
+                    self._parse_max_latencies(line)
+                 continue
+ 
+             # Skipping blank lines
+-- 
+2.31.1
+
--- a/rteval-cyclictest.py-Skip-statistics-reporting-in-ca.patch
+++ b/rteval-cyclictest.py-Skip-statistics-reporting-in-ca.patch
@ -0,0 +1,55 @@
+From 0292c8963611f3376b88335b372cfc32b96db8cc Mon Sep 17 00:00:00 2001
+From: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
+Date: Mon, 18 Oct 2021 14:57:36 +0900
+Subject: [PATCH 3/6] rteval: cyclictest.py: Skip statistics reporting in case
+ of an overflow
+
+The cyclictest.py module recently gained the capability to parse max
+latency values as reported by cyclictest.
+
+When the max latency exceeds the range of the latency histogram (or in
+other words, the number of configured buckets), statistics such as
+mean and standard deviation can not be calculated correctly due to
+lost samples during measurement.
+
+In the case of lost samples, skip statistics generation and report the
+max latency warning to the user to rerun the measurement.
+
+Signed-off-by: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
+- Small edit to the explanation
+Signed-off-by: John Kacur <jkacur@redhat.com>
+---
+ rteval/modules/measurement/cyclictest.py | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
+index e459c1839865..c094df499403 100644
+--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
+@@ -67,6 +67,9 @@ class RunData:
+         retval += "mean:       %f\n" % self.__mean
+         return retval
+ 
+    def get_max(self):
+        return self.__max
+
+     def update_max(self, value):
+         if value > self.__max:
+             self.__max = value
+@@ -416,6 +419,13 @@ class Cyclictest(rtevalModulePrototype):
+         if abrt:
+             rep_n.addChild(abrt_n)
+ 
+        # Let the user know if max latency overshot the number of buckets
+        if self.__cyclicdata["system"].get_max() > self.__buckets:
+            self._log(Log.ERR, "Max latency(%dus) exceeded histogram range(%dus). Skipping statistics" %
+                      (self.__cyclicdata["system"].get_max(), self.__buckets))
+            self._log(Log.ERR, "Increase number of buckets to avoid lost samples")
+            return rep_n
+
+         rep_n.addChild(self.__cyclicdata["system"].MakeReport())
+         for thr in self.__cpus:
+             if str(thr) not in self.__cyclicdata:
+-- 
+2.31.1
+
--- a/rteval-cyclictest.py-Sort-the-list-of-cpus.patch
+++ b/rteval-cyclictest.py-Sort-the-list-of-cpus.patch
@ -0,0 +1,46 @@
+From 8240a34f22c09151501ec1fa2ae76cdad057f9e5 Mon Sep 17 00:00:00 2001
+From: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
+Date: Mon, 18 Oct 2021 14:57:35 +0900
+Subject: [PATCH 2/6] rteval: cyclictest.py: Sort the list of cpus
+
+online_cpus() returns a list of online cpus in arbitrary order. e.g.,
+on a hexacore system it returns -
+
+    ['5', '3', '1', '4', '2', '0']
+
+Generally this wouldn't be a problem but the cyclictest.py module
+matches the unsorted list with the latencies output by "cyclictest"
+which are ordered by core number. This leads to incorrect reporting of
+per-core latencies in the final report generated by rteval. The issue
+was noticed when comparing the rteval report with cyclictest logs
+(enabled by a recent change).
+
+Fix the inconsistency in core numbering by sorting the list of cpus
+used by cyclictest.py module. As the cpus are represented as a string,
+sort with the integer key to avoid issues on systems with large number
+of cores.
+
+Signed-off-by: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
+Signed-off-by: John Kacur <jkacur@redhat.com>
+---
+ rteval/modules/measurement/cyclictest.py | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/rteval/modules/measurement/cyclictest.py b/rteval/modules/measurement/cyclictest.py
+index f79949faf031..e459c1839865 100644
+--- a/rteval/modules/measurement/cyclictest.py
+++ b/rteval/modules/measurement/cyclictest.py
+@@ -214,6 +214,10 @@ class Cyclictest(rtevalModulePrototype):
+         else:
+             self.__cpus = online_cpus()
+ 
+        # Sort the list of cpus to align with the order reported by
+        # cyclictest
+        self.__cpus.sort(key=int)
+
+         # Get the cpuset from the environment
+         cpuset = os.sched_getaffinity(0)
+ 
+-- 
+2.31.1
+
--- a/rteval.spec
+++ b/rteval.spec
@ -1,6 +1,6 @@
 Name:		rteval
 Version:	3.2
-Release:	8%{?dist}
+Release:	9%{?dist}
 Summary:	Utility to evaluate system suitability for RT Linux

 Group:		Development/Tools
@ -25,6 +25,7 @@ Requires:	elfutils elfutils-libelf-devel
 Requires:	openssl openssl-devel
 Requires:	stress-ng
 Requires:	perl-interpreter, perl-devel, perl-generators
+Requires:	libmpc, libmpc-devel
 BuildArch:	noarch

 #Patches
@ -35,6 +36,11 @@ Patch4: rteval-Remove-mult-from-hackbench.py.patch
 Patch5: rteval-Remove-self.__err_sleep.patch
 Patch6: rteval-Make-donotrun-work-correctly-in-load-modules.patch
 Patch7: rteval-Add-idea-of-exclusive-load-module-and-make-st.patch
+Patch8: rteval-cyclictest.py-Do-not-pass-obsolete-notrace-op.patch
+Patch9: rteval-cyclictest.py-Parse-max-latencies-from-cyclic.patch
+Patch10: rteval-cyclictest.py-Sort-the-list-of-cpus.patch
+Patch11: rteval-cyclictest.py-Skip-statistics-reporting-in-ca.patch
+Patch12: rteval-Add-cyclictest-threshold-USEC.patch

 %description
 The rteval script is a utility for measuring various aspects of
@ -54,6 +60,11 @@ to the screen.
 %patch5 -p1
 %patch6 -p1
 %patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1

 %build
 %{__python3} setup.py build
@ -80,6 +91,15 @@ rm -rf $RPM_BUILD_ROOT
 %{python3_sitelib}/rteval/__pycache__/*

 %changelog
+* Wed Jan 12 2022 John Kacur <jkacur@redhat.com> - 3.2-9
+- Do not pass obsolete notrace option to cyclictest
+- Parse maximum latency even if outside configured buckets
+- Sort the list of cpus
+- Skip statistics generation if max latency outside of configured buckets
+- Add --cyclictest-threshold=USEC feature
+- Add libmpc and libmpc-devel to the Requires
+Resolves: rhbz#1995195
+
 * Thu Nov 04 2021 John Kacur <jkacur@redhat.com> - 3.2-8
 - allow hackbench to run with warning on low mem
 - clean-ups to hackbench.py