Add timerlat tracing

Add timerlat tracing
Add Fix -aNone being passed to cyclictest
Add rtla as a requires for timerlat
Resolves: RHEL-35461

Signed-off-by: John Kacur <jkacur@redhat.com>
This commit is contained in:
John Kacur 2024-06-24 15:44:27 -04:00
parent c261d895e5
commit f0f764c0ae
5 changed files with 802 additions and 4 deletions

View File

@ -0,0 +1,45 @@
From 17a39863e5f89fbaf51dc158bc7e27f46676d46d Mon Sep 17 00:00:00 2001
From: Tomas Glozar <tglozar@redhat.com>
Date: Tue, 11 Jun 2024 16:45:53 +0200
Subject: [PATCH] rteval: Fix -aNone being passed to cyclictest
When rteval is called via the command line, cpulists for both
measurements and loads default to an empty string and are further
processed by parse_cpulist_from_config. However, this is not true when
rteval is used as a module: in that case, neither the default
command-line value is used nor parse_cpulist_from_config is run, leading
to None being set to the config property of measurements which is
explicitely passed down to the corresponding cyclictest config property.
After 64ce7848dfab ("rteval: Add relative cpulists for measurements"),
where the check for None was removed from the cyclictest module, rteval
passes "-aNone" to cyclictest when being used as a module.
Call parse_cpulist_from_config with an empty string to get the default
cpulist to pass to cyclictest module if cpulist is empty in
the measurement config.
Fixes: 64ce7848dfab ("rteval: Add relative cpulists for measurements")
Signed-off-by: Tomas Glozar <tglozar@redhat.com>
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/__init__.py | 3 +++
1 file changed, 3 insertions(+)
diff --git a/rteval/modules/measurement/__init__.py b/rteval/modules/measurement/__init__.py
index 11bd7b0fce69..43c0fda30ce1 100644
--- a/rteval/modules/measurement/__init__.py
+++ b/rteval/modules/measurement/__init__.py
@@ -148,6 +148,9 @@ measurement profiles, based on their characteristics"""
modcfg = self.__cfg.GetSection("measurement")
cpulist = modcfg.cpulist
run_on_isolcpus = modcfg.run_on_isolcpus
+ if cpulist is None:
+ # Get default cpulist value
+ cpulist = cpulist_utils.collapse_cpulist(parse_cpulist_from_config("", run_on_isolcpus))
for (modname, modtype) in modcfg:
if isinstance(modtype, str) and modtype.lower() == 'module': # Only 'module' will be supported (ds)
--
2.45.2

View File

@ -0,0 +1,526 @@
From 5909521e06ed92ea60ffb247b25ca86c232f71bf Mon Sep 17 00:00:00 2001
From: John Kacur <jkacur@redhat.com>
Date: Thu, 20 Jun 2024 21:34:24 -0400
Subject: [PATCH 1/4] rteval: timerlat: Add timerlat tracing to rteval
This patch adds tracing from timerlat to rteval.
These two options are added
--timerlat-stoptrace USEC
Stop trace if thread latency higher than USEC
--timerlat-trace FILE
File to save trace to
This also stores the summary from rtla timerlat in the rteval xml file.
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/timerlat.py | 213 ++++++++++++++++++++++++-
rteval/rteval_text.xsl | 181 ++++++++++++++++++++-
2 files changed, 389 insertions(+), 5 deletions(-)
diff --git a/rteval/modules/measurement/timerlat.py b/rteval/modules/measurement/timerlat.py
index e8345fab1ad7..45adec1b33e1 100644
--- a/rteval/modules/measurement/timerlat.py
+++ b/rteval/modules/measurement/timerlat.py
@@ -16,6 +16,7 @@ from rteval.modules import rtevalModulePrototype
from rteval.systopology import cpuinfo, SysTopology
from rteval.cpulist_utils import expand_cpulist, collapse_cpulist
+
class TLRunData:
''' class to store instance data from a timerlat run '''
def __init__(self, coreid, datatype, priority, logfnc):
@@ -188,6 +189,15 @@ class Timerlat(rtevalModulePrototype):
self.__cpus = [str(c) for c in expand_cpulist(self.__cpulist)]
self.__numcores = len(self.__cpus)
+ # Has tracing been triggered
+ self.__stoptrace = False
+ # This stores the output from rtla
+ self.__posttrace = ""
+ # Stop Trace Data
+ self.__stdata = {}
+ # Stop Trace Cpu
+ self.stcpu = -1
+
self.__timerlat_out = None
self.__timerlat_err = None
self.__started = False
@@ -218,10 +228,21 @@ class Timerlat(rtevalModulePrototype):
self.__cmd = ['rtla', 'timerlat', 'hist', '-P', f'f:{int(self.__priority)}', '-u']
self.__cmd.append(f'-c{self.__cpulist}')
self.__cmd.append(f'-E{self.__buckets}')
+
+ if self.__cfg.stoptrace:
+ self.__cmd.append(f"-T{int(self.__cfg.stoptrace)}")
+
+ if self.__cfg.trace:
+ if not self.__cfg.stoptrace:
+ self._log(Log.WARN, f'Ignoring trace={self.__cfg.trace}, because stoptrace not invoked')
+ else:
+ self.__cmd.append(f'-t={self.__cfg.trace}')
+
self._log(Log.DEBUG, f'self.__cmd = {self.__cmd}')
self.__timerlat_out = tempfile.SpooledTemporaryFile(mode='w+b')
self.__timerlat_err = tempfile.SpooledTemporaryFile(mode='w+b')
+
def _WorkloadTask(self):
if self.__started:
return
@@ -252,6 +273,10 @@ class Timerlat(rtevalModulePrototype):
os.kill(self.__timerlat_process.pid, signal.SIGINT)
time.sleep(2)
+ blocking_thread_detected = False
+ softirq_interference_detected = False
+ irq_interference_detected = False
+
# Parse histogram output
self.__timerlat_out.seek(0)
for line in self.__timerlat_out:
@@ -261,6 +286,113 @@ class Timerlat(rtevalModulePrototype):
if not line:
continue
+ # Parsing if stoptrace has been invoked
+ if self.__stoptrace:
+ self.__posttrace += line
+ line = line.strip()
+ fields = line.split()
+ if line.startswith("##") and fields[1] == "CPU":
+ self.stcpu = int(fields[2])
+ self._log(Log.DEBUG, f"self.stcpu = {self.stcpu}")
+ self.__stdata[self.stcpu] = {}
+ continue
+ if self.stcpu == -1:
+ self._log(Log.WARN, "Stop trace has been invoked, but a stop cpu has not been identified.")
+ continue
+ if line.startswith('------------------'):
+ blocking_thread_detected = False
+ softirq_interference_detected = False
+ irq_interference_detected = False
+ continue
+ if line.startswith("Thread latency:"):
+ thread_latency_percent = fields[-1].strip('()%')
+ self._log(Log.DEBUG, f"thread_latency_percent = {thread_latency_percent}")
+ thread_latency = fields[-3]
+ self._log(Log.DEBUG, f"thread_latency = {thread_latency}")
+ self.__stdata[self.stcpu]["Thread_latency"] = (thread_latency, thread_latency_percent)
+ elif line.startswith("Previous IRQ interference"):
+ self._log(Log.DEBUG, f'Previous_IRQ_interference = {fields[-2]}')
+ self.__stdata[self.stcpu]["Previous_IRQ_interference"] = fields[-2]
+ elif line.startswith("IRQ handler delay:"):
+ irq_handler_delay_percent = fields[-2].strip('(')
+ irq_handler_delay = fields[-4]
+ # Do we have (exit from idle)?
+ if fields[3] == '(exit':
+ field_name = "IRQ_handler_delay_exit_from_idle"
+ else:
+ field_name = "IRQ_handler_delay"
+ self._log(Log.DEBUG, f"{field_name} = {irq_handler_delay}")
+ self._log(Log.DEBUG, f"{field_name}_percent = {irq_handler_delay_percent}")
+ self.__stdata[self.stcpu][field_name] = (irq_handler_delay, irq_handler_delay_percent)
+ elif line.startswith("IRQ latency:"):
+ self._log(Log.DEBUG, f"irq_latency = {fields[-2]}")
+ self.__stdata[self.stcpu]["IRQ_latency"] = fields[-2]
+ elif line.startswith("Timerlat IRQ duration"):
+ timerlat_irq_duration_percent = fields[-2].strip('(')
+ self._log(Log.DEBUG, f"timerlat_irq_duration_percent = {timerlat_irq_duration_percent}")
+ timerlat_irq_duration = fields[-4]
+ self._log(Log.DEBUG, f"timerlat_irq_duration = {timerlat_irq_duration}")
+ self.__stdata[self.stcpu]["Timerlat_IRQ_duration"] = (timerlat_irq_duration, timerlat_irq_duration_percent)
+ elif line.startswith("Blocking thread:"):
+ blocking_thread_percent = fields[-2].strip('(')
+ self._log(Log.DEBUG, f"blocking_thread_percent = {blocking_thread_percent}")
+ blocking_thread = fields[-4]
+ self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}")
+ self.__stdata[self.stcpu]["Blocking_Thread"] = (blocking_thread, blocking_thread_percent)
+ blocking_thread_detected = True
+ irq_interference_detected = False
+ softirq_interference_detected = False
+ elif line.startswith("IRQ interference"):
+ irq_interference_percent = fields[-2].strip('(')
+ self._log(Log.DEBUG, f"irq_interference_percent = {irq_interference_percent}")
+ irq_interference = fields[-4]
+ self._log(Log.DEBUG, f"irq_interference = {irq_interference}")
+ self.__stdata[self.stcpu]["IRQ_interference"] = (irq_interference, irq_interference_percent)
+ blocking_thread_detected = False
+ irq_interference_detected = True
+ softirq_interference_detected = False
+ elif line.startswith("Softirq interference"):
+ softirq_interference_percent = fields[-2].strip('(')
+ self._log(Log.DEBUG, f"softirq_interference_percent = {softirq_interference_percent}")
+ softirq_interference = fields[-4]
+ self._log(Log.DEBUG, f"softirq_interference = {softirq_interference}")
+ self.__stdata[self.stcpu]["Softirq_interference"] = (softirq_interference, softirq_interference_percent)
+ blocking_thread_detected = False
+ irq_interference_detected = False
+ softirq_interference_detected = True
+ elif blocking_thread_detected:
+ self._log(Log.DEBUG, f'line={line}')
+ blocking_thread = " ".join(fields[0:-2])
+ self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}")
+ blocking_threadus = fields[-2]
+ self._log(Log.DEBUG, f"blocking_threadus = {blocking_threadus}")
+ self.__stdata[self.stcpu].setdefault("blocking_thread", [])
+ self.__stdata[self.stcpu]["blocking_thread"] += [(blocking_thread, blocking_threadus)]
+ elif softirq_interference_detected:
+ softirq = " ".join(fields[0:-2])
+ softirq_latency = fields[-2]
+ self._log(Log.DEBUG, f'softirq = {softirq}')
+ self._log(Log.DEBUG, f'softirq_latency = {softirq_latency}')
+ self.__stdata[self.stcpu].setdefault("softirq_interference", [])
+ self.__stdata[self.stcpu]["softirq_interference"] += [(softirq, softirq_latency)]
+ elif irq_interference_detected:
+ irq_interference_name = " ".join(fields[0:-2])
+ irq_interference_latency = fields[-2]
+ self._log(Log.DEBUG, f'irq_interference = {irq_interference_name}, latency = {irq_interference_latency}')
+ self.__stdata[self.stcpu].setdefault("irq_interference", [])
+ self.__stdata[self.stcpu]["irq_interference"] += [(irq_interference_name, irq_interference_latency)]
+ elif line.startswith("Max timerlat IRQ latency"):
+ self._log(Log.DEBUG, f"line={line}")
+ max_timerlat_irq_latency = fields[-5]
+ self._log(Log.DEBUG, f"max_timerlat_irq_latency = {max_timerlat_irq_latency}")
+ max_timerlat_cpu = int(fields[-1])
+ self._log(Log.DEBUG, f"max_timerlat_cpu = {max_timerlat_cpu}")
+ self.__stdata.setdefault(max_timerlat_cpu, {})
+ self.__stdata[max_timerlat_cpu]["Max_timerlat_IRQ_latency_from_idle"] = max_timerlat_irq_latency
+ else:
+ self._log(Log.DEBUG, f'line = {line}')
+ continue
+
if line.startswith('#'):
if line.startswith('# Duration:'):
duration = line.split()[2]
@@ -285,9 +417,13 @@ class Timerlat(rtevalModulePrototype):
elif line.startswith('max:'):
#print(line)
continue
+ elif line.startswith('rtla timerlat hit stop tracing'):
+ self.__stoptrace = True
+ self.__posttrace += line
+ continue
else:
- pass
#print(line)
+ pass
vals = line.split()
if not vals:
@@ -301,12 +437,18 @@ class Timerlat(rtevalModulePrototype):
continue
for i, core in enumerate(self.__cpus):
- self.__timerlatdata[core].bucket(index, int(vals[i*3+1]),
+ # There might not be a count on every cpu if tracing invoked
+ if i*3 + 1 >= len(vals):
+ self.__timerlatdata[core].bucket(index, 0, 0, 0)
+ self.__timerlatdata['system'].bucket(index, 0, 0, 0)
+ else:
+ self.__timerlatdata[core].bucket(index, int(vals[i*3+1]),
int(vals[i*3+2]),
int(vals[i*3+3]))
- self.__timerlatdata['system'].bucket(index, int(vals[i*3+1]),
+ self.__timerlatdata['system'].bucket(index, int(vals[i*3+1]),
int(vals[i*3+2]),
int(vals[i*3+3]))
+
# Generate statistics for each RunData object
for n in list(self.__timerlatdata.keys()):
self.__timerlatdata[n].reduce()
@@ -320,6 +462,62 @@ class Timerlat(rtevalModulePrototype):
rep_n = libxml2.newNode('timerlat')
rep_n.newProp('command_line', ' '.join(self.__cmd))
+ stoptrace_invoked_n = libxml2.newNode('stoptrace_invoked')
+ if self.stcpu != -1:
+ stoptrace_invoked_n.newProp("invoked", "true")
+ else:
+ stoptrace_invoked_n.newProp("invoked", "")
+ rep_n.addChild(stoptrace_invoked_n)
+
+ if self.stcpu != -1:
+ self._log(Log.DEBUG, f'self.__stdata = {self.__stdata}')
+ for cpu in self.__stdata:
+ # This is Max timerlat IRQ latency from idle
+ # With no other data from that cpu, so don't create a
+ # stoptrace_report for this
+ if len(self.__stdata[cpu]) == 1:
+ continue
+ stoptrace_n = libxml2.newNode('stoptrace_report')
+ stoptrace_n.newProp("CPU", str(cpu))
+ for k, v in self.__stdata[cpu].items():
+ self._log(Log.DEBUG, f"cpu={cpu}, k={k}, v={v}")
+ if isinstance(v, tuple):
+ latency = str(v[0])
+ percent = str(v[1])
+ cpu_n = stoptrace_n.newTextChild(None, str(k), None)
+ n = cpu_n.newTextChild(None, "latency", latency)
+ n.newProp('unit', 'us')
+
+ n = cpu_n.newTextChild(None, "latency_percent", percent)
+ n.newProp('unit', '%')
+ elif isinstance(v, list):
+ if k in ("blocking_thread", "softirq_interference", "irq_interference"):
+ for name, latency in v:
+ cpu_n = stoptrace_n.newTextChild(None, k, None)
+ n = cpu_n.newTextChild(None, "name", name)
+ n = cpu_n.newTextChild(None, "latency", latency)
+ n.newProp('unit', 'us')
+ else:
+ if k == "Max_timerlat_IRQ_latency_from_idle":
+ continue
+ cpu_n = stoptrace_n.newTextChild(None, str(k), str(v))
+ cpu_n.newProp('unit', 'us')
+ rep_n.addChild(stoptrace_n)
+
+ self._log(Log.DEBUG, f'timerlat: posttrace = \n{self.__posttrace}')
+ self._log(Log.DEBUG, 'timerlat: posttrace END')
+ for cpu in self.__stdata:
+ for k, v in self.__stdata[cpu].items():
+ if isinstance(v, tuple):
+ continue
+ if k == "Max_timerlat_IRQ_latency_from_idle":
+ max_timerlat_n = libxml2.newNode('max_timerlat_report')
+ max_timerlat_n.newProp("CPU", str(cpu))
+ cpu_n = max_timerlat_n.newTextChild(None, k, str(v))
+ cpu_n.newProp('unit', 'us')
+ rep_n.addChild(max_timerlat_n)
+ return rep_n
+
rep_n.addChild(self.__timerlatdata['system'].MakeReport())
for thr in self.__cpus:
if str(thr) not in self.__timerlatdata:
@@ -341,7 +539,13 @@ def ModuleParameters():
"metavar": "PRIO" },
"buckets": {"descr": "Number of buckets",
"default": 3500,
- "metavar": "NUM"}
+ "metavar": "NUM" },
+ "stoptrace": {"descr": "Stop trace if thread latency higher than USEC",
+ "default": None,
+ "metavar": "USEC" },
+ "trace": {"descr": "File to save trace to",
+ "default": None,
+ "metavar": "FILE" },
}
def create(params, logger):
@@ -367,6 +571,7 @@ if __name__ == '__main__':
cfg_tl = cfg.GetSection('timerlat')
cfg_tl.cpulist = collapse_cpulist(SysTopology().online_cpus())
+ cfg_tl.stoptrace=50
RUNTIME = 10
diff --git a/rteval/rteval_text.xsl b/rteval/rteval_text.xsl
index 2f03bda0bb55..70777354efa5 100644
--- a/rteval/rteval_text.xsl
+++ b/rteval/rteval_text.xsl
@@ -316,14 +316,26 @@
<xsl:value-of select="@command_line"/>
<xsl:text>&#10;&#10;</xsl:text>
- <xsl:apply-templates select="abort_report"/>
+ <xsl:if test="stoptrace_invoked">
+ <xsl:text>rtla timerlat hit stop tracing</xsl:text>
+ <xsl:text>
+</xsl:text>
+ <xsl:apply-templates select="stoptrace_report"/>
+ <xsl:apply-templates select="max_timerlat_report"/>
+ </xsl:if>
+ <!-- Make sure the description is available before printing System: -->
+ <xsl:if test="system/@description">
<xsl:text> System: </xsl:text>
<xsl:value-of select="system/@description"/>
<xsl:text>&#10;</xsl:text>
+ </xsl:if>
+ <!-- If stoptrace_invoked is true, no Statistics are available -->
+ <xsl:if test="stoptrace_invoked != true">
<xsl:text> Statistics: &#10;</xsl:text>
<xsl:apply-templates select="system/statistics"/>
+ </xsl:if>
<!-- Add CPU core info and stats-->
<xsl:apply-templates select="core">
@@ -470,4 +482,171 @@
</xsl:if>
</xsl:template>
+ <!-- Format posttrace information if present -->
+ <xsl:template match="stoptrace_report">
+ <xsl:text>## CPU </xsl:text>
+ <xsl:value-of select="@CPU"/>
+ <xsl:text> hit stop tracing, analyzing it ##</xsl:text>
+ <xsl:text>
+</xsl:text>
+
+
+ <xsl:if test="Previous_IRQ_interference">
+ <xsl:text>Previous IRQ interference: up to </xsl:text>
+ <xsl:value-of select="Previous_IRQ_interference"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Previous_IRQ_interference/@unit"/>
+ <xsl:text>
+</xsl:text>
+ </xsl:if>
+
+ <xsl:if test="IRQ_handler_delay">
+ <xsl:text>IRQ handler delay: </xsl:text>
+ <xsl:value-of select="IRQ_handler_delay/latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_handler_delay/latency/@unit"/>
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="IRQ_handler_delay/latency_percent"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_handler_delay/latency_percent/@unit"/>
+ <xsl:text>)
+</xsl:text>
+ </xsl:if>
+
+ <xsl:if test="IRQ_handler_delay_exit_from_idle">
+ <xsl:text>IRQ handler delay: (exit from idle) </xsl:text>
+ <xsl:value-of select="IRQ_handler_delay_exit_from_idle/latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_handler_delay_exit_from_idle/latency/@unit"/>
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="IRQ_handler_delay_exit_from_idle/latency_percent"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_handler_delay_exit_from_idle/latency_percent/@unit"/>
+ <xsl:text>)
+</xsl:text>
+ </xsl:if>
+
+ <xsl:text>IRQ latency:</xsl:text>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_latency/@unit"/>
+ <xsl:text>
+</xsl:text>
+
+ <xsl:text>Timerlat IRQ duration:</xsl:text>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Timerlat_IRQ_duration/latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Timerlat_IRQ_duration/latency/@unit"/>
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="Timerlat_IRQ_duration/latency_percent"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Timerlat_IRQ_duration/latency_percent/@unit"/>
+ <xsl:text>)
+</xsl:text>
+
+ <xsl:if test="Blocking_Thread">
+ <xsl:text>Blocking thread:</xsl:text>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Blocking_Thread/latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Blocking_Thread/latency/@unit"/>
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="Blocking_Thread/latency_percent"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Blocking_Thread/latency_percent/@unit"/>
+ <xsl:text>)
+</xsl:text>
+ </xsl:if>
+
+ <xsl:for-each select="blocking_thread">
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="name"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="latency/@unit"/>
+ <xsl:text>
+</xsl:text>
+ </xsl:for-each>
+
+ <xsl:if test="Softirq_interference">
+ <xsl:text>Softirq interference:</xsl:text>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Softirq_interference/latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Softirq_interference/latency/@unit"/>
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="Softirq_interference/latency_percent"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Softirq_interference/latency_percent/@unit"/>
+ <xsl:text>)
+</xsl:text>
+ </xsl:if>
+
+ <xsl:for-each select="softirq_interference">
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="name"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="latency/@unit"/>
+ <xsl:text>
+</xsl:text>
+ </xsl:for-each>
+
+ <xsl:if test="IRQ_interference">
+ <xsl:text>IRQ interference:</xsl:text>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_interference/latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_interference/latency/@unit"/>
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="IRQ_interference/latency_percent"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="IRQ_interference/latency_percent/@unit"/>
+ <xsl:text>)
+</xsl:text>
+ </xsl:if>
+
+ <xsl:for-each select="irq_interference">
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="name"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="latency/@unit"/>
+ <xsl:text>
+</xsl:text>
+ </xsl:for-each>
+
+ <xsl:text>--------------------------------------------------------------------------------</xsl:text>
+ <xsl:text>Thread latency:</xsl:text>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Thread_latency/latency"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Thread_latency/latency/@unit"/>
+ <xsl:text> (</xsl:text>
+ <xsl:value-of select="Thread_latency/latency_percent"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Thread_latency/latency_percent/@unit"/>
+ <xsl:text>)
+
+</xsl:text>
+ </xsl:template>
+
+ <xsl:template match="max_timerlat_report">
+ <xsl:text>
+</xsl:text>
+ <xsl:text>Max timerlat IRQ latency from idle:</xsl:text>
+ <xsl:value-of select="Max_timerlat_IRQ_latency_from_idle"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="Max_timerlat_IRQ_latency_from_idle/@unit"/>
+ <xsl:text> in cpu </xsl:text>
+ <xsl:value-of select="@CPU"/>
+ <xsl:text>
+</xsl:text>
+ </xsl:template>
+
</xsl:stylesheet>
--
2.45.2

View File

@ -0,0 +1,217 @@
From 4fd5b77dc47f039551050925e442db9a17be5e75 Mon Sep 17 00:00:00 2001
From: John Kacur <jkacur@redhat.com>
Date: Mon, 24 Jun 2024 08:45:54 -0400
Subject: [PATCH 4/4] rteval: timerlat tracing clean-up
The patch cleans-up the code that stores the timerlat summary into xml,
after a trace is stopped. It does so by grouping similar kinds of output
together for parsing. It also reduces the amount of debug output.
Signed-off-by: John Kacur <jkacur@redhat.com>
---
rteval/modules/measurement/timerlat.py | 142 ++++++++++---------------
rteval/rteval_text.xsl | 10 +-
2 files changed, 62 insertions(+), 90 deletions(-)
diff --git a/rteval/modules/measurement/timerlat.py b/rteval/modules/measurement/timerlat.py
index 45adec1b33e1..dc6226ccc991 100644
--- a/rteval/modules/measurement/timerlat.py
+++ b/rteval/modules/measurement/timerlat.py
@@ -273,12 +273,14 @@ class Timerlat(rtevalModulePrototype):
os.kill(self.__timerlat_process.pid, signal.SIGINT)
time.sleep(2)
+
+ # Parse histogram output
+ self.__timerlat_out.seek(0)
+
blocking_thread_detected = False
softirq_interference_detected = False
irq_interference_detected = False
- # Parse histogram output
- self.__timerlat_out.seek(0)
for line in self.__timerlat_out:
line = bytes.decode(line)
@@ -291,6 +293,8 @@ class Timerlat(rtevalModulePrototype):
self.__posttrace += line
line = line.strip()
fields = line.split()
+ if not line:
+ continue
if line.startswith("##") and fields[1] == "CPU":
self.stcpu = int(fields[2])
self._log(Log.DEBUG, f"self.stcpu = {self.stcpu}")
@@ -304,91 +308,59 @@ class Timerlat(rtevalModulePrototype):
softirq_interference_detected = False
irq_interference_detected = False
continue
- if line.startswith("Thread latency:"):
- thread_latency_percent = fields[-1].strip('()%')
- self._log(Log.DEBUG, f"thread_latency_percent = {thread_latency_percent}")
- thread_latency = fields[-3]
- self._log(Log.DEBUG, f"thread_latency = {thread_latency}")
- self.__stdata[self.stcpu]["Thread_latency"] = (thread_latency, thread_latency_percent)
- elif line.startswith("Previous IRQ interference"):
- self._log(Log.DEBUG, f'Previous_IRQ_interference = {fields[-2]}')
- self.__stdata[self.stcpu]["Previous_IRQ_interference"] = fields[-2]
- elif line.startswith("IRQ handler delay:"):
- irq_handler_delay_percent = fields[-2].strip('(')
- irq_handler_delay = fields[-4]
- # Do we have (exit from idle)?
- if fields[3] == '(exit':
- field_name = "IRQ_handler_delay_exit_from_idle"
- else:
- field_name = "IRQ_handler_delay"
- self._log(Log.DEBUG, f"{field_name} = {irq_handler_delay}")
- self._log(Log.DEBUG, f"{field_name}_percent = {irq_handler_delay_percent}")
- self.__stdata[self.stcpu][field_name] = (irq_handler_delay, irq_handler_delay_percent)
- elif line.startswith("IRQ latency:"):
- self._log(Log.DEBUG, f"irq_latency = {fields[-2]}")
- self.__stdata[self.stcpu]["IRQ_latency"] = fields[-2]
- elif line.startswith("Timerlat IRQ duration"):
- timerlat_irq_duration_percent = fields[-2].strip('(')
- self._log(Log.DEBUG, f"timerlat_irq_duration_percent = {timerlat_irq_duration_percent}")
- timerlat_irq_duration = fields[-4]
- self._log(Log.DEBUG, f"timerlat_irq_duration = {timerlat_irq_duration}")
- self.__stdata[self.stcpu]["Timerlat_IRQ_duration"] = (timerlat_irq_duration, timerlat_irq_duration_percent)
- elif line.startswith("Blocking thread:"):
- blocking_thread_percent = fields[-2].strip('(')
- self._log(Log.DEBUG, f"blocking_thread_percent = {blocking_thread_percent}")
- blocking_thread = fields[-4]
- self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}")
- self.__stdata[self.stcpu]["Blocking_Thread"] = (blocking_thread, blocking_thread_percent)
- blocking_thread_detected = True
- irq_interference_detected = False
- softirq_interference_detected = False
- elif line.startswith("IRQ interference"):
- irq_interference_percent = fields[-2].strip('(')
- self._log(Log.DEBUG, f"irq_interference_percent = {irq_interference_percent}")
- irq_interference = fields[-4]
- self._log(Log.DEBUG, f"irq_interference = {irq_interference}")
- self.__stdata[self.stcpu]["IRQ_interference"] = (irq_interference, irq_interference_percent)
- blocking_thread_detected = False
- irq_interference_detected = True
- softirq_interference_detected = False
- elif line.startswith("Softirq interference"):
- softirq_interference_percent = fields[-2].strip('(')
- self._log(Log.DEBUG, f"softirq_interference_percent = {softirq_interference_percent}")
- softirq_interference = fields[-4]
- self._log(Log.DEBUG, f"softirq_interference = {softirq_interference}")
- self.__stdata[self.stcpu]["Softirq_interference"] = (softirq_interference, softirq_interference_percent)
- blocking_thread_detected = False
- irq_interference_detected = False
- softirq_interference_detected = True
- elif blocking_thread_detected:
- self._log(Log.DEBUG, f'line={line}')
- blocking_thread = " ".join(fields[0:-2])
- self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}")
- blocking_threadus = fields[-2]
- self._log(Log.DEBUG, f"blocking_threadus = {blocking_threadus}")
- self.__stdata[self.stcpu].setdefault("blocking_thread", [])
- self.__stdata[self.stcpu]["blocking_thread"] += [(blocking_thread, blocking_threadus)]
- elif softirq_interference_detected:
- softirq = " ".join(fields[0:-2])
- softirq_latency = fields[-2]
- self._log(Log.DEBUG, f'softirq = {softirq}')
- self._log(Log.DEBUG, f'softirq_latency = {softirq_latency}')
- self.__stdata[self.stcpu].setdefault("softirq_interference", [])
- self.__stdata[self.stcpu]["softirq_interference"] += [(softirq, softirq_latency)]
- elif irq_interference_detected:
- irq_interference_name = " ".join(fields[0:-2])
- irq_interference_latency = fields[-2]
- self._log(Log.DEBUG, f'irq_interference = {irq_interference_name}, latency = {irq_interference_latency}')
- self.__stdata[self.stcpu].setdefault("irq_interference", [])
- self.__stdata[self.stcpu]["irq_interference"] += [(irq_interference_name, irq_interference_latency)]
- elif line.startswith("Max timerlat IRQ latency"):
- self._log(Log.DEBUG, f"line={line}")
- max_timerlat_irq_latency = fields[-5]
- self._log(Log.DEBUG, f"max_timerlat_irq_latency = {max_timerlat_irq_latency}")
+
+ # work around rtla not printing ':' after all names
+ if line.startswith('Softirq interference'):
+ name = 'Softirq_interference'
+ elif line.startswith('IRQ interference'):
+ name = 'IRQ_interference'
+ else:
+ name = ''.join(line.split(':')[0]).replace(' ', '_')
+ self._log(Log.DEBUG, f"name={name}")
+
+ if name in ['Thread_latency']:
+ latency = fields[-3]
+ percent = fields[-1].strip('()%')
+ self._log(Log.DEBUG, f'{name} = ({latency}, {percent})')
+ self.__stdata[self.stcpu][name] = (latency, percent)
+ continue
+ if name in ['Timerlat_IRQ_duration', 'IRQ_handler_delay', 'Blocking_thread', 'IRQ_interference', 'Softirq_interference']:
+ latency = fields[-4]
+ percent = fields[-2].strip('(')
+ if name == 'IRQ_handler_delay' and fields[3] == '(exit':
+ name = 'IRQ_handler_delay_exit_from_idle'
+ self._log(Log.DEBUG, f'{name} = ({latency}, {percent})')
+ self.__stdata[self.stcpu][name] = (latency, percent)
+ detected = {'Blocking_thread' : (True, False, False),
+ 'IRQ_interference' : (False, True, False),
+ 'Softirq_interference' : (False, False, True) }
+ if name in ('Blocking_thread', 'IRQ_interference', 'Softirq_interference'):
+ blocking_thread_detected, irq_interference_detected, softirq_interference_detected = detected.get(name)
+ continue
+ if name in ["IRQ_latency", "Previous_IRQ_interference"]:
+ latency = fields[-2]
+ self._log(Log.DEBUG, f'{name} = {fields[-2]}')
+ self.__stdata[self.stcpu][name] = fields[-2]
+ continue
+ if blocking_thread_detected or softirq_interference_detected or irq_interference_detected:
+ if blocking_thread_detected:
+ field_name = "blocking_thread"
+ elif softirq_interference_detected:
+ field_name = "softirq_interference"
+ elif irq_interference_detected:
+ field_name = "irq_interference"
+ thread = " ".join(fields[0:-2])
+ latency = fields[-2]
+ self._log(Log.DEBUG, f"{field_name} += [({thread}, {latency})]")
+ self.__stdata[self.stcpu].setdefault(field_name, [])
+ self.__stdata[self.stcpu][field_name] += [(thread, latency)]
+ continue
+ if name == "Max_timerlat_IRQ_latency_from_idle":
+ latency = fields[-5]
max_timerlat_cpu = int(fields[-1])
- self._log(Log.DEBUG, f"max_timerlat_cpu = {max_timerlat_cpu}")
+ self._log(Log.DEBUG, f'self.__stdata[{max_timerlat_cpu}][{name}] = {latency}')
self.__stdata.setdefault(max_timerlat_cpu, {})
- self.__stdata[max_timerlat_cpu]["Max_timerlat_IRQ_latency_from_idle"] = max_timerlat_irq_latency
+ self.__stdata[max_timerlat_cpu][name] = latency
else:
self._log(Log.DEBUG, f'line = {line}')
continue
diff --git a/rteval/rteval_text.xsl b/rteval/rteval_text.xsl
index 70777354efa5..7ca0ae3a4c66 100644
--- a/rteval/rteval_text.xsl
+++ b/rteval/rteval_text.xsl
@@ -546,16 +546,16 @@
<xsl:text>)
</xsl:text>
- <xsl:if test="Blocking_Thread">
+ <xsl:if test="Blocking_thread">
<xsl:text>Blocking thread:</xsl:text>
<xsl:text> </xsl:text>
- <xsl:value-of select="Blocking_Thread/latency"/>
+ <xsl:value-of select="Blocking_thread/latency"/>
<xsl:text> </xsl:text>
- <xsl:value-of select="Blocking_Thread/latency/@unit"/>
+ <xsl:value-of select="Blocking_thread/latency/@unit"/>
<xsl:text> (</xsl:text>
- <xsl:value-of select="Blocking_Thread/latency_percent"/>
+ <xsl:value-of select="Blocking_thread/latency_percent"/>
<xsl:text> </xsl:text>
- <xsl:value-of select="Blocking_Thread/latency_percent/@unit"/>
+ <xsl:value-of select="Blocking_thread/latency_percent/@unit"/>
<xsl:text>)
</xsl:text>
</xsl:if>
--
2.45.2

View File

@ -1,6 +1,6 @@
Name: rteval
Version: 3.8
Release: 3%{?dist}
Release: 4%{?dist}
Summary: Utility to evaluate system suitability for RT Linux
Group: Development/Tools
@ -29,12 +29,16 @@ Requires: libmpc, libmpc-devel
Requires: dwarves
Requires: dmidecode
Requires: procps-ng
Requires: rtla
BuildArch: noarch
#Patches
Patch1: 0001-Updated-rteval-man-page.patch
Patch2: rteval-Fix-sysreport-traceback-when-utility-sos-not-.patch
Patch3: rteval-sysstat-Convert-base64-data-to-text-before-wr.patch
Patch1: Updated-rteval-man-page.patch
Patch2: rteval-Fix-aNone-being-passed-to-cyclictest.patch
Patch3: rteval-Fix-sysreport-traceback-when-utility-sos-not-.patch
Patch4: rteval-sysstat-Convert-base64-data-to-text-before-wr.patch
Patch5: rteval-timerlat-Add-timerlat-tracing-to-rteval.patch
Patch6: rteval-timerlat-tracing-clean-up.patch
%description
The rteval script is a utility for measuring various aspects of
@ -71,6 +75,12 @@ to the screen.
%{_bindir}/rteval
%changelog
* Mon Jun 24 2024 John Kacur <jkacur@redhat.com> - 3.8-4
- Add timerlat tracing
- Add Fix -aNone being passed to cyclictest
- Add rtla as a requires for timerlat
Resolves: RHEL-35461
* Mon Jun 24 2024 Anubhav Shelat <ashelat@redhat.com> - 3.8-3
- Allow rteval to search /usr/bin/ for sysreport
- Convert base64 data to text before wrapping.