From f0f764c0aecf1770679636294a3232159c5f825e Mon Sep 17 00:00:00 2001 From: John Kacur Date: Mon, 24 Jun 2024 15:44:27 -0400 Subject: [PATCH] Add timerlat tracing Add timerlat tracing Add Fix -aNone being passed to cyclictest Add rtla as a requires for timerlat Resolves: RHEL-35461 Signed-off-by: John Kacur --- ...age.patch => Updated-rteval-man-page.patch | 0 ...Fix-aNone-being-passed-to-cyclictest.patch | 45 ++ ...erlat-Add-timerlat-tracing-to-rteval.patch | 526 ++++++++++++++++++ rteval-timerlat-tracing-clean-up.patch | 217 ++++++++ rteval.spec | 18 +- 5 files changed, 802 insertions(+), 4 deletions(-) rename 0001-Updated-rteval-man-page.patch => Updated-rteval-man-page.patch (100%) create mode 100644 rteval-Fix-aNone-being-passed-to-cyclictest.patch create mode 100644 rteval-timerlat-Add-timerlat-tracing-to-rteval.patch create mode 100644 rteval-timerlat-tracing-clean-up.patch diff --git a/0001-Updated-rteval-man-page.patch b/Updated-rteval-man-page.patch similarity index 100% rename from 0001-Updated-rteval-man-page.patch rename to Updated-rteval-man-page.patch diff --git a/rteval-Fix-aNone-being-passed-to-cyclictest.patch b/rteval-Fix-aNone-being-passed-to-cyclictest.patch new file mode 100644 index 0000000..263145f --- /dev/null +++ b/rteval-Fix-aNone-being-passed-to-cyclictest.patch @@ -0,0 +1,45 @@ +From 17a39863e5f89fbaf51dc158bc7e27f46676d46d Mon Sep 17 00:00:00 2001 +From: Tomas Glozar +Date: Tue, 11 Jun 2024 16:45:53 +0200 +Subject: [PATCH] rteval: Fix -aNone being passed to cyclictest + +When rteval is called via the command line, cpulists for both +measurements and loads default to an empty string and are further +processed by parse_cpulist_from_config. However, this is not true when +rteval is used as a module: in that case, neither the default +command-line value is used nor parse_cpulist_from_config is run, leading +to None being set to the config property of measurements which is +explicitely passed down to the corresponding cyclictest config property. + +After 64ce7848dfab ("rteval: Add relative cpulists for measurements"), +where the check for None was removed from the cyclictest module, rteval +passes "-aNone" to cyclictest when being used as a module. + +Call parse_cpulist_from_config with an empty string to get the default +cpulist to pass to cyclictest module if cpulist is empty in +the measurement config. + +Fixes: 64ce7848dfab ("rteval: Add relative cpulists for measurements") +Signed-off-by: Tomas Glozar +Signed-off-by: John Kacur +--- + rteval/modules/measurement/__init__.py | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/rteval/modules/measurement/__init__.py b/rteval/modules/measurement/__init__.py +index 11bd7b0fce69..43c0fda30ce1 100644 +--- a/rteval/modules/measurement/__init__.py ++++ b/rteval/modules/measurement/__init__.py +@@ -148,6 +148,9 @@ measurement profiles, based on their characteristics""" + modcfg = self.__cfg.GetSection("measurement") + cpulist = modcfg.cpulist + run_on_isolcpus = modcfg.run_on_isolcpus ++ if cpulist is None: ++ # Get default cpulist value ++ cpulist = cpulist_utils.collapse_cpulist(parse_cpulist_from_config("", run_on_isolcpus)) + + for (modname, modtype) in modcfg: + if isinstance(modtype, str) and modtype.lower() == 'module': # Only 'module' will be supported (ds) +-- +2.45.2 + diff --git a/rteval-timerlat-Add-timerlat-tracing-to-rteval.patch b/rteval-timerlat-Add-timerlat-tracing-to-rteval.patch new file mode 100644 index 0000000..831a850 --- /dev/null +++ b/rteval-timerlat-Add-timerlat-tracing-to-rteval.patch @@ -0,0 +1,526 @@ +From 5909521e06ed92ea60ffb247b25ca86c232f71bf Mon Sep 17 00:00:00 2001 +From: John Kacur +Date: Thu, 20 Jun 2024 21:34:24 -0400 +Subject: [PATCH 1/4] rteval: timerlat: Add timerlat tracing to rteval + +This patch adds tracing from timerlat to rteval. + +These two options are added + + --timerlat-stoptrace USEC + Stop trace if thread latency higher than USEC + --timerlat-trace FILE + File to save trace to + +This also stores the summary from rtla timerlat in the rteval xml file. + +Signed-off-by: John Kacur +--- + rteval/modules/measurement/timerlat.py | 213 ++++++++++++++++++++++++- + rteval/rteval_text.xsl | 181 ++++++++++++++++++++- + 2 files changed, 389 insertions(+), 5 deletions(-) + +diff --git a/rteval/modules/measurement/timerlat.py b/rteval/modules/measurement/timerlat.py +index e8345fab1ad7..45adec1b33e1 100644 +--- a/rteval/modules/measurement/timerlat.py ++++ b/rteval/modules/measurement/timerlat.py +@@ -16,6 +16,7 @@ from rteval.modules import rtevalModulePrototype + from rteval.systopology import cpuinfo, SysTopology + from rteval.cpulist_utils import expand_cpulist, collapse_cpulist + ++ + class TLRunData: + ''' class to store instance data from a timerlat run ''' + def __init__(self, coreid, datatype, priority, logfnc): +@@ -188,6 +189,15 @@ class Timerlat(rtevalModulePrototype): + self.__cpus = [str(c) for c in expand_cpulist(self.__cpulist)] + self.__numcores = len(self.__cpus) + ++ # Has tracing been triggered ++ self.__stoptrace = False ++ # This stores the output from rtla ++ self.__posttrace = "" ++ # Stop Trace Data ++ self.__stdata = {} ++ # Stop Trace Cpu ++ self.stcpu = -1 ++ + self.__timerlat_out = None + self.__timerlat_err = None + self.__started = False +@@ -218,10 +228,21 @@ class Timerlat(rtevalModulePrototype): + self.__cmd = ['rtla', 'timerlat', 'hist', '-P', f'f:{int(self.__priority)}', '-u'] + self.__cmd.append(f'-c{self.__cpulist}') + self.__cmd.append(f'-E{self.__buckets}') ++ ++ if self.__cfg.stoptrace: ++ self.__cmd.append(f"-T{int(self.__cfg.stoptrace)}") ++ ++ if self.__cfg.trace: ++ if not self.__cfg.stoptrace: ++ self._log(Log.WARN, f'Ignoring trace={self.__cfg.trace}, because stoptrace not invoked') ++ else: ++ self.__cmd.append(f'-t={self.__cfg.trace}') ++ + self._log(Log.DEBUG, f'self.__cmd = {self.__cmd}') + self.__timerlat_out = tempfile.SpooledTemporaryFile(mode='w+b') + self.__timerlat_err = tempfile.SpooledTemporaryFile(mode='w+b') + ++ + def _WorkloadTask(self): + if self.__started: + return +@@ -252,6 +273,10 @@ class Timerlat(rtevalModulePrototype): + os.kill(self.__timerlat_process.pid, signal.SIGINT) + time.sleep(2) + ++ blocking_thread_detected = False ++ softirq_interference_detected = False ++ irq_interference_detected = False ++ + # Parse histogram output + self.__timerlat_out.seek(0) + for line in self.__timerlat_out: +@@ -261,6 +286,113 @@ class Timerlat(rtevalModulePrototype): + if not line: + continue + ++ # Parsing if stoptrace has been invoked ++ if self.__stoptrace: ++ self.__posttrace += line ++ line = line.strip() ++ fields = line.split() ++ if line.startswith("##") and fields[1] == "CPU": ++ self.stcpu = int(fields[2]) ++ self._log(Log.DEBUG, f"self.stcpu = {self.stcpu}") ++ self.__stdata[self.stcpu] = {} ++ continue ++ if self.stcpu == -1: ++ self._log(Log.WARN, "Stop trace has been invoked, but a stop cpu has not been identified.") ++ continue ++ if line.startswith('------------------'): ++ blocking_thread_detected = False ++ softirq_interference_detected = False ++ irq_interference_detected = False ++ continue ++ if line.startswith("Thread latency:"): ++ thread_latency_percent = fields[-1].strip('()%') ++ self._log(Log.DEBUG, f"thread_latency_percent = {thread_latency_percent}") ++ thread_latency = fields[-3] ++ self._log(Log.DEBUG, f"thread_latency = {thread_latency}") ++ self.__stdata[self.stcpu]["Thread_latency"] = (thread_latency, thread_latency_percent) ++ elif line.startswith("Previous IRQ interference"): ++ self._log(Log.DEBUG, f'Previous_IRQ_interference = {fields[-2]}') ++ self.__stdata[self.stcpu]["Previous_IRQ_interference"] = fields[-2] ++ elif line.startswith("IRQ handler delay:"): ++ irq_handler_delay_percent = fields[-2].strip('(') ++ irq_handler_delay = fields[-4] ++ # Do we have (exit from idle)? ++ if fields[3] == '(exit': ++ field_name = "IRQ_handler_delay_exit_from_idle" ++ else: ++ field_name = "IRQ_handler_delay" ++ self._log(Log.DEBUG, f"{field_name} = {irq_handler_delay}") ++ self._log(Log.DEBUG, f"{field_name}_percent = {irq_handler_delay_percent}") ++ self.__stdata[self.stcpu][field_name] = (irq_handler_delay, irq_handler_delay_percent) ++ elif line.startswith("IRQ latency:"): ++ self._log(Log.DEBUG, f"irq_latency = {fields[-2]}") ++ self.__stdata[self.stcpu]["IRQ_latency"] = fields[-2] ++ elif line.startswith("Timerlat IRQ duration"): ++ timerlat_irq_duration_percent = fields[-2].strip('(') ++ self._log(Log.DEBUG, f"timerlat_irq_duration_percent = {timerlat_irq_duration_percent}") ++ timerlat_irq_duration = fields[-4] ++ self._log(Log.DEBUG, f"timerlat_irq_duration = {timerlat_irq_duration}") ++ self.__stdata[self.stcpu]["Timerlat_IRQ_duration"] = (timerlat_irq_duration, timerlat_irq_duration_percent) ++ elif line.startswith("Blocking thread:"): ++ blocking_thread_percent = fields[-2].strip('(') ++ self._log(Log.DEBUG, f"blocking_thread_percent = {blocking_thread_percent}") ++ blocking_thread = fields[-4] ++ self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}") ++ self.__stdata[self.stcpu]["Blocking_Thread"] = (blocking_thread, blocking_thread_percent) ++ blocking_thread_detected = True ++ irq_interference_detected = False ++ softirq_interference_detected = False ++ elif line.startswith("IRQ interference"): ++ irq_interference_percent = fields[-2].strip('(') ++ self._log(Log.DEBUG, f"irq_interference_percent = {irq_interference_percent}") ++ irq_interference = fields[-4] ++ self._log(Log.DEBUG, f"irq_interference = {irq_interference}") ++ self.__stdata[self.stcpu]["IRQ_interference"] = (irq_interference, irq_interference_percent) ++ blocking_thread_detected = False ++ irq_interference_detected = True ++ softirq_interference_detected = False ++ elif line.startswith("Softirq interference"): ++ softirq_interference_percent = fields[-2].strip('(') ++ self._log(Log.DEBUG, f"softirq_interference_percent = {softirq_interference_percent}") ++ softirq_interference = fields[-4] ++ self._log(Log.DEBUG, f"softirq_interference = {softirq_interference}") ++ self.__stdata[self.stcpu]["Softirq_interference"] = (softirq_interference, softirq_interference_percent) ++ blocking_thread_detected = False ++ irq_interference_detected = False ++ softirq_interference_detected = True ++ elif blocking_thread_detected: ++ self._log(Log.DEBUG, f'line={line}') ++ blocking_thread = " ".join(fields[0:-2]) ++ self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}") ++ blocking_threadus = fields[-2] ++ self._log(Log.DEBUG, f"blocking_threadus = {blocking_threadus}") ++ self.__stdata[self.stcpu].setdefault("blocking_thread", []) ++ self.__stdata[self.stcpu]["blocking_thread"] += [(blocking_thread, blocking_threadus)] ++ elif softirq_interference_detected: ++ softirq = " ".join(fields[0:-2]) ++ softirq_latency = fields[-2] ++ self._log(Log.DEBUG, f'softirq = {softirq}') ++ self._log(Log.DEBUG, f'softirq_latency = {softirq_latency}') ++ self.__stdata[self.stcpu].setdefault("softirq_interference", []) ++ self.__stdata[self.stcpu]["softirq_interference"] += [(softirq, softirq_latency)] ++ elif irq_interference_detected: ++ irq_interference_name = " ".join(fields[0:-2]) ++ irq_interference_latency = fields[-2] ++ self._log(Log.DEBUG, f'irq_interference = {irq_interference_name}, latency = {irq_interference_latency}') ++ self.__stdata[self.stcpu].setdefault("irq_interference", []) ++ self.__stdata[self.stcpu]["irq_interference"] += [(irq_interference_name, irq_interference_latency)] ++ elif line.startswith("Max timerlat IRQ latency"): ++ self._log(Log.DEBUG, f"line={line}") ++ max_timerlat_irq_latency = fields[-5] ++ self._log(Log.DEBUG, f"max_timerlat_irq_latency = {max_timerlat_irq_latency}") ++ max_timerlat_cpu = int(fields[-1]) ++ self._log(Log.DEBUG, f"max_timerlat_cpu = {max_timerlat_cpu}") ++ self.__stdata.setdefault(max_timerlat_cpu, {}) ++ self.__stdata[max_timerlat_cpu]["Max_timerlat_IRQ_latency_from_idle"] = max_timerlat_irq_latency ++ else: ++ self._log(Log.DEBUG, f'line = {line}') ++ continue ++ + if line.startswith('#'): + if line.startswith('# Duration:'): + duration = line.split()[2] +@@ -285,9 +417,13 @@ class Timerlat(rtevalModulePrototype): + elif line.startswith('max:'): + #print(line) + continue ++ elif line.startswith('rtla timerlat hit stop tracing'): ++ self.__stoptrace = True ++ self.__posttrace += line ++ continue + else: +- pass + #print(line) ++ pass + + vals = line.split() + if not vals: +@@ -301,12 +437,18 @@ class Timerlat(rtevalModulePrototype): + continue + + for i, core in enumerate(self.__cpus): +- self.__timerlatdata[core].bucket(index, int(vals[i*3+1]), ++ # There might not be a count on every cpu if tracing invoked ++ if i*3 + 1 >= len(vals): ++ self.__timerlatdata[core].bucket(index, 0, 0, 0) ++ self.__timerlatdata['system'].bucket(index, 0, 0, 0) ++ else: ++ self.__timerlatdata[core].bucket(index, int(vals[i*3+1]), + int(vals[i*3+2]), + int(vals[i*3+3])) +- self.__timerlatdata['system'].bucket(index, int(vals[i*3+1]), ++ self.__timerlatdata['system'].bucket(index, int(vals[i*3+1]), + int(vals[i*3+2]), + int(vals[i*3+3])) ++ + # Generate statistics for each RunData object + for n in list(self.__timerlatdata.keys()): + self.__timerlatdata[n].reduce() +@@ -320,6 +462,62 @@ class Timerlat(rtevalModulePrototype): + rep_n = libxml2.newNode('timerlat') + rep_n.newProp('command_line', ' '.join(self.__cmd)) + ++ stoptrace_invoked_n = libxml2.newNode('stoptrace_invoked') ++ if self.stcpu != -1: ++ stoptrace_invoked_n.newProp("invoked", "true") ++ else: ++ stoptrace_invoked_n.newProp("invoked", "") ++ rep_n.addChild(stoptrace_invoked_n) ++ ++ if self.stcpu != -1: ++ self._log(Log.DEBUG, f'self.__stdata = {self.__stdata}') ++ for cpu in self.__stdata: ++ # This is Max timerlat IRQ latency from idle ++ # With no other data from that cpu, so don't create a ++ # stoptrace_report for this ++ if len(self.__stdata[cpu]) == 1: ++ continue ++ stoptrace_n = libxml2.newNode('stoptrace_report') ++ stoptrace_n.newProp("CPU", str(cpu)) ++ for k, v in self.__stdata[cpu].items(): ++ self._log(Log.DEBUG, f"cpu={cpu}, k={k}, v={v}") ++ if isinstance(v, tuple): ++ latency = str(v[0]) ++ percent = str(v[1]) ++ cpu_n = stoptrace_n.newTextChild(None, str(k), None) ++ n = cpu_n.newTextChild(None, "latency", latency) ++ n.newProp('unit', 'us') ++ ++ n = cpu_n.newTextChild(None, "latency_percent", percent) ++ n.newProp('unit', '%') ++ elif isinstance(v, list): ++ if k in ("blocking_thread", "softirq_interference", "irq_interference"): ++ for name, latency in v: ++ cpu_n = stoptrace_n.newTextChild(None, k, None) ++ n = cpu_n.newTextChild(None, "name", name) ++ n = cpu_n.newTextChild(None, "latency", latency) ++ n.newProp('unit', 'us') ++ else: ++ if k == "Max_timerlat_IRQ_latency_from_idle": ++ continue ++ cpu_n = stoptrace_n.newTextChild(None, str(k), str(v)) ++ cpu_n.newProp('unit', 'us') ++ rep_n.addChild(stoptrace_n) ++ ++ self._log(Log.DEBUG, f'timerlat: posttrace = \n{self.__posttrace}') ++ self._log(Log.DEBUG, 'timerlat: posttrace END') ++ for cpu in self.__stdata: ++ for k, v in self.__stdata[cpu].items(): ++ if isinstance(v, tuple): ++ continue ++ if k == "Max_timerlat_IRQ_latency_from_idle": ++ max_timerlat_n = libxml2.newNode('max_timerlat_report') ++ max_timerlat_n.newProp("CPU", str(cpu)) ++ cpu_n = max_timerlat_n.newTextChild(None, k, str(v)) ++ cpu_n.newProp('unit', 'us') ++ rep_n.addChild(max_timerlat_n) ++ return rep_n ++ + rep_n.addChild(self.__timerlatdata['system'].MakeReport()) + for thr in self.__cpus: + if str(thr) not in self.__timerlatdata: +@@ -341,7 +539,13 @@ def ModuleParameters(): + "metavar": "PRIO" }, + "buckets": {"descr": "Number of buckets", + "default": 3500, +- "metavar": "NUM"} ++ "metavar": "NUM" }, ++ "stoptrace": {"descr": "Stop trace if thread latency higher than USEC", ++ "default": None, ++ "metavar": "USEC" }, ++ "trace": {"descr": "File to save trace to", ++ "default": None, ++ "metavar": "FILE" }, + } + + def create(params, logger): +@@ -367,6 +571,7 @@ if __name__ == '__main__': + + cfg_tl = cfg.GetSection('timerlat') + cfg_tl.cpulist = collapse_cpulist(SysTopology().online_cpus()) ++ cfg_tl.stoptrace=50 + + RUNTIME = 10 + +diff --git a/rteval/rteval_text.xsl b/rteval/rteval_text.xsl +index 2f03bda0bb55..70777354efa5 100644 +--- a/rteval/rteval_text.xsl ++++ b/rteval/rteval_text.xsl +@@ -316,14 +316,26 @@ + + + +- ++ ++ rtla timerlat hit stop tracing ++ ++ ++ ++ ++ + ++ ++ + System: + + ++ + ++ ++ + Statistics: + ++ + + + +@@ -470,4 +482,171 @@ + + + ++ ++ ++ ## CPU ++ ++ hit stop tracing, analyzing it ## ++ ++ ++ ++ ++ ++ Previous IRQ interference: up to ++ ++ ++ ++ ++ ++ ++ ++ ++ IRQ handler delay: ++ ++ ++ ++ ( ++ ++ ++ ++ ) ++ ++ ++ ++ ++ IRQ handler delay: (exit from idle) ++ ++ ++ ++ ( ++ ++ ++ ++ ) ++ ++ ++ ++ IRQ latency: ++ ++ ++ ++ ++ ++ ++ ++ Timerlat IRQ duration: ++ ++ ++ ++ ++ ( ++ ++ ++ ++ ) ++ ++ ++ ++ Blocking thread: ++ ++ ++ ++ ++ ( ++ ++ ++ ++ ) ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ Softirq interference: ++ ++ ++ ++ ++ ( ++ ++ ++ ++ ) ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ IRQ interference: ++ ++ ++ ++ ++ ( ++ ++ ++ ++ ) ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ -------------------------------------------------------------------------------- ++ Thread latency: ++ ++ ++ ++ ++ ( ++ ++ ++ ++ ) ++ ++ ++ ++ ++ ++ ++ ++ Max timerlat IRQ latency from idle: ++ ++ ++ ++ in cpu ++ ++ ++ ++ ++ + +-- +2.45.2 + diff --git a/rteval-timerlat-tracing-clean-up.patch b/rteval-timerlat-tracing-clean-up.patch new file mode 100644 index 0000000..ede50e6 --- /dev/null +++ b/rteval-timerlat-tracing-clean-up.patch @@ -0,0 +1,217 @@ +From 4fd5b77dc47f039551050925e442db9a17be5e75 Mon Sep 17 00:00:00 2001 +From: John Kacur +Date: Mon, 24 Jun 2024 08:45:54 -0400 +Subject: [PATCH 4/4] rteval: timerlat tracing clean-up + +The patch cleans-up the code that stores the timerlat summary into xml, +after a trace is stopped. It does so by grouping similar kinds of output +together for parsing. It also reduces the amount of debug output. + +Signed-off-by: John Kacur +--- + rteval/modules/measurement/timerlat.py | 142 ++++++++++--------------- + rteval/rteval_text.xsl | 10 +- + 2 files changed, 62 insertions(+), 90 deletions(-) + +diff --git a/rteval/modules/measurement/timerlat.py b/rteval/modules/measurement/timerlat.py +index 45adec1b33e1..dc6226ccc991 100644 +--- a/rteval/modules/measurement/timerlat.py ++++ b/rteval/modules/measurement/timerlat.py +@@ -273,12 +273,14 @@ class Timerlat(rtevalModulePrototype): + os.kill(self.__timerlat_process.pid, signal.SIGINT) + time.sleep(2) + ++ ++ # Parse histogram output ++ self.__timerlat_out.seek(0) ++ + blocking_thread_detected = False + softirq_interference_detected = False + irq_interference_detected = False + +- # Parse histogram output +- self.__timerlat_out.seek(0) + for line in self.__timerlat_out: + line = bytes.decode(line) + +@@ -291,6 +293,8 @@ class Timerlat(rtevalModulePrototype): + self.__posttrace += line + line = line.strip() + fields = line.split() ++ if not line: ++ continue + if line.startswith("##") and fields[1] == "CPU": + self.stcpu = int(fields[2]) + self._log(Log.DEBUG, f"self.stcpu = {self.stcpu}") +@@ -304,91 +308,59 @@ class Timerlat(rtevalModulePrototype): + softirq_interference_detected = False + irq_interference_detected = False + continue +- if line.startswith("Thread latency:"): +- thread_latency_percent = fields[-1].strip('()%') +- self._log(Log.DEBUG, f"thread_latency_percent = {thread_latency_percent}") +- thread_latency = fields[-3] +- self._log(Log.DEBUG, f"thread_latency = {thread_latency}") +- self.__stdata[self.stcpu]["Thread_latency"] = (thread_latency, thread_latency_percent) +- elif line.startswith("Previous IRQ interference"): +- self._log(Log.DEBUG, f'Previous_IRQ_interference = {fields[-2]}') +- self.__stdata[self.stcpu]["Previous_IRQ_interference"] = fields[-2] +- elif line.startswith("IRQ handler delay:"): +- irq_handler_delay_percent = fields[-2].strip('(') +- irq_handler_delay = fields[-4] +- # Do we have (exit from idle)? +- if fields[3] == '(exit': +- field_name = "IRQ_handler_delay_exit_from_idle" +- else: +- field_name = "IRQ_handler_delay" +- self._log(Log.DEBUG, f"{field_name} = {irq_handler_delay}") +- self._log(Log.DEBUG, f"{field_name}_percent = {irq_handler_delay_percent}") +- self.__stdata[self.stcpu][field_name] = (irq_handler_delay, irq_handler_delay_percent) +- elif line.startswith("IRQ latency:"): +- self._log(Log.DEBUG, f"irq_latency = {fields[-2]}") +- self.__stdata[self.stcpu]["IRQ_latency"] = fields[-2] +- elif line.startswith("Timerlat IRQ duration"): +- timerlat_irq_duration_percent = fields[-2].strip('(') +- self._log(Log.DEBUG, f"timerlat_irq_duration_percent = {timerlat_irq_duration_percent}") +- timerlat_irq_duration = fields[-4] +- self._log(Log.DEBUG, f"timerlat_irq_duration = {timerlat_irq_duration}") +- self.__stdata[self.stcpu]["Timerlat_IRQ_duration"] = (timerlat_irq_duration, timerlat_irq_duration_percent) +- elif line.startswith("Blocking thread:"): +- blocking_thread_percent = fields[-2].strip('(') +- self._log(Log.DEBUG, f"blocking_thread_percent = {blocking_thread_percent}") +- blocking_thread = fields[-4] +- self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}") +- self.__stdata[self.stcpu]["Blocking_Thread"] = (blocking_thread, blocking_thread_percent) +- blocking_thread_detected = True +- irq_interference_detected = False +- softirq_interference_detected = False +- elif line.startswith("IRQ interference"): +- irq_interference_percent = fields[-2].strip('(') +- self._log(Log.DEBUG, f"irq_interference_percent = {irq_interference_percent}") +- irq_interference = fields[-4] +- self._log(Log.DEBUG, f"irq_interference = {irq_interference}") +- self.__stdata[self.stcpu]["IRQ_interference"] = (irq_interference, irq_interference_percent) +- blocking_thread_detected = False +- irq_interference_detected = True +- softirq_interference_detected = False +- elif line.startswith("Softirq interference"): +- softirq_interference_percent = fields[-2].strip('(') +- self._log(Log.DEBUG, f"softirq_interference_percent = {softirq_interference_percent}") +- softirq_interference = fields[-4] +- self._log(Log.DEBUG, f"softirq_interference = {softirq_interference}") +- self.__stdata[self.stcpu]["Softirq_interference"] = (softirq_interference, softirq_interference_percent) +- blocking_thread_detected = False +- irq_interference_detected = False +- softirq_interference_detected = True +- elif blocking_thread_detected: +- self._log(Log.DEBUG, f'line={line}') +- blocking_thread = " ".join(fields[0:-2]) +- self._log(Log.DEBUG, f"blocking_thread = {blocking_thread}") +- blocking_threadus = fields[-2] +- self._log(Log.DEBUG, f"blocking_threadus = {blocking_threadus}") +- self.__stdata[self.stcpu].setdefault("blocking_thread", []) +- self.__stdata[self.stcpu]["blocking_thread"] += [(blocking_thread, blocking_threadus)] +- elif softirq_interference_detected: +- softirq = " ".join(fields[0:-2]) +- softirq_latency = fields[-2] +- self._log(Log.DEBUG, f'softirq = {softirq}') +- self._log(Log.DEBUG, f'softirq_latency = {softirq_latency}') +- self.__stdata[self.stcpu].setdefault("softirq_interference", []) +- self.__stdata[self.stcpu]["softirq_interference"] += [(softirq, softirq_latency)] +- elif irq_interference_detected: +- irq_interference_name = " ".join(fields[0:-2]) +- irq_interference_latency = fields[-2] +- self._log(Log.DEBUG, f'irq_interference = {irq_interference_name}, latency = {irq_interference_latency}') +- self.__stdata[self.stcpu].setdefault("irq_interference", []) +- self.__stdata[self.stcpu]["irq_interference"] += [(irq_interference_name, irq_interference_latency)] +- elif line.startswith("Max timerlat IRQ latency"): +- self._log(Log.DEBUG, f"line={line}") +- max_timerlat_irq_latency = fields[-5] +- self._log(Log.DEBUG, f"max_timerlat_irq_latency = {max_timerlat_irq_latency}") ++ ++ # work around rtla not printing ':' after all names ++ if line.startswith('Softirq interference'): ++ name = 'Softirq_interference' ++ elif line.startswith('IRQ interference'): ++ name = 'IRQ_interference' ++ else: ++ name = ''.join(line.split(':')[0]).replace(' ', '_') ++ self._log(Log.DEBUG, f"name={name}") ++ ++ if name in ['Thread_latency']: ++ latency = fields[-3] ++ percent = fields[-1].strip('()%') ++ self._log(Log.DEBUG, f'{name} = ({latency}, {percent})') ++ self.__stdata[self.stcpu][name] = (latency, percent) ++ continue ++ if name in ['Timerlat_IRQ_duration', 'IRQ_handler_delay', 'Blocking_thread', 'IRQ_interference', 'Softirq_interference']: ++ latency = fields[-4] ++ percent = fields[-2].strip('(') ++ if name == 'IRQ_handler_delay' and fields[3] == '(exit': ++ name = 'IRQ_handler_delay_exit_from_idle' ++ self._log(Log.DEBUG, f'{name} = ({latency}, {percent})') ++ self.__stdata[self.stcpu][name] = (latency, percent) ++ detected = {'Blocking_thread' : (True, False, False), ++ 'IRQ_interference' : (False, True, False), ++ 'Softirq_interference' : (False, False, True) } ++ if name in ('Blocking_thread', 'IRQ_interference', 'Softirq_interference'): ++ blocking_thread_detected, irq_interference_detected, softirq_interference_detected = detected.get(name) ++ continue ++ if name in ["IRQ_latency", "Previous_IRQ_interference"]: ++ latency = fields[-2] ++ self._log(Log.DEBUG, f'{name} = {fields[-2]}') ++ self.__stdata[self.stcpu][name] = fields[-2] ++ continue ++ if blocking_thread_detected or softirq_interference_detected or irq_interference_detected: ++ if blocking_thread_detected: ++ field_name = "blocking_thread" ++ elif softirq_interference_detected: ++ field_name = "softirq_interference" ++ elif irq_interference_detected: ++ field_name = "irq_interference" ++ thread = " ".join(fields[0:-2]) ++ latency = fields[-2] ++ self._log(Log.DEBUG, f"{field_name} += [({thread}, {latency})]") ++ self.__stdata[self.stcpu].setdefault(field_name, []) ++ self.__stdata[self.stcpu][field_name] += [(thread, latency)] ++ continue ++ if name == "Max_timerlat_IRQ_latency_from_idle": ++ latency = fields[-5] + max_timerlat_cpu = int(fields[-1]) +- self._log(Log.DEBUG, f"max_timerlat_cpu = {max_timerlat_cpu}") ++ self._log(Log.DEBUG, f'self.__stdata[{max_timerlat_cpu}][{name}] = {latency}') + self.__stdata.setdefault(max_timerlat_cpu, {}) +- self.__stdata[max_timerlat_cpu]["Max_timerlat_IRQ_latency_from_idle"] = max_timerlat_irq_latency ++ self.__stdata[max_timerlat_cpu][name] = latency + else: + self._log(Log.DEBUG, f'line = {line}') + continue +diff --git a/rteval/rteval_text.xsl b/rteval/rteval_text.xsl +index 70777354efa5..7ca0ae3a4c66 100644 +--- a/rteval/rteval_text.xsl ++++ b/rteval/rteval_text.xsl +@@ -546,16 +546,16 @@ + ) + + +- ++ + Blocking thread: + +- ++ + +- ++ + ( +- ++ + +- ++ + ) + + +-- +2.45.2 + diff --git a/rteval.spec b/rteval.spec index 04540b1..cc4fa36 100644 --- a/rteval.spec +++ b/rteval.spec @@ -1,6 +1,6 @@ Name: rteval Version: 3.8 -Release: 3%{?dist} +Release: 4%{?dist} Summary: Utility to evaluate system suitability for RT Linux Group: Development/Tools @@ -29,12 +29,16 @@ Requires: libmpc, libmpc-devel Requires: dwarves Requires: dmidecode Requires: procps-ng +Requires: rtla BuildArch: noarch #Patches -Patch1: 0001-Updated-rteval-man-page.patch -Patch2: rteval-Fix-sysreport-traceback-when-utility-sos-not-.patch -Patch3: rteval-sysstat-Convert-base64-data-to-text-before-wr.patch +Patch1: Updated-rteval-man-page.patch +Patch2: rteval-Fix-aNone-being-passed-to-cyclictest.patch +Patch3: rteval-Fix-sysreport-traceback-when-utility-sos-not-.patch +Patch4: rteval-sysstat-Convert-base64-data-to-text-before-wr.patch +Patch5: rteval-timerlat-Add-timerlat-tracing-to-rteval.patch +Patch6: rteval-timerlat-tracing-clean-up.patch %description The rteval script is a utility for measuring various aspects of @@ -71,6 +75,12 @@ to the screen. %{_bindir}/rteval %changelog +* Mon Jun 24 2024 John Kacur - 3.8-4 +- Add timerlat tracing +- Add Fix -aNone being passed to cyclictest +- Add rtla as a requires for timerlat +Resolves: RHEL-35461 + * Mon Jun 24 2024 Anubhav Shelat - 3.8-3 - Allow rteval to search /usr/bin/ for sysreport - Convert base64 data to text before wrapping.