From 41300a3c6a1700e6f220ac89f16d4c5a6d041e68 Mon Sep 17 00:00:00 2001
From: eabdullin <eabdullin@almalinux.org>
Date: Wed, 27 Sep 2023 14:09:45 +0000
Subject: [PATCH] import CS rtla-5.14.0-8.el8

---
 ...umentation-rtla-Add-hwnoise-man-page.patch |  151 +++
 ...la-Add-timerlat-top-auto-analysis-op.patch |  216 ++++
 ...on-rtla-Correct-command-line-example.patch |   37 +
 SOURCES/rtla-Add-hwnoise-tool.patch           |  477 ++++++++
 ...rtla-timerlat-Add-auto-analysis-core.patch | 1056 +++++++++++++++++
 ...merlat-Add-auto-analysis-only-option.patch |  164 +++
 ...dd-auto-analysis-support-to-timerlat.patch |  293 +++++
 ...-Fix-Previous-IRQ-auto-analysis-line.patch |   48 +
 ...la-osnoise_hist-display-average-with.patch |   36 +
 ...la-osnoise_hist-use-total-duration-f.patch |   51 +
 SPECS/rtla.spec                               |   43 +-
 11 files changed, 2571 insertions(+), 1 deletion(-)
 create mode 100644 SOURCES/Documentation-rtla-Add-hwnoise-man-page.patch
 create mode 100644 SOURCES/Documentation-rtla-Add-timerlat-top-auto-analysis-op.patch
 create mode 100644 SOURCES/Documentation-rtla-Correct-command-line-example.patch
 create mode 100644 SOURCES/rtla-Add-hwnoise-tool.patch
 create mode 100644 SOURCES/rtla-timerlat-Add-auto-analysis-core.patch
 create mode 100644 SOURCES/rtla-timerlat-Add-auto-analysis-only-option.patch
 create mode 100644 SOURCES/rtla-timerlat-Add-auto-analysis-support-to-timerlat.patch
 create mode 100644 SOURCES/rtla-timerlat-Fix-Previous-IRQ-auto-analysis-line.patch
 create mode 100644 SOURCES/tools-tracing-rtla-osnoise_hist-display-average-with.patch
 create mode 100644 SOURCES/tools-tracing-rtla-osnoise_hist-use-total-duration-f.patch

diff --git a/SOURCES/Documentation-rtla-Add-hwnoise-man-page.patch b/SOURCES/Documentation-rtla-Add-hwnoise-man-page.patch
new file mode 100644
index 0000000..91d75e0
--- /dev/null
+++ b/SOURCES/Documentation-rtla-Add-hwnoise-man-page.patch
@@ -0,0 +1,151 @@
+From 5dc3750e747f93f9bb7987da3d47a8ab4a5a181e Mon Sep 17 00:00:00 2001
+From: Daniel Bristot de Oliveira <bristot@kernel.org>
+Date: Tue, 7 Feb 2023 22:48:51 +0100
+Subject: [PATCH] Documentation/rtla: Add hwnoise man page
+
+Add a man page for the new rtla hwnoise tool, mostly based in the
+rtla osnoise top.
+
+Link: https://lkml.kernel.org/r/27088e0f42a6129e9b75f789d78adff4171f2e79.1675805361.git.bristot@kernel.org
+
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
+Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ index.rst        |   1 +
+ rtla-hwnoise.rst | 107 +++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 108 insertions(+)
+ create mode 100644 Documentation/tools/rtla/rtla-hwnoise.rst
+
+diff --git a/Documentation/index.rst b/Documentation/index.rst
+index 840f0bf3e803..05d2652e4072 100644
+--- a/Documentation/index.rst
++++ b/Documentation/index.rst
+@@ -17,6 +17,7 @@ behavior on specific hardware.
+    rtla-timerlat
+    rtla-timerlat-hist
+    rtla-timerlat-top
++   rtla-hwnoise
+ 
+ .. only::  subproject and html
+ 
+diff --git a/Documentation/rtla-hwnoise.rst b/Documentation/rtla-hwnoise.rst
+new file mode 100644
+index 000000000000..fb1c52bbc00b
+--- /dev/null
++++ b/Documentation/rtla-hwnoise.rst
+@@ -0,0 +1,107 @@
++.. SPDX-License-Identifier: GPL-2.0
++
++============
++rtla-hwnoise
++============
++------------------------------------------
++Detect and quantify hardware-related noise
++------------------------------------------
++
++:Manual section: 1
++
++SYNOPSIS
++========
++
++**rtla hwnoise** [*OPTIONS*]
++
++DESCRIPTION
++===========
++
++**rtla hwnoise** collects the periodic summary from the *osnoise* tracer
++running with *interrupts disabled*. By disabling interrupts, and the scheduling
++of threads as a consequence, only non-maskable interrupts and hardware-related
++noise is allowed.
++
++The tool also allows the configurations of the *osnoise* tracer and the
++collection of the tracer output.
++
++OPTIONS
++=======
++.. include:: common_osnoise_options.rst
++
++.. include:: common_top_options.rst
++
++.. include:: common_options.rst
++
++EXAMPLE
++=======
++In the example below, the **rtla hwnoise** tool is set to run on CPUs *1-7*
++on a system with 8 cores/16 threads with hyper-threading enabled.
++
++The tool is set to detect any noise higher than *one microsecond*,
++to run for *ten minutes*, displaying a summary of the report at the
++end of the session::
++
++  # rtla hwnoise -c 1-7 -T 1 -d 10m -q
++                                          Hardware-related Noise
++  duration:   0 00:10:00 | time is in us
++  CPU Period       Runtime        Noise  % CPU Aval   Max Noise   Max Single          HW          NMI
++    1 #599       599000000          138    99.99997           3            3           4           74
++    2 #599       599000000           85    99.99998           3            3           4           75
++    3 #599       599000000           86    99.99998           4            3           6           75
++    4 #599       599000000           81    99.99998           4            4           2           75
++    5 #599       599000000           85    99.99998           2            2           2           75
++    6 #599       599000000           76    99.99998           2            2           0           75
++    7 #599       599000000           77    99.99998           3            3           0           75
++
++
++The first column shows the *CPU*, and the second column shows how many
++*Periods* the tool ran during the session. The *Runtime* is the time
++the tool effectively runs on the CPU. The *Noise* column is the sum of
++all noise that the tool observed, and the *% CPU Aval* is the relation
++between the *Runtime* and *Noise*.
++
++The *Max Noise* column is the maximum hardware noise the tool detected in a
++single period, and the *Max Single* is the maximum single noise seen.
++
++The *HW* and *NMI* columns show the total number of *hardware* and *NMI* noise
++occurrence observed by the tool.
++
++For example, *CPU 3* ran *599* periods of *1 second Runtime*. The CPU received
++*86 us* of noise during the entire execution, leaving *99.99997 %* of CPU time
++for the application. In the worst single period, the CPU caused *4 us* of
++noise to the application, but it was certainly caused by more than one single
++noise, as the *Max Single* noise was of *3 us*. The CPU has *HW noise,* at a
++rate of *six occurrences*/*ten minutes*. The CPU also has *NMIs*, at a higher
++frequency: around *seven per second*.
++
++The tool should report *0* hardware-related noise in the ideal situation.
++For example, by disabling hyper-threading to remove the hardware noise,
++and disabling the TSC watchdog to remove the NMI (it is possible to identify
++this using tracing options of **rtla hwnoise**), it was possible to reach
++the ideal situation in the same hardware::
++
++  # rtla hwnoise -c 1-7 -T 1 -d 10m -q
++                                          Hardware-related Noise
++  duration:   0 00:10:00 | time is in us
++  CPU Period       Runtime        Noise  % CPU Aval   Max Noise   Max Single          HW          NMI
++    1 #599       599000000            0   100.00000           0            0           0            0
++    2 #599       599000000            0   100.00000           0            0           0            0
++    3 #599       599000000            0   100.00000           0            0           0            0
++    4 #599       599000000            0   100.00000           0            0           0            0
++    5 #599       599000000            0   100.00000           0            0           0            0
++    6 #599       599000000            0   100.00000           0            0           0            0
++    7 #599       599000000            0   100.00000           0            0           0            0
++
++SEE ALSO
++========
++
++**rtla-osnoise**\(1)
++
++Osnoise tracer documentation: <https://www.kernel.org/doc/html/latest/trace/osnoise-tracer.html>
++
++AUTHOR
++======
++Written by Daniel Bristot de Oliveira <bristot@kernel.org>
++
++.. include:: common_appendix.rst
+-- 
+2.40.1
+
diff --git a/SOURCES/Documentation-rtla-Add-timerlat-top-auto-analysis-op.patch b/SOURCES/Documentation-rtla-Add-timerlat-top-auto-analysis-op.patch
new file mode 100644
index 0000000..772818c
--- /dev/null
+++ b/SOURCES/Documentation-rtla-Add-timerlat-top-auto-analysis-op.patch
@@ -0,0 +1,216 @@
+From ce6cc6f70cad4d703ddfe99d74f33990617c0a3c Mon Sep 17 00:00:00 2001
+From: Daniel Bristot de Oliveira <bristot@kernel.org>
+Date: Tue, 31 Jan 2023 16:38:55 +0100
+Subject: [PATCH] Documentation/rtla: Add timerlat-top auto-analysis options
+
+Add the new options to the man page, as well as updating the
+example to include the new output.
+
+Link: https://lkml.kernel.org/r/a3f5fb49432a55c3323b18725fc6e702f16ccc79.1675179318.git.bristot@kernel.org
+
+Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ common_timerlat_aa.rst |   7 ++
+ rtla-timerlat-top.rst  | 164 ++++++++++++++++++-----------------------
+ 2 files changed, 80 insertions(+), 91 deletions(-)
+ create mode 100644 Documentation/tools/rtla/common_timerlat_aa.rst
+
+diff --git a/Documentation/common_timerlat_aa.rst b/Documentation/common_timerlat_aa.rst
+new file mode 100644
+index 000000000000..077029e6b289
+--- /dev/null
++++ b/Documentation/common_timerlat_aa.rst
+@@ -0,0 +1,7 @@
++**--dump-tasks**
++
++        prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)
++
++**--no-aa**
++
++        disable auto-analysis, reducing rtla timerlat cpu usage
+diff --git a/Documentation/rtla-timerlat-top.rst b/Documentation/rtla-timerlat-top.rst
+index 7c4e4b109493..73799c1150ad 100644
+--- a/Documentation/rtla-timerlat-top.rst
++++ b/Documentation/rtla-timerlat-top.rst
+@@ -30,102 +30,84 @@ OPTIONS
+ 
+ .. include:: common_options.rst
+ 
++.. include:: common_timerlat_aa.rst
++
+ EXAMPLE
+ =======
+ 
+-In the example below, the *timerlat* tracer is set to capture the stack trace at
+-the IRQ handler, printing it to the buffer if the *Thread* timer latency is
+-higher than *30 us*. It is also set to stop the session if a *Thread* timer
+-latency higher than *30 us* is hit. Finally, it is set to save the trace
+-buffer if the stop condition is hit::
++In the example below, the timerlat tracer is dispatched in cpus *1-23* in the
++automatic trace mode, instructing the tracer to stop if a *40 us* latency or
++higher is found::
+ 
+-  [root@alien ~]# rtla timerlat top -s 30 -T 30 -t
+-                   Timer Latency
+-    0 00:00:59   |          IRQ Timer Latency (us)        |         Thread Timer Latency (us)
++  # timerlat -a 40 -c 1-23 -q
++                                     Timer Latency
++    0 00:00:12   |          IRQ Timer Latency (us)        |         Thread Timer Latency (us)
+   CPU COUNT      |      cur       min       avg       max |      cur       min       avg       max
+-    0 #58634     |        1         0         1        10 |       11         2        10        23
+-    1 #58634     |        1         0         1         9 |       12         2         9        23
+-    2 #58634     |        0         0         1        11 |       10         2         9        23
+-    3 #58634     |        1         0         1        11 |       11         2         9        24
+-    4 #58634     |        1         0         1        10 |       11         2         9        26
+-    5 #58634     |        1         0         1         8 |       10         2         9        25
+-    6 #58634     |       12         0         1        12 |       30         2        10        30 <--- CPU with spike
+-    7 #58634     |        1         0         1         9 |       11         2         9        23
+-    8 #58633     |        1         0         1         9 |       11         2         9        26
+-    9 #58633     |        1         0         1         9 |       10         2         9        26
+-   10 #58633     |        1         0         1        13 |       11         2         9        28
+-   11 #58633     |        1         0         1        13 |       12         2         9        24
+-   12 #58633     |        1         0         1         8 |       10         2         9        23
+-   13 #58633     |        1         0         1        10 |       10         2         9        22
+-   14 #58633     |        1         0         1        18 |       12         2         9        27
+-   15 #58633     |        1         0         1        10 |       11         2         9        28
+-   16 #58633     |        0         0         1        11 |        7         2         9        26
+-   17 #58633     |        1         0         1        13 |       10         2         9        24
+-   18 #58633     |        1         0         1         9 |       13         2         9        22
+-   19 #58633     |        1         0         1        10 |       11         2         9        23
+-   20 #58633     |        1         0         1        12 |       11         2         9        28
+-   21 #58633     |        1         0         1        14 |       11         2         9        24
+-   22 #58633     |        1         0         1         8 |       11         2         9        22
+-   23 #58633     |        1         0         1        10 |       11         2         9        27
+-  timerlat hit stop tracing
+-  saving trace to timerlat_trace.txt
+-  [root@alien bristot]# tail -60 timerlat_trace.txt
+-  [...]
+-      timerlat/5-79755   [005] .......   426.271226: #58634 context thread timer_latency     10823 ns
+-              sh-109404  [006] dnLh213   426.271247: #58634 context    irq timer_latency     12505 ns
+-              sh-109404  [006] dNLh313   426.271258: irq_noise: local_timer:236 start 426.271245463 duration 12553 ns
+-              sh-109404  [006] d...313   426.271263: thread_noise:       sh:109404 start 426.271245853 duration 4769 ns
+-      timerlat/6-79756   [006] .......   426.271264: #58634 context thread timer_latency     30328 ns
+-      timerlat/6-79756   [006] ....1..   426.271265: <stack trace>
+-  => timerlat_irq
+-  => __hrtimer_run_queues
+-  => hrtimer_interrupt
+-  => __sysvec_apic_timer_interrupt
+-  => sysvec_apic_timer_interrupt
+-  => asm_sysvec_apic_timer_interrupt
+-  => _raw_spin_unlock_irqrestore			<---- spinlock that disabled interrupt.
+-  => try_to_wake_up
+-  => autoremove_wake_function
+-  => __wake_up_common
+-  => __wake_up_common_lock
+-  => ep_poll_callback
+-  => __wake_up_common
+-  => __wake_up_common_lock
+-  => fsnotify_add_event
+-  => inotify_handle_inode_event
+-  => fsnotify
+-  => __fsnotify_parent
+-  => __fput
+-  => task_work_run
+-  => exit_to_user_mode_prepare
+-  => syscall_exit_to_user_mode
+-  => do_syscall_64
+-  => entry_SYSCALL_64_after_hwframe
+-  => 0x7265000001378c
+-  => 0x10000cea7
+-  => 0x25a00000204a
+-  => 0x12e302d00000000
+-  => 0x19b51010901b6
+-  => 0x283ce00726500
+-  => 0x61ea308872
+-  => 0x00000fe3
+-            bash-109109  [007] d..h...   426.271265: #58634 context    irq timer_latency      1211 ns
+-      timerlat/6-79756   [006] .......   426.271267: timerlat_main: stop tracing hit on cpu 6
+-
+-In the trace, it is possible the notice that the *IRQ* timer latency was
+-already high, accounting *12505 ns*. The IRQ delay was caused by the
+-*bash-109109* process that disabled IRQs in the wake-up path
+-(*_try_to_wake_up()* function). The duration of the IRQ handler that woke
+-up the timerlat thread, informed with the **osnoise:irq_noise** event, was
+-also high and added more *12553 ns* to the Thread latency. Finally, the
+-**osnoise:thread_noise** added by the currently running thread (including
+-the scheduling overhead) added more *4769 ns*. Summing up these values,
+-the *Thread* timer latency accounted for *30328 ns*.
+-
+-The primary reason for this high value is the wake-up path that was hit
+-twice during this case: when the *bash-109109* was waking up a thread
+-and then when the *timerlat* thread was awakened. This information can
+-then be used as the starting point of a more fine-grained analysis.
++    1 #12322     |        0         0         1        15 |       10         3         9        31
++    2 #12322     |        3         0         1        12 |       10         3         9        23
++    3 #12322     |        1         0         1        21 |        8         2         8        34
++    4 #12322     |        1         0         1        17 |       10         2        11        33
++    5 #12322     |        0         0         1        12 |        8         3         8        25
++    6 #12322     |        1         0         1        14 |       16         3        11        35
++    7 #12322     |        0         0         1        14 |        9         2         8        29
++    8 #12322     |        1         0         1        22 |        9         3         9        34
++    9 #12322     |        0         0         1        14 |        8         2         8        24
++   10 #12322     |        1         0         0        12 |        9         3         8        24
++   11 #12322     |        0         0         0        15 |        6         2         7        29
++   12 #12321     |        1         0         0        13 |        5         3         8        23
++   13 #12319     |        0         0         1        14 |        9         3         9        26
++   14 #12321     |        1         0         0        13 |        6         2         8        24
++   15 #12321     |        1         0         1        15 |       12         3        11        27
++   16 #12318     |        0         0         1        13 |        7         3        10        24
++   17 #12319     |        0         0         1        13 |       11         3         9        25
++   18 #12318     |        0         0         0        12 |        8         2         8        20
++   19 #12319     |        0         0         1        18 |       10         2         9        28
++   20 #12317     |        0         0         0        20 |        9         3         8        34
++   21 #12318     |        0         0         0        13 |        8         3         8        28
++   22 #12319     |        0         0         1        11 |        8         3        10        22
++   23 #12320     |       28         0         1        28 |       41         3        11        41
++  rtla timerlat hit stop tracing
++  ## CPU 23 hit stop tracing, analyzing it ##
++  IRQ handler delay:                                        27.49 us (65.52 %)
++  IRQ latency:                                              28.13 us
++  Timerlat IRQ duration:                                     9.59 us (22.85 %)
++  Blocking thread:                                           3.79 us (9.03 %)
++                         objtool:49256                       3.79 us
++    Blocking thread stacktrace
++                -> timerlat_irq
++                -> __hrtimer_run_queues
++                -> hrtimer_interrupt
++                -> __sysvec_apic_timer_interrupt
++                -> sysvec_apic_timer_interrupt
++                -> asm_sysvec_apic_timer_interrupt
++                -> _raw_spin_unlock_irqrestore
++                -> cgroup_rstat_flush_locked
++                -> cgroup_rstat_flush_irqsafe
++                -> mem_cgroup_flush_stats
++                -> mem_cgroup_wb_stats
++                -> balance_dirty_pages
++                -> balance_dirty_pages_ratelimited_flags
++                -> btrfs_buffered_write
++                -> btrfs_do_write_iter
++                -> vfs_write
++                -> __x64_sys_pwrite64
++                -> do_syscall_64
++                -> entry_SYSCALL_64_after_hwframe
++  ------------------------------------------------------------------------
++    Thread latency:                                          41.96 us (100%)
++
++  The system has exit from idle latency!
++    Max timerlat IRQ latency from idle: 17.48 us in cpu 4
++  Saving trace to timerlat_trace.txt
++
++In this case, the major factor was the delay suffered by the *IRQ handler*
++that handles **timerlat** wakeup: *65.52%*. This can be caused by the
++current thread masking interrupts, which can be seen in the blocking
++thread stacktrace: the current thread (*objtool:49256*) disabled interrupts
++via *raw spin lock* operations inside mem cgroup, while doing write
++syscall in a btrfs file system.
++
++The raw trace is saved in the **timerlat_trace.txt** file for further analysis.
+ 
+ Note that **rtla timerlat** was dispatched without changing *timerlat* tracer
+ threads' priority. That is generally not needed because these threads hava
+-- 
+2.40.1
+
diff --git a/SOURCES/Documentation-rtla-Correct-command-line-example.patch b/SOURCES/Documentation-rtla-Correct-command-line-example.patch
new file mode 100644
index 0000000..467c3f3
--- /dev/null
+++ b/SOURCES/Documentation-rtla-Correct-command-line-example.patch
@@ -0,0 +1,37 @@
+From 877d95dcfd0a56102d4b97a9691115f5fb5e9ea3 Mon Sep 17 00:00:00 2001
+From: Pierre Gondois <pierre.gondois@arm.com>
+Date: Thu, 6 Oct 2022 10:44:09 +0200
+Subject: [PATCH] Documentation: rtla: Correct command line example
+
+The '-t/-T' parameters seem to have been swapped:
+-t/--trace[=file]: save the stopped trace
+to [file|timerlat_trace.txt]
+-T/--thread us: stop trace if the thread latency
+is higher than the argument in us
+
+Swap them back.
+
+Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
+Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Link: https://lore.kernel.org/r/20221006084409.3882542-1-pierre.gondois@arm.com
+Signed-off-by: Jonathan Corbet <corbet@lwn.net>
+---
+ Documentation/tools/rtla/rtla-timerlat-top.rst | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Documentation/tools/rtla/rtla-timerlat-top.rst b/Documentation/tools/rtla/rtla-timerlat-top.rst
+index 1c321de1c171..7c4e4b109493 100644
+--- a/Documentation/rtla-timerlat-top.rst
++++ b/Documentation/rtla-timerlat-top.rst
+@@ -39,7 +39,7 @@ higher than *30 us*. It is also set to stop the session if a *Thread* timer
+ latency higher than *30 us* is hit. Finally, it is set to save the trace
+ buffer if the stop condition is hit::
+ 
+-  [root@alien ~]# rtla timerlat top -s 30 -t 30 -T
++  [root@alien ~]# rtla timerlat top -s 30 -T 30 -t
+                    Timer Latency
+     0 00:00:59   |          IRQ Timer Latency (us)        |         Thread Timer Latency (us)
+   CPU COUNT      |      cur       min       avg       max |      cur       min       avg       max
+-- 
+2.40.0
+
diff --git a/SOURCES/rtla-Add-hwnoise-tool.patch b/SOURCES/rtla-Add-hwnoise-tool.patch
new file mode 100644
index 0000000..dbf0822
--- /dev/null
+++ b/SOURCES/rtla-Add-hwnoise-tool.patch
@@ -0,0 +1,477 @@
+From 1f428356c38dcbe49fd2f1c488b41e88720ead92 Mon Sep 17 00:00:00 2001
+From: Daniel Bristot de Oliveira <bristot@kernel.org>
+Date: Tue, 7 Feb 2023 22:48:50 +0100
+Subject: [PATCH 5/8] rtla: Add hwnoise tool
+
+The hwnoise tool is a special mode for the osnoise top tool.
+
+hwnoise dispatches the osnoise tracer and displays a summary of the noise.
+The difference is that it runs the tracer with the OSNOISE_IRQ_DISABLE
+option set, thus only allowing only hardware-related noise, resulting in
+a simplified output. hwnoise has the same features of osnoise.
+
+An example of the tool's output:
+
+ # rtla hwnoise -c 1-11 -T 1 -d 10m -q
+                                           Hardware-related Noise
+ duration:   0 00:10:00 | time is in us
+ CPU Period       Runtime        Noise  % CPU Aval   Max Noise   Max Single          HW          NMI
+   1 #599       599000000          138    99.99997           3            3           4           74
+   2 #599       599000000           85    99.99998           3            3           4           75
+   3 #599       599000000           86    99.99998           4            3           6           75
+   4 #599       599000000           81    99.99998           4            4           2           75
+   5 #599       599000000           85    99.99998           2            2           2           75
+
+Link: https://lkml.kernel.org/r/2d6f49a6f3a4f8b51b2c806458b1cff71ad4d014.1675805361.git.bristot@kernel.org
+
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
+Cc: Juri Lelli <juri.lelli@redhat.com>
+Cc: Clark Williams <williams@redhat.com>
+Cc: Bagas Sanjaya <bagasdotme@gmail.com>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ Makefile          |   2 +
+ src/osnoise.c     | 117 ++++++++++++++++++++++++++++++++++++++++++++++
+ src/osnoise.h     |   7 +++
+ src/osnoise_top.c |  84 ++++++++++++++++++++++++++-------
+ src/rtla.c        |   4 ++
+ 5 files changed, 198 insertions(+), 16 deletions(-)
+
+diff --git a/Makefile b/Makefile
+index 22e28b76f800..2456a399eb9a 100644
+--- a/Makefile
++++ b/Makefile
+@@ -119,6 +119,8 @@ install: doc_install
+ 	$(STRIP) $(DESTDIR)$(BINDIR)/rtla
+ 	@test ! -f $(DESTDIR)$(BINDIR)/osnoise || rm $(DESTDIR)$(BINDIR)/osnoise
+ 	ln -s rtla $(DESTDIR)$(BINDIR)/osnoise
++	@test ! -f $(DESTDIR)$(BINDIR)/hwnoise || rm $(DESTDIR)$(BINDIR)/hwnoise
++	ln -s rtla $(DESTDIR)$(BINDIR)/hwnoise
+ 	@test ! -f $(DESTDIR)$(BINDIR)/timerlat || rm $(DESTDIR)$(BINDIR)/timerlat
+ 	ln -s rtla $(DESTDIR)$(BINDIR)/timerlat
+ 
+diff --git a/src/osnoise.c b/src/osnoise.c
+index 4dee343909b1..3ca7a3853943 100644
+--- a/src/osnoise.c
++++ b/src/osnoise.c
+@@ -734,6 +734,113 @@ void osnoise_put_tracing_thresh(struct osnoise_context *context)
+ 	context->orig_tracing_thresh = OSNOISE_OPTION_INIT_VAL;
+ }
+ 
++static int osnoise_options_get_option(char *option)
++{
++	char *options = tracefs_instance_file_read(NULL, "osnoise/options", NULL);
++	char no_option[128];
++	int retval = 0;
++	char *opt;
++
++	if (!options)
++		return OSNOISE_OPTION_INIT_VAL;
++
++	/*
++	 * Check first if the option is disabled.
++	 */
++	snprintf(no_option, sizeof(no_option), "NO_%s", option);
++
++	opt = strstr(options, no_option);
++	if (opt)
++		goto out_free;
++
++	/*
++	 * Now that it is not disabled, if the string is there, it is
++	 * enabled. If the string is not there, the option does not exist.
++	 */
++	opt = strstr(options, option);
++	if (opt)
++		retval = 1;
++	else
++		retval = OSNOISE_OPTION_INIT_VAL;
++
++out_free:
++	free(options);
++	return retval;
++}
++
++static int osnoise_options_set_option(char *option, bool onoff)
++{
++	char no_option[128];
++
++	if (onoff)
++		return tracefs_instance_file_write(NULL, "osnoise/options", option);
++
++	snprintf(no_option, sizeof(no_option), "NO_%s", option);
++
++	return tracefs_instance_file_write(NULL, "osnoise/options", no_option);
++}
++
++static int osnoise_get_irq_disable(struct osnoise_context *context)
++{
++	if (context->opt_irq_disable != OSNOISE_OPTION_INIT_VAL)
++		return context->opt_irq_disable;
++
++	if (context->orig_opt_irq_disable != OSNOISE_OPTION_INIT_VAL)
++		return context->orig_opt_irq_disable;
++
++	context->orig_opt_irq_disable = osnoise_options_get_option("OSNOISE_IRQ_DISABLE");
++
++	return context->orig_opt_irq_disable;
++}
++
++int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff)
++{
++	int opt_irq_disable = osnoise_get_irq_disable(context);
++	int retval;
++
++	if (opt_irq_disable == OSNOISE_OPTION_INIT_VAL)
++		return -1;
++
++	if (opt_irq_disable == onoff)
++		return 0;
++
++	retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", onoff);
++	if (retval < 0)
++		return -1;
++
++	context->opt_irq_disable = onoff;
++
++	return 0;
++}
++
++static void osnoise_restore_irq_disable(struct osnoise_context *context)
++{
++	int retval;
++
++	if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL)
++		return;
++
++	if (context->orig_opt_irq_disable == context->opt_irq_disable)
++		goto out_done;
++
++	retval = osnoise_options_set_option("OSNOISE_IRQ_DISABLE", context->orig_opt_irq_disable);
++	if (retval < 0)
++		err_msg("Could not restore original OSNOISE_IRQ_DISABLE option\n");
++
++out_done:
++	context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL;
++}
++
++static void osnoise_put_irq_disable(struct osnoise_context *context)
++{
++	osnoise_restore_irq_disable(context);
++
++	if (context->orig_opt_irq_disable == OSNOISE_OPTION_INIT_VAL)
++		return;
++
++	context->orig_opt_irq_disable = OSNOISE_OPTION_INIT_VAL;
++}
++
+ /*
+  * enable_osnoise - enable osnoise tracer in the trace_instance
+  */
+@@ -798,6 +905,9 @@ struct osnoise_context *osnoise_context_alloc(void)
+ 	context->orig_tracing_thresh	= OSNOISE_OPTION_INIT_VAL;
+ 	context->tracing_thresh		= OSNOISE_OPTION_INIT_VAL;
+ 
++	context->orig_opt_irq_disable	= OSNOISE_OPTION_INIT_VAL;
++	context->opt_irq_disable	= OSNOISE_OPTION_INIT_VAL;
++
+ 	osnoise_get_context(context);
+ 
+ 	return context;
+@@ -824,6 +934,7 @@ void osnoise_put_context(struct osnoise_context *context)
+ 	osnoise_put_timerlat_period_us(context);
+ 	osnoise_put_print_stack(context);
+ 	osnoise_put_tracing_thresh(context);
++	osnoise_put_irq_disable(context);
+ 
+ 	free(context);
+ }
+@@ -958,3 +1069,9 @@ int osnoise_main(int argc, char *argv[])
+ 	osnoise_usage(1);
+ 	exit(1);
+ }
++
++int hwnoise_main(int argc, char *argv[])
++{
++	osnoise_top_main(argc, argv);
++	exit(0);
++}
+diff --git a/src/osnoise.h b/src/osnoise.h
+index 04a4384cc544..4dcf22ccd704 100644
+--- a/src/osnoise.h
++++ b/src/osnoise.h
+@@ -38,6 +38,10 @@ struct osnoise_context {
+ 	/* -1 as init value because 0 is disabled */
+ 	long long		orig_print_stack;
+ 	long long		print_stack;
++
++	/* -1 as init value because 0 is off */
++	int			orig_opt_irq_disable;
++	int			opt_irq_disable;
+ };
+ 
+ /*
+@@ -79,6 +83,8 @@ void osnoise_restore_print_stack(struct osnoise_context *context);
+ int osnoise_set_print_stack(struct osnoise_context *context,
+ 			    long long print_stack);
+ 
++int osnoise_set_irq_disable(struct osnoise_context *context, bool onoff);
++
+ /*
+  * osnoise_tool -  osnoise based tool definition.
+  */
+@@ -97,3 +103,4 @@ struct osnoise_tool *osnoise_init_trace_tool(char *tracer);
+ int osnoise_hist_main(int argc, char *argv[]);
+ int osnoise_top_main(int argc, char **argv);
+ int osnoise_main(int argc, char **argv);
++int hwnoise_main(int argc, char **argv);
+diff --git a/src/osnoise_top.c b/src/osnoise_top.c
+index 76479bfb2922..562f2e4b18c5 100644
+--- a/src/osnoise_top.c
++++ b/src/osnoise_top.c
+@@ -14,6 +14,11 @@
+ #include "osnoise.h"
+ #include "utils.h"
+ 
++enum osnoise_mode {
++	MODE_OSNOISE = 0,
++	MODE_HWNOISE
++};
++
+ /*
+  * osnoise top parameters
+  */
+@@ -32,6 +37,7 @@ struct osnoise_top_params {
+ 	int			set_sched;
+ 	struct sched_attr	sched_param;
+ 	struct trace_events	*events;
++	enum osnoise_mode	mode;
+ };
+ 
+ struct osnoise_top_cpu {
+@@ -143,15 +149,23 @@ osnoise_top_handler(struct trace_seq *s, struct tep_record *record,
+  */
+ static void osnoise_top_header(struct osnoise_tool *top)
+ {
++	struct osnoise_top_params *params = top->params;
+ 	struct trace_seq *s = top->trace.seq;
+ 	char duration[26];
+ 
+ 	get_duration(top->start_time, duration, sizeof(duration));
+ 
+ 	trace_seq_printf(s, "\033[2;37;40m");
+-	trace_seq_printf(s, "                                          Operating System Noise");
+-	trace_seq_printf(s, "                                     ");
+-	trace_seq_printf(s, "                                     ");
++	trace_seq_printf(s, "                                          ");
++
++	if (params->mode == MODE_OSNOISE) {
++		trace_seq_printf(s, "Operating System Noise");
++		trace_seq_printf(s, "                                       ");
++	} else if (params->mode == MODE_HWNOISE) {
++		trace_seq_printf(s, "Hardware-related Noise");
++	}
++
++	trace_seq_printf(s, "                                   ");
+ 	trace_seq_printf(s, "\033[0;0;0m");
+ 	trace_seq_printf(s, "\n");
+ 
+@@ -162,7 +176,14 @@ static void osnoise_top_header(struct osnoise_tool *top)
+ 	trace_seq_printf(s, "       Noise ");
+ 	trace_seq_printf(s, " %% CPU Aval ");
+ 	trace_seq_printf(s, "  Max Noise   Max Single ");
+-	trace_seq_printf(s, "         HW          NMI          IRQ      Softirq       Thread");
++	trace_seq_printf(s, "         HW          NMI");
++
++	if (params->mode == MODE_HWNOISE)
++		goto eol;
++
++	trace_seq_printf(s, "          IRQ      Softirq       Thread");
++
++eol:
+ 	trace_seq_printf(s, "\033[0;0;0m");
+ 	trace_seq_printf(s, "\n");
+ }
+@@ -181,6 +202,7 @@ static void clear_terminal(struct trace_seq *seq)
+  */
+ static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
+ {
++	struct osnoise_top_params *params = tool->params;
+ 	struct trace_seq *s = tool->trace.seq;
+ 	struct osnoise_top_cpu *cpu_data;
+ 	struct osnoise_top_data *data;
+@@ -205,6 +227,12 @@ static void osnoise_top_print(struct osnoise_tool *tool, int cpu)
+ 
+ 	trace_seq_printf(s, "%12llu ", cpu_data->hw_count);
+ 	trace_seq_printf(s, "%12llu ", cpu_data->nmi_count);
++
++	if (params->mode == MODE_HWNOISE) {
++		trace_seq_printf(s, "\n");
++		return;
++	}
++
+ 	trace_seq_printf(s, "%12llu ", cpu_data->irq_count);
+ 	trace_seq_printf(s, "%12llu ", cpu_data->softirq_count);
+ 	trace_seq_printf(s, "%12llu\n", cpu_data->thread_count);
+@@ -241,12 +269,12 @@ osnoise_print_stats(struct osnoise_top_params *params, struct osnoise_tool *top)
+ /*
+  * osnoise_top_usage - prints osnoise top usage message
+  */
+-void osnoise_top_usage(char *usage)
++static void osnoise_top_usage(struct osnoise_top_params *params, char *usage)
+ {
+ 	int i;
+ 
+ 	static const char * const msg[] = {
+-		"  usage: rtla osnoise [top] [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
++		" [-h] [-q] [-D] [-d s] [-a us] [-p us] [-r us] [-s us] [-S us] \\",
+ 		"	  [-T us] [-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] \\",
+ 		"	  [-c cpu-list] [-P priority]",
+ 		"",
+@@ -277,9 +305,22 @@ void osnoise_top_usage(char *usage)
+ 	if (usage)
+ 		fprintf(stderr, "%s\n", usage);
+ 
+-	fprintf(stderr, "rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n",
++	if (params->mode == MODE_OSNOISE) {
++		fprintf(stderr,
++			"rtla osnoise top: a per-cpu summary of the OS noise (version %s)\n",
+ 			VERSION);
+ 
++		fprintf(stderr, "  usage: rtla osnoise [top]");
++	}
++
++	if (params->mode == MODE_HWNOISE) {
++		fprintf(stderr,
++			"rtla hwnoise: a summary of hardware-related noise (version %s)\n",
++			VERSION);
++
++		fprintf(stderr, "  usage: rtla hwnoise");
++	}
++
+ 	for (i = 0; msg[i]; i++)
+ 		fprintf(stderr, "%s\n", msg[i]);
+ 	exit(1);
+@@ -299,6 +340,9 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+ 	if (!params)
+ 		exit(1);
+ 
++	if (strcmp(argv[0], "hwnoise") == 0)
++		params->mode = MODE_HWNOISE;
++
+ 	while (1) {
+ 		static struct option long_options[] = {
+ 			{"auto",		required_argument,	0, 'a'},
+@@ -345,7 +389,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+ 		case 'c':
+ 			retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ 			if (retval)
+-				osnoise_top_usage("\nInvalid -c cpu list\n");
++				osnoise_top_usage(params, "\nInvalid -c cpu list\n");
+ 			params->cpus = optarg;
+ 			break;
+ 		case 'D':
+@@ -354,7 +398,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+ 		case 'd':
+ 			params->duration = parse_seconds_duration(optarg);
+ 			if (!params->duration)
+-				osnoise_top_usage("Invalid -D duration\n");
++				osnoise_top_usage(params, "Invalid -D duration\n");
+ 			break;
+ 		case 'e':
+ 			tevent = trace_event_alloc(optarg);
+@@ -370,17 +414,17 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+ 			break;
+ 		case 'h':
+ 		case '?':
+-			osnoise_top_usage(NULL);
++			osnoise_top_usage(params, NULL);
+ 			break;
+ 		case 'p':
+ 			params->period = get_llong_from_str(optarg);
+ 			if (params->period > 10000000)
+-				osnoise_top_usage("Period longer than 10 s\n");
++				osnoise_top_usage(params, "Period longer than 10 s\n");
+ 			break;
+ 		case 'P':
+ 			retval = parse_prio(optarg, &params->sched_param);
+ 			if (retval == -1)
+-				osnoise_top_usage("Invalid -P priority");
++				osnoise_top_usage(params, "Invalid -P priority");
+ 			params->set_sched = 1;
+ 			break;
+ 		case 'q':
+@@ -389,7 +433,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+ 		case 'r':
+ 			params->runtime = get_llong_from_str(optarg);
+ 			if (params->runtime < 100)
+-				osnoise_top_usage("Runtime shorter than 100 us\n");
++				osnoise_top_usage(params, "Runtime shorter than 100 us\n");
+ 			break;
+ 		case 's':
+ 			params->stop_us = get_llong_from_str(optarg);
+@@ -415,7 +459,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+ 					exit(EXIT_FAILURE);
+ 				}
+ 			} else {
+-				osnoise_top_usage("--trigger requires a previous -e\n");
++				osnoise_top_usage(params, "--trigger requires a previous -e\n");
+ 			}
+ 			break;
+ 		case '1': /* filter */
+@@ -426,11 +470,11 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv)
+ 					exit(EXIT_FAILURE);
+ 				}
+ 			} else {
+-				osnoise_top_usage("--filter requires a previous -e\n");
++				osnoise_top_usage(params, "--filter requires a previous -e\n");
+ 			}
+ 			break;
+ 		default:
+-			osnoise_top_usage("Invalid option");
++			osnoise_top_usage(params, "Invalid option");
+ 		}
+ 	}
+ 
+@@ -495,6 +539,14 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p
+ 		}
+ 	}
+ 
++	if (params->mode == MODE_HWNOISE) {
++		retval = osnoise_set_irq_disable(tool->context, 1);
++		if (retval) {
++			err_msg("Failed to set OSNOISE_IRQ_DISABLE option\n");
++			goto out_err;
++		}
++	}
++
+ 	return 0;
+ 
+ out_err:
+diff --git a/src/rtla.c b/src/rtla.c
+index 52e8f1825281..7635c70123ab 100644
+--- a/src/rtla.c
++++ b/src/rtla.c
+@@ -26,6 +26,7 @@ static void rtla_usage(int err)
+ 		"",
+ 		"  commands:",
+ 		"     osnoise  - gives information about the operating system noise (osnoise)",
++		"     hwnoise  - gives information about hardware-related noise",
+ 		"     timerlat - measures the timer irq and thread latency",
+ 		"",
+ 		NULL,
+@@ -47,6 +48,9 @@ int run_command(int argc, char **argv, int start_position)
+ 	if (strcmp(argv[start_position], "osnoise") == 0) {
+ 		osnoise_main(argc-start_position, &argv[start_position]);
+ 		goto ran;
++	} else if (strcmp(argv[start_position], "hwnoise") == 0) {
++		hwnoise_main(argc-start_position, &argv[start_position]);
++		goto ran;
+ 	} else if (strcmp(argv[start_position], "timerlat") == 0) {
+ 		timerlat_main(argc-start_position, &argv[start_position]);
+ 		goto ran;
+-- 
+2.40.1
+
diff --git a/SOURCES/rtla-timerlat-Add-auto-analysis-core.patch b/SOURCES/rtla-timerlat-Add-auto-analysis-core.patch
new file mode 100644
index 0000000..ea59904
--- /dev/null
+++ b/SOURCES/rtla-timerlat-Add-auto-analysis-core.patch
@@ -0,0 +1,1056 @@
+From 27e348b221f6a78cbe86e7def8e2611f84509211 Mon Sep 17 00:00:00 2001
+From: Daniel Bristot de Oliveira <bristot@kernel.org>
+Date: Tue, 31 Jan 2023 16:38:53 +0100
+Subject: [PATCH 3/8] rtla/timerlat: Add auto-analysis core
+
+Currently, timerlat displays a summary of the timerlat tracer results
+saving the trace if the system hits a stop condition.
+
+While this represented a huge step forward, the root cause was not
+that is accessible to non-expert users.
+
+The auto-analysis fulfill this gap by parsing the trace timerlat runs,
+printing an intuitive auto-analysis.
+
+Link: https://lkml.kernel.org/r/1ee073822f6a2cbb33da0c817331d0d4045e837f.1675179318.git.bristot@kernel.org
+
+Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ src/timerlat_aa.c | 990 ++++++++++++++++++++++++++++++++++++++++++++++
+ src/timerlat_aa.h |  12 +
+ src/utils.h       |   3 +
+ 3 files changed, 1005 insertions(+)
+ create mode 100644 tools/tracing/rtla/src/timerlat_aa.c
+ create mode 100644 tools/tracing/rtla/src/timerlat_aa.h
+
+diff --git a/src/timerlat_aa.c b/src/timerlat_aa.c
+new file mode 100644
+index 000000000000..ec4e0f4b0e6c
+--- /dev/null
++++ b/src/timerlat_aa.c
+@@ -0,0 +1,990 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
++ */
++
++#include <stdlib.h>
++#include <errno.h>
++#include "utils.h"
++#include "osnoise.h"
++#include "timerlat.h"
++
++enum timelat_state {
++	TIMERLAT_INIT = 0,
++	TIMERLAT_WAITING_IRQ,
++	TIMERLAT_WAITING_THREAD,
++};
++
++#define MAX_COMM		24
++
++/*
++ * Per-cpu data statistics and data.
++ */
++struct timerlat_aa_data {
++	/* Current CPU state */
++	int			curr_state;
++
++	/* timerlat IRQ latency */
++	unsigned long long	tlat_irq_seqnum;
++	unsigned long long	tlat_irq_latency;
++	unsigned long long	tlat_irq_timstamp;
++
++	/* timerlat Thread latency */
++	unsigned long long	tlat_thread_seqnum;
++	unsigned long long	tlat_thread_latency;
++	unsigned long long	tlat_thread_timstamp;
++
++	/*
++	 * Information about the thread running when the IRQ
++	 * arrived.
++	 *
++	 * This can be blocking or interference, depending on the
++	 * priority of the thread. Assuming timerlat is the highest
++	 * prio, it is blocking. If timerlat has a lower prio, it is
++	 * interference.
++	 * note: "unsigned long long" because they are fetch using tep_get_field_val();
++	 */
++	unsigned long long	run_thread_pid;
++	char			run_thread_comm[MAX_COMM];
++	unsigned long long	thread_blocking_duration;
++	unsigned long long	max_exit_idle_latency;
++
++	/* Information about the timerlat timer irq */
++	unsigned long long	timer_irq_start_time;
++	unsigned long long	timer_irq_start_delay;
++	unsigned long long	timer_irq_duration;
++	unsigned long long	timer_exit_from_idle;
++
++	/*
++	 * Information about the last IRQ before the timerlat irq
++	 * arrived.
++	 *
++	 * If now - timestamp is <= latency, it might have influenced
++	 * in the timerlat irq latency. Otherwise, ignore it.
++	 */
++	unsigned long long	prev_irq_duration;
++	unsigned long long	prev_irq_timstamp;
++
++	/*
++	 * Interference sum.
++	 */
++	unsigned long long	thread_nmi_sum;
++	unsigned long long	thread_irq_sum;
++	unsigned long long	thread_softirq_sum;
++	unsigned long long	thread_thread_sum;
++
++	/*
++	 * Interference task information.
++	 */
++	struct trace_seq	*prev_irqs_seq;
++	struct trace_seq	*nmi_seq;
++	struct trace_seq	*irqs_seq;
++	struct trace_seq	*softirqs_seq;
++	struct trace_seq	*threads_seq;
++	struct trace_seq	*stack_seq;
++
++	/*
++	 * Current thread.
++	 */
++	char			current_comm[MAX_COMM];
++	unsigned long long	current_pid;
++
++	/*
++	 * Is the system running a kworker?
++	 */
++	unsigned long long	kworker;
++	unsigned long long	kworker_func;
++};
++
++/*
++ * The analysis context and system wide view
++ */
++struct timerlat_aa_context {
++	int nr_cpus;
++	int dump_tasks;
++
++	/* per CPU data */
++	struct timerlat_aa_data *taa_data;
++
++	/*
++	 * required to translate function names and register
++	 * events.
++	 */
++	struct osnoise_tool *tool;
++};
++
++/*
++ * The data is stored as a local variable, but accessed via a helper function.
++ *
++ * It could be stored inside the trace context. But every access would
++ * require container_of() + a series of pointers. Do we need it? Not sure.
++ *
++ * For now keep it simple. If needed, store it in the tool, add the *context
++ * as a parameter in timerlat_aa_get_ctx() and do the magic there.
++ */
++static struct timerlat_aa_context *__timerlat_aa_ctx;
++
++static struct timerlat_aa_context *timerlat_aa_get_ctx(void)
++{
++	return __timerlat_aa_ctx;
++}
++
++/*
++ * timerlat_aa_get_data - Get the per-cpu data from the timerlat context
++ */
++static struct timerlat_aa_data
++*timerlat_aa_get_data(struct timerlat_aa_context *taa_ctx, int cpu)
++{
++	return &taa_ctx->taa_data[cpu];
++}
++
++/*
++ * timerlat_aa_irq_latency - Handles timerlat IRQ event
++ */
++static int timerlat_aa_irq_latency(struct timerlat_aa_data *taa_data,
++				   struct trace_seq *s, struct tep_record *record,
++				   struct tep_event *event)
++{
++	/*
++	 * For interference, we start now looking for things that can delay
++	 * the thread.
++	 */
++	taa_data->curr_state = TIMERLAT_WAITING_THREAD;
++	taa_data->tlat_irq_timstamp = record->ts;
++
++	/*
++	 * Zero values.
++	 */
++	taa_data->thread_nmi_sum = 0;
++	taa_data->thread_irq_sum = 0;
++	taa_data->thread_softirq_sum = 0;
++	taa_data->thread_blocking_duration = 0;
++	taa_data->timer_irq_start_time = 0;
++	taa_data->timer_irq_duration = 0;
++	taa_data->timer_exit_from_idle = 0;
++
++	/*
++	 * Zero interference tasks.
++	 */
++	trace_seq_reset(taa_data->nmi_seq);
++	trace_seq_reset(taa_data->irqs_seq);
++	trace_seq_reset(taa_data->softirqs_seq);
++	trace_seq_reset(taa_data->threads_seq);
++
++	/* IRQ latency values */
++	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_irq_latency, 1);
++	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_irq_seqnum, 1);
++
++	/* The thread that can cause blocking */
++	tep_get_common_field_val(s, event, "common_pid", record, &taa_data->run_thread_pid, 1);
++
++	/*
++	 * Get exit from idle case.
++	 *
++	 * If it is not idle thread:
++	 */
++	if (taa_data->run_thread_pid)
++		return 0;
++
++	/*
++	 * if the latency is shorter than the known exit from idle:
++	 */
++	if (taa_data->tlat_irq_latency < taa_data->max_exit_idle_latency)
++		return 0;
++
++	/*
++	 * To be safe, ignore the cases in which an IRQ/NMI could have
++	 * interfered with the timerlat IRQ.
++	 */
++	if (taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency
++	    < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
++		return 0;
++
++	taa_data->max_exit_idle_latency = taa_data->tlat_irq_latency;
++
++	return 0;
++}
++
++/*
++ * timerlat_aa_thread_latency - Handles timerlat thread event
++ */
++static int timerlat_aa_thread_latency(struct timerlat_aa_data *taa_data,
++				      struct trace_seq *s, struct tep_record *record,
++				      struct tep_event *event)
++{
++	/*
++	 * For interference, we start now looking for things that can delay
++	 * the IRQ of the next cycle.
++	 */
++	taa_data->curr_state = TIMERLAT_WAITING_IRQ;
++	taa_data->tlat_thread_timstamp = record->ts;
++
++	/* Thread latency values */
++	tep_get_field_val(s, event, "timer_latency", record, &taa_data->tlat_thread_latency, 1);
++	tep_get_field_val(s, event, "seqnum", record, &taa_data->tlat_thread_seqnum, 1);
++
++	return 0;
++}
++
++/*
++ * timerlat_aa_handler - Handle timerlat events
++ *
++ * This function is called to handle timerlat events recording statistics.
++ *
++ * Returns 0 on success, -1 otherwise.
++ */
++int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
++			struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++	unsigned long long thread;
++
++	if (!taa_data)
++		return -1;
++
++	tep_get_field_val(s, event, "context", record, &thread, 1);
++	if (!thread)
++		return timerlat_aa_irq_latency(taa_data, s, record, event);
++	else
++		return timerlat_aa_thread_latency(taa_data, s, record, event);
++}
++
++/*
++ * timerlat_aa_nmi_handler - Handles NMI noise
++ *
++ * It is used to collect information about interferences from NMI. It is
++ * hooked to the osnoise:nmi_noise event.
++ */
++static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *record,
++				   struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++	unsigned long long duration;
++	unsigned long long start;
++
++	tep_get_field_val(s, event, "duration", record, &duration, 1);
++	tep_get_field_val(s, event, "start", record, &start, 1);
++
++	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
++		taa_data->prev_irq_duration = duration;
++		taa_data->prev_irq_timstamp = start;
++
++		trace_seq_reset(taa_data->prev_irqs_seq);
++		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s	\t\t\t%9.2f us\n",
++			 "nmi", ns_to_usf(duration));
++		return 0;
++	}
++
++	taa_data->thread_nmi_sum += duration;
++	trace_seq_printf(taa_data->nmi_seq, "	%24s	\t\t\t%9.2f us\n",
++		 "nmi", ns_to_usf(duration));
++
++	return 0;
++}
++
++/*
++ * timerlat_aa_irq_handler - Handles IRQ noise
++ *
++ * It is used to collect information about interferences from IRQ. It is
++ * hooked to the osnoise:irq_noise event.
++ *
++ * It is a little bit more complex than the other because it measures:
++ *	- The IRQs that can delay the timer IRQ before it happened.
++ *	- The Timerlat IRQ handler
++ *	- The IRQs that happened between the timerlat IRQ and the timerlat thread
++ *	  (IRQ interference).
++ */
++static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *record,
++				   struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++	unsigned long long expected_start;
++	unsigned long long duration;
++	unsigned long long vector;
++	unsigned long long start;
++	char *desc;
++	int val;
++
++	tep_get_field_val(s, event, "duration", record, &duration, 1);
++	tep_get_field_val(s, event, "start", record, &start, 1);
++	tep_get_field_val(s, event, "vector", record, &vector, 1);
++	desc = tep_get_field_raw(s, event, "desc", record, &val, 1);
++
++	/*
++	 * Before the timerlat IRQ.
++	 */
++	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ) {
++		taa_data->prev_irq_duration = duration;
++		taa_data->prev_irq_timstamp = start;
++
++		trace_seq_reset(taa_data->prev_irqs_seq);
++		trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
++				 desc, vector, ns_to_usf(duration));
++		return 0;
++	}
++
++	/*
++	 * The timerlat IRQ: taa_data->timer_irq_start_time is zeroed at
++	 * the timerlat irq handler.
++	 */
++	if (!taa_data->timer_irq_start_time) {
++		expected_start = taa_data->tlat_irq_timstamp - taa_data->tlat_irq_latency;
++
++		taa_data->timer_irq_start_time = start;
++		taa_data->timer_irq_duration = duration;
++
++		taa_data->timer_irq_start_delay = taa_data->timer_irq_start_time - expected_start;
++
++		/*
++		 * not exit from idle.
++		 */
++		if (taa_data->run_thread_pid)
++			return 0;
++
++		if (expected_start > taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
++			taa_data->timer_exit_from_idle = taa_data->timer_irq_start_delay;
++
++		return 0;
++	}
++
++	/*
++	 * IRQ interference.
++	 */
++	taa_data->thread_irq_sum += duration;
++	trace_seq_printf(taa_data->irqs_seq, "	%24s:%-3llu	\t	%9.2f us\n",
++			 desc, vector, ns_to_usf(duration));
++
++	return 0;
++}
++
++static char *softirq_name[] = { "HI", "TIMER",	"NET_TX", "NET_RX", "BLOCK",
++				"IRQ_POLL", "TASKLET", "SCHED", "HRTIMER", "RCU" };
++
++
++/*
++ * timerlat_aa_softirq_handler - Handles Softirq noise
++ *
++ * It is used to collect information about interferences from Softirq. It is
++ * hooked to the osnoise:softirq_noise event.
++ *
++ * It is only printed in the non-rt kernel, as softirqs become thread on RT.
++ */
++static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *record,
++				       struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++	unsigned long long duration;
++	unsigned long long vector;
++	unsigned long long start;
++
++	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
++		return 0;
++
++	tep_get_field_val(s, event, "duration", record, &duration, 1);
++	tep_get_field_val(s, event, "start", record, &start, 1);
++	tep_get_field_val(s, event, "vector", record, &vector, 1);
++
++	taa_data->thread_softirq_sum += duration;
++
++	trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu	\t	%9.2f us\n",
++			 softirq_name[vector], vector, ns_to_usf(duration));
++	return 0;
++}
++
++/*
++ * timerlat_aa_softirq_handler - Handles thread noise
++ *
++ * It is used to collect information about interferences from threads. It is
++ * hooked to the osnoise:thread_noise event.
++ *
++ * Note: if you see thread noise, your timerlat thread was not the highest prio one.
++ */
++static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *record,
++				      struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++	unsigned long long duration;
++	unsigned long long start;
++	unsigned long long pid;
++	const char *comm;
++	int val;
++
++	if (taa_data->curr_state == TIMERLAT_WAITING_IRQ)
++		return 0;
++
++	tep_get_field_val(s, event, "duration", record, &duration, 1);
++	tep_get_field_val(s, event, "start", record, &start, 1);
++
++	tep_get_common_field_val(s, event, "common_pid", record, &pid, 1);
++	comm = tep_get_field_raw(s, event, "comm", record, &val, 1);
++
++	if (pid == taa_data->run_thread_pid && !taa_data->thread_blocking_duration) {
++		taa_data->thread_blocking_duration = duration;
++
++		if (comm)
++			strncpy(taa_data->run_thread_comm, comm, MAX_COMM);
++		else
++			sprintf(taa_data->run_thread_comm, "<...>");
++
++	} else {
++		taa_data->thread_thread_sum += duration;
++
++		trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu	\t\t%9.2f us\n",
++			 comm, pid, ns_to_usf(duration));
++	}
++
++	return 0;
++}
++
++/*
++ * timerlat_aa_stack_handler - Handles timerlat IRQ stack trace
++ *
++ * Saves and parse the stack trace generated by the timerlat IRQ.
++ */
++static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *record,
++			      struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++	unsigned long *caller;
++	const char *function;
++	int val, i;
++
++	trace_seq_reset(taa_data->stack_seq);
++
++	trace_seq_printf(taa_data->stack_seq, "    Blocking thread stack trace\n");
++	caller = tep_get_field_raw(s, event, "caller", record, &val, 1);
++	if (caller) {
++		for (i = 0; ; i++) {
++			function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]);
++			if (!function)
++				break;
++			trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function);
++		}
++	}
++	return 0;
++}
++
++/*
++ * timerlat_aa_sched_switch_handler - Tracks the current thread running on the CPU
++ *
++ * Handles the sched:sched_switch event to trace the current thread running on the
++ * CPU. It is used to display the threads running on the other CPUs when the trace
++ * stops.
++ */
++static int timerlat_aa_sched_switch_handler(struct trace_seq *s, struct tep_record *record,
++					    struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++	const char *comm;
++	int val;
++
++	tep_get_field_val(s, event, "next_pid", record, &taa_data->current_pid, 1);
++	comm = tep_get_field_raw(s, event, "next_comm", record, &val, 1);
++
++	strncpy(taa_data->current_comm, comm, MAX_COMM);
++
++	/*
++	 * If this was a kworker, clean the last kworkers that ran.
++	 */
++	taa_data->kworker = 0;
++	taa_data->kworker_func = 0;
++
++	return 0;
++}
++
++/*
++ * timerlat_aa_kworker_start_handler - Tracks a kworker running on the CPU
++ *
++ * Handles workqueue:workqueue_execute_start event, keeping track of
++ * the job that a kworker could be doing in the CPU.
++ *
++ * We already catch problems of hardware related latencies caused by work queues
++ * running driver code that causes hardware stall. For example, with DRM drivers.
++ */
++static int timerlat_aa_kworker_start_handler(struct trace_seq *s, struct tep_record *record,
++					     struct tep_event *event, void *context)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	struct timerlat_aa_data *taa_data = timerlat_aa_get_data(taa_ctx, record->cpu);
++
++	tep_get_field_val(s, event, "work", record, &taa_data->kworker, 1);
++	tep_get_field_val(s, event, "function", record, &taa_data->kworker_func, 1);
++	return 0;
++}
++
++/*
++ * timerlat_thread_analysis - Prints the analysis of a CPU that hit a stop tracing
++ *
++ * This is the core of the analysis.
++ */
++static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
++				     int irq_thresh, int thread_thresh)
++{
++	unsigned long long exp_irq_ts;
++	int total;
++	int irq;
++
++	/*
++	 * IRQ latency or Thread latency?
++	 */
++	if (taa_data->tlat_irq_seqnum > taa_data->tlat_thread_seqnum) {
++		irq = 1;
++		total = taa_data->tlat_irq_latency;
++	} else {
++		irq = 0;
++		total = taa_data->tlat_thread_latency;
++	}
++
++	/*
++	 * Expected IRQ arrival time using the trace clock as the base.
++	 */
++	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
++
++	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
++		printf("  Previous IRQ interference:	\t	up to %9.2f us",
++			ns_to_usf(taa_data->prev_irq_duration));
++
++	/*
++	 * The delay that the IRQ suffered before starting.
++	 */
++	printf("  IRQ handler delay:		%16s	%9.2f us (%.2f %%)\n",
++		(ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "",
++		ns_to_usf(taa_data->timer_irq_start_delay),
++		ns_to_per(total, taa_data->timer_irq_start_delay));
++
++	/*
++	 * Timerlat IRQ.
++	 */
++	printf("  IRQ latency:	\t\t\t\t	%9.2f us\n",
++		ns_to_usf(taa_data->tlat_irq_latency));
++
++	if (irq) {
++		/*
++		 * If the trace stopped due to IRQ, the other events will not happen
++		 * because... the trace stopped :-).
++		 *
++		 * That is all folks, the stack trace was printed before the stop,
++		 * so it will be displayed, it is the key.
++		 */
++		printf("  Blocking thread:\n");
++		printf("	%24s:%-9llu\n",
++			taa_data->run_thread_comm, taa_data->run_thread_pid);
++	} else  {
++		/*
++		 * The duration of the IRQ handler that handled the timerlat IRQ.
++		 */
++		printf("  Timerlat IRQ duration:	\t\t	%9.2f us (%.2f %%)\n",
++			ns_to_usf(taa_data->timer_irq_duration),
++			ns_to_per(total, taa_data->timer_irq_duration));
++
++		/*
++		 * The amount of time that the current thread postponed the scheduler.
++		 *
++		 * Recalling that it is net from NMI/IRQ/Softirq interference, so there
++		 * is no need to compute values here.
++		 */
++		printf("  Blocking thread:	\t\t\t	%9.2f us (%.2f %%)\n",
++			ns_to_usf(taa_data->thread_blocking_duration),
++			ns_to_per(total, taa_data->thread_blocking_duration));
++
++		printf("	%24s:%-9llu		%9.2f us\n",
++			taa_data->run_thread_comm, taa_data->run_thread_pid,
++			ns_to_usf(taa_data->thread_blocking_duration));
++	}
++
++	/*
++	 * Print the stack trace!
++	 */
++	trace_seq_do_printf(taa_data->stack_seq);
++
++	/*
++	 * NMIs can happen during the IRQ, so they are always possible.
++	 */
++	if (taa_data->thread_nmi_sum)
++		printf("  NMI interference	\t\t\t	%9.2f us (%.2f %%)\n",
++			ns_to_usf(taa_data->thread_nmi_sum),
++			ns_to_per(total, taa_data->thread_nmi_sum));
++
++	/*
++	 * If it is an IRQ latency, the other factors can be skipped.
++	 */
++	if (irq)
++		goto print_total;
++
++	/*
++	 * Prints the interference caused by IRQs to the thread latency.
++	 */
++	if (taa_data->thread_irq_sum) {
++		printf("  IRQ interference	\t\t\t	%9.2f us (%.2f %%)\n",
++			ns_to_usf(taa_data->thread_irq_sum),
++			ns_to_per(total, taa_data->thread_irq_sum));
++
++		trace_seq_do_printf(taa_data->irqs_seq);
++	}
++
++	/*
++	 * Prints the interference caused by Softirqs to the thread latency.
++	 */
++	if (taa_data->thread_softirq_sum) {
++		printf("  Softirq interference	\t\t\t	%9.2f us (%.2f %%)\n",
++			ns_to_usf(taa_data->thread_softirq_sum),
++			ns_to_per(total, taa_data->thread_softirq_sum));
++
++		trace_seq_do_printf(taa_data->softirqs_seq);
++	}
++
++	/*
++	 * Prints the interference caused by other threads to the thread latency.
++	 *
++	 * If this happens, your timerlat is not the highest prio. OK, migration
++	 * thread can happen. But otherwise, you are not measuring the "scheduling
++	 * latency" only, and here is the difference from scheduling latency and
++	 * timer handling latency.
++	 */
++	if (taa_data->thread_thread_sum) {
++		printf("  Thread interference	\t\t\t	%9.2f us (%.2f %%)\n",
++			ns_to_usf(taa_data->thread_thread_sum),
++			ns_to_per(total, taa_data->thread_thread_sum));
++
++		trace_seq_do_printf(taa_data->threads_seq);
++	}
++
++	/*
++	 * Done.
++	 */
++print_total:
++	printf("------------------------------------------------------------------------\n");
++	printf("  %s latency:	\t\t\t	%9.2f us (100%%)\n", irq ? "IRQ" : "Thread",
++		ns_to_usf(total));
++}
++
++/**
++ * timerlat_auto_analysis - Analyze the collected data
++ */
++void timerlat_auto_analysis(int irq_thresh, int thread_thresh)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++	unsigned long long max_exit_from_idle = 0;
++	struct timerlat_aa_data *taa_data;
++	int max_exit_from_idle_cpu;
++	struct tep_handle *tep;
++	int cpu;
++
++	/* bring stop tracing to the ns scale */
++	irq_thresh = irq_thresh * 1000;
++	thread_thresh = thread_thresh * 1000;
++
++	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
++		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
++
++		if (irq_thresh && taa_data->tlat_irq_latency >= irq_thresh) {
++			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
++			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
++		} else if (thread_thresh && (taa_data->tlat_thread_latency) >= thread_thresh) {
++			printf("## CPU %d hit stop tracing, analyzing it ##\n", cpu);
++			timerlat_thread_analysis(taa_data, cpu, irq_thresh, thread_thresh);
++		}
++
++		if (taa_data->max_exit_idle_latency > max_exit_from_idle) {
++			max_exit_from_idle = taa_data->max_exit_idle_latency;
++			max_exit_from_idle_cpu = cpu;
++		}
++
++	}
++
++	if (max_exit_from_idle) {
++		printf("\n");
++		printf("Max timerlat IRQ latency from idle: %.2f us in cpu %d\n",
++			ns_to_usf(max_exit_from_idle), max_exit_from_idle_cpu);
++	}
++	if (!taa_ctx->dump_tasks)
++		return;
++
++	printf("\n");
++	printf("Printing CPU tasks:\n");
++	for (cpu = 0; cpu < taa_ctx->nr_cpus; cpu++) {
++		taa_data = timerlat_aa_get_data(taa_ctx, cpu);
++		tep = taa_ctx->tool->trace.tep;
++
++		printf("    [%.3d] %24s:%llu", cpu, taa_data->current_comm, taa_data->current_pid);
++
++		if (taa_data->kworker_func)
++			printf(" kworker:%s:%s",
++				tep_find_function(tep, taa_data->kworker) ? : "<...>",
++				tep_find_function(tep, taa_data->kworker_func));
++		printf("\n");
++	}
++
++}
++
++/*
++ * timerlat_aa_destroy_seqs - Destroy seq files used to store parsed data
++ */
++static void timerlat_aa_destroy_seqs(struct timerlat_aa_context *taa_ctx)
++{
++	struct timerlat_aa_data *taa_data;
++	int i;
++
++	if (!taa_ctx->taa_data)
++		return;
++
++	for (i = 0; i < taa_ctx->nr_cpus; i++) {
++		taa_data = timerlat_aa_get_data(taa_ctx, i);
++
++		if (taa_data->prev_irqs_seq) {
++			trace_seq_destroy(taa_data->prev_irqs_seq);
++			free(taa_data->prev_irqs_seq);
++		}
++
++		if (taa_data->nmi_seq) {
++			trace_seq_destroy(taa_data->nmi_seq);
++			free(taa_data->nmi_seq);
++		}
++
++		if (taa_data->irqs_seq) {
++			trace_seq_destroy(taa_data->irqs_seq);
++			free(taa_data->irqs_seq);
++		}
++
++		if (taa_data->softirqs_seq) {
++			trace_seq_destroy(taa_data->softirqs_seq);
++			free(taa_data->softirqs_seq);
++		}
++
++		if (taa_data->threads_seq) {
++			trace_seq_destroy(taa_data->threads_seq);
++			free(taa_data->threads_seq);
++		}
++
++		if (taa_data->stack_seq) {
++			trace_seq_destroy(taa_data->stack_seq);
++			free(taa_data->stack_seq);
++		}
++	}
++}
++
++/*
++ * timerlat_aa_init_seqs - Init seq files used to store parsed information
++ *
++ * Instead of keeping data structures to store raw data, use seq files to
++ * store parsed data.
++ *
++ * Allocates and initialize seq files.
++ *
++ * Returns 0 on success, -1 otherwise.
++ */
++static int timerlat_aa_init_seqs(struct timerlat_aa_context *taa_ctx)
++{
++	struct timerlat_aa_data *taa_data;
++	int i;
++
++	for (i = 0; i < taa_ctx->nr_cpus; i++) {
++
++		taa_data = timerlat_aa_get_data(taa_ctx, i);
++
++		taa_data->prev_irqs_seq = calloc(1, sizeof(*taa_data->prev_irqs_seq));
++		if (!taa_data->prev_irqs_seq)
++			goto out_err;
++
++		trace_seq_init(taa_data->prev_irqs_seq);
++
++		taa_data->nmi_seq = calloc(1, sizeof(*taa_data->nmi_seq));
++		if (!taa_data->nmi_seq)
++			goto out_err;
++
++		trace_seq_init(taa_data->nmi_seq);
++
++		taa_data->irqs_seq = calloc(1, sizeof(*taa_data->irqs_seq));
++		if (!taa_data->irqs_seq)
++			goto out_err;
++
++		trace_seq_init(taa_data->irqs_seq);
++
++		taa_data->softirqs_seq = calloc(1, sizeof(*taa_data->softirqs_seq));
++		if (!taa_data->softirqs_seq)
++			goto out_err;
++
++		trace_seq_init(taa_data->softirqs_seq);
++
++		taa_data->threads_seq = calloc(1, sizeof(*taa_data->threads_seq));
++		if (!taa_data->threads_seq)
++			goto out_err;
++
++		trace_seq_init(taa_data->threads_seq);
++
++		taa_data->stack_seq = calloc(1, sizeof(*taa_data->stack_seq));
++		if (!taa_data->stack_seq)
++			goto out_err;
++
++		trace_seq_init(taa_data->stack_seq);
++	}
++
++	return 0;
++
++out_err:
++	timerlat_aa_destroy_seqs(taa_ctx);
++	return -1;
++}
++
++/*
++ * timerlat_aa_unregister_events - Unregister events used in the auto-analysis
++ */
++static void timerlat_aa_unregister_events(struct osnoise_tool *tool, int dump_tasks)
++{
++	tracefs_event_disable(tool->trace.inst, "osnoise", NULL);
++
++	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
++				     timerlat_aa_nmi_handler, tool);
++
++	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
++				     timerlat_aa_irq_handler, tool);
++
++	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
++				     timerlat_aa_softirq_handler, tool);
++
++	tep_unregister_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
++				     timerlat_aa_thread_handler, tool);
++
++	tep_unregister_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
++				     timerlat_aa_stack_handler, tool);
++	if (!dump_tasks)
++		return;
++
++	tracefs_event_disable(tool->trace.inst, "sched", "sched_switch");
++	tep_unregister_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
++				     timerlat_aa_sched_switch_handler, tool);
++
++	tracefs_event_disable(tool->trace.inst, "workqueue", "workqueue_execute_start");
++	tep_unregister_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
++				     timerlat_aa_kworker_start_handler, tool);
++}
++
++/*
++ * timerlat_aa_register_events - Register events used in the auto-analysis
++ *
++ * Returns 0 on success, -1 otherwise.
++ */
++static int timerlat_aa_register_events(struct osnoise_tool *tool, int dump_tasks)
++{
++	int retval;
++
++	/*
++	 * register auto-analysis handlers.
++	 */
++	retval = tracefs_event_enable(tool->trace.inst, "osnoise", NULL);
++	if (retval < 0 && !errno) {
++		err_msg("Could not find osnoise events\n");
++		goto out_err;
++	}
++
++	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "nmi_noise",
++				   timerlat_aa_nmi_handler, tool);
++
++	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "irq_noise",
++				   timerlat_aa_irq_handler, tool);
++
++	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "softirq_noise",
++				   timerlat_aa_softirq_handler, tool);
++
++	tep_register_event_handler(tool->trace.tep, -1, "osnoise", "thread_noise",
++				   timerlat_aa_thread_handler, tool);
++
++	tep_register_event_handler(tool->trace.tep, -1, "ftrace", "kernel_stack",
++				   timerlat_aa_stack_handler, tool);
++
++	if (!dump_tasks)
++		return 0;
++
++	/*
++	 * Dump task events.
++	 */
++	retval = tracefs_event_enable(tool->trace.inst, "sched", "sched_switch");
++	if (retval < 0 && !errno) {
++		err_msg("Could not find sched_switch\n");
++		goto out_err;
++	}
++
++	tep_register_event_handler(tool->trace.tep, -1, "sched", "sched_switch",
++				   timerlat_aa_sched_switch_handler, tool);
++
++	retval = tracefs_event_enable(tool->trace.inst, "workqueue", "workqueue_execute_start");
++	if (retval < 0 && !errno) {
++		err_msg("Could not find workqueue_execute_start\n");
++		goto out_err;
++	}
++
++	tep_register_event_handler(tool->trace.tep, -1, "workqueue", "workqueue_execute_start",
++				   timerlat_aa_kworker_start_handler, tool);
++
++	return 0;
++
++out_err:
++	timerlat_aa_unregister_events(tool, dump_tasks);
++	return -1;
++}
++
++/**
++ * timerlat_aa_destroy - Destroy timerlat auto-analysis
++ */
++void timerlat_aa_destroy(void)
++{
++	struct timerlat_aa_context *taa_ctx = timerlat_aa_get_ctx();
++
++	if (!taa_ctx)
++		return;
++
++	if (!taa_ctx->taa_data)
++		goto out_ctx;
++
++	timerlat_aa_unregister_events(taa_ctx->tool, taa_ctx->dump_tasks);
++	timerlat_aa_destroy_seqs(taa_ctx);
++	free(taa_ctx->taa_data);
++out_ctx:
++	free(taa_ctx);
++}
++
++/**
++ * timerlat_aa_init - Initialize timerlat auto-analysis
++ *
++ * Returns 0 on success, -1 otherwise.
++ */
++int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_tasks)
++{
++	struct timerlat_aa_context *taa_ctx;
++	int retval;
++
++	taa_ctx = calloc(1, sizeof(*taa_ctx));
++	if (!taa_ctx)
++		return -1;
++
++	__timerlat_aa_ctx = taa_ctx;
++
++	taa_ctx->nr_cpus = nr_cpus;
++	taa_ctx->tool = tool;
++	taa_ctx->dump_tasks = dump_tasks;
++
++	taa_ctx->taa_data = calloc(nr_cpus, sizeof(*taa_ctx->taa_data));
++	if (!taa_ctx->taa_data)
++		goto out_err;
++
++	retval = timerlat_aa_init_seqs(taa_ctx);
++	if (retval)
++		goto out_err;
++
++	retval = timerlat_aa_register_events(tool, dump_tasks);
++	if (retval)
++		goto out_err;
++
++	return 0;
++
++out_err:
++	timerlat_aa_destroy();
++	return -1;
++}
+diff --git a/src/timerlat_aa.h b/src/timerlat_aa.h
+new file mode 100644
+index 000000000000..d4f6ca7e342a
+--- /dev/null
++++ b/src/timerlat_aa.h
+@@ -0,0 +1,12 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Copyright (C) 2023 Red Hat Inc, Daniel Bristot de Oliveira <bristot@kernel.org>
++ */
++
++int timerlat_aa_init(struct osnoise_tool *tool, int nr_cpus, int dump_task);
++void timerlat_aa_destroy(void);
++
++int timerlat_aa_handler(struct trace_seq *s, struct tep_record *record,
++			struct tep_event *event, void *context);
++
++void timerlat_auto_analysis(int irq_thresh, int thread_thresh);
+diff --git a/src/utils.h b/src/utils.h
+index 5571afd3b549..90e4f52a030b 100644
+--- a/src/utils.h
++++ b/src/utils.h
+@@ -56,3 +56,6 @@ struct sched_attr {
+ int parse_prio(char *arg, struct sched_attr *sched_param);
+ int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr);
+ int set_cpu_dma_latency(int32_t latency);
++
++#define ns_to_usf(x) (((double)x/1000))
++#define ns_to_per(total, part) ((part * 100) / (double)total)
+-- 
+2.40.1
+
diff --git a/SOURCES/rtla-timerlat-Add-auto-analysis-only-option.patch b/SOURCES/rtla-timerlat-Add-auto-analysis-only-option.patch
new file mode 100644
index 0000000..1d0842f
--- /dev/null
+++ b/SOURCES/rtla-timerlat-Add-auto-analysis-only-option.patch
@@ -0,0 +1,164 @@
+From 9fa48a2477de747b644c778a1e35d679cabcd917 Mon Sep 17 00:00:00 2001
+From: Daniel Bristot de Oliveira <bristot@kernel.org>
+Date: Wed, 29 Mar 2023 18:23:32 +0200
+Subject: [PATCH 7/8] rtla/timerlat: Add auto-analysis only option
+
+Parsing and formating timerlat data might consume a reasonable
+amount of CPU time on very large systems, or when timerlat
+has a too short period.
+
+Add an option to run timerlat with auto-analysis enabled while
+skipping the statistics parsing. In this mode, rtla timerlat
+periodically checks if the tracing is on, going to sleep waiting
+for the stop tracing condition to stop tracing, or for the
+tracing session to finish.
+
+If the stop tracing condition is hit, the tool prints the auto
+analysis. Otherwise, the tool prints the max observed latency and
+exit. The max observed latency is captured via tracing_max_latency.
+
+Link: https://lore.kernel.org/linux-trace-devel/4dc514d1d5dc353c537a466a9b5af44c266b6da2.1680106912.git.bristot@kernel.org
+
+Cc: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ src/timerlat_top.c | 49 +++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 44 insertions(+), 5 deletions(-)
+
+diff --git a/src/timerlat_top.c b/src/timerlat_top.c
+index eea5b3357e27..92c658c64f28 100644
+--- a/src/timerlat_top.c
++++ b/src/timerlat_top.c
+@@ -33,6 +33,7 @@ struct timerlat_top_params {
+ 	int			set_sched;
+ 	int			dma_latency;
+ 	int			no_aa;
++	int			aa_only;
+ 	int			dump_tasks;
+ 	struct sched_attr	sched_param;
+ 	struct trace_events	*events;
+@@ -142,10 +143,12 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
+ 	top = container_of(trace, struct osnoise_tool, trace);
+ 	params = top->params;
+ 
+-	tep_get_field_val(s, event, "context", record, &thread, 1);
+-	tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
++	if (!params->aa_only) {
++		tep_get_field_val(s, event, "context", record, &thread, 1);
++		tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+ 
+-	timerlat_top_update(top, cpu, thread, latency);
++		timerlat_top_update(top, cpu, thread, latency);
++	}
+ 
+ 	if (!params->no_aa)
+ 		timerlat_aa_handler(s, record, event, context);
+@@ -250,6 +253,9 @@ timerlat_print_stats(struct timerlat_top_params *params, struct osnoise_tool *to
+ 	static int nr_cpus = -1;
+ 	int i;
+ 
++	if (params->aa_only)
++		return;
++
+ 	if (nr_cpus == -1)
+ 		nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ 
+@@ -279,10 +285,11 @@ static void timerlat_top_usage(char *usage)
+ 		"",
+ 		"  usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\",
+ 		"	  [[-t[=file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] \\",
+-		"	  [-P priority] [--dma-latency us]",
++		"	  [-P priority] [--dma-latency us] [--aa-only us]",
+ 		"",
+ 		"	  -h/--help: print this menu",
+ 		"	  -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit",
++		"	     --aa-only us: stop if <us> latency is hit, only printing the auto analysis (reduces CPU usage)",
+ 		"	  -p/--period us: timerlat period in us",
+ 		"	  -i/--irq us: stop trace if the irq latency is higher than the argument in us",
+ 		"	  -T/--thread us: stop trace if the thread latency is higher than the argument in us",
+@@ -362,13 +369,14 @@ static struct timerlat_top_params
+ 			{"dma-latency",		required_argument,	0, '2'},
+ 			{"no-aa",		no_argument,		0, '3'},
+ 			{"dump-tasks",		no_argument,		0, '4'},
++			{"aa-only",		required_argument,	0, '5'},
+ 			{0, 0, 0, 0}
+ 		};
+ 
+ 		/* getopt_long stores the option index here. */
+ 		int option_index = 0;
+ 
+-		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:34",
++		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:345:",
+ 				 long_options, &option_index);
+ 
+ 		/* detect the end of the options. */
+@@ -389,6 +397,20 @@ static struct timerlat_top_params
+ 			/* set trace */
+ 			params->trace_output = "timerlat_trace.txt";
+ 			break;
++		case '5':
++			/* it is here because it is similar to -a */
++			auto_thresh = get_llong_from_str(optarg);
++
++			/* set thread stop to auto_thresh */
++			params->stop_total_us = auto_thresh;
++			params->stop_us = auto_thresh;
++
++			/* get stack trace */
++			params->print_stack = auto_thresh;
++
++			/* set aa_only to avoid parsing the trace */
++			params->aa_only = 1;
++			break;
+ 		case 'c':
+ 			retval = parse_cpu_list(optarg, &params->monitored_cpus);
+ 			if (retval)
+@@ -503,6 +525,9 @@ static struct timerlat_top_params
+ 	if (!params->stop_us && !params->stop_total_us)
+ 		params->no_aa = 1;
+ 
++	if (params->no_aa && params->aa_only)
++		timerlat_top_usage("--no-aa and --aa-only are mutually exclusive!");
++
+ 	return params;
+ }
+ 
+@@ -634,6 +659,7 @@ int timerlat_top_main(int argc, char *argv[])
+ 	struct trace_instance *trace;
+ 	int dma_latency_fd = -1;
+ 	int return_value = 1;
++	char *max_lat;
+ 	int retval;
+ 
+ 	params = timerlat_top_parse_args(argc, argv);
+@@ -700,6 +726,9 @@ int timerlat_top_main(int argc, char *argv[])
+ 	while (!stop_tracing) {
+ 		sleep(params->sleep_time);
+ 
++		if (params->aa_only && !trace_is_off(&top->trace, &record->trace))
++			continue;
++
+ 		retval = tracefs_iterate_raw_events(trace->tep,
+ 						    trace->inst,
+ 						    NULL,
+@@ -733,6 +762,16 @@ int timerlat_top_main(int argc, char *argv[])
+ 			printf("  Saving trace to %s\n", params->trace_output);
+ 			save_trace_to_file(record->trace.inst, params->trace_output);
+ 		}
++	} else if (params->aa_only) {
++		/*
++		 * If the trace did not stop with --aa-only, at least print the
++		 * max known latency.
++		 */
++		max_lat = tracefs_instance_file_read(trace->inst, "tracing_max_latency", NULL);
++		if (max_lat) {
++			printf("  Max latency was %s\n", max_lat);
++			free(max_lat);
++		}
+ 	}
+ 
+ out_top:
+-- 
+2.40.1
+
diff --git a/SOURCES/rtla-timerlat-Add-auto-analysis-support-to-timerlat.patch b/SOURCES/rtla-timerlat-Add-auto-analysis-support-to-timerlat.patch
new file mode 100644
index 0000000..5f7275a
--- /dev/null
+++ b/SOURCES/rtla-timerlat-Add-auto-analysis-support-to-timerlat.patch
@@ -0,0 +1,293 @@
+From 5def33df84d2326ebf5f29ae9ddc702a4593c337 Mon Sep 17 00:00:00 2001
+From: Daniel Bristot de Oliveira <bristot@kernel.org>
+Date: Tue, 31 Jan 2023 16:38:54 +0100
+Subject: [PATCH 4/8] rtla/timerlat: Add auto-analysis support to timerlat top
+
+Currently, timerlat top displays the timerlat tracer latency results, saving
+the intuitive timerlat trace for the developer to analyze.
+
+This patch goes a step forward in the automaton of the scheduling latency
+analysis by providing a summary of the root cause of a latency higher than
+the passed "stop tracing" parameter if the trace stops.
+
+The output is intuitive enough for non-expert users to have a general idea
+of the root cause by looking at each factor's contribution percentage while
+keeping the technical detail in the output for more expert users to start
+an in dept debug or to correlate a root cause with an existing one.
+
+The terminology is in line with recent industry and academic publications
+to facilitate the understanding of both audiences.
+
+Here is one example of tool output:
+ ----------------------------------------- %< -----------------------------------------------------
+  # taskset -c 0 timerlat -a 40 -c 1-23 -q
+                                     Timer Latency
+    0 00:00:12   |          IRQ Timer Latency (us)        |         Thread Timer Latency (us)
+  CPU COUNT      |      cur       min       avg       max |      cur       min       avg       max
+    1 #12322     |        0         0         1        15 |       10         3         9        31
+    2 #12322     |        3         0         1        12 |       10         3         9        23
+    3 #12322     |        1         0         1        21 |        8         2         8        34
+    4 #12322     |        1         0         1        17 |       10         2        11        33
+    5 #12322     |        0         0         1        12 |        8         3         8        25
+    6 #12322     |        1         0         1        14 |       16         3        11        35
+    7 #12322     |        0         0         1        14 |        9         2         8        29
+    8 #12322     |        1         0         1        22 |        9         3         9        34
+    9 #12322     |        0         0         1        14 |        8         2         8        24
+   10 #12322     |        1         0         0        12 |        9         3         8        24
+   11 #12322     |        0         0         0        15 |        6         2         7        29
+   12 #12321     |        1         0         0        13 |        5         3         8        23
+   13 #12319     |        0         0         1        14 |        9         3         9        26
+   14 #12321     |        1         0         0        13 |        6         2         8        24
+   15 #12321     |        1         0         1        15 |       12         3        11        27
+   16 #12318     |        0         0         1        13 |        7         3        10        24
+   17 #12319     |        0         0         1        13 |       11         3         9        25
+   18 #12318     |        0         0         0        12 |        8         2         8        20
+   19 #12319     |        0         0         1        18 |       10         2         9        28
+   20 #12317     |        0         0         0        20 |        9         3         8        34
+   21 #12318     |        0         0         0        13 |        8         3         8        28
+   22 #12319     |        0         0         1        11 |        8         3        10        22
+   23 #12320     |       28         0         1        28 |       41         3        11        41
+  rtla timerlat hit stop tracing
+  ## CPU 23 hit stop tracing, analyzing it ##
+  IRQ handler delay:				      	    27.49 us (65.52 %)
+  IRQ latency:						    28.13 us
+  Timerlat IRQ duration:				     9.59 us (22.85 %)
+  Blocking thread:					     3.79 us (9.03 %)
+			objtool:49256    		     3.79 us
+    Blocking thread stacktrace
+		-> timerlat_irq
+		-> __hrtimer_run_queues
+		-> hrtimer_interrupt
+		-> __sysvec_apic_timer_interrupt
+		-> sysvec_apic_timer_interrupt
+		-> asm_sysvec_apic_timer_interrupt
+		-> _raw_spin_unlock_irqrestore
+		-> cgroup_rstat_flush_locked
+		-> cgroup_rstat_flush_irqsafe
+		-> mem_cgroup_flush_stats
+		-> mem_cgroup_wb_stats
+		-> balance_dirty_pages
+		-> balance_dirty_pages_ratelimited_flags
+		-> btrfs_buffered_write
+		-> btrfs_do_write_iter
+		-> vfs_write
+		-> __x64_sys_pwrite64
+		-> do_syscall_64
+		-> entry_SYSCALL_64_after_hwframe
+  ------------------------------------------------------------------------
+    Thread latency:					    41.96 us (100%)
+
+  The system has exit from idle latency!
+    Max timerlat IRQ latency from idle: 17.48 us in cpu 4
+  Saving trace to timerlat_trace.txt
+ ----------------------------------------- >% -----------------------------------------------------
+
+In this case, the major factor was the delay suffered by the IRQ handler
+that handles timerlat wakeup: 65.52 %. This can be caused by the
+current thread masking interrupts, which can be seen in the blocking
+thread stacktrace: the current thread (objtool:49256) disabled interrupts
+via raw spin lock operations inside mem cgroup, while doing write
+syscall in a btrfs file system.
+
+A simple search for the function name on Google shows that this is
+a legit case for disabling the interrupts:
+
+  cgroup: Use irqsave in cgroup_rstat_flush_locked()
+  lore.kernel.org/linux-mm/20220301122143.1521823-2-bigeasy@linutronix.de/
+
+The output also prints other reasons for the latency root cause, such as:
+
+  - an IRQ that happened before the IRQ handler that caused delays
+  - The interference from NMI, IRQ, Softirq, and Threads
+
+The details about how these factors affect the scheduling latency
+can be found here:
+
+   https://bristot.me/demystifying-the-real-time-linux-latency/
+
+Link: https://lkml.kernel.org/r/3d45f40e630317f51ac6d678e2d96d310e495729.1675179318.git.bristot@kernel.org
+
+Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ src/timerlat_top.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
+ 1 file changed, 44 insertions(+), 2 deletions(-)
+
+diff --git a/src/timerlat_top.c b/src/timerlat_top.c
+index 334271935222..eea5b3357e27 100644
+--- a/src/timerlat_top.c
++++ b/src/timerlat_top.c
+@@ -10,10 +10,12 @@
+ #include <unistd.h>
+ #include <stdio.h>
+ #include <time.h>
++#include <errno.h>
+ 
+ #include "utils.h"
+ #include "osnoise.h"
+ #include "timerlat.h"
++#include "timerlat_aa.h"
+ 
+ struct timerlat_top_params {
+ 	char			*cpus;
+@@ -30,6 +32,8 @@ struct timerlat_top_params {
+ 	int			quiet;
+ 	int			set_sched;
+ 	int			dma_latency;
++	int			no_aa;
++	int			dump_tasks;
+ 	struct sched_attr	sched_param;
+ 	struct trace_events	*events;
+ };
+@@ -130,17 +134,22 @@ timerlat_top_handler(struct trace_seq *s, struct tep_record *record,
+ 		     struct tep_event *event, void *context)
+ {
+ 	struct trace_instance *trace = context;
++	struct timerlat_top_params *params;
+ 	unsigned long long latency, thread;
+ 	struct osnoise_tool *top;
+ 	int cpu = record->cpu;
+ 
+ 	top = container_of(trace, struct osnoise_tool, trace);
++	params = top->params;
+ 
+ 	tep_get_field_val(s, event, "context", record, &thread, 1);
+ 	tep_get_field_val(s, event, "timer_latency", record, &latency, 1);
+ 
+ 	timerlat_top_update(top, cpu, thread, latency);
+ 
++	if (!params->no_aa)
++		timerlat_aa_handler(s, record, event, context);
++
+ 	return 0;
+ }
+ 
+@@ -281,11 +290,13 @@ static void timerlat_top_usage(char *usage)
+ 		"	  -c/--cpus cpus: run the tracer only on the given cpus",
+ 		"	  -d/--duration time[m|h|d]: duration of the session in seconds",
+ 		"	  -D/--debug: print debug info",
++		"	     --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)",
+ 		"	  -t/--trace[=file]: save the stopped trace to [file|timerlat_trace.txt]",
+ 		"	  -e/--event <sys:event>: enable the <sys:event> in the trace instance, multiple -e are allowed",
+ 		"	     --filter <command>: enable a trace event filter to the previous -e event",
+ 		"	     --trigger <command>: enable a trace event trigger to the previous -e event",
+ 		"	  -n/--nano: display data in nanoseconds",
++		"	     --no-aa: disable auto-analysis, reducing rtla timerlat cpu usage",
+ 		"	  -q/--quiet print only a summary at the end",
+ 		"	     --dma-latency us: set /dev/cpu_dma_latency latency <us> to reduce exit from idle latency",
+ 		"	  -P/--priority o:prio|r:prio|f:prio|d:runtime:period : set scheduling parameters",
+@@ -349,13 +360,15 @@ static struct timerlat_top_params
+ 			{"trigger",		required_argument,	0, '0'},
+ 			{"filter",		required_argument,	0, '1'},
+ 			{"dma-latency",		required_argument,	0, '2'},
++			{"no-aa",		no_argument,		0, '3'},
++			{"dump-tasks",		no_argument,		0, '4'},
+ 			{0, 0, 0, 0}
+ 		};
+ 
+ 		/* getopt_long stores the option index here. */
+ 		int option_index = 0;
+ 
+-		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:",
++		c = getopt_long(argc, argv, "a:c:d:De:hi:np:P:qs:t::T:0:1:2:34",
+ 				 long_options, &option_index);
+ 
+ 		/* detect the end of the options. */
+@@ -368,13 +381,13 @@ static struct timerlat_top_params
+ 
+ 			/* set thread stop to auto_thresh */
+ 			params->stop_total_us = auto_thresh;
++			params->stop_us = auto_thresh;
+ 
+ 			/* get stack trace */
+ 			params->print_stack = auto_thresh;
+ 
+ 			/* set trace */
+ 			params->trace_output = "timerlat_trace.txt";
+-
+ 			break;
+ 		case 'c':
+ 			retval = parse_cpu_list(optarg, &params->monitored_cpus);
+@@ -437,6 +450,7 @@ static struct timerlat_top_params
+ 				params->trace_output = &optarg[1];
+ 			else
+ 				params->trace_output = "timerlat_trace.txt";
++
+ 			break;
+ 		case '0': /* trigger */
+ 			if (params->events) {
+@@ -467,6 +481,12 @@ static struct timerlat_top_params
+ 				exit(EXIT_FAILURE);
+ 			}
+ 			break;
++		case '3': /* no-aa */
++			params->no_aa = 1;
++			break;
++		case '4':
++			params->dump_tasks = 1;
++			break;
+ 		default:
+ 			timerlat_top_usage("Invalid option");
+ 		}
+@@ -477,6 +497,12 @@ static struct timerlat_top_params
+ 		exit(EXIT_FAILURE);
+ 	}
+ 
++	/*
++	 * Auto analysis only happens if stop tracing, thus:
++	 */
++	if (!params->stop_us && !params->stop_total_us)
++		params->no_aa = 1;
++
+ 	return params;
+ }
+ 
+@@ -547,6 +573,7 @@ static struct osnoise_tool
+ {
+ 	struct osnoise_tool *top;
+ 	int nr_cpus;
++	int retval;
+ 
+ 	nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ 
+@@ -563,6 +590,16 @@ static struct osnoise_tool
+ 	tep_register_event_handler(top->trace.tep, -1, "ftrace", "timerlat",
+ 				   timerlat_top_handler, top);
+ 
++	/*
++	 * If no auto analysis, we are ready.
++	 */
++	if (params->no_aa)
++		return top;
++
++	retval = timerlat_aa_init(top, nr_cpus, params->dump_tasks);
++	if (retval)
++		goto out_err;
++
+ 	return top;
+ 
+ out_err:
+@@ -688,6 +725,10 @@ int timerlat_top_main(int argc, char *argv[])
+ 
+ 	if (trace_is_off(&top->trace, &record->trace)) {
+ 		printf("rtla timerlat hit stop tracing\n");
++
++		if (!params->no_aa)
++			timerlat_auto_analysis(params->stop_us, params->stop_total_us);
++
+ 		if (params->trace_output) {
+ 			printf("  Saving trace to %s\n", params->trace_output);
+ 			save_trace_to_file(record->trace.inst, params->trace_output);
+@@ -701,6 +742,7 @@ int timerlat_top_main(int argc, char *argv[])
+ 	params->events = NULL;
+ out_free:
+ 	timerlat_free_top(top->data);
++	timerlat_aa_destroy();
+ 	osnoise_destroy_tool(record);
+ 	osnoise_destroy_tool(top);
+ 	free(params);
+-- 
+2.40.1
+
diff --git a/SOURCES/rtla-timerlat-Fix-Previous-IRQ-auto-analysis-line.patch b/SOURCES/rtla-timerlat-Fix-Previous-IRQ-auto-analysis-line.patch
new file mode 100644
index 0000000..9f92c84
--- /dev/null
+++ b/SOURCES/rtla-timerlat-Fix-Previous-IRQ-auto-analysis-line.patch
@@ -0,0 +1,48 @@
+From 82253a271aae9271fcf0aaa5e0ecc6dd38fb872b Mon Sep 17 00:00:00 2001
+From: Daniel Bristot de Oliveira <bristot@kernel.org>
+Date: Fri, 24 Mar 2023 20:26:42 +0100
+Subject: [PATCH 8/8] rtla/timerlat: Fix "Previous IRQ" auto analysis' line
+
+The "Previous IRQ interference" line is misaligned and without
+a \n, breaking the tool's output:
+
+ ## CPU 12 hit stop tracing, analyzing it ##
+  Previous IRQ interference:			up to      2.22 us  IRQ handler delay:		                	    18.06 us (0.00 %)
+  IRQ latency:						    18.52 us
+  Timerlat IRQ duration:				     4.41 us (0.00 %)
+  Blocking thread:					   216.93 us (0.03 %)
+
+Fix the output:
+
+ ## CPU 7 hit stop tracing, analyzing it ##
+  Previous IRQ interference:			 up to       8.93 us
+  IRQ handler delay:		                	     0.98 us (0.00 %)
+  IRQ latency:						     2.95 us
+  Timerlat IRQ duration:				    11.26 us (0.03 %)
+
+Link: https://lore.kernel.org/linux-trace-devel/8b5819077f15ccf24745c9bf3205451e16ee32d9.1679685525.git.bristot@kernel.org
+
+Fixes: 27e348b221f6 ("rtla/timerlat: Add auto-analysis core")
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ src/timerlat_aa.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/timerlat_aa.c b/src/timerlat_aa.c
+index ec4e0f4b0e6c..1843fff66da5 100644
+--- a/src/timerlat_aa.c
++++ b/src/timerlat_aa.c
+@@ -548,7 +548,7 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu,
+ 	exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay;
+ 
+ 	if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration)
+-		printf("  Previous IRQ interference:	\t	up to %9.2f us",
++		printf("  Previous IRQ interference:	\t\t up to  %9.2f us\n",
+ 			ns_to_usf(taa_data->prev_irq_duration));
+ 
+ 	/*
+-- 
+2.40.1
+
diff --git a/SOURCES/tools-tracing-rtla-osnoise_hist-display-average-with.patch b/SOURCES/tools-tracing-rtla-osnoise_hist-display-average-with.patch
new file mode 100644
index 0000000..0fff487
--- /dev/null
+++ b/SOURCES/tools-tracing-rtla-osnoise_hist-display-average-with.patch
@@ -0,0 +1,36 @@
+From 1fab1469b66baf7847298b205e5c4aff47c2ae8a Mon Sep 17 00:00:00 2001
+From: Andreas Ziegler <br015@umbiko.net>
+Date: Tue, 3 Jan 2023 11:34:00 +0100
+Subject: [PATCH 2/8] tools/tracing/rtla: osnoise_hist: display average with
+ two-digit precision
+
+Calculate average value in osnoise-hist summary with two-digit
+precision to avoid displaying too optimitic results.
+
+Link: https://lkml.kernel.org/r/20230103103400.275566-3-br015@umbiko.net
+
+Signed-off-by: Andreas Ziegler <br015@umbiko.net>
+Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ src/osnoise_hist.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/osnoise_hist.c b/src/osnoise_hist.c
+index fe34452fc4ec..13e1233690bb 100644
+--- a/src/osnoise_hist.c
++++ b/src/osnoise_hist.c
+@@ -335,8 +335,8 @@ osnoise_print_summary(struct osnoise_hist_params *params,
+ 			continue;
+ 
+ 		if (data->hist[cpu].count)
+-			trace_seq_printf(trace->seq, "%9llu ",
+-					data->hist[cpu].sum_sample / data->hist[cpu].count);
++			trace_seq_printf(trace->seq, "%9.2f ",
++				((double) data->hist[cpu].sum_sample) / data->hist[cpu].count);
+ 		else
+ 			trace_seq_printf(trace->seq, "        - ");
+ 	}
+-- 
+2.40.1
+
diff --git a/SOURCES/tools-tracing-rtla-osnoise_hist-use-total-duration-f.patch b/SOURCES/tools-tracing-rtla-osnoise_hist-use-total-duration-f.patch
new file mode 100644
index 0000000..f2d7df2
--- /dev/null
+++ b/SOURCES/tools-tracing-rtla-osnoise_hist-use-total-duration-f.patch
@@ -0,0 +1,51 @@
+From fe137a4fe0e77eb95396cfc5c3dd7df404421aa4 Mon Sep 17 00:00:00 2001
+From: Andreas Ziegler <br015@umbiko.net>
+Date: Tue, 3 Jan 2023 11:33:59 +0100
+Subject: [PATCH 1/8] tools/tracing/rtla: osnoise_hist: use total duration for
+ average calculation
+
+Sampled durations must be weighted by observed quantity, to arrive at a correct
+average duration value.
+
+Perform calculation of total duration by summing (duration * count).
+
+Link: https://lkml.kernel.org/r/20230103103400.275566-2-br015@umbiko.net
+
+Fixes: 829a6c0b5698 ("rtla/osnoise: Add the hist mode")
+
+Signed-off-by: Andreas Ziegler <br015@umbiko.net>
+Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+---
+ src/osnoise_hist.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/src/osnoise_hist.c b/src/osnoise_hist.c
+index 5d7ea479ac89..fe34452fc4ec 100644
+--- a/src/osnoise_hist.c
++++ b/src/osnoise_hist.c
+@@ -121,6 +121,7 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
+ {
+ 	struct osnoise_hist_params *params = tool->params;
+ 	struct osnoise_hist_data *data = tool->data;
++	unsigned long long total_duration;
+ 	int entries = data->entries;
+ 	int bucket;
+ 	int *hist;
+@@ -131,10 +132,12 @@ static void osnoise_hist_update_multiple(struct osnoise_tool *tool, int cpu,
+ 	if (data->bucket_size)
+ 		bucket = duration / data->bucket_size;
+ 
++	total_duration = duration * count;
++
+ 	hist = data->hist[cpu].samples;
+ 	data->hist[cpu].count += count;
+ 	update_min(&data->hist[cpu].min_sample, &duration);
+-	update_sum(&data->hist[cpu].sum_sample, &duration);
++	update_sum(&data->hist[cpu].sum_sample, &total_duration);
+ 	update_max(&data->hist[cpu].max_sample, &duration);
+ 
+ 	if (bucket < entries)
+-- 
+2.40.1
+
diff --git a/SPECS/rtla.spec b/SPECS/rtla.spec
index 39cc2ae..e352074 100644
--- a/SPECS/rtla.spec
+++ b/SPECS/rtla.spec
@@ -1,6 +1,6 @@
 Name:    rtla
 Version: 5.14.0
-Release: 4%{?dist}
+Release: 8%{?dist}
 Summary: Real-Time Linux Analysis tools
 
 License: GPLv2
@@ -19,6 +19,16 @@ Requires: libtracefs >= 1.3.1
 
 # Patches
 Patch1: rtla-Fix-exit-status-when-returning-from-calls-to-usage.patch
+Patch2: Documentation-rtla-Correct-command-line-example.patch
+Patch3: tools-tracing-rtla-osnoise_hist-use-total-duration-f.patch
+Patch4: tools-tracing-rtla-osnoise_hist-display-average-with.patch
+Patch5: rtla-timerlat-Add-auto-analysis-core.patch
+Patch6: rtla-timerlat-Add-auto-analysis-support-to-timerlat.patch
+Patch7: rtla-timerlat-Add-auto-analysis-only-option.patch
+Patch8: rtla-timerlat-Fix-Previous-IRQ-auto-analysis-line.patch
+Patch9: Documentation-rtla-Add-timerlat-top-auto-analysis-op.patch
+Patch10: rtla-Add-hwnoise-tool.patch
+Patch11: Documentation-rtla-Add-hwnoise-man-page.patch
 
 %description
 The rtla meta-tool includes a set of commands that aims to analyze
@@ -30,6 +40,16 @@ about the properties and root causes of unexpected results.
 %prep
 %setup -q -n %{name}-%{version}
 %patch1 -p1
+%patch2 -p1
+%patch3 -p1
+%patch4 -p1
+%patch5 -p1
+%patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
 
 
 %build
@@ -62,9 +82,30 @@ make DESTDIR=%{buildroot} -C Documentation install
 %{_mandir}/man1/rtla-timerlat-top.1.gz
 %{_mandir}/man1/rtla-timerlat.1.gz
 %{_mandir}/man1/rtla.1.gz
+%{_mandir}/man1/rtla-hwnoise.1.gz
 
 
 %changelog
+* Fri Jul 14 2023 John Kacur <jkacur@redhat.com> - 5.14.0-8
+- Add rtla hwnoise
+Resolves: bz2175295
+jiraProject ==  RHELPLAN-150659
+
+* Fri Jul 14 2023 John Kacur <jkacur@redhat.com> - 5.14.0-7
+- Add rtla timerlat auto analysis
+Resolves: rhbz#2175292
+jiraProject == RHELPLAN-150657
+
+* Tue May 09 2023 John Kacur <jkacur@redhat.com> - 5.14.0-6
+- Forward port the tests directory for ci testing
+Resolves: rhbz#2196611
+jiraProject == RHELPLAN-156801
+
+* Fri May 05 2023 John Kacur <jkacur@redhat.com> - 5.14.0-5
+- Correct commandline example
+Resolves: rhbz#2189440
+jiraProject ==  RHELPLAN-155623
+
 * Thu Jan 26 2023 John Kacur <jkacur@redhat.com> - 5.14.0-4
 - Add a gating test for rtla
 Resolves: rhbz#2164877