Release 2.4.1
Resolves: RHEL-15588 Signed-off-by: Pingfan Liu <piliu@redhat.com>
This commit is contained in:
parent
c695e02ad1
commit
d5bdaad69f
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
SOURCES/v2.2.tar.gz
|
SOURCES/v2.2.tar.gz
|
||||||
/v2.2.tar.gz
|
/v2.2.tar.gz
|
||||||
/v2.3.tar.gz
|
/v2.3.tar.gz
|
||||||
|
/v2.4.tar.gz
|
||||||
|
@ -1,42 +0,0 @@
|
|||||||
From 2715969f92f5e8d6c60488a65ccef73fef57fa6e Mon Sep 17 00:00:00 2001
|
|
||||||
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
|
||||||
Date: Mon, 31 Oct 2022 14:22:32 +0800
|
|
||||||
Subject: [PATCH 01/15] configure.ac : Fix build error when libnuma is missed
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
When libnuma is installed after running autogen.sh (which didn’t fails) it
|
|
||||||
silently fails linking with missing symbols. To avoid this issue just make
|
|
||||||
autoconf error out if libnuma is missing.
|
|
||||||
|
|
||||||
Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
|
||||||
---
|
|
||||||
configure.ac | 4 ++--
|
|
||||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/configure.ac b/configure.ac
|
|
||||||
index fd945f4..36edcc5 100644
|
|
||||||
--- a/configure.ac
|
|
||||||
+++ b/configure.ac
|
|
||||||
@@ -2,7 +2,7 @@
|
|
||||||
# Process this file with autoconf to produce a configure script.
|
|
||||||
|
|
||||||
AC_PREREQ([2.69])
|
|
||||||
-AC_INIT([numatop], [v2.1], [yao.jin@intel.com])
|
|
||||||
+AC_INIT([numatop], [v2.3], [zhengjun.xing@intel.com])
|
|
||||||
AM_INIT_AUTOMAKE([-Wno-portability no-dist-gzip dist-xz foreign subdir-objects])
|
|
||||||
AC_CONFIG_SRCDIR([common/numatop.c])
|
|
||||||
AC_CONFIG_HEADERS([config.h])
|
|
||||||
@@ -18,7 +18,7 @@ AC_PROG_CC
|
|
||||||
AC_PROG_INSTALL
|
|
||||||
|
|
||||||
# Checks for libraries.
|
|
||||||
-AC_CHECK_LIB([numa], [numa_free])
|
|
||||||
+AC_CHECK_LIB([numa], [numa_free], [], [ AC_MSG_ERROR([numactl-devel or libnuma-dev(el) is required but was not found]) exit -1])
|
|
||||||
AC_CHECK_LIB([pthread], [pthread_create])
|
|
||||||
|
|
||||||
PKG_CHECK_MODULES([CHECK], [check])
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
|||||||
From b9157a8e3ba3a2a0af3d8f755a32a3b57cad04c9 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
|
||||||
Date: Mon, 31 Oct 2022 15:15:08 +0800
|
|
||||||
Subject: [PATCH 02/15] Update the error message
|
|
||||||
|
|
||||||
Update error message for cases that needs to increase ulimit.
|
|
||||||
For example, SPR needs to set the max open files to be more
|
|
||||||
than 1024, while in the most system, the default is 1024.
|
|
||||||
|
|
||||||
Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
|
||||||
---
|
|
||||||
common/os/os_perf.c | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
|
||||||
index 44263e8..f2f1104 100644
|
|
||||||
--- a/common/os/os_perf.c
|
|
||||||
+++ b/common/os/os_perf.c
|
|
||||||
@@ -650,7 +650,7 @@ os_profiling_start(perf_ctl_t *ctl, perf_task_t *task)
|
|
||||||
proc_ll_clear(NULL);
|
|
||||||
|
|
||||||
if (profiling_start(ctl, (task_profiling_t *)(task)) != 0) {
|
|
||||||
- exit_msg_put("Fail to setup perf (probably permission denied)!\n");
|
|
||||||
+ exit_msg_put("Fail to setup perf (probably permission denied or need to increase the ulimit)!\n");
|
|
||||||
debug_print(NULL, 2, "os_profiling_start failed\n");
|
|
||||||
perf_status_set(PERF_STATUS_PROFILING_FAILED);
|
|
||||||
return (-1);
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,56 +0,0 @@
|
|||||||
From ff75e35508183b5ed39d50122c71293e8e65a86f Mon Sep 17 00:00:00 2001
|
|
||||||
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
|
||||||
Date: Mon, 31 Oct 2022 15:25:04 +0800
|
|
||||||
Subject: [PATCH 03/15] Update README
|
|
||||||
|
|
||||||
Update README, add "check" for build Build Dependencies, add tips
|
|
||||||
for running NumaTOP.
|
|
||||||
|
|
||||||
Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
|
||||||
---
|
|
||||||
README.md | 24 +++++++++++++++++++++++-
|
|
||||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/README.md b/README.md
|
|
||||||
index 6910c78..d16a486 100644
|
|
||||||
--- a/README.md
|
|
||||||
+++ b/README.md
|
|
||||||
@@ -12,12 +12,34 @@ the `mgen` program for help information.
|
|
||||||
|
|
||||||
## Build Dependencies
|
|
||||||
|
|
||||||
-NumaTOP requires following libraries:
|
|
||||||
+NumaTOP requires following libraries or packages:
|
|
||||||
|
|
||||||
* numactl-devel or libnuma-dev(el)
|
|
||||||
* libncurses
|
|
||||||
* libpthread
|
|
||||||
|
|
||||||
+* check
|
|
||||||
+
|
|
||||||
+## Run NumaTOP
|
|
||||||
+
|
|
||||||
+NumaTOP requires running as root.
|
|
||||||
+ # ./numatop
|
|
||||||
+
|
|
||||||
+In many systems, the default max open files are 1024, for platforms (like SPR)
|
|
||||||
+that have more CPUs, they require the system with the max open files should
|
|
||||||
+bigger than 1024, otherwise, the error can be "Fail to setup perf":
|
|
||||||
+
|
|
||||||
+ # ulimit -n
|
|
||||||
+ 1024 <------the max open files are 1024
|
|
||||||
+ # ./numatop
|
|
||||||
+ NumaTOP is starting ...
|
|
||||||
+ Fail to setup perf (probably permission denied)!
|
|
||||||
+
|
|
||||||
+Need to enlarge the max open files:
|
|
||||||
+
|
|
||||||
+ # ulimit -n 8192
|
|
||||||
+ # ulimit -n
|
|
||||||
+ 8192 <------now the max open files are 8192
|
|
||||||
|
|
||||||
## Supported Kernels
|
|
||||||
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,265 +0,0 @@
|
|||||||
From f39f29d200b83c568748afc4483feb544b4f6bd6 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Wed, 16 Feb 2022 16:01:18 +0530
|
|
||||||
Subject: [PATCH 04/15] x86: Prepare for multi-vendor support
|
|
||||||
|
|
||||||
In order to support x86 processors from other vendors, move
|
|
||||||
existing platform-specific code for Intel processors to the
|
|
||||||
new x86 directory and update the build files accordingly.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
Makefile.am | 36 ++++++++++++++++-----------------
|
|
||||||
README.md | 2 +-
|
|
||||||
common/include/os/plat.h | 2 +-
|
|
||||||
common/include/types.h | 2 +-
|
|
||||||
configure.ac | 2 +-
|
|
||||||
test/mgen/{intel => x86}/util.c | 0
|
|
||||||
{intel => x86}/bdw.c | 0
|
|
||||||
{intel => x86}/include/bdw.h | 0
|
|
||||||
{intel => x86}/include/nhm.h | 0
|
|
||||||
{intel => x86}/include/skl.h | 0
|
|
||||||
{intel => x86}/include/snb.h | 0
|
|
||||||
{intel => x86}/include/types.h | 6 +++---
|
|
||||||
{intel => x86}/include/util.h | 6 +++---
|
|
||||||
{intel => x86}/include/wsm.h | 0
|
|
||||||
{intel => x86}/nhm.c | 0
|
|
||||||
{intel => x86}/plat.c | 0
|
|
||||||
{intel => x86}/skl.c | 0
|
|
||||||
{intel => x86}/snb.c | 0
|
|
||||||
{intel => x86}/ui_perf_map.c | 0
|
|
||||||
{intel => x86}/util.c | 0
|
|
||||||
{intel => x86}/wsm.c | 0
|
|
||||||
21 files changed, 28 insertions(+), 28 deletions(-)
|
|
||||||
rename test/mgen/{intel => x86}/util.c (100%)
|
|
||||||
rename {intel => x86}/bdw.c (100%)
|
|
||||||
rename {intel => x86}/include/bdw.h (100%)
|
|
||||||
rename {intel => x86}/include/nhm.h (100%)
|
|
||||||
rename {intel => x86}/include/skl.h (100%)
|
|
||||||
rename {intel => x86}/include/snb.h (100%)
|
|
||||||
rename {intel => x86}/include/types.h (95%)
|
|
||||||
rename {intel => x86}/include/util.h (94%)
|
|
||||||
rename {intel => x86}/include/wsm.h (100%)
|
|
||||||
rename {intel => x86}/nhm.c (100%)
|
|
||||||
rename {intel => x86}/plat.c (100%)
|
|
||||||
rename {intel => x86}/skl.c (100%)
|
|
||||||
rename {intel => x86}/snb.c (100%)
|
|
||||||
rename {intel => x86}/ui_perf_map.c (100%)
|
|
||||||
rename {intel => x86}/util.c (100%)
|
|
||||||
rename {intel => x86}/wsm.c (100%)
|
|
||||||
|
|
||||||
diff --git a/Makefile.am b/Makefile.am
|
|
||||||
index 643704a..438a9fc 100644
|
|
||||||
--- a/Makefile.am
|
|
||||||
+++ b/Makefile.am
|
|
||||||
@@ -58,23 +58,23 @@ libnumatop_la_SOURCES = \
|
|
||||||
common/util.c \
|
|
||||||
common/win.c
|
|
||||||
|
|
||||||
-if CPU_INTEL
|
|
||||||
+if CPU_X86
|
|
||||||
libnumatop_la_SOURCES += \
|
|
||||||
- intel/include/bdw.h \
|
|
||||||
- intel/include/nhm.h \
|
|
||||||
- intel/include/skl.h \
|
|
||||||
- intel/include/snb.h \
|
|
||||||
- intel/include/types.h \
|
|
||||||
- intel/include/util.h \
|
|
||||||
- intel/include/wsm.h \
|
|
||||||
- intel/bdw.c \
|
|
||||||
- intel/nhm.c \
|
|
||||||
- intel/plat.c \
|
|
||||||
- intel/skl.c \
|
|
||||||
- intel/snb.c \
|
|
||||||
- intel/ui_perf_map.c \
|
|
||||||
- intel/util.c \
|
|
||||||
- intel/wsm.c
|
|
||||||
+ x86/include/bdw.h \
|
|
||||||
+ x86/include/nhm.h \
|
|
||||||
+ x86/include/skl.h \
|
|
||||||
+ x86/include/snb.h \
|
|
||||||
+ x86/include/types.h \
|
|
||||||
+ x86/include/util.h \
|
|
||||||
+ x86/include/wsm.h \
|
|
||||||
+ x86/bdw.c \
|
|
||||||
+ x86/nhm.c \
|
|
||||||
+ x86/plat.c \
|
|
||||||
+ x86/skl.c \
|
|
||||||
+ x86/snb.c \
|
|
||||||
+ x86/ui_perf_map.c \
|
|
||||||
+ x86/util.c \
|
|
||||||
+ x86/wsm.c
|
|
||||||
endif
|
|
||||||
|
|
||||||
if CPU_PPC
|
|
||||||
@@ -106,9 +106,9 @@ if CPU_PPC
|
|
||||||
mgen_SOURCES += \
|
|
||||||
test/mgen/powerpc/util.c
|
|
||||||
endif
|
|
||||||
-if CPU_INTEL
|
|
||||||
+if CPU_X86
|
|
||||||
mgen_SOURCES += \
|
|
||||||
- test/mgen/intel/util.c
|
|
||||||
+ test/mgen/x86/util.c
|
|
||||||
endif
|
|
||||||
|
|
||||||
TESTS = test/mgen.01.sh test/mgen.02.sh
|
|
||||||
diff --git a/README.md b/README.md
|
|
||||||
index d16a486..e96f0a8 100644
|
|
||||||
--- a/README.md
|
|
||||||
+++ b/README.md
|
|
||||||
@@ -58,7 +58,7 @@ http://www.gossamer-threads.com/lists/linux/kernel/1964864
|
|
||||||
|
|
||||||
common: common code for all platforms.
|
|
||||||
|
|
||||||
-intel : Intel platform-specific code.
|
|
||||||
+x86 : Intel platform-specific code.
|
|
||||||
|
|
||||||
powerpc: PowerPC platform-specific code.
|
|
||||||
|
|
||||||
diff --git a/common/include/os/plat.h b/common/include/os/plat.h
|
|
||||||
index 35629dc..e35093d 100644
|
|
||||||
--- a/common/include/os/plat.h
|
|
||||||
+++ b/common/include/os/plat.h
|
|
||||||
@@ -35,7 +35,7 @@
|
|
||||||
#ifdef __powerpc64__
|
|
||||||
#include "../../../powerpc/include/types.h"
|
|
||||||
#else
|
|
||||||
-#include "../../../intel/include/types.h"
|
|
||||||
+#include "../../../x86/include/types.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
diff --git a/common/include/types.h b/common/include/types.h
|
|
||||||
index fc9c592..3e30f7c 100644
|
|
||||||
--- a/common/include/types.h
|
|
||||||
+++ b/common/include/types.h
|
|
||||||
@@ -34,7 +34,7 @@
|
|
||||||
#ifdef __powerpc64__
|
|
||||||
#include "../../powerpc/include/types.h"
|
|
||||||
#else
|
|
||||||
-#include "../../intel/include/types.h"
|
|
||||||
+#include "../../x86/include/types.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
diff --git a/configure.ac b/configure.ac
|
|
||||||
index 36edcc5..71fa92d 100644
|
|
||||||
--- a/configure.ac
|
|
||||||
+++ b/configure.ac
|
|
||||||
@@ -10,7 +10,7 @@ LT_INIT
|
|
||||||
AC_CONFIG_MACRO_DIRS([m4])
|
|
||||||
|
|
||||||
AC_CANONICAL_HOST
|
|
||||||
-AM_CONDITIONAL(CPU_INTEL, test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xi686")
|
|
||||||
+AM_CONDITIONAL(CPU_X86, test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xi686")
|
|
||||||
AM_CONDITIONAL(CPU_PPC, test "x$host_cpu" = "xpowerpc64" -o "x$host_cpu" = "xpowerpc64le")
|
|
||||||
|
|
||||||
# Checks for programs.
|
|
||||||
diff --git a/test/mgen/intel/util.c b/test/mgen/x86/util.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from test/mgen/intel/util.c
|
|
||||||
rename to test/mgen/x86/util.c
|
|
||||||
diff --git a/intel/bdw.c b/x86/bdw.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/bdw.c
|
|
||||||
rename to x86/bdw.c
|
|
||||||
diff --git a/intel/include/bdw.h b/x86/include/bdw.h
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/include/bdw.h
|
|
||||||
rename to x86/include/bdw.h
|
|
||||||
diff --git a/intel/include/nhm.h b/x86/include/nhm.h
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/include/nhm.h
|
|
||||||
rename to x86/include/nhm.h
|
|
||||||
diff --git a/intel/include/skl.h b/x86/include/skl.h
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/include/skl.h
|
|
||||||
rename to x86/include/skl.h
|
|
||||||
diff --git a/intel/include/snb.h b/x86/include/snb.h
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/include/snb.h
|
|
||||||
rename to x86/include/snb.h
|
|
||||||
diff --git a/intel/include/types.h b/x86/include/types.h
|
|
||||||
similarity index 95%
|
|
||||||
rename from intel/include/types.h
|
|
||||||
rename to x86/include/types.h
|
|
||||||
index 76c7ad3..1a15b3a 100644
|
|
||||||
--- a/intel/include/types.h
|
|
||||||
+++ b/x86/include/types.h
|
|
||||||
@@ -27,8 +27,8 @@
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
-#ifndef _NUMATOP_INTEL_TYPES_H
|
|
||||||
-#define _NUMATOP_INTEL_TYPES_H
|
|
||||||
+#ifndef _NUMATOP_X86_TYPES_H
|
|
||||||
+#define _NUMATOP_X86_TYPES_H
|
|
||||||
|
|
||||||
#include "../../common/include/types.h"
|
|
||||||
|
|
||||||
@@ -62,4 +62,4 @@ typedef enum {
|
|
||||||
|
|
||||||
#define PERF_COUNT_NUM 5
|
|
||||||
|
|
||||||
-#endif /* _NUMATOP_INTEL_TYPES_H */
|
|
||||||
+#endif /* _NUMATOP_X86_TYPES_H */
|
|
||||||
diff --git a/intel/include/util.h b/x86/include/util.h
|
|
||||||
similarity index 94%
|
|
||||||
rename from intel/include/util.h
|
|
||||||
rename to x86/include/util.h
|
|
||||||
index 7026e99..37a6300 100644
|
|
||||||
--- a/intel/include/util.h
|
|
||||||
+++ b/x86/include/util.h
|
|
||||||
@@ -27,8 +27,8 @@
|
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
-#ifndef _NUMATOP_INTEL_UTIL_H
|
|
||||||
-#define _NUMATOP_INTEL_UTIL_H
|
|
||||||
+#ifndef _NUMATOP_X86_UTIL_H
|
|
||||||
+#define _NUMATOP_X86_UTIL_H
|
|
||||||
|
|
||||||
#define CPU_FAMILY(eax) \
|
|
||||||
(((eax) & 0x0F00) >> 8)
|
|
||||||
@@ -39,4 +39,4 @@
|
|
||||||
#define CPU_EXT_MODEL(eax) \
|
|
||||||
(((eax) & 0xF0000) >> 16)
|
|
||||||
|
|
||||||
-#endif /* _NUMATOP_INTEL_UTIL_H */
|
|
||||||
+#endif /* _NUMATOP_X86_UTIL_H */
|
|
||||||
diff --git a/intel/include/wsm.h b/x86/include/wsm.h
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/include/wsm.h
|
|
||||||
rename to x86/include/wsm.h
|
|
||||||
diff --git a/intel/nhm.c b/x86/nhm.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/nhm.c
|
|
||||||
rename to x86/nhm.c
|
|
||||||
diff --git a/intel/plat.c b/x86/plat.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/plat.c
|
|
||||||
rename to x86/plat.c
|
|
||||||
diff --git a/intel/skl.c b/x86/skl.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/skl.c
|
|
||||||
rename to x86/skl.c
|
|
||||||
diff --git a/intel/snb.c b/x86/snb.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/snb.c
|
|
||||||
rename to x86/snb.c
|
|
||||||
diff --git a/intel/ui_perf_map.c b/x86/ui_perf_map.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/ui_perf_map.c
|
|
||||||
rename to x86/ui_perf_map.c
|
|
||||||
diff --git a/intel/util.c b/x86/util.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/util.c
|
|
||||||
rename to x86/util.c
|
|
||||||
diff --git a/intel/wsm.c b/x86/wsm.c
|
|
||||||
similarity index 100%
|
|
||||||
rename from intel/wsm.c
|
|
||||||
rename to x86/wsm.c
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,322 +0,0 @@
|
|||||||
From fdf9b3ce90d1f435fe837added7373e25e6045b2 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Wed, 16 Feb 2022 18:05:27 +0530
|
|
||||||
Subject: [PATCH 05/15] x86/zen: Add initial support
|
|
||||||
|
|
||||||
Add vendor and family identification as well as the relevant
|
|
||||||
events to count per-process memory accesseses and CPU usage
|
|
||||||
on AMD Zen and Zen 2 family of processors.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
Makefile.am | 4 ++-
|
|
||||||
README.md | 2 +-
|
|
||||||
x86/include/types.h | 5 ++--
|
|
||||||
x86/include/util.h | 3 ++
|
|
||||||
x86/include/zen.h | 50 ++++++++++++++++++++++++++++++++
|
|
||||||
x86/plat.c | 29 +++++++++++++------
|
|
||||||
x86/zen.c | 70 +++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
7 files changed, 151 insertions(+), 12 deletions(-)
|
|
||||||
create mode 100644 x86/include/zen.h
|
|
||||||
create mode 100644 x86/zen.c
|
|
||||||
|
|
||||||
diff --git a/Makefile.am b/Makefile.am
|
|
||||||
index 438a9fc..ae11522 100644
|
|
||||||
--- a/Makefile.am
|
|
||||||
+++ b/Makefile.am
|
|
||||||
@@ -67,6 +67,7 @@ libnumatop_la_SOURCES += \
|
|
||||||
x86/include/types.h \
|
|
||||||
x86/include/util.h \
|
|
||||||
x86/include/wsm.h \
|
|
||||||
+ x86/include/zen.h \
|
|
||||||
x86/bdw.c \
|
|
||||||
x86/nhm.c \
|
|
||||||
x86/plat.c \
|
|
||||||
@@ -74,7 +75,8 @@ libnumatop_la_SOURCES += \
|
|
||||||
x86/snb.c \
|
|
||||||
x86/ui_perf_map.c \
|
|
||||||
x86/util.c \
|
|
||||||
- x86/wsm.c
|
|
||||||
+ x86/wsm.c \
|
|
||||||
+ x86/zen.c
|
|
||||||
endif
|
|
||||||
|
|
||||||
if CPU_PPC
|
|
||||||
diff --git a/README.md b/README.md
|
|
||||||
index e96f0a8..9908e92 100644
|
|
||||||
--- a/README.md
|
|
||||||
+++ b/README.md
|
|
||||||
@@ -58,7 +58,7 @@ http://www.gossamer-threads.com/lists/linux/kernel/1964864
|
|
||||||
|
|
||||||
common: common code for all platforms.
|
|
||||||
|
|
||||||
-x86 : Intel platform-specific code.
|
|
||||||
+x86 : Intel and AMD platform-specific code.
|
|
||||||
|
|
||||||
powerpc: PowerPC platform-specific code.
|
|
||||||
|
|
||||||
diff --git a/x86/include/types.h b/x86/include/types.h
|
|
||||||
index 1a15b3a..0843bd1 100644
|
|
||||||
--- a/x86/include/types.h
|
|
||||||
+++ b/x86/include/types.h
|
|
||||||
@@ -46,10 +46,11 @@ typedef enum {
|
|
||||||
CPU_BDX,
|
|
||||||
CPU_SKX,
|
|
||||||
CPU_ICX,
|
|
||||||
- CPU_SPR
|
|
||||||
+ CPU_SPR,
|
|
||||||
+ CPU_ZEN
|
|
||||||
} cpu_type_t;
|
|
||||||
|
|
||||||
-#define CPU_TYPE_NUM 12
|
|
||||||
+#define CPU_TYPE_NUM 13
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
PERF_COUNT_INVALID = -1,
|
|
||||||
diff --git a/x86/include/util.h b/x86/include/util.h
|
|
||||||
index 37a6300..4d2534b 100644
|
|
||||||
--- a/x86/include/util.h
|
|
||||||
+++ b/x86/include/util.h
|
|
||||||
@@ -36,6 +36,9 @@
|
|
||||||
#define CPU_MODEL(eax) \
|
|
||||||
(((eax) & 0x00F0) >> 4)
|
|
||||||
|
|
||||||
+#define CPU_EXT_FAMILY(eax) \
|
|
||||||
+ (((eax) & 0x0FF00000) >> 20)
|
|
||||||
+
|
|
||||||
#define CPU_EXT_MODEL(eax) \
|
|
||||||
(((eax) & 0xF0000) >> 16)
|
|
||||||
|
|
||||||
diff --git a/x86/include/zen.h b/x86/include/zen.h
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..be61324
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/x86/include/zen.h
|
|
||||||
@@ -0,0 +1,50 @@
|
|
||||||
+/*
|
|
||||||
+ * Copyright (c) 2023, AMD Corporation
|
|
||||||
+ *
|
|
||||||
+ * Redistribution and use in source and binary forms, with or without
|
|
||||||
+ * modification, are permitted provided that the following conditions are met:
|
|
||||||
+ *
|
|
||||||
+ * * Redistributions of source code must retain the above copyright notice,
|
|
||||||
+ * this list of conditions and the following disclaimer.
|
|
||||||
+ * * Redistributions in binary form must reproduce the above copyright
|
|
||||||
+ * notice, this list of conditions and the following disclaimer in the
|
|
||||||
+ * documentation and/or other materials provided with the distribution.
|
|
||||||
+ * * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
+ * may be used to endorse or promote products derived from this software
|
|
||||||
+ * without specific prior written permission.
|
|
||||||
+ *
|
|
||||||
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
+ * POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+#ifndef _NUMATOP_AMD_ZEN_H
|
|
||||||
+#define _NUMATOP_AMD_ZEN_H
|
|
||||||
+
|
|
||||||
+#ifdef __cplusplus
|
|
||||||
+extern "C" {
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#include <sys/types.h>
|
|
||||||
+#include <inttypes.h>
|
|
||||||
+#include "../../common/include/types.h"
|
|
||||||
+
|
|
||||||
+struct _plat_event_config;
|
|
||||||
+
|
|
||||||
+extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
|
||||||
+extern void zen_ll_config(struct _plat_event_config *);
|
|
||||||
+extern int zen_offcore_num(void);
|
|
||||||
+
|
|
||||||
+#ifdef __cplusplus
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#endif /* _NUMATOP_AMD_ZEN_H */
|
|
||||||
diff --git a/x86/plat.c b/x86/plat.c
|
|
||||||
index abf3766..f79837a 100644
|
|
||||||
--- a/x86/plat.c
|
|
||||||
+++ b/x86/plat.c
|
|
||||||
@@ -36,6 +36,7 @@
|
|
||||||
#include "include/snb.h"
|
|
||||||
#include "include/bdw.h"
|
|
||||||
#include "include/skl.h"
|
|
||||||
+#include "include/zen.h"
|
|
||||||
|
|
||||||
pfn_plat_profiling_config_t
|
|
||||||
s_plat_profiling_config[CPU_TYPE_NUM] = {
|
|
||||||
@@ -50,7 +51,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = {
|
|
||||||
bdw_profiling_config,
|
|
||||||
skl_profiling_config,
|
|
||||||
icx_profiling_config,
|
|
||||||
- spr_profiling_config
|
|
||||||
+ spr_profiling_config,
|
|
||||||
+ zen_profiling_config
|
|
||||||
};
|
|
||||||
|
|
||||||
pfn_plat_ll_config_t
|
|
||||||
@@ -66,7 +68,8 @@ s_plat_ll_config[CPU_TYPE_NUM] = {
|
|
||||||
bdw_ll_config,
|
|
||||||
skl_ll_config,
|
|
||||||
icx_ll_config,
|
|
||||||
- spr_ll_config
|
|
||||||
+ spr_ll_config,
|
|
||||||
+ zen_ll_config
|
|
||||||
};
|
|
||||||
|
|
||||||
pfn_plat_offcore_num_t
|
|
||||||
@@ -82,7 +85,8 @@ s_plat_offcore_num[CPU_TYPE_NUM] = {
|
|
||||||
bdw_offcore_num,
|
|
||||||
skl_offcore_num,
|
|
||||||
icx_offcore_num,
|
|
||||||
- spr_offcore_num
|
|
||||||
+ spr_offcore_num,
|
|
||||||
+ zen_offcore_num
|
|
||||||
};
|
|
||||||
|
|
||||||
/* ARGSUSED */
|
|
||||||
@@ -117,7 +121,7 @@ static cpu_type_t
|
|
||||||
cpu_type_get(void)
|
|
||||||
{
|
|
||||||
unsigned int eax, ebx, ecx, edx;
|
|
||||||
- int family, model, ext_model;
|
|
||||||
+ int family, model;
|
|
||||||
cpu_type_t type = CPU_UNSUP;
|
|
||||||
char vendor[16];
|
|
||||||
|
|
||||||
@@ -129,7 +133,8 @@ cpu_type_get(void)
|
|
||||||
(void) strncpy(&vendor[8], (char *)(&edx), 4);
|
|
||||||
vendor[12] = 0;
|
|
||||||
|
|
||||||
- if (strncmp(vendor, "Genu" "ntel" "ineI", 12) != 0) {
|
|
||||||
+ if (strncmp(vendor, "Genu" "ntel" "ineI", 12) != 0 &&
|
|
||||||
+ strncmp(vendor, "Auth" "cAMD" "enti", 12) != 0) {
|
|
||||||
return (CPU_UNSUP);
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -138,11 +143,16 @@ cpu_type_get(void)
|
|
||||||
|
|
||||||
family = CPU_FAMILY(eax);
|
|
||||||
model = CPU_MODEL(eax);
|
|
||||||
- ext_model = CPU_EXT_MODEL(eax);
|
|
||||||
|
|
||||||
- if (family == 6) {
|
|
||||||
- model = (ext_model << 4) + model;
|
|
||||||
+ /* Extended Model ID is considered only when Family ID is either 6 or 15 */
|
|
||||||
+ if (family == 6 || family == 15)
|
|
||||||
+ model += CPU_EXT_MODEL(eax) << 4;
|
|
||||||
+
|
|
||||||
+ /* Extended Family ID is considered only when Family ID is 15 */
|
|
||||||
+ if (family == 15)
|
|
||||||
+ family += CPU_EXT_FAMILY(eax);
|
|
||||||
|
|
||||||
+ if (family == 6) {
|
|
||||||
switch (model) {
|
|
||||||
case 26:
|
|
||||||
type = CPU_NHM_EP;
|
|
||||||
@@ -178,6 +188,8 @@ cpu_type_get(void)
|
|
||||||
type = CPU_SPR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
+ } else if (family == 23) {
|
|
||||||
+ type = CPU_ZEN;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (type);
|
|
||||||
@@ -217,6 +229,7 @@ plat_detect(void)
|
|
||||||
case CPU_SKX:
|
|
||||||
case CPU_ICX:
|
|
||||||
case CPU_SPR:
|
|
||||||
+ case CPU_ZEN:
|
|
||||||
ret = 0;
|
|
||||||
s_cpu_type = cpu_type;
|
|
||||||
break;
|
|
||||||
diff --git a/x86/zen.c b/x86/zen.c
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..abf603a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/x86/zen.c
|
|
||||||
@@ -0,0 +1,70 @@
|
|
||||||
+/*
|
|
||||||
+ * Copyright (c) 2023, AMD Corporation
|
|
||||||
+ *
|
|
||||||
+ * Redistribution and use in source and binary forms, with or without
|
|
||||||
+ * modification, are permitted provided that the following conditions are met:
|
|
||||||
+ *
|
|
||||||
+ * * Redistributions of source code must retain the above copyright notice,
|
|
||||||
+ * this list of conditions and the following disclaimer.
|
|
||||||
+ * * Redistributions in binary form must reproduce the above copyright
|
|
||||||
+ * notice, this list of conditions and the following disclaimer in the
|
|
||||||
+ * documentation and/or other materials provided with the distribution.
|
|
||||||
+ * * Neither the name of Intel Corporation nor the names of its contributors
|
|
||||||
+ * may be used to endorse or promote products derived from this software
|
|
||||||
+ * without specific prior written permission.
|
|
||||||
+ *
|
|
||||||
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
+ * POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+/* This file contains the Zen platform specific functions. */
|
|
||||||
+
|
|
||||||
+#include <inttypes.h>
|
|
||||||
+#include <stdlib.h>
|
|
||||||
+#include <sys/types.h>
|
|
||||||
+#include <stdio.h>
|
|
||||||
+#include <unistd.h>
|
|
||||||
+#include <string.h>
|
|
||||||
+#include <strings.h>
|
|
||||||
+#include "../common/include/os/linux/perf_event.h"
|
|
||||||
+#include "../common/include/os/plat.h"
|
|
||||||
+#include "include/zen.h"
|
|
||||||
+
|
|
||||||
+static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "ExRetOps" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static plat_event_config_t s_zen_ll = {
|
|
||||||
+ PERF_TYPE_RAW, 0, 0, 0, "Unsupported"
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
|
||||||
+{
|
|
||||||
+ plat_config_get(perf_count_id, cfg, s_zen_config);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void
|
|
||||||
+zen_ll_config(plat_event_config_t *cfg)
|
|
||||||
+{
|
|
||||||
+ memcpy(cfg, &s_zen_ll, sizeof (plat_event_config_t));
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+int
|
|
||||||
+zen_offcore_num(void)
|
|
||||||
+{
|
|
||||||
+ return (2);
|
|
||||||
+}
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,308 +0,0 @@
|
|||||||
From 4a8b8d47f4a240a95830dc05abd3c19e10b6d821 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Fri, 11 Nov 2022 13:56:09 +0530
|
|
||||||
Subject: [PATCH 06/15] common: Add sample period to platform event config
|
|
||||||
|
|
||||||
Precise PMU events are currently used for capturing memory
|
|
||||||
access statistics. The sample period used for such events is
|
|
||||||
currently hard-coded (LL_THRESH) and an universal value may
|
|
||||||
not work well on all platforms due to microarchitectural
|
|
||||||
differences in the design of the precise PMU.
|
|
||||||
|
|
||||||
E.g. precise events are programmed through Instruction Based
|
|
||||||
Sampling (IBS) on AMD processors but that PMU does not have
|
|
||||||
the ability to tag only load-store operations. This leads to
|
|
||||||
the capture of many samples that are not relevant for the
|
|
||||||
current use-case. To get an appropriate amount of relevant
|
|
||||||
data, more samples need to be captured and then filtered.
|
|
||||||
This is achieved by increasing the sampling frequency.
|
|
||||||
|
|
||||||
Add sample period as an additional attribute to the platform
|
|
||||||
event config structure so that a customized sample period
|
|
||||||
that works well on a specific platform can be passed during
|
|
||||||
event programming. If not set, a default value is chosen.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
common/include/os/plat.h | 1 +
|
|
||||||
common/os/os_perf.c | 6 +++++-
|
|
||||||
powerpc/power8.c | 14 +++++++-------
|
|
||||||
powerpc/power9.c | 14 +++++++-------
|
|
||||||
x86/bdw.c | 12 ++++++------
|
|
||||||
x86/nhm.c | 12 ++++++------
|
|
||||||
x86/skl.c | 32 ++++++++++++++++----------------
|
|
||||||
x86/snb.c | 12 ++++++------
|
|
||||||
x86/wsm.c | 22 +++++++++++-----------
|
|
||||||
x86/zen.c | 2 +-
|
|
||||||
10 files changed, 66 insertions(+), 61 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/common/include/os/plat.h b/common/include/os/plat.h
|
|
||||||
index e35093d..34535cd 100644
|
|
||||||
--- a/common/include/os/plat.h
|
|
||||||
+++ b/common/include/os/plat.h
|
|
||||||
@@ -53,6 +53,7 @@ typedef struct _plat_event_config {
|
|
||||||
uint64_t config;
|
|
||||||
uint64_t other_attr;
|
|
||||||
uint64_t extra_value;
|
|
||||||
+ uint64_t sample_period;
|
|
||||||
char desc[PLAT_EVENT_DESC_SIZE];
|
|
||||||
} plat_event_config_t;
|
|
||||||
|
|
||||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
|
||||||
index f2f1104..f1036a9 100644
|
|
||||||
--- a/common/os/os_perf.c
|
|
||||||
+++ b/common/os/os_perf.c
|
|
||||||
@@ -839,7 +839,11 @@ ll_init(pf_conf_t *conf)
|
|
||||||
conf->type = cfg.type;
|
|
||||||
conf->config = (cfg.config) | (cfg.other_attr << 16);
|
|
||||||
conf->config1 = cfg.extra_value;
|
|
||||||
- conf->sample_period = LL_PERIOD;
|
|
||||||
+ conf->sample_period = cfg.sample_period;
|
|
||||||
+
|
|
||||||
+ /* If sample period is not set, choose a default value */
|
|
||||||
+ if (!cfg.sample_period)
|
|
||||||
+ conf->sample_period = LL_PERIOD;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
diff --git a/powerpc/power8.c b/powerpc/power8.c
|
|
||||||
index b3cab75..a76851d 100644
|
|
||||||
--- a/powerpc/power8.c
|
|
||||||
+++ b/powerpc/power8.c
|
|
||||||
@@ -38,16 +38,16 @@
|
|
||||||
#include "include/power8.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_power8_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, "PM_RUN_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, "PM_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_power8_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x0000, 0, 0, "PM_SUSPENDED"
|
|
||||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/powerpc/power9.c b/powerpc/power9.c
|
|
||||||
index c6f1cec..4b0bcfc 100644
|
|
||||||
--- a/powerpc/power9.c
|
|
||||||
+++ b/powerpc/power9.c
|
|
||||||
@@ -38,16 +38,16 @@
|
|
||||||
#include "include/power9.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_power9_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, "PM_RUN_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, "PM_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_power9_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x0000, 0, 0, "PM_SUSPENDED"
|
|
||||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/bdw.c b/x86/bdw.c
|
|
||||||
index 97e33ea..5640f7b 100644
|
|
||||||
--- a/x86/bdw.c
|
|
||||||
+++ b/x86/bdw.c
|
|
||||||
@@ -40,15 +40,15 @@
|
|
||||||
#include "include/bdw.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_bdw_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_bdw_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/nhm.c b/x86/nhm.c
|
|
||||||
index bf8c14f..d29d396 100644
|
|
||||||
--- a/x86/nhm.c
|
|
||||||
+++ b/x86/nhm.c
|
|
||||||
@@ -41,15 +41,15 @@
|
|
||||||
#include "include/nhm.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_nhm_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_nhm_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, "mem_inst_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
static void
|
|
||||||
diff --git a/x86/skl.c b/x86/skl.c
|
|
||||||
index ace0833..6f81298 100644
|
|
||||||
--- a/x86/skl.c
|
|
||||||
+++ b/x86/skl.c
|
|
||||||
@@ -40,31 +40,31 @@
|
|
||||||
#include "include/skl.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_skl_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_icx_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_spr_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_skl_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/snb.c b/x86/snb.c
|
|
||||||
index eb89859..3d3185b 100644
|
|
||||||
--- a/x86/snb.c
|
|
||||||
+++ b/x86/snb.c
|
|
||||||
@@ -40,15 +40,15 @@
|
|
||||||
#include "include/snb.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_snb_ep_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_snb_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/wsm.c b/x86/wsm.c
|
|
||||||
index f4285c2..16f68e4 100644
|
|
||||||
--- a/x86/wsm.c
|
|
||||||
+++ b/x86/wsm.c
|
|
||||||
@@ -40,23 +40,23 @@
|
|
||||||
#include "include/wsm.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_wsmex_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_wsmep_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_wsm_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, "mem_inst_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/zen.c b/x86/zen.c
|
|
||||||
index abf603a..c153a1a 100644
|
|
||||||
--- a/x86/zen.c
|
|
||||||
+++ b/x86/zen.c
|
|
||||||
@@ -48,7 +48,7 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_zen_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0, 0, 0, "Unsupported"
|
|
||||||
+ PERF_TYPE_RAW, 0, 0, 0, 0, "Unsupported"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,350 +0,0 @@
|
|||||||
From 9d665e4712f0dfa48603471c51ed3c87441030ad Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Tue, 13 Jun 2023 19:21:49 +0530
|
|
||||||
Subject: [PATCH 07/15] common: Add exclude guest to platform event config
|
|
||||||
|
|
||||||
Precise PMU events are currently used for capturing memory
|
|
||||||
access statistics. Currently, these events are programmed to
|
|
||||||
exclude guests and this does not work well on all platforms
|
|
||||||
due to differences in the design of the precise PMU.
|
|
||||||
|
|
||||||
E.g. precise events are programmed through Instruction Based
|
|
||||||
Sampling (IBS) on AMD processors but that PMU does not have
|
|
||||||
the ability to ignore guests unlike the Core PMU.
|
|
||||||
|
|
||||||
Add exclude guest as an additional attribute to the platform
|
|
||||||
event config structure so that precise events can be customized
|
|
||||||
to work on different platforms.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
common/include/os/pfwrapper.h | 1 +
|
|
||||||
common/include/os/plat.h | 1 +
|
|
||||||
common/include/types.h | 1 +
|
|
||||||
common/os/os_perf.c | 1 +
|
|
||||||
common/os/pfwrapper.c | 2 +-
|
|
||||||
powerpc/power8.c | 14 +++++++-------
|
|
||||||
powerpc/power9.c | 14 +++++++-------
|
|
||||||
x86/bdw.c | 12 ++++++------
|
|
||||||
x86/nhm.c | 12 ++++++------
|
|
||||||
x86/skl.c | 32 ++++++++++++++++----------------
|
|
||||||
x86/snb.c | 12 ++++++------
|
|
||||||
x86/wsm.c | 22 +++++++++++-----------
|
|
||||||
x86/zen.c | 12 ++++++------
|
|
||||||
13 files changed, 70 insertions(+), 66 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/common/include/os/pfwrapper.h b/common/include/os/pfwrapper.h
|
|
||||||
index 414d6af..1864a10 100644
|
|
||||||
--- a/common/include/os/pfwrapper.h
|
|
||||||
+++ b/common/include/os/pfwrapper.h
|
|
||||||
@@ -78,6 +78,7 @@ typedef struct _pf_conf {
|
|
||||||
uint64_t config;
|
|
||||||
uint64_t config1;
|
|
||||||
uint64_t sample_period;
|
|
||||||
+ bool exclude_guest;
|
|
||||||
} pf_conf_t;
|
|
||||||
|
|
||||||
typedef struct _pf_profiling_rec {
|
|
||||||
diff --git a/common/include/os/plat.h b/common/include/os/plat.h
|
|
||||||
index 34535cd..ac4aac8 100644
|
|
||||||
--- a/common/include/os/plat.h
|
|
||||||
+++ b/common/include/os/plat.h
|
|
||||||
@@ -54,6 +54,7 @@ typedef struct _plat_event_config {
|
|
||||||
uint64_t other_attr;
|
|
||||||
uint64_t extra_value;
|
|
||||||
uint64_t sample_period;
|
|
||||||
+ bool exclude_guest;
|
|
||||||
char desc[PLAT_EVENT_DESC_SIZE];
|
|
||||||
} plat_event_config_t;
|
|
||||||
|
|
||||||
diff --git a/common/include/types.h b/common/include/types.h
|
|
||||||
index 3e30f7c..efe3055 100644
|
|
||||||
--- a/common/include/types.h
|
|
||||||
+++ b/common/include/types.h
|
|
||||||
@@ -30,6 +30,7 @@
|
|
||||||
#define _NUMATOP_TYPES_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
+#include <stdbool.h>
|
|
||||||
#include "./os/os_types.h"
|
|
||||||
#ifdef __powerpc64__
|
|
||||||
#include "../../powerpc/include/types.h"
|
|
||||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
|
||||||
index f1036a9..44ca43d 100644
|
|
||||||
--- a/common/os/os_perf.c
|
|
||||||
+++ b/common/os/os_perf.c
|
|
||||||
@@ -840,6 +840,7 @@ ll_init(pf_conf_t *conf)
|
|
||||||
conf->config = (cfg.config) | (cfg.other_attr << 16);
|
|
||||||
conf->config1 = cfg.extra_value;
|
|
||||||
conf->sample_period = cfg.sample_period;
|
|
||||||
+ conf->exclude_guest = cfg.exclude_guest;
|
|
||||||
|
|
||||||
/* If sample period is not set, choose a default value */
|
|
||||||
if (!cfg.sample_period)
|
|
||||||
diff --git a/common/os/pfwrapper.c b/common/os/pfwrapper.c
|
|
||||||
index b4c4f17..e08ce07 100644
|
|
||||||
--- a/common/os/pfwrapper.c
|
|
||||||
+++ b/common/os/pfwrapper.c
|
|
||||||
@@ -432,7 +432,7 @@ pf_ll_setup(struct _perf_cpu *cpu, pf_conf_t *conf)
|
|
||||||
attr.config1 = conf->config1;
|
|
||||||
attr.sample_period = conf->sample_period;
|
|
||||||
attr.precise_ip = 1;
|
|
||||||
- attr.exclude_guest = 1;
|
|
||||||
+ attr.exclude_guest = conf->exclude_guest;
|
|
||||||
attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | PERF_SAMPLE_CPU |
|
|
||||||
PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN;
|
|
||||||
attr.disabled = 1;
|
|
||||||
diff --git a/powerpc/power8.c b/powerpc/power8.c
|
|
||||||
index a76851d..d8f4e01 100644
|
|
||||||
--- a/powerpc/power8.c
|
|
||||||
+++ b/powerpc/power8.c
|
|
||||||
@@ -38,16 +38,16 @@
|
|
||||||
#include "include/power8.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_power8_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, 0, "PM_RUN_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, 0, "PM_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_power8_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
|
||||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, 1, "PM_SUSPENDED"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/powerpc/power9.c b/powerpc/power9.c
|
|
||||||
index 4b0bcfc..9879ec7 100644
|
|
||||||
--- a/powerpc/power9.c
|
|
||||||
+++ b/powerpc/power9.c
|
|
||||||
@@ -38,16 +38,16 @@
|
|
||||||
#include "include/power9.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_power9_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
|
||||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, 0, "PM_RUN_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, 0, "PM_CYC" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_power9_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
|
||||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, 1, "PM_SUSPENDED"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/bdw.c b/x86/bdw.c
|
|
||||||
index 5640f7b..97eca67 100644
|
|
||||||
--- a/x86/bdw.c
|
|
||||||
+++ b/x86/bdw.c
|
|
||||||
@@ -40,15 +40,15 @@
|
|
||||||
#include "include/bdw.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_bdw_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_bdw_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/nhm.c b/x86/nhm.c
|
|
||||||
index d29d396..cf65705 100644
|
|
||||||
--- a/x86/nhm.c
|
|
||||||
+++ b/x86/nhm.c
|
|
||||||
@@ -41,15 +41,15 @@
|
|
||||||
#include "include/nhm.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_nhm_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_nhm_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, 1, "mem_inst_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
static void
|
|
||||||
diff --git a/x86/skl.c b/x86/skl.c
|
|
||||||
index 6f81298..a7bbc14 100644
|
|
||||||
--- a/x86/skl.c
|
|
||||||
+++ b/x86/skl.c
|
|
||||||
@@ -40,31 +40,31 @@
|
|
||||||
#include "include/skl.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_skl_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_icx_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_spr_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_skl_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/snb.c b/x86/snb.c
|
|
||||||
index 3d3185b..135ee1c 100644
|
|
||||||
--- a/x86/snb.c
|
|
||||||
+++ b/x86/snb.c
|
|
||||||
@@ -40,15 +40,15 @@
|
|
||||||
#include "include/snb.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_snb_ep_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_snb_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/wsm.c b/x86/wsm.c
|
|
||||||
index 16f68e4..7b122fd 100644
|
|
||||||
--- a/x86/wsm.c
|
|
||||||
+++ b/x86/wsm.c
|
|
||||||
@@ -40,23 +40,23 @@
|
|
||||||
#include "include/wsm.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_wsmex_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_wsmep_profiling[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
|
||||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, 0, "off_core_response_1" }
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_wsm_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
|
||||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, 1, "mem_inst_retired.latency_above_threshold"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
diff --git a/x86/zen.c b/x86/zen.c
|
|
||||||
index c153a1a..2f851a2 100644
|
|
||||||
--- a/x86/zen.c
|
|
||||||
+++ b/x86/zen.c
|
|
||||||
@@ -40,15 +40,15 @@
|
|
||||||
#include "include/zen.h"
|
|
||||||
|
|
||||||
static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
- { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "ExRetOps" },
|
|
||||||
- { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
|
||||||
};
|
|
||||||
|
|
||||||
static plat_event_config_t s_zen_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0, 0, 0, 0, "Unsupported"
|
|
||||||
+ PERF_TYPE_RAW, 0, 0, 0, 0, 0, "Unsupported"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,156 +0,0 @@
|
|||||||
From aefc85d7b956c4df998afb4cfe5c413e5fd5b062 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Thu, 10 Mar 2022 10:32:51 +0530
|
|
||||||
Subject: [PATCH 08/15] x86/zen: Add support for memory access stats
|
|
||||||
|
|
||||||
Add support for capturing memory access statistics on Zen
|
|
||||||
processors using Instruction Based Sampling (IBS).
|
|
||||||
|
|
||||||
IBS, by design, cannot tag specific types of ops and hence
|
|
||||||
cannot provide samples for only those ops that cause memory
|
|
||||||
access. Hence, additional post-processing is required for
|
|
||||||
filtering out irrelevant samples. To get an appropriate
|
|
||||||
volume of samples, the sampling frequency also needs to be
|
|
||||||
high.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
common/os/pfwrapper.c | 20 +++++++++++++++++---
|
|
||||||
x86/zen.c | 35 ++++++++++++++++++++++++++++++++++-
|
|
||||||
2 files changed, 51 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/common/os/pfwrapper.c b/common/os/pfwrapper.c
|
|
||||||
index e08ce07..d6102be 100644
|
|
||||||
--- a/common/os/pfwrapper.c
|
|
||||||
+++ b/common/os/pfwrapper.c
|
|
||||||
@@ -434,7 +434,8 @@ pf_ll_setup(struct _perf_cpu *cpu, pf_conf_t *conf)
|
|
||||||
attr.precise_ip = 1;
|
|
||||||
attr.exclude_guest = conf->exclude_guest;
|
|
||||||
attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | PERF_SAMPLE_CPU |
|
|
||||||
- PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN;
|
|
||||||
+ PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN |
|
|
||||||
+ PERF_SAMPLE_DATA_SRC;
|
|
||||||
attr.disabled = 1;
|
|
||||||
|
|
||||||
if ((fds[0] = pf_event_open(&attr, -1, cpu->cpuid, -1, 0)) < 0) {
|
|
||||||
@@ -481,6 +482,7 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size,
|
|
||||||
pf_ll_rec_t *rec)
|
|
||||||
{
|
|
||||||
struct { uint32_t pid, tid; } id;
|
|
||||||
+ union perf_mem_data_src data_src;
|
|
||||||
uint64_t i, addr, cpu, weight, nr, value, *ips;
|
|
||||||
int j, ret = -1;
|
|
||||||
|
|
||||||
@@ -492,6 +494,7 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size,
|
|
||||||
* [ u64 nr; }
|
|
||||||
* { u64 ips[nr]; }
|
|
||||||
* { u64 weight; }
|
|
||||||
+ * { u64 data_src; }
|
|
||||||
* };
|
|
||||||
*/
|
|
||||||
if (mmap_buffer_read(mhdr, &id, sizeof (id)) == -1) {
|
|
||||||
@@ -551,7 +554,18 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size,
|
|
||||||
}
|
|
||||||
|
|
||||||
size -= sizeof (weight);
|
|
||||||
-
|
|
||||||
+
|
|
||||||
+ if (mmap_buffer_read(mhdr, &data_src, sizeof (data_src)) == -1) {
|
|
||||||
+ debug_print(NULL, 2, "ll_sample_read: read data_src failed.\n");
|
|
||||||
+ goto L_EXIT;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ size -= sizeof (data_src);
|
|
||||||
+
|
|
||||||
+ if (data_src.mem_op == PERF_MEM_OP_NA ||
|
|
||||||
+ data_src.mem_op == PERF_MEM_OP_EXEC)
|
|
||||||
+ addr = 0;
|
|
||||||
+
|
|
||||||
rec->ip_num = j;
|
|
||||||
rec->pid = id.pid;
|
|
||||||
rec->tid = id.tid;
|
|
||||||
@@ -575,7 +589,7 @@ ll_recbuf_update(pf_ll_rec_t *rec_arr, int *nrec, pf_ll_rec_t *rec)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
- if ((rec->pid == 0) || (rec->tid == 0)) {
|
|
||||||
+ if ((rec->pid == 0) || (rec->tid == 0) || (rec->addr == 0)) {
|
|
||||||
/* Just consider the user-land process/thread. */
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
diff --git a/x86/zen.c b/x86/zen.c
|
|
||||||
index 2f851a2..67a425b 100644
|
|
||||||
--- a/x86/zen.c
|
|
||||||
+++ b/x86/zen.c
|
|
||||||
@@ -30,7 +30,9 @@
|
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
+#include <sys/stat.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
+#include <fcntl.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <string.h>
|
|
||||||
@@ -39,6 +41,9 @@
|
|
||||||
#include "../common/include/os/plat.h"
|
|
||||||
#include "include/zen.h"
|
|
||||||
|
|
||||||
+#define IBS_OP_PMU_TYPE_PATH \
|
|
||||||
+ "/sys/bus/event_source/devices/ibs_op/type"
|
|
||||||
+
|
|
||||||
static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
|
||||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
{ PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
|
||||||
@@ -47,8 +52,13 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
|
||||||
{ PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
|
||||||
};
|
|
||||||
|
|
||||||
+/*
|
|
||||||
+ * Owing to the nature of IBS uop tagging, a higher sampling period is
|
|
||||||
+ * required to capture meaningful samples. All samples may not originate
|
|
||||||
+ * from a memory access instruction and require additional filtering.
|
|
||||||
+ */
|
|
||||||
static plat_event_config_t s_zen_ll = {
|
|
||||||
- PERF_TYPE_RAW, 0, 0, 0, 0, 0, "Unsupported"
|
|
||||||
+ 0, 0x0000000000000000, 0, 0, LL_THRESH * 10, 0, "IbsOpCntCycles"
|
|
||||||
};
|
|
||||||
|
|
||||||
void
|
|
||||||
@@ -57,10 +67,33 @@ zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
|
||||||
plat_config_get(perf_count_id, cfg, s_zen_config);
|
|
||||||
}
|
|
||||||
|
|
||||||
+static int
|
|
||||||
+zen_ibs_op_pmu_type(void)
|
|
||||||
+{
|
|
||||||
+ int fd, type, i;
|
|
||||||
+ char buf[32];
|
|
||||||
+
|
|
||||||
+ if ((fd = open(IBS_OP_PMU_TYPE_PATH, O_RDONLY)) < 0)
|
|
||||||
+ return (-1);
|
|
||||||
+
|
|
||||||
+ if ((i = read(fd, buf, sizeof (buf) - 1)) <= 0) {
|
|
||||||
+ close(fd);
|
|
||||||
+ return (-1);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ close(fd);
|
|
||||||
+ buf[i] = 0;
|
|
||||||
+ if ((type = atoi(buf)) == 0)
|
|
||||||
+ return (-1);
|
|
||||||
+
|
|
||||||
+ return (type);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
void
|
|
||||||
zen_ll_config(plat_event_config_t *cfg)
|
|
||||||
{
|
|
||||||
memcpy(cfg, &s_zen_ll, sizeof (plat_event_config_t));
|
|
||||||
+ cfg->type = zen_ibs_op_pmu_type();
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,71 +0,0 @@
|
|||||||
From c149b054fe5b1851860fd01d54596ea75f5008d3 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Wed, 13 Apr 2022 11:45:08 +0530
|
|
||||||
Subject: [PATCH 09/15] x86: Fix clock frequency parsing
|
|
||||||
|
|
||||||
AMD processors do not advertise a base clock frequency as
|
|
||||||
a part of the "model name" in /proc/cpuinfo. The parsing
|
|
||||||
must fail in order to let os_calibrate() determine clock
|
|
||||||
speed from cpufreq information or from TSC instead.
|
|
||||||
|
|
||||||
Since the parser fails to find "@", strcspn() returns the
|
|
||||||
length of the line instead and sscanf() ends up scanning
|
|
||||||
garbage values beyond the null terminator that match the
|
|
||||||
format specifier. To avoid this, add an additional check
|
|
||||||
that makes the condition fail if "@" is not found.
|
|
||||||
|
|
||||||
Fixes: eaeed92 ("Powerpc: Fix CPU% utilization for PowerVMs")
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
x86/util.c | 19 ++++++++++++++-----
|
|
||||||
1 file changed, 14 insertions(+), 5 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/x86/util.c b/x86/util.c
|
|
||||||
index fdff877..655a677 100644
|
|
||||||
--- a/x86/util.c
|
|
||||||
+++ b/x86/util.c
|
|
||||||
@@ -67,27 +67,36 @@ rdtsc(void)
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check the cpu name in proc info. Intel CPUs always have @ x.y
|
|
||||||
- * Ghz and that is the TSC frequency.
|
|
||||||
+ * GHz and that is the TSC frequency. AMD CPUs do not advertise
|
|
||||||
+ * clock frequency as a part of the model name.
|
|
||||||
*/
|
|
||||||
int
|
|
||||||
arch__cpuinfo_freq(double *freq, char *unit)
|
|
||||||
{
|
|
||||||
FILE *f;
|
|
||||||
char *line = NULL;
|
|
||||||
- size_t len = 0;
|
|
||||||
+ size_t idx, len = 0;
|
|
||||||
int ret = -1;
|
|
||||||
|
|
||||||
if ((f = fopen(CPUINFO_PATH, "r")) == NULL) {
|
|
||||||
return (-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
- while (getline(&line, &len, f) > 0) {
|
|
||||||
+ while ((len = getline(&line, &len, f)) > 0) {
|
|
||||||
if (strncmp(line, "model name", sizeof ("model name") - 1) != 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
- if (sscanf(line + strcspn(line, "@") + 1, "%lf%10s",
|
|
||||||
- freq, unit) == 2) {
|
|
||||||
+ idx = strcspn(line, "@") + 1;
|
|
||||||
+
|
|
||||||
+ /*
|
|
||||||
+ * The model name will not change for other processors. So
|
|
||||||
+ * bail out if "@" is not found.
|
|
||||||
+ */
|
|
||||||
+ if (idx >= len)
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ if (sscanf(line + idx, "%lf%10s", freq, unit) == 2) {
|
|
||||||
if (strcasecmp(unit, "GHz") == 0) {
|
|
||||||
*freq *= GHZ;
|
|
||||||
} else if (strcasecmp(unit, "Mhz") == 0) {
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,135 +0,0 @@
|
|||||||
From e9bd7eaa767c987fcb8d6879e7c7509a24bcb17c Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Fri, 20 Jan 2023 13:56:37 +0530
|
|
||||||
Subject: [PATCH 10/15] x86/zen: Add Zen 3 support
|
|
||||||
|
|
||||||
Add vendor and family identification as well as the relevant
|
|
||||||
events to count per-process memory accesseses and CPU usage
|
|
||||||
on AMD Zen 3 family of processors.
|
|
||||||
|
|
||||||
Key changes include the use of the LsAnyFillsFromSys event
|
|
||||||
instead of LsDmndFillsFromSys for counting local and remote
|
|
||||||
memory accesses. While LsDmndFillsFromSys covers only demand
|
|
||||||
cache fills, LsAnyFillsFromSys covers all cache fills
|
|
||||||
including prefetches.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
x86/include/types.h | 5 +++--
|
|
||||||
x86/include/zen.h | 1 +
|
|
||||||
x86/plat.c | 8 +++++++-
|
|
||||||
x86/zen.c | 14 ++++++++++++++
|
|
||||||
4 files changed, 25 insertions(+), 3 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/x86/include/types.h b/x86/include/types.h
|
|
||||||
index 0843bd1..4aa5fa6 100644
|
|
||||||
--- a/x86/include/types.h
|
|
||||||
+++ b/x86/include/types.h
|
|
||||||
@@ -47,10 +47,11 @@ typedef enum {
|
|
||||||
CPU_SKX,
|
|
||||||
CPU_ICX,
|
|
||||||
CPU_SPR,
|
|
||||||
- CPU_ZEN
|
|
||||||
+ CPU_ZEN,
|
|
||||||
+ CPU_ZEN3
|
|
||||||
} cpu_type_t;
|
|
||||||
|
|
||||||
-#define CPU_TYPE_NUM 13
|
|
||||||
+#define CPU_TYPE_NUM 14
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
PERF_COUNT_INVALID = -1,
|
|
||||||
diff --git a/x86/include/zen.h b/x86/include/zen.h
|
|
||||||
index be61324..b5c40f5 100644
|
|
||||||
--- a/x86/include/zen.h
|
|
||||||
+++ b/x86/include/zen.h
|
|
||||||
@@ -40,6 +40,7 @@ extern "C" {
|
|
||||||
struct _plat_event_config;
|
|
||||||
|
|
||||||
extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
|
||||||
+extern void zen3_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
|
||||||
extern void zen_ll_config(struct _plat_event_config *);
|
|
||||||
extern int zen_offcore_num(void);
|
|
||||||
|
|
||||||
diff --git a/x86/plat.c b/x86/plat.c
|
|
||||||
index f79837a..35561dc 100644
|
|
||||||
--- a/x86/plat.c
|
|
||||||
+++ b/x86/plat.c
|
|
||||||
@@ -52,7 +52,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = {
|
|
||||||
skl_profiling_config,
|
|
||||||
icx_profiling_config,
|
|
||||||
spr_profiling_config,
|
|
||||||
- zen_profiling_config
|
|
||||||
+ zen_profiling_config,
|
|
||||||
+ zen3_profiling_config
|
|
||||||
};
|
|
||||||
|
|
||||||
pfn_plat_ll_config_t
|
|
||||||
@@ -69,6 +70,7 @@ s_plat_ll_config[CPU_TYPE_NUM] = {
|
|
||||||
skl_ll_config,
|
|
||||||
icx_ll_config,
|
|
||||||
spr_ll_config,
|
|
||||||
+ zen_ll_config,
|
|
||||||
zen_ll_config
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -86,6 +88,7 @@ s_plat_offcore_num[CPU_TYPE_NUM] = {
|
|
||||||
skl_offcore_num,
|
|
||||||
icx_offcore_num,
|
|
||||||
spr_offcore_num,
|
|
||||||
+ zen_offcore_num,
|
|
||||||
zen_offcore_num
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -190,6 +193,8 @@ cpu_type_get(void)
|
|
||||||
}
|
|
||||||
} else if (family == 23) {
|
|
||||||
type = CPU_ZEN;
|
|
||||||
+ } else if (family == 25) {
|
|
||||||
+ type = CPU_ZEN3;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (type);
|
|
||||||
@@ -230,6 +235,7 @@ plat_detect(void)
|
|
||||||
case CPU_ICX:
|
|
||||||
case CPU_SPR:
|
|
||||||
case CPU_ZEN:
|
|
||||||
+ case CPU_ZEN3:
|
|
||||||
ret = 0;
|
|
||||||
s_cpu_type = cpu_type;
|
|
||||||
break;
|
|
||||||
diff --git a/x86/zen.c b/x86/zen.c
|
|
||||||
index 67a425b..dd37d03 100644
|
|
||||||
--- a/x86/zen.c
|
|
||||||
+++ b/x86/zen.c
|
|
||||||
@@ -52,6 +52,14 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
|
||||||
{ PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
|
||||||
};
|
|
||||||
|
|
||||||
+static plat_event_config_t s_zen3_config[PERF_COUNT_NUM] = {
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000004044, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoRemote" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" },
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* Owing to the nature of IBS uop tagging, a higher sampling period is
|
|
||||||
* required to capture meaningful samples. All samples may not originate
|
|
||||||
@@ -67,6 +75,12 @@ zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
|
||||||
plat_config_get(perf_count_id, cfg, s_zen_config);
|
|
||||||
}
|
|
||||||
|
|
||||||
+void
|
|
||||||
+zen3_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
|
||||||
+{
|
|
||||||
+ plat_config_get(perf_count_id, cfg, s_zen3_config);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static int
|
|
||||||
zen_ibs_op_pmu_type(void)
|
|
||||||
{
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,145 +0,0 @@
|
|||||||
From 7fc232a4df2013089300b0c23490d7d07c9c0165 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Fri, 20 Jan 2023 14:19:29 +0530
|
|
||||||
Subject: [PATCH 11/15] x86/zen: Add Zen 4 support
|
|
||||||
|
|
||||||
Add vendor and family identification as well as the relevant
|
|
||||||
events to count per-process memory accesseses and CPU usage
|
|
||||||
on AMD Zen 4 family of processors.
|
|
||||||
|
|
||||||
Key changes include the use of the LsNotHaltedP0Cyc event to
|
|
||||||
count cycles at P0 frequency. This improves the accuracy of
|
|
||||||
the utilization metrics as, unlike the typical cycles event,
|
|
||||||
this is clock frequency invariant.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
x86/include/types.h | 5 +++--
|
|
||||||
x86/include/zen.h | 1 +
|
|
||||||
x86/plat.c | 18 ++++++++++++++----
|
|
||||||
x86/zen.c | 14 ++++++++++++++
|
|
||||||
4 files changed, 32 insertions(+), 6 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/x86/include/types.h b/x86/include/types.h
|
|
||||||
index 4aa5fa6..bb965f9 100644
|
|
||||||
--- a/x86/include/types.h
|
|
||||||
+++ b/x86/include/types.h
|
|
||||||
@@ -48,10 +48,11 @@ typedef enum {
|
|
||||||
CPU_ICX,
|
|
||||||
CPU_SPR,
|
|
||||||
CPU_ZEN,
|
|
||||||
- CPU_ZEN3
|
|
||||||
+ CPU_ZEN3,
|
|
||||||
+ CPU_ZEN4
|
|
||||||
} cpu_type_t;
|
|
||||||
|
|
||||||
-#define CPU_TYPE_NUM 14
|
|
||||||
+#define CPU_TYPE_NUM 15
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
PERF_COUNT_INVALID = -1,
|
|
||||||
diff --git a/x86/include/zen.h b/x86/include/zen.h
|
|
||||||
index b5c40f5..cbdfcd8 100644
|
|
||||||
--- a/x86/include/zen.h
|
|
||||||
+++ b/x86/include/zen.h
|
|
||||||
@@ -41,6 +41,7 @@ struct _plat_event_config;
|
|
||||||
|
|
||||||
extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
|
||||||
extern void zen3_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
|
||||||
+extern void zen4_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
|
||||||
extern void zen_ll_config(struct _plat_event_config *);
|
|
||||||
extern int zen_offcore_num(void);
|
|
||||||
|
|
||||||
diff --git a/x86/plat.c b/x86/plat.c
|
|
||||||
index 35561dc..fe2bf01 100644
|
|
||||||
--- a/x86/plat.c
|
|
||||||
+++ b/x86/plat.c
|
|
||||||
@@ -53,7 +53,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = {
|
|
||||||
icx_profiling_config,
|
|
||||||
spr_profiling_config,
|
|
||||||
zen_profiling_config,
|
|
||||||
- zen3_profiling_config
|
|
||||||
+ zen3_profiling_config,
|
|
||||||
+ zen4_profiling_config
|
|
||||||
};
|
|
||||||
|
|
||||||
pfn_plat_ll_config_t
|
|
||||||
@@ -71,6 +72,7 @@ s_plat_ll_config[CPU_TYPE_NUM] = {
|
|
||||||
icx_ll_config,
|
|
||||||
spr_ll_config,
|
|
||||||
zen_ll_config,
|
|
||||||
+ zen_ll_config,
|
|
||||||
zen_ll_config
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -89,6 +91,7 @@ s_plat_offcore_num[CPU_TYPE_NUM] = {
|
|
||||||
icx_offcore_num,
|
|
||||||
spr_offcore_num,
|
|
||||||
zen_offcore_num,
|
|
||||||
+ zen_offcore_num,
|
|
||||||
zen_offcore_num
|
|
||||||
};
|
|
||||||
|
|
||||||
@@ -191,10 +194,16 @@ cpu_type_get(void)
|
|
||||||
type = CPU_SPR;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
- } else if (family == 23) {
|
|
||||||
+ } else if (family == 23) { /* Family 17h */
|
|
||||||
type = CPU_ZEN;
|
|
||||||
- } else if (family == 25) {
|
|
||||||
- type = CPU_ZEN3;
|
|
||||||
+ } else if (family == 25) { /* Family 19h */
|
|
||||||
+ if ((model >= 0x00 && model <= 0x0f) ||
|
|
||||||
+ (model >= 0x20 && model <= 0x2f) ||
|
|
||||||
+ (model >= 0x40 && model <= 0x5f)) {
|
|
||||||
+ type = CPU_ZEN3;
|
|
||||||
+ } else {
|
|
||||||
+ type = CPU_ZEN4;
|
|
||||||
+ }
|
|
||||||
}
|
|
||||||
|
|
||||||
return (type);
|
|
||||||
@@ -236,6 +245,7 @@ plat_detect(void)
|
|
||||||
case CPU_SPR:
|
|
||||||
case CPU_ZEN:
|
|
||||||
case CPU_ZEN3:
|
|
||||||
+ case CPU_ZEN4:
|
|
||||||
ret = 0;
|
|
||||||
s_cpu_type = cpu_type;
|
|
||||||
break;
|
|
||||||
diff --git a/x86/zen.c b/x86/zen.c
|
|
||||||
index dd37d03..c21eb1a 100644
|
|
||||||
--- a/x86/zen.c
|
|
||||||
+++ b/x86/zen.c
|
|
||||||
@@ -60,6 +60,14 @@ static plat_event_config_t s_zen3_config[PERF_COUNT_NUM] = {
|
|
||||||
{ PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" },
|
|
||||||
};
|
|
||||||
|
|
||||||
+static plat_event_config_t s_zen4_config[PERF_COUNT_NUM] = {
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000004044, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoRemote" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000100000120, 0, 0, 0, 0, "LsNotHaltedP0Cyc.P0FreqCyc" },
|
|
||||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" },
|
|
||||||
+ { PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" },
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
/*
|
|
||||||
* Owing to the nature of IBS uop tagging, a higher sampling period is
|
|
||||||
* required to capture meaningful samples. All samples may not originate
|
|
||||||
@@ -81,6 +89,12 @@ zen3_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
|
||||||
plat_config_get(perf_count_id, cfg, s_zen3_config);
|
|
||||||
}
|
|
||||||
|
|
||||||
+void
|
|
||||||
+zen4_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
|
||||||
+{
|
|
||||||
+ plat_config_get(perf_count_id, cfg, s_zen4_config);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
static int
|
|
||||||
zen_ibs_op_pmu_type(void)
|
|
||||||
{
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,75 +0,0 @@
|
|||||||
From 34b5ee97323d0bc62ca2d0beae3e99b2213752c2 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Fri, 20 Jan 2023 14:39:28 +0530
|
|
||||||
Subject: [PATCH 12/15] x86: Add feature tracker
|
|
||||||
|
|
||||||
Add a file to keep track of features available on AMD and
|
|
||||||
Intel processors.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
x86/FEATURES | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
1 file changed, 52 insertions(+)
|
|
||||||
create mode 100755 x86/FEATURES
|
|
||||||
|
|
||||||
diff --git a/x86/FEATURES b/x86/FEATURES
|
|
||||||
new file mode 100755
|
|
||||||
index 0000000..7ece0d6
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/x86/FEATURES
|
|
||||||
@@ -0,0 +1,52 @@
|
|
||||||
+Features supported on X86:
|
|
||||||
+--------------------------
|
|
||||||
+
|
|
||||||
+Per process/thread:
|
|
||||||
+
|
|
||||||
+| Feature | Supported |
|
|
||||||
+|-----------------------------------------------------------|---------------|
|
|
||||||
+| | AMD | Intel |
|
|
||||||
+|-----------------------------------------------------------|---------------|
|
|
||||||
+| RMA | Y | Y |
|
|
||||||
+| LMA | Y | Y |
|
|
||||||
+| CPI | Y | Y |
|
|
||||||
+| CPU% | Y | Y |
|
|
||||||
+| Memory area ADDR | Y | Y |
|
|
||||||
+| Memory area SIZE | Y | Y |
|
|
||||||
+| Memory area ACCESS% | Y | Y |
|
|
||||||
+| Memory area LAT(ns) | Y | Y |
|
|
||||||
+| Memory area DESC | Y | Y |
|
|
||||||
+| Node ACCESS% | Y | Y |
|
|
||||||
+| Node LAT(ns) | Y | Y |
|
|
||||||
+| Call-chain when process generates RMA / LMA / CYCLES / IR | Y | Y |
|
|
||||||
+| Call-chain when process accesses the memory area | Y | Y |
|
|
||||||
+| PQOS CMT/MBM | N | Y |
|
|
||||||
+
|
|
||||||
+Per Node:
|
|
||||||
+
|
|
||||||
+| Feature | Supported |
|
|
||||||
+|-----------------------------------------------------------|---------------|
|
|
||||||
+| | AMD | Intel |
|
|
||||||
+|-----------------------------------------------------------|---------------|
|
|
||||||
+| RMA | Y | Y |
|
|
||||||
+| LMA | Y | Y |
|
|
||||||
+| CPU | Y | Y |
|
|
||||||
+| CPU% | Y | Y |
|
|
||||||
+| MEM total | Y | Y |
|
|
||||||
+| MEM free | Y | Y |
|
|
||||||
+| MEM active | Y | Y |
|
|
||||||
+| MEM inactive | Y | Y |
|
|
||||||
+| Dirty | Y | Y |
|
|
||||||
+| Writeback | Y | Y |
|
|
||||||
+| Mapped | Y | Y |
|
|
||||||
+| QPI/UPI 0 bandwidth | N | Y |
|
|
||||||
+| QPI/UPI 1 bandwidth | N | Y |
|
|
||||||
+| Memory controller bandwidth | N | Y |
|
|
||||||
+
|
|
||||||
+Other:
|
|
||||||
+
|
|
||||||
+| Feature | Supported |
|
|
||||||
+|-----------------------------------------------------------|---------------|
|
|
||||||
+| | AMD | Intel |
|
|
||||||
+|-----------------------------------------------------------|---------------|
|
|
||||||
+| mgen testcase | Y | Y |
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,58 +0,0 @@
|
|||||||
From b4543efe798bbc255519fdcec73484cbd43472d1 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Mon, 18 Apr 2022 10:59:26 +0530
|
|
||||||
Subject: [PATCH 13/15] common: Fix perf init for large systems
|
|
||||||
|
|
||||||
Large systems with hundreds of CPUs can run into issues
|
|
||||||
during perf event initialization because of the default
|
|
||||||
resource limits for file descriptors. Set RLIMIT_NOFILE
|
|
||||||
explicitly to a fairly large value to avoid them.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
common/os/os_perf.c | 16 ++++++++++++++++
|
|
||||||
1 file changed, 16 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
|
||||||
index 44ca43d..49fdaaa 100644
|
|
||||||
--- a/common/os/os_perf.c
|
|
||||||
+++ b/common/os/os_perf.c
|
|
||||||
@@ -28,6 +28,7 @@
|
|
||||||
|
|
||||||
#include <inttypes.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
+#include <sys/resource.h>
|
|
||||||
#include <sys/types.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <string.h>
|
|
||||||
@@ -850,12 +851,27 @@ ll_init(pf_conf_t *conf)
|
|
||||||
int
|
|
||||||
os_perf_init(void)
|
|
||||||
{
|
|
||||||
+ struct rlimit limit;
|
|
||||||
int ringsize, size;
|
|
||||||
|
|
||||||
s_profiling_recbuf = NULL;
|
|
||||||
s_ll_recbuf = NULL;
|
|
||||||
s_partpause_enabled = B_FALSE;
|
|
||||||
|
|
||||||
+ /*
|
|
||||||
+ * Depending on the number of available CPUs in the system, the
|
|
||||||
+ * default fd limit may be exceeded. Set it to a large value to
|
|
||||||
+ * avoid running into problems.
|
|
||||||
+ */
|
|
||||||
+ limit.rlim_cur = 32768;
|
|
||||||
+ limit.rlim_max = 32768;
|
|
||||||
+
|
|
||||||
+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0) {
|
|
||||||
+ exit_msg_put("Failed to setup perf!\n");
|
|
||||||
+ debug_print(NULL, 2, "os_perf_init failed\n");
|
|
||||||
+ return (-1);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
ringsize = pf_ringsize_init();
|
|
||||||
size = ((ringsize / sizeof (pf_profiling_rbrec_t)) + 1) *
|
|
||||||
sizeof (pf_profiling_rec_t);
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,30 +0,0 @@
|
|||||||
From 6f6cc3b24d84c413556639b64a62aca6ad0b21cc Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Mon, 10 Oct 2022 14:55:45 +0530
|
|
||||||
Subject: [PATCH 14/15] common: Increase count of possible CPUs per-node
|
|
||||||
|
|
||||||
Upcoming AMD Zen 4 processors support up to 256 threads per
|
|
||||||
NUMA node in NPS1 configuration. Hence, increase the number
|
|
||||||
of possible CPUs per-node to 256.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
common/include/types.h | 2 +-
|
|
||||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
|
||||||
|
|
||||||
diff --git a/common/include/types.h b/common/include/types.h
|
|
||||||
index efe3055..05b411a 100644
|
|
||||||
--- a/common/include/types.h
|
|
||||||
+++ b/common/include/types.h
|
|
||||||
@@ -116,7 +116,7 @@ typedef enum {
|
|
||||||
#define UI_COUNT_NUM 5
|
|
||||||
|
|
||||||
#define NNODES_MAX 64
|
|
||||||
-#define NCPUS_NODE_MAX 128
|
|
||||||
+#define NCPUS_NODE_MAX 256
|
|
||||||
#define NCPUS_MAX (NNODES_MAX * NCPUS_NODE_MAX)
|
|
||||||
#define NPROCS_NAX 4096
|
|
||||||
#define LL_THRESH 128
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
@ -1,36 +0,0 @@
|
|||||||
From 8c3fe7b2debf74566a6017c92eebc7cb23f9deca Mon Sep 17 00:00:00 2001
|
|
||||||
From: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
Date: Thu, 10 Nov 2022 10:43:04 +0530
|
|
||||||
Subject: [PATCH 15/15] common: Fix some typos
|
|
||||||
|
|
||||||
Fix some typos in the messages shown when an user attempts
|
|
||||||
to monitor a process or thread that has already exited.
|
|
||||||
|
|
||||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
|
||||||
---
|
|
||||||
common/win.c | 4 ++--
|
|
||||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/common/win.c b/common/win.c
|
|
||||||
index 087efe5..d0a8f3b 100644
|
|
||||||
--- a/common/win.c
|
|
||||||
+++ b/common/win.c
|
|
||||||
@@ -3489,13 +3489,13 @@ win_warn_msg(warn_type_t warn_type)
|
|
||||||
break;
|
|
||||||
|
|
||||||
case WARN_INVALID_PID:
|
|
||||||
- (void) strncpy(content, "Process exists, "
|
|
||||||
+ (void) strncpy(content, "Process exited, "
|
|
||||||
"return to home window ...",
|
|
||||||
WIN_LINECHAR_MAX);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case WARN_INVALID_LWPID:
|
|
||||||
- (void) strncpy(content, "Thread exists, "
|
|
||||||
+ (void) strncpy(content, "Thread exited, "
|
|
||||||
"return to home window ...",
|
|
||||||
WIN_LINECHAR_MAX);
|
|
||||||
break;
|
|
||||||
--
|
|
||||||
2.31.1
|
|
||||||
|
|
23
numatop.spec
23
numatop.spec
@ -2,8 +2,8 @@
|
|||||||
%undefine _ld_as_needed
|
%undefine _ld_as_needed
|
||||||
|
|
||||||
Name: numatop
|
Name: numatop
|
||||||
Version: 2.3
|
Version: 2.4
|
||||||
Release: 2%{?dist}
|
Release: 1%{?dist}
|
||||||
Summary: Memory access locality characterization and analysis
|
Summary: Memory access locality characterization and analysis
|
||||||
|
|
||||||
License: BSD
|
License: BSD
|
||||||
@ -23,21 +23,6 @@ BuildRequires: numactl-devel
|
|||||||
ExclusiveArch: x86_64 ppc64le
|
ExclusiveArch: x86_64 ppc64le
|
||||||
|
|
||||||
#Patch001: v2.2-001-Initial-support-for-SPR.patch
|
#Patch001: v2.2-001-Initial-support-for-SPR.patch
|
||||||
Patch0001: 0001-configure.ac-Fix-build-error-when-libnuma-is-missed.patch
|
|
||||||
Patch0002: 0002-Update-the-error-message.patch
|
|
||||||
Patch0003: 0003-Update-README.patch
|
|
||||||
Patch0004: 0004-x86-Prepare-for-multi-vendor-support.patch
|
|
||||||
Patch0005: 0005-x86-zen-Add-initial-support.patch
|
|
||||||
Patch0006: 0006-common-Add-sample-period-to-platform-event-config.patch
|
|
||||||
Patch0007: 0007-common-Add-exclude-guest-to-platform-event-config.patch
|
|
||||||
Patch0008: 0008-x86-zen-Add-support-for-memory-access-stats.patch
|
|
||||||
Patch0009: 0009-x86-Fix-clock-frequency-parsing.patch
|
|
||||||
Patch0010: 0010-x86-zen-Add-Zen-3-support.patch
|
|
||||||
Patch0011: 0011-x86-zen-Add-Zen-4-support.patch
|
|
||||||
Patch0012: 0012-x86-Add-feature-tracker.patch
|
|
||||||
Patch0013: 0013-common-Fix-perf-init-for-large-systems.patch
|
|
||||||
Patch0014: 0014-common-Increase-count-of-possible-CPUs-per-node.patch
|
|
||||||
Patch0015: 0015-common-Fix-some-typos.patch
|
|
||||||
|
|
||||||
|
|
||||||
%description
|
%description
|
||||||
@ -76,6 +61,10 @@ autoreconf --force --install --symlink
|
|||||||
|
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Thu Feb 1 2024 Pingfan Liu <piliu@redhat.com> - 2.4.1
|
||||||
|
- Add EMR support
|
||||||
|
- Add Power10 support
|
||||||
|
|
||||||
* Wed Nov 2 2022 Pingfan Liu <piliu@redhat.com> - 2.2-3
|
* Wed Nov 2 2022 Pingfan Liu <piliu@redhat.com> - 2.2-3
|
||||||
- bump release version to 2.2-3
|
- bump release version to 2.2-3
|
||||||
|
|
||||||
|
2
sources
2
sources
@ -1 +1 @@
|
|||||||
SHA512 (v2.3.tar.gz) = 8d8483ba7ff0a82517df4dff7617b7899e19938460b26b0bf6dd04d5d498900f58bf30f9282c4d2b3525d84f028bc931602ce4dfd1eb48bf644e9fb4235c5859
|
SHA512 (v2.4.tar.gz) = eb500424f56a3bcd19375cdca5f1c0d1f4ffbd9817bb0d42bb8224f2929c4cf1cdbb4005adf6a148f6e669db9337175c7dfbd3076aa2d99bfb08f537850efaff
|
||||||
|
Loading…
Reference in New Issue
Block a user