add x86 zen support
Signed-off-by: Pingfan Liu <piliu@redhat.com>
This commit is contained in:
parent
cdbe99a244
commit
bb8dcb4304
@ -0,0 +1,42 @@
|
||||
From 2715969f92f5e8d6c60488a65ccef73fef57fa6e Mon Sep 17 00:00:00 2001
|
||||
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
||||
Date: Mon, 31 Oct 2022 14:22:32 +0800
|
||||
Subject: [PATCH 01/15] configure.ac : Fix build error when libnuma is missed
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
When libnuma is installed after running autogen.sh (which didn’t fails) it
|
||||
silently fails linking with missing symbols. To avoid this issue just make
|
||||
autoconf error out if libnuma is missing.
|
||||
|
||||
Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
||||
---
|
||||
configure.ac | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index fd945f4..36edcc5 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -2,7 +2,7 @@
|
||||
# Process this file with autoconf to produce a configure script.
|
||||
|
||||
AC_PREREQ([2.69])
|
||||
-AC_INIT([numatop], [v2.1], [yao.jin@intel.com])
|
||||
+AC_INIT([numatop], [v2.3], [zhengjun.xing@intel.com])
|
||||
AM_INIT_AUTOMAKE([-Wno-portability no-dist-gzip dist-xz foreign subdir-objects])
|
||||
AC_CONFIG_SRCDIR([common/numatop.c])
|
||||
AC_CONFIG_HEADERS([config.h])
|
||||
@@ -18,7 +18,7 @@ AC_PROG_CC
|
||||
AC_PROG_INSTALL
|
||||
|
||||
# Checks for libraries.
|
||||
-AC_CHECK_LIB([numa], [numa_free])
|
||||
+AC_CHECK_LIB([numa], [numa_free], [], [ AC_MSG_ERROR([numactl-devel or libnuma-dev(el) is required but was not found]) exit -1])
|
||||
AC_CHECK_LIB([pthread], [pthread_create])
|
||||
|
||||
PKG_CHECK_MODULES([CHECK], [check])
|
||||
--
|
||||
2.31.1
|
||||
|
30
0002-Update-the-error-message.patch
Normal file
30
0002-Update-the-error-message.patch
Normal file
@ -0,0 +1,30 @@
|
||||
From b9157a8e3ba3a2a0af3d8f755a32a3b57cad04c9 Mon Sep 17 00:00:00 2001
|
||||
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
||||
Date: Mon, 31 Oct 2022 15:15:08 +0800
|
||||
Subject: [PATCH 02/15] Update the error message
|
||||
|
||||
Update error message for cases that needs to increase ulimit.
|
||||
For example, SPR needs to set the max open files to be more
|
||||
than 1024, while in the most system, the default is 1024.
|
||||
|
||||
Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
||||
---
|
||||
common/os/os_perf.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
||||
index 44263e8..f2f1104 100644
|
||||
--- a/common/os/os_perf.c
|
||||
+++ b/common/os/os_perf.c
|
||||
@@ -650,7 +650,7 @@ os_profiling_start(perf_ctl_t *ctl, perf_task_t *task)
|
||||
proc_ll_clear(NULL);
|
||||
|
||||
if (profiling_start(ctl, (task_profiling_t *)(task)) != 0) {
|
||||
- exit_msg_put("Fail to setup perf (probably permission denied)!\n");
|
||||
+ exit_msg_put("Fail to setup perf (probably permission denied or need to increase the ulimit)!\n");
|
||||
debug_print(NULL, 2, "os_profiling_start failed\n");
|
||||
perf_status_set(PERF_STATUS_PROFILING_FAILED);
|
||||
return (-1);
|
||||
--
|
||||
2.31.1
|
||||
|
56
0003-Update-README.patch
Normal file
56
0003-Update-README.patch
Normal file
@ -0,0 +1,56 @@
|
||||
From ff75e35508183b5ed39d50122c71293e8e65a86f Mon Sep 17 00:00:00 2001
|
||||
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
||||
Date: Mon, 31 Oct 2022 15:25:04 +0800
|
||||
Subject: [PATCH 03/15] Update README
|
||||
|
||||
Update README, add "check" for build Build Dependencies, add tips
|
||||
for running NumaTOP.
|
||||
|
||||
Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
|
||||
---
|
||||
README.md | 24 +++++++++++++++++++++++-
|
||||
1 file changed, 23 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/README.md b/README.md
|
||||
index 6910c78..d16a486 100644
|
||||
--- a/README.md
|
||||
+++ b/README.md
|
||||
@@ -12,12 +12,34 @@ the `mgen` program for help information.
|
||||
|
||||
## Build Dependencies
|
||||
|
||||
-NumaTOP requires following libraries:
|
||||
+NumaTOP requires following libraries or packages:
|
||||
|
||||
* numactl-devel or libnuma-dev(el)
|
||||
* libncurses
|
||||
* libpthread
|
||||
|
||||
+* check
|
||||
+
|
||||
+## Run NumaTOP
|
||||
+
|
||||
+NumaTOP requires running as root.
|
||||
+ # ./numatop
|
||||
+
|
||||
+In many systems, the default max open files are 1024, for platforms (like SPR)
|
||||
+that have more CPUs, they require the system with the max open files should
|
||||
+bigger than 1024, otherwise, the error can be "Fail to setup perf":
|
||||
+
|
||||
+ # ulimit -n
|
||||
+ 1024 <------the max open files are 1024
|
||||
+ # ./numatop
|
||||
+ NumaTOP is starting ...
|
||||
+ Fail to setup perf (probably permission denied)!
|
||||
+
|
||||
+Need to enlarge the max open files:
|
||||
+
|
||||
+ # ulimit -n 8192
|
||||
+ # ulimit -n
|
||||
+ 8192 <------now the max open files are 8192
|
||||
|
||||
## Supported Kernels
|
||||
|
||||
--
|
||||
2.31.1
|
||||
|
265
0004-x86-Prepare-for-multi-vendor-support.patch
Normal file
265
0004-x86-Prepare-for-multi-vendor-support.patch
Normal file
@ -0,0 +1,265 @@
|
||||
From f39f29d200b83c568748afc4483feb544b4f6bd6 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Wed, 16 Feb 2022 16:01:18 +0530
|
||||
Subject: [PATCH 04/15] x86: Prepare for multi-vendor support
|
||||
|
||||
In order to support x86 processors from other vendors, move
|
||||
existing platform-specific code for Intel processors to the
|
||||
new x86 directory and update the build files accordingly.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
Makefile.am | 36 ++++++++++++++++-----------------
|
||||
README.md | 2 +-
|
||||
common/include/os/plat.h | 2 +-
|
||||
common/include/types.h | 2 +-
|
||||
configure.ac | 2 +-
|
||||
test/mgen/{intel => x86}/util.c | 0
|
||||
{intel => x86}/bdw.c | 0
|
||||
{intel => x86}/include/bdw.h | 0
|
||||
{intel => x86}/include/nhm.h | 0
|
||||
{intel => x86}/include/skl.h | 0
|
||||
{intel => x86}/include/snb.h | 0
|
||||
{intel => x86}/include/types.h | 6 +++---
|
||||
{intel => x86}/include/util.h | 6 +++---
|
||||
{intel => x86}/include/wsm.h | 0
|
||||
{intel => x86}/nhm.c | 0
|
||||
{intel => x86}/plat.c | 0
|
||||
{intel => x86}/skl.c | 0
|
||||
{intel => x86}/snb.c | 0
|
||||
{intel => x86}/ui_perf_map.c | 0
|
||||
{intel => x86}/util.c | 0
|
||||
{intel => x86}/wsm.c | 0
|
||||
21 files changed, 28 insertions(+), 28 deletions(-)
|
||||
rename test/mgen/{intel => x86}/util.c (100%)
|
||||
rename {intel => x86}/bdw.c (100%)
|
||||
rename {intel => x86}/include/bdw.h (100%)
|
||||
rename {intel => x86}/include/nhm.h (100%)
|
||||
rename {intel => x86}/include/skl.h (100%)
|
||||
rename {intel => x86}/include/snb.h (100%)
|
||||
rename {intel => x86}/include/types.h (95%)
|
||||
rename {intel => x86}/include/util.h (94%)
|
||||
rename {intel => x86}/include/wsm.h (100%)
|
||||
rename {intel => x86}/nhm.c (100%)
|
||||
rename {intel => x86}/plat.c (100%)
|
||||
rename {intel => x86}/skl.c (100%)
|
||||
rename {intel => x86}/snb.c (100%)
|
||||
rename {intel => x86}/ui_perf_map.c (100%)
|
||||
rename {intel => x86}/util.c (100%)
|
||||
rename {intel => x86}/wsm.c (100%)
|
||||
|
||||
diff --git a/Makefile.am b/Makefile.am
|
||||
index 643704a..438a9fc 100644
|
||||
--- a/Makefile.am
|
||||
+++ b/Makefile.am
|
||||
@@ -58,23 +58,23 @@ libnumatop_la_SOURCES = \
|
||||
common/util.c \
|
||||
common/win.c
|
||||
|
||||
-if CPU_INTEL
|
||||
+if CPU_X86
|
||||
libnumatop_la_SOURCES += \
|
||||
- intel/include/bdw.h \
|
||||
- intel/include/nhm.h \
|
||||
- intel/include/skl.h \
|
||||
- intel/include/snb.h \
|
||||
- intel/include/types.h \
|
||||
- intel/include/util.h \
|
||||
- intel/include/wsm.h \
|
||||
- intel/bdw.c \
|
||||
- intel/nhm.c \
|
||||
- intel/plat.c \
|
||||
- intel/skl.c \
|
||||
- intel/snb.c \
|
||||
- intel/ui_perf_map.c \
|
||||
- intel/util.c \
|
||||
- intel/wsm.c
|
||||
+ x86/include/bdw.h \
|
||||
+ x86/include/nhm.h \
|
||||
+ x86/include/skl.h \
|
||||
+ x86/include/snb.h \
|
||||
+ x86/include/types.h \
|
||||
+ x86/include/util.h \
|
||||
+ x86/include/wsm.h \
|
||||
+ x86/bdw.c \
|
||||
+ x86/nhm.c \
|
||||
+ x86/plat.c \
|
||||
+ x86/skl.c \
|
||||
+ x86/snb.c \
|
||||
+ x86/ui_perf_map.c \
|
||||
+ x86/util.c \
|
||||
+ x86/wsm.c
|
||||
endif
|
||||
|
||||
if CPU_PPC
|
||||
@@ -106,9 +106,9 @@ if CPU_PPC
|
||||
mgen_SOURCES += \
|
||||
test/mgen/powerpc/util.c
|
||||
endif
|
||||
-if CPU_INTEL
|
||||
+if CPU_X86
|
||||
mgen_SOURCES += \
|
||||
- test/mgen/intel/util.c
|
||||
+ test/mgen/x86/util.c
|
||||
endif
|
||||
|
||||
TESTS = test/mgen.01.sh test/mgen.02.sh
|
||||
diff --git a/README.md b/README.md
|
||||
index d16a486..e96f0a8 100644
|
||||
--- a/README.md
|
||||
+++ b/README.md
|
||||
@@ -58,7 +58,7 @@ http://www.gossamer-threads.com/lists/linux/kernel/1964864
|
||||
|
||||
common: common code for all platforms.
|
||||
|
||||
-intel : Intel platform-specific code.
|
||||
+x86 : Intel platform-specific code.
|
||||
|
||||
powerpc: PowerPC platform-specific code.
|
||||
|
||||
diff --git a/common/include/os/plat.h b/common/include/os/plat.h
|
||||
index 35629dc..e35093d 100644
|
||||
--- a/common/include/os/plat.h
|
||||
+++ b/common/include/os/plat.h
|
||||
@@ -35,7 +35,7 @@
|
||||
#ifdef __powerpc64__
|
||||
#include "../../../powerpc/include/types.h"
|
||||
#else
|
||||
-#include "../../../intel/include/types.h"
|
||||
+#include "../../../x86/include/types.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
diff --git a/common/include/types.h b/common/include/types.h
|
||||
index fc9c592..3e30f7c 100644
|
||||
--- a/common/include/types.h
|
||||
+++ b/common/include/types.h
|
||||
@@ -34,7 +34,7 @@
|
||||
#ifdef __powerpc64__
|
||||
#include "../../powerpc/include/types.h"
|
||||
#else
|
||||
-#include "../../intel/include/types.h"
|
||||
+#include "../../x86/include/types.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index 36edcc5..71fa92d 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -10,7 +10,7 @@ LT_INIT
|
||||
AC_CONFIG_MACRO_DIRS([m4])
|
||||
|
||||
AC_CANONICAL_HOST
|
||||
-AM_CONDITIONAL(CPU_INTEL, test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xi686")
|
||||
+AM_CONDITIONAL(CPU_X86, test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xi686")
|
||||
AM_CONDITIONAL(CPU_PPC, test "x$host_cpu" = "xpowerpc64" -o "x$host_cpu" = "xpowerpc64le")
|
||||
|
||||
# Checks for programs.
|
||||
diff --git a/test/mgen/intel/util.c b/test/mgen/x86/util.c
|
||||
similarity index 100%
|
||||
rename from test/mgen/intel/util.c
|
||||
rename to test/mgen/x86/util.c
|
||||
diff --git a/intel/bdw.c b/x86/bdw.c
|
||||
similarity index 100%
|
||||
rename from intel/bdw.c
|
||||
rename to x86/bdw.c
|
||||
diff --git a/intel/include/bdw.h b/x86/include/bdw.h
|
||||
similarity index 100%
|
||||
rename from intel/include/bdw.h
|
||||
rename to x86/include/bdw.h
|
||||
diff --git a/intel/include/nhm.h b/x86/include/nhm.h
|
||||
similarity index 100%
|
||||
rename from intel/include/nhm.h
|
||||
rename to x86/include/nhm.h
|
||||
diff --git a/intel/include/skl.h b/x86/include/skl.h
|
||||
similarity index 100%
|
||||
rename from intel/include/skl.h
|
||||
rename to x86/include/skl.h
|
||||
diff --git a/intel/include/snb.h b/x86/include/snb.h
|
||||
similarity index 100%
|
||||
rename from intel/include/snb.h
|
||||
rename to x86/include/snb.h
|
||||
diff --git a/intel/include/types.h b/x86/include/types.h
|
||||
similarity index 95%
|
||||
rename from intel/include/types.h
|
||||
rename to x86/include/types.h
|
||||
index 76c7ad3..1a15b3a 100644
|
||||
--- a/intel/include/types.h
|
||||
+++ b/x86/include/types.h
|
||||
@@ -27,8 +27,8 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
-#ifndef _NUMATOP_INTEL_TYPES_H
|
||||
-#define _NUMATOP_INTEL_TYPES_H
|
||||
+#ifndef _NUMATOP_X86_TYPES_H
|
||||
+#define _NUMATOP_X86_TYPES_H
|
||||
|
||||
#include "../../common/include/types.h"
|
||||
|
||||
@@ -62,4 +62,4 @@ typedef enum {
|
||||
|
||||
#define PERF_COUNT_NUM 5
|
||||
|
||||
-#endif /* _NUMATOP_INTEL_TYPES_H */
|
||||
+#endif /* _NUMATOP_X86_TYPES_H */
|
||||
diff --git a/intel/include/util.h b/x86/include/util.h
|
||||
similarity index 94%
|
||||
rename from intel/include/util.h
|
||||
rename to x86/include/util.h
|
||||
index 7026e99..37a6300 100644
|
||||
--- a/intel/include/util.h
|
||||
+++ b/x86/include/util.h
|
||||
@@ -27,8 +27,8 @@
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
-#ifndef _NUMATOP_INTEL_UTIL_H
|
||||
-#define _NUMATOP_INTEL_UTIL_H
|
||||
+#ifndef _NUMATOP_X86_UTIL_H
|
||||
+#define _NUMATOP_X86_UTIL_H
|
||||
|
||||
#define CPU_FAMILY(eax) \
|
||||
(((eax) & 0x0F00) >> 8)
|
||||
@@ -39,4 +39,4 @@
|
||||
#define CPU_EXT_MODEL(eax) \
|
||||
(((eax) & 0xF0000) >> 16)
|
||||
|
||||
-#endif /* _NUMATOP_INTEL_UTIL_H */
|
||||
+#endif /* _NUMATOP_X86_UTIL_H */
|
||||
diff --git a/intel/include/wsm.h b/x86/include/wsm.h
|
||||
similarity index 100%
|
||||
rename from intel/include/wsm.h
|
||||
rename to x86/include/wsm.h
|
||||
diff --git a/intel/nhm.c b/x86/nhm.c
|
||||
similarity index 100%
|
||||
rename from intel/nhm.c
|
||||
rename to x86/nhm.c
|
||||
diff --git a/intel/plat.c b/x86/plat.c
|
||||
similarity index 100%
|
||||
rename from intel/plat.c
|
||||
rename to x86/plat.c
|
||||
diff --git a/intel/skl.c b/x86/skl.c
|
||||
similarity index 100%
|
||||
rename from intel/skl.c
|
||||
rename to x86/skl.c
|
||||
diff --git a/intel/snb.c b/x86/snb.c
|
||||
similarity index 100%
|
||||
rename from intel/snb.c
|
||||
rename to x86/snb.c
|
||||
diff --git a/intel/ui_perf_map.c b/x86/ui_perf_map.c
|
||||
similarity index 100%
|
||||
rename from intel/ui_perf_map.c
|
||||
rename to x86/ui_perf_map.c
|
||||
diff --git a/intel/util.c b/x86/util.c
|
||||
similarity index 100%
|
||||
rename from intel/util.c
|
||||
rename to x86/util.c
|
||||
diff --git a/intel/wsm.c b/x86/wsm.c
|
||||
similarity index 100%
|
||||
rename from intel/wsm.c
|
||||
rename to x86/wsm.c
|
||||
--
|
||||
2.31.1
|
||||
|
322
0005-x86-zen-Add-initial-support.patch
Normal file
322
0005-x86-zen-Add-initial-support.patch
Normal file
@ -0,0 +1,322 @@
|
||||
From fdf9b3ce90d1f435fe837added7373e25e6045b2 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Wed, 16 Feb 2022 18:05:27 +0530
|
||||
Subject: [PATCH 05/15] x86/zen: Add initial support
|
||||
|
||||
Add vendor and family identification as well as the relevant
|
||||
events to count per-process memory accesseses and CPU usage
|
||||
on AMD Zen and Zen 2 family of processors.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
Makefile.am | 4 ++-
|
||||
README.md | 2 +-
|
||||
x86/include/types.h | 5 ++--
|
||||
x86/include/util.h | 3 ++
|
||||
x86/include/zen.h | 50 ++++++++++++++++++++++++++++++++
|
||||
x86/plat.c | 29 +++++++++++++------
|
||||
x86/zen.c | 70 +++++++++++++++++++++++++++++++++++++++++++++
|
||||
7 files changed, 151 insertions(+), 12 deletions(-)
|
||||
create mode 100644 x86/include/zen.h
|
||||
create mode 100644 x86/zen.c
|
||||
|
||||
diff --git a/Makefile.am b/Makefile.am
|
||||
index 438a9fc..ae11522 100644
|
||||
--- a/Makefile.am
|
||||
+++ b/Makefile.am
|
||||
@@ -67,6 +67,7 @@ libnumatop_la_SOURCES += \
|
||||
x86/include/types.h \
|
||||
x86/include/util.h \
|
||||
x86/include/wsm.h \
|
||||
+ x86/include/zen.h \
|
||||
x86/bdw.c \
|
||||
x86/nhm.c \
|
||||
x86/plat.c \
|
||||
@@ -74,7 +75,8 @@ libnumatop_la_SOURCES += \
|
||||
x86/snb.c \
|
||||
x86/ui_perf_map.c \
|
||||
x86/util.c \
|
||||
- x86/wsm.c
|
||||
+ x86/wsm.c \
|
||||
+ x86/zen.c
|
||||
endif
|
||||
|
||||
if CPU_PPC
|
||||
diff --git a/README.md b/README.md
|
||||
index e96f0a8..9908e92 100644
|
||||
--- a/README.md
|
||||
+++ b/README.md
|
||||
@@ -58,7 +58,7 @@ http://www.gossamer-threads.com/lists/linux/kernel/1964864
|
||||
|
||||
common: common code for all platforms.
|
||||
|
||||
-x86 : Intel platform-specific code.
|
||||
+x86 : Intel and AMD platform-specific code.
|
||||
|
||||
powerpc: PowerPC platform-specific code.
|
||||
|
||||
diff --git a/x86/include/types.h b/x86/include/types.h
|
||||
index 1a15b3a..0843bd1 100644
|
||||
--- a/x86/include/types.h
|
||||
+++ b/x86/include/types.h
|
||||
@@ -46,10 +46,11 @@ typedef enum {
|
||||
CPU_BDX,
|
||||
CPU_SKX,
|
||||
CPU_ICX,
|
||||
- CPU_SPR
|
||||
+ CPU_SPR,
|
||||
+ CPU_ZEN
|
||||
} cpu_type_t;
|
||||
|
||||
-#define CPU_TYPE_NUM 12
|
||||
+#define CPU_TYPE_NUM 13
|
||||
|
||||
typedef enum {
|
||||
PERF_COUNT_INVALID = -1,
|
||||
diff --git a/x86/include/util.h b/x86/include/util.h
|
||||
index 37a6300..4d2534b 100644
|
||||
--- a/x86/include/util.h
|
||||
+++ b/x86/include/util.h
|
||||
@@ -36,6 +36,9 @@
|
||||
#define CPU_MODEL(eax) \
|
||||
(((eax) & 0x00F0) >> 4)
|
||||
|
||||
+#define CPU_EXT_FAMILY(eax) \
|
||||
+ (((eax) & 0x0FF00000) >> 20)
|
||||
+
|
||||
#define CPU_EXT_MODEL(eax) \
|
||||
(((eax) & 0xF0000) >> 16)
|
||||
|
||||
diff --git a/x86/include/zen.h b/x86/include/zen.h
|
||||
new file mode 100644
|
||||
index 0000000..be61324
|
||||
--- /dev/null
|
||||
+++ b/x86/include/zen.h
|
||||
@@ -0,0 +1,50 @@
|
||||
+/*
|
||||
+ * Copyright (c) 2023, AMD Corporation
|
||||
+ *
|
||||
+ * Redistribution and use in source and binary forms, with or without
|
||||
+ * modification, are permitted provided that the following conditions are met:
|
||||
+ *
|
||||
+ * * Redistributions of source code must retain the above copyright notice,
|
||||
+ * this list of conditions and the following disclaimer.
|
||||
+ * * Redistributions in binary form must reproduce the above copyright
|
||||
+ * notice, this list of conditions and the following disclaimer in the
|
||||
+ * documentation and/or other materials provided with the distribution.
|
||||
+ * * Neither the name of Intel Corporation nor the names of its contributors
|
||||
+ * may be used to endorse or promote products derived from this software
|
||||
+ * without specific prior written permission.
|
||||
+ *
|
||||
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
+ * POSSIBILITY OF SUCH DAMAGE.
|
||||
+ */
|
||||
+
|
||||
+#ifndef _NUMATOP_AMD_ZEN_H
|
||||
+#define _NUMATOP_AMD_ZEN_H
|
||||
+
|
||||
+#ifdef __cplusplus
|
||||
+extern "C" {
|
||||
+#endif
|
||||
+
|
||||
+#include <sys/types.h>
|
||||
+#include <inttypes.h>
|
||||
+#include "../../common/include/types.h"
|
||||
+
|
||||
+struct _plat_event_config;
|
||||
+
|
||||
+extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
||||
+extern void zen_ll_config(struct _plat_event_config *);
|
||||
+extern int zen_offcore_num(void);
|
||||
+
|
||||
+#ifdef __cplusplus
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+#endif /* _NUMATOP_AMD_ZEN_H */
|
||||
diff --git a/x86/plat.c b/x86/plat.c
|
||||
index abf3766..f79837a 100644
|
||||
--- a/x86/plat.c
|
||||
+++ b/x86/plat.c
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "include/snb.h"
|
||||
#include "include/bdw.h"
|
||||
#include "include/skl.h"
|
||||
+#include "include/zen.h"
|
||||
|
||||
pfn_plat_profiling_config_t
|
||||
s_plat_profiling_config[CPU_TYPE_NUM] = {
|
||||
@@ -50,7 +51,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = {
|
||||
bdw_profiling_config,
|
||||
skl_profiling_config,
|
||||
icx_profiling_config,
|
||||
- spr_profiling_config
|
||||
+ spr_profiling_config,
|
||||
+ zen_profiling_config
|
||||
};
|
||||
|
||||
pfn_plat_ll_config_t
|
||||
@@ -66,7 +68,8 @@ s_plat_ll_config[CPU_TYPE_NUM] = {
|
||||
bdw_ll_config,
|
||||
skl_ll_config,
|
||||
icx_ll_config,
|
||||
- spr_ll_config
|
||||
+ spr_ll_config,
|
||||
+ zen_ll_config
|
||||
};
|
||||
|
||||
pfn_plat_offcore_num_t
|
||||
@@ -82,7 +85,8 @@ s_plat_offcore_num[CPU_TYPE_NUM] = {
|
||||
bdw_offcore_num,
|
||||
skl_offcore_num,
|
||||
icx_offcore_num,
|
||||
- spr_offcore_num
|
||||
+ spr_offcore_num,
|
||||
+ zen_offcore_num
|
||||
};
|
||||
|
||||
/* ARGSUSED */
|
||||
@@ -117,7 +121,7 @@ static cpu_type_t
|
||||
cpu_type_get(void)
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
- int family, model, ext_model;
|
||||
+ int family, model;
|
||||
cpu_type_t type = CPU_UNSUP;
|
||||
char vendor[16];
|
||||
|
||||
@@ -129,7 +133,8 @@ cpu_type_get(void)
|
||||
(void) strncpy(&vendor[8], (char *)(&edx), 4);
|
||||
vendor[12] = 0;
|
||||
|
||||
- if (strncmp(vendor, "Genu" "ntel" "ineI", 12) != 0) {
|
||||
+ if (strncmp(vendor, "Genu" "ntel" "ineI", 12) != 0 &&
|
||||
+ strncmp(vendor, "Auth" "cAMD" "enti", 12) != 0) {
|
||||
return (CPU_UNSUP);
|
||||
}
|
||||
|
||||
@@ -138,11 +143,16 @@ cpu_type_get(void)
|
||||
|
||||
family = CPU_FAMILY(eax);
|
||||
model = CPU_MODEL(eax);
|
||||
- ext_model = CPU_EXT_MODEL(eax);
|
||||
|
||||
- if (family == 6) {
|
||||
- model = (ext_model << 4) + model;
|
||||
+ /* Extended Model ID is considered only when Family ID is either 6 or 15 */
|
||||
+ if (family == 6 || family == 15)
|
||||
+ model += CPU_EXT_MODEL(eax) << 4;
|
||||
+
|
||||
+ /* Extended Family ID is considered only when Family ID is 15 */
|
||||
+ if (family == 15)
|
||||
+ family += CPU_EXT_FAMILY(eax);
|
||||
|
||||
+ if (family == 6) {
|
||||
switch (model) {
|
||||
case 26:
|
||||
type = CPU_NHM_EP;
|
||||
@@ -178,6 +188,8 @@ cpu_type_get(void)
|
||||
type = CPU_SPR;
|
||||
break;
|
||||
}
|
||||
+ } else if (family == 23) {
|
||||
+ type = CPU_ZEN;
|
||||
}
|
||||
|
||||
return (type);
|
||||
@@ -217,6 +229,7 @@ plat_detect(void)
|
||||
case CPU_SKX:
|
||||
case CPU_ICX:
|
||||
case CPU_SPR:
|
||||
+ case CPU_ZEN:
|
||||
ret = 0;
|
||||
s_cpu_type = cpu_type;
|
||||
break;
|
||||
diff --git a/x86/zen.c b/x86/zen.c
|
||||
new file mode 100644
|
||||
index 0000000..abf603a
|
||||
--- /dev/null
|
||||
+++ b/x86/zen.c
|
||||
@@ -0,0 +1,70 @@
|
||||
+/*
|
||||
+ * Copyright (c) 2023, AMD Corporation
|
||||
+ *
|
||||
+ * Redistribution and use in source and binary forms, with or without
|
||||
+ * modification, are permitted provided that the following conditions are met:
|
||||
+ *
|
||||
+ * * Redistributions of source code must retain the above copyright notice,
|
||||
+ * this list of conditions and the following disclaimer.
|
||||
+ * * Redistributions in binary form must reproduce the above copyright
|
||||
+ * notice, this list of conditions and the following disclaimer in the
|
||||
+ * documentation and/or other materials provided with the distribution.
|
||||
+ * * Neither the name of Intel Corporation nor the names of its contributors
|
||||
+ * may be used to endorse or promote products derived from this software
|
||||
+ * without specific prior written permission.
|
||||
+ *
|
||||
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
+ * POSSIBILITY OF SUCH DAMAGE.
|
||||
+ */
|
||||
+
|
||||
+/* This file contains the Zen platform specific functions. */
|
||||
+
|
||||
+#include <inttypes.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <sys/types.h>
|
||||
+#include <stdio.h>
|
||||
+#include <unistd.h>
|
||||
+#include <string.h>
|
||||
+#include <strings.h>
|
||||
+#include "../common/include/os/linux/perf_event.h"
|
||||
+#include "../common/include/os/plat.h"
|
||||
+#include "include/zen.h"
|
||||
+
|
||||
+static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "ExRetOps" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
||||
+};
|
||||
+
|
||||
+static plat_event_config_t s_zen_ll = {
|
||||
+ PERF_TYPE_RAW, 0, 0, 0, "Unsupported"
|
||||
+};
|
||||
+
|
||||
+void
|
||||
+zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
||||
+{
|
||||
+ plat_config_get(perf_count_id, cfg, s_zen_config);
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+zen_ll_config(plat_event_config_t *cfg)
|
||||
+{
|
||||
+ memcpy(cfg, &s_zen_ll, sizeof (plat_event_config_t));
|
||||
+}
|
||||
+
|
||||
+int
|
||||
+zen_offcore_num(void)
|
||||
+{
|
||||
+ return (2);
|
||||
+}
|
||||
--
|
||||
2.31.1
|
||||
|
308
0006-common-Add-sample-period-to-platform-event-config.patch
Normal file
308
0006-common-Add-sample-period-to-platform-event-config.patch
Normal file
@ -0,0 +1,308 @@
|
||||
From 4a8b8d47f4a240a95830dc05abd3c19e10b6d821 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Fri, 11 Nov 2022 13:56:09 +0530
|
||||
Subject: [PATCH 06/15] common: Add sample period to platform event config
|
||||
|
||||
Precise PMU events are currently used for capturing memory
|
||||
access statistics. The sample period used for such events is
|
||||
currently hard-coded (LL_THRESH) and an universal value may
|
||||
not work well on all platforms due to microarchitectural
|
||||
differences in the design of the precise PMU.
|
||||
|
||||
E.g. precise events are programmed through Instruction Based
|
||||
Sampling (IBS) on AMD processors but that PMU does not have
|
||||
the ability to tag only load-store operations. This leads to
|
||||
the capture of many samples that are not relevant for the
|
||||
current use-case. To get an appropriate amount of relevant
|
||||
data, more samples need to be captured and then filtered.
|
||||
This is achieved by increasing the sampling frequency.
|
||||
|
||||
Add sample period as an additional attribute to the platform
|
||||
event config structure so that a customized sample period
|
||||
that works well on a specific platform can be passed during
|
||||
event programming. If not set, a default value is chosen.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
common/include/os/plat.h | 1 +
|
||||
common/os/os_perf.c | 6 +++++-
|
||||
powerpc/power8.c | 14 +++++++-------
|
||||
powerpc/power9.c | 14 +++++++-------
|
||||
x86/bdw.c | 12 ++++++------
|
||||
x86/nhm.c | 12 ++++++------
|
||||
x86/skl.c | 32 ++++++++++++++++----------------
|
||||
x86/snb.c | 12 ++++++------
|
||||
x86/wsm.c | 22 +++++++++++-----------
|
||||
x86/zen.c | 2 +-
|
||||
10 files changed, 66 insertions(+), 61 deletions(-)
|
||||
|
||||
diff --git a/common/include/os/plat.h b/common/include/os/plat.h
|
||||
index e35093d..34535cd 100644
|
||||
--- a/common/include/os/plat.h
|
||||
+++ b/common/include/os/plat.h
|
||||
@@ -53,6 +53,7 @@ typedef struct _plat_event_config {
|
||||
uint64_t config;
|
||||
uint64_t other_attr;
|
||||
uint64_t extra_value;
|
||||
+ uint64_t sample_period;
|
||||
char desc[PLAT_EVENT_DESC_SIZE];
|
||||
} plat_event_config_t;
|
||||
|
||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
||||
index f2f1104..f1036a9 100644
|
||||
--- a/common/os/os_perf.c
|
||||
+++ b/common/os/os_perf.c
|
||||
@@ -839,7 +839,11 @@ ll_init(pf_conf_t *conf)
|
||||
conf->type = cfg.type;
|
||||
conf->config = (cfg.config) | (cfg.other_attr << 16);
|
||||
conf->config1 = cfg.extra_value;
|
||||
- conf->sample_period = LL_PERIOD;
|
||||
+ conf->sample_period = cfg.sample_period;
|
||||
+
|
||||
+ /* If sample period is not set, choose a default value */
|
||||
+ if (!cfg.sample_period)
|
||||
+ conf->sample_period = LL_PERIOD;
|
||||
}
|
||||
|
||||
int
|
||||
diff --git a/powerpc/power8.c b/powerpc/power8.c
|
||||
index b3cab75..a76851d 100644
|
||||
--- a/powerpc/power8.c
|
||||
+++ b/powerpc/power8.c
|
||||
@@ -38,16 +38,16 @@
|
||||
#include "include/power8.h"
|
||||
|
||||
static plat_event_config_t s_power8_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, "PM_RUN_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, "PM_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
};
|
||||
|
||||
static plat_event_config_t s_power8_ll = {
|
||||
- PERF_TYPE_RAW, 0x0000, 0, 0, "PM_SUSPENDED"
|
||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/powerpc/power9.c b/powerpc/power9.c
|
||||
index c6f1cec..4b0bcfc 100644
|
||||
--- a/powerpc/power9.c
|
||||
+++ b/powerpc/power9.c
|
||||
@@ -38,16 +38,16 @@
|
||||
#include "include/power9.h"
|
||||
|
||||
static plat_event_config_t s_power9_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, "PM_RUN_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, "PM_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
};
|
||||
|
||||
static plat_event_config_t s_power9_ll = {
|
||||
- PERF_TYPE_RAW, 0x0000, 0, 0, "PM_SUSPENDED"
|
||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/bdw.c b/x86/bdw.c
|
||||
index 97e33ea..5640f7b 100644
|
||||
--- a/x86/bdw.c
|
||||
+++ b/x86/bdw.c
|
||||
@@ -40,15 +40,15 @@
|
||||
#include "include/bdw.h"
|
||||
|
||||
static plat_event_config_t s_bdw_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_bdw_ll = {
|
||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/nhm.c b/x86/nhm.c
|
||||
index bf8c14f..d29d396 100644
|
||||
--- a/x86/nhm.c
|
||||
+++ b/x86/nhm.c
|
||||
@@ -41,15 +41,15 @@
|
||||
#include "include/nhm.h"
|
||||
|
||||
static plat_event_config_t s_nhm_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_nhm_ll = {
|
||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, "mem_inst_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
static void
|
||||
diff --git a/x86/skl.c b/x86/skl.c
|
||||
index ace0833..6f81298 100644
|
||||
--- a/x86/skl.c
|
||||
+++ b/x86/skl.c
|
||||
@@ -40,31 +40,31 @@
|
||||
#include "include/skl.h"
|
||||
|
||||
static plat_event_config_t s_skl_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_icx_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_spr_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_skl_ll = {
|
||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/snb.c b/x86/snb.c
|
||||
index eb89859..3d3185b 100644
|
||||
--- a/x86/snb.c
|
||||
+++ b/x86/snb.c
|
||||
@@ -40,15 +40,15 @@
|
||||
#include "include/snb.h"
|
||||
|
||||
static plat_event_config_t s_snb_ep_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_snb_ll = {
|
||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/wsm.c b/x86/wsm.c
|
||||
index f4285c2..16f68e4 100644
|
||||
--- a/x86/wsm.c
|
||||
+++ b/x86/wsm.c
|
||||
@@ -40,23 +40,23 @@
|
||||
#include "include/wsm.h"
|
||||
|
||||
static plat_event_config_t s_wsmex_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_wsmep_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_wsm_ll = {
|
||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, "mem_inst_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/zen.c b/x86/zen.c
|
||||
index abf603a..c153a1a 100644
|
||||
--- a/x86/zen.c
|
||||
+++ b/x86/zen.c
|
||||
@@ -48,7 +48,7 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
||||
};
|
||||
|
||||
static plat_event_config_t s_zen_ll = {
|
||||
- PERF_TYPE_RAW, 0, 0, 0, "Unsupported"
|
||||
+ PERF_TYPE_RAW, 0, 0, 0, 0, "Unsupported"
|
||||
};
|
||||
|
||||
void
|
||||
--
|
||||
2.31.1
|
||||
|
350
0007-common-Add-exclude-guest-to-platform-event-config.patch
Normal file
350
0007-common-Add-exclude-guest-to-platform-event-config.patch
Normal file
@ -0,0 +1,350 @@
|
||||
From 9d665e4712f0dfa48603471c51ed3c87441030ad Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Tue, 13 Jun 2023 19:21:49 +0530
|
||||
Subject: [PATCH 07/15] common: Add exclude guest to platform event config
|
||||
|
||||
Precise PMU events are currently used for capturing memory
|
||||
access statistics. Currently, these events are programmed to
|
||||
exclude guests and this does not work well on all platforms
|
||||
due to differences in the design of the precise PMU.
|
||||
|
||||
E.g. precise events are programmed through Instruction Based
|
||||
Sampling (IBS) on AMD processors but that PMU does not have
|
||||
the ability to ignore guests unlike the Core PMU.
|
||||
|
||||
Add exclude guest as an additional attribute to the platform
|
||||
event config structure so that precise events can be customized
|
||||
to work on different platforms.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
common/include/os/pfwrapper.h | 1 +
|
||||
common/include/os/plat.h | 1 +
|
||||
common/include/types.h | 1 +
|
||||
common/os/os_perf.c | 1 +
|
||||
common/os/pfwrapper.c | 2 +-
|
||||
powerpc/power8.c | 14 +++++++-------
|
||||
powerpc/power9.c | 14 +++++++-------
|
||||
x86/bdw.c | 12 ++++++------
|
||||
x86/nhm.c | 12 ++++++------
|
||||
x86/skl.c | 32 ++++++++++++++++----------------
|
||||
x86/snb.c | 12 ++++++------
|
||||
x86/wsm.c | 22 +++++++++++-----------
|
||||
x86/zen.c | 12 ++++++------
|
||||
13 files changed, 70 insertions(+), 66 deletions(-)
|
||||
|
||||
diff --git a/common/include/os/pfwrapper.h b/common/include/os/pfwrapper.h
|
||||
index 414d6af..1864a10 100644
|
||||
--- a/common/include/os/pfwrapper.h
|
||||
+++ b/common/include/os/pfwrapper.h
|
||||
@@ -78,6 +78,7 @@ typedef struct _pf_conf {
|
||||
uint64_t config;
|
||||
uint64_t config1;
|
||||
uint64_t sample_period;
|
||||
+ bool exclude_guest;
|
||||
} pf_conf_t;
|
||||
|
||||
typedef struct _pf_profiling_rec {
|
||||
diff --git a/common/include/os/plat.h b/common/include/os/plat.h
|
||||
index 34535cd..ac4aac8 100644
|
||||
--- a/common/include/os/plat.h
|
||||
+++ b/common/include/os/plat.h
|
||||
@@ -54,6 +54,7 @@ typedef struct _plat_event_config {
|
||||
uint64_t other_attr;
|
||||
uint64_t extra_value;
|
||||
uint64_t sample_period;
|
||||
+ bool exclude_guest;
|
||||
char desc[PLAT_EVENT_DESC_SIZE];
|
||||
} plat_event_config_t;
|
||||
|
||||
diff --git a/common/include/types.h b/common/include/types.h
|
||||
index 3e30f7c..efe3055 100644
|
||||
--- a/common/include/types.h
|
||||
+++ b/common/include/types.h
|
||||
@@ -30,6 +30,7 @@
|
||||
#define _NUMATOP_TYPES_H
|
||||
|
||||
#include <stdint.h>
|
||||
+#include <stdbool.h>
|
||||
#include "./os/os_types.h"
|
||||
#ifdef __powerpc64__
|
||||
#include "../../powerpc/include/types.h"
|
||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
||||
index f1036a9..44ca43d 100644
|
||||
--- a/common/os/os_perf.c
|
||||
+++ b/common/os/os_perf.c
|
||||
@@ -840,6 +840,7 @@ ll_init(pf_conf_t *conf)
|
||||
conf->config = (cfg.config) | (cfg.other_attr << 16);
|
||||
conf->config1 = cfg.extra_value;
|
||||
conf->sample_period = cfg.sample_period;
|
||||
+ conf->exclude_guest = cfg.exclude_guest;
|
||||
|
||||
/* If sample period is not set, choose a default value */
|
||||
if (!cfg.sample_period)
|
||||
diff --git a/common/os/pfwrapper.c b/common/os/pfwrapper.c
|
||||
index b4c4f17..e08ce07 100644
|
||||
--- a/common/os/pfwrapper.c
|
||||
+++ b/common/os/pfwrapper.c
|
||||
@@ -432,7 +432,7 @@ pf_ll_setup(struct _perf_cpu *cpu, pf_conf_t *conf)
|
||||
attr.config1 = conf->config1;
|
||||
attr.sample_period = conf->sample_period;
|
||||
attr.precise_ip = 1;
|
||||
- attr.exclude_guest = 1;
|
||||
+ attr.exclude_guest = conf->exclude_guest;
|
||||
attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | PERF_SAMPLE_CPU |
|
||||
PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN;
|
||||
attr.disabled = 1;
|
||||
diff --git a/powerpc/power8.c b/powerpc/power8.c
|
||||
index a76851d..d8f4e01 100644
|
||||
--- a/powerpc/power8.c
|
||||
+++ b/powerpc/power8.c
|
||||
@@ -38,16 +38,16 @@
|
||||
#include "include/power8.h"
|
||||
|
||||
static plat_event_config_t s_power8_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, 0, "PM_RUN_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, 0, "PM_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
};
|
||||
|
||||
static plat_event_config_t s_power8_ll = {
|
||||
- PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, 1, "PM_SUSPENDED"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/powerpc/power9.c b/powerpc/power9.c
|
||||
index 4b0bcfc..9879ec7 100644
|
||||
--- a/powerpc/power9.c
|
||||
+++ b/powerpc/power9.c
|
||||
@@ -38,16 +38,16 @@
|
||||
#include "include/power9.h"
|
||||
|
||||
static plat_event_config_t s_power9_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
- { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" },
|
||||
- { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
- { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
- { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, 0, "PM_RUN_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, 0, "PM_DATA_FROM_DMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, 0, "PM_CYC" },
|
||||
+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, 0, "PM_RUN_INST_CMPL" },
|
||||
+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, 0, "PM_DATA_FROM_LMEM" },
|
||||
+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, 0, "PM_DATA_FROM_RMEM" },
|
||||
};
|
||||
|
||||
static plat_event_config_t s_power9_ll = {
|
||||
- PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED"
|
||||
+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, 1, "PM_SUSPENDED"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/bdw.c b/x86/bdw.c
|
||||
index 5640f7b..97eca67 100644
|
||||
--- a/x86/bdw.c
|
||||
+++ b/x86/bdw.c
|
||||
@@ -40,15 +40,15 @@
|
||||
#include "include/bdw.h"
|
||||
|
||||
static plat_event_config_t s_bdw_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_bdw_ll = {
|
||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/nhm.c b/x86/nhm.c
|
||||
index d29d396..cf65705 100644
|
||||
--- a/x86/nhm.c
|
||||
+++ b/x86/nhm.c
|
||||
@@ -41,15 +41,15 @@
|
||||
#include "include/nhm.h"
|
||||
|
||||
static plat_event_config_t s_nhm_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_nhm_ll = {
|
||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, 1, "mem_inst_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
static void
|
||||
diff --git a/x86/skl.c b/x86/skl.c
|
||||
index 6f81298..a7bbc14 100644
|
||||
--- a/x86/skl.c
|
||||
+++ b/x86/skl.c
|
||||
@@ -40,31 +40,31 @@
|
||||
#include "include/skl.h"
|
||||
|
||||
static plat_event_config_t s_skl_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_icx_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_spr_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_skl_ll = {
|
||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/snb.c b/x86/snb.c
|
||||
index 3d3185b..135ee1c 100644
|
||||
--- a/x86/snb.c
|
||||
+++ b/x86/snb.c
|
||||
@@ -40,15 +40,15 @@
|
||||
#include "include/snb.h"
|
||||
|
||||
static plat_event_config_t s_snb_ep_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_snb_ll = {
|
||||
- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/wsm.c b/x86/wsm.c
|
||||
index 16f68e4..7b122fd 100644
|
||||
--- a/x86/wsm.c
|
||||
+++ b/x86/wsm.c
|
||||
@@ -40,23 +40,23 @@
|
||||
#include "include/wsm.h"
|
||||
|
||||
static plat_event_config_t s_wsmex_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_wsmep_profiling[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" },
|
||||
- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" },
|
||||
- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" }
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" },
|
||||
+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" },
|
||||
+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, 0, "off_core_response_1" }
|
||||
};
|
||||
|
||||
static plat_event_config_t s_wsm_ll = {
|
||||
- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold"
|
||||
+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, 1, "mem_inst_retired.latency_above_threshold"
|
||||
};
|
||||
|
||||
void
|
||||
diff --git a/x86/zen.c b/x86/zen.c
|
||||
index c153a1a..2f851a2 100644
|
||||
--- a/x86/zen.c
|
||||
+++ b/x86/zen.c
|
||||
@@ -40,15 +40,15 @@
|
||||
#include "include/zen.h"
|
||||
|
||||
static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
- { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "ExRetOps" },
|
||||
- { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
||||
};
|
||||
|
||||
static plat_event_config_t s_zen_ll = {
|
||||
- PERF_TYPE_RAW, 0, 0, 0, 0, "Unsupported"
|
||||
+ PERF_TYPE_RAW, 0, 0, 0, 0, 0, "Unsupported"
|
||||
};
|
||||
|
||||
void
|
||||
--
|
||||
2.31.1
|
||||
|
156
0008-x86-zen-Add-support-for-memory-access-stats.patch
Normal file
156
0008-x86-zen-Add-support-for-memory-access-stats.patch
Normal file
@ -0,0 +1,156 @@
|
||||
From aefc85d7b956c4df998afb4cfe5c413e5fd5b062 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Thu, 10 Mar 2022 10:32:51 +0530
|
||||
Subject: [PATCH 08/15] x86/zen: Add support for memory access stats
|
||||
|
||||
Add support for capturing memory access statistics on Zen
|
||||
processors using Instruction Based Sampling (IBS).
|
||||
|
||||
IBS, by design, cannot tag specific types of ops and hence
|
||||
cannot provide samples for only those ops that cause memory
|
||||
access. Hence, additional post-processing is required for
|
||||
filtering out irrelevant samples. To get an appropriate
|
||||
volume of samples, the sampling frequency also needs to be
|
||||
high.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
common/os/pfwrapper.c | 20 +++++++++++++++++---
|
||||
x86/zen.c | 35 ++++++++++++++++++++++++++++++++++-
|
||||
2 files changed, 51 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/common/os/pfwrapper.c b/common/os/pfwrapper.c
|
||||
index e08ce07..d6102be 100644
|
||||
--- a/common/os/pfwrapper.c
|
||||
+++ b/common/os/pfwrapper.c
|
||||
@@ -434,7 +434,8 @@ pf_ll_setup(struct _perf_cpu *cpu, pf_conf_t *conf)
|
||||
attr.precise_ip = 1;
|
||||
attr.exclude_guest = conf->exclude_guest;
|
||||
attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | PERF_SAMPLE_CPU |
|
||||
- PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN;
|
||||
+ PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN |
|
||||
+ PERF_SAMPLE_DATA_SRC;
|
||||
attr.disabled = 1;
|
||||
|
||||
if ((fds[0] = pf_event_open(&attr, -1, cpu->cpuid, -1, 0)) < 0) {
|
||||
@@ -481,6 +482,7 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size,
|
||||
pf_ll_rec_t *rec)
|
||||
{
|
||||
struct { uint32_t pid, tid; } id;
|
||||
+ union perf_mem_data_src data_src;
|
||||
uint64_t i, addr, cpu, weight, nr, value, *ips;
|
||||
int j, ret = -1;
|
||||
|
||||
@@ -492,6 +494,7 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size,
|
||||
* [ u64 nr; }
|
||||
* { u64 ips[nr]; }
|
||||
* { u64 weight; }
|
||||
+ * { u64 data_src; }
|
||||
* };
|
||||
*/
|
||||
if (mmap_buffer_read(mhdr, &id, sizeof (id)) == -1) {
|
||||
@@ -551,7 +554,18 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size,
|
||||
}
|
||||
|
||||
size -= sizeof (weight);
|
||||
-
|
||||
+
|
||||
+ if (mmap_buffer_read(mhdr, &data_src, sizeof (data_src)) == -1) {
|
||||
+ debug_print(NULL, 2, "ll_sample_read: read data_src failed.\n");
|
||||
+ goto L_EXIT;
|
||||
+ }
|
||||
+
|
||||
+ size -= sizeof (data_src);
|
||||
+
|
||||
+ if (data_src.mem_op == PERF_MEM_OP_NA ||
|
||||
+ data_src.mem_op == PERF_MEM_OP_EXEC)
|
||||
+ addr = 0;
|
||||
+
|
||||
rec->ip_num = j;
|
||||
rec->pid = id.pid;
|
||||
rec->tid = id.tid;
|
||||
@@ -575,7 +589,7 @@ ll_recbuf_update(pf_ll_rec_t *rec_arr, int *nrec, pf_ll_rec_t *rec)
|
||||
{
|
||||
int i;
|
||||
|
||||
- if ((rec->pid == 0) || (rec->tid == 0)) {
|
||||
+ if ((rec->pid == 0) || (rec->tid == 0) || (rec->addr == 0)) {
|
||||
/* Just consider the user-land process/thread. */
|
||||
return;
|
||||
}
|
||||
diff --git a/x86/zen.c b/x86/zen.c
|
||||
index 2f851a2..67a425b 100644
|
||||
--- a/x86/zen.c
|
||||
+++ b/x86/zen.c
|
||||
@@ -30,7 +30,9 @@
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdlib.h>
|
||||
+#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
+#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
@@ -39,6 +41,9 @@
|
||||
#include "../common/include/os/plat.h"
|
||||
#include "include/zen.h"
|
||||
|
||||
+#define IBS_OP_PMU_TYPE_PATH \
|
||||
+ "/sys/bus/event_source/devices/ibs_op/type"
|
||||
+
|
||||
static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
||||
{ PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
{ PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" },
|
||||
@@ -47,8 +52,13 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
||||
{ PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
||||
};
|
||||
|
||||
+/*
|
||||
+ * Owing to the nature of IBS uop tagging, a higher sampling period is
|
||||
+ * required to capture meaningful samples. All samples may not originate
|
||||
+ * from a memory access instruction and require additional filtering.
|
||||
+ */
|
||||
static plat_event_config_t s_zen_ll = {
|
||||
- PERF_TYPE_RAW, 0, 0, 0, 0, 0, "Unsupported"
|
||||
+ 0, 0x0000000000000000, 0, 0, LL_THRESH * 10, 0, "IbsOpCntCycles"
|
||||
};
|
||||
|
||||
void
|
||||
@@ -57,10 +67,33 @@ zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
||||
plat_config_get(perf_count_id, cfg, s_zen_config);
|
||||
}
|
||||
|
||||
+static int
|
||||
+zen_ibs_op_pmu_type(void)
|
||||
+{
|
||||
+ int fd, type, i;
|
||||
+ char buf[32];
|
||||
+
|
||||
+ if ((fd = open(IBS_OP_PMU_TYPE_PATH, O_RDONLY)) < 0)
|
||||
+ return (-1);
|
||||
+
|
||||
+ if ((i = read(fd, buf, sizeof (buf) - 1)) <= 0) {
|
||||
+ close(fd);
|
||||
+ return (-1);
|
||||
+ }
|
||||
+
|
||||
+ close(fd);
|
||||
+ buf[i] = 0;
|
||||
+ if ((type = atoi(buf)) == 0)
|
||||
+ return (-1);
|
||||
+
|
||||
+ return (type);
|
||||
+}
|
||||
+
|
||||
void
|
||||
zen_ll_config(plat_event_config_t *cfg)
|
||||
{
|
||||
memcpy(cfg, &s_zen_ll, sizeof (plat_event_config_t));
|
||||
+ cfg->type = zen_ibs_op_pmu_type();
|
||||
}
|
||||
|
||||
int
|
||||
--
|
||||
2.31.1
|
||||
|
71
0009-x86-Fix-clock-frequency-parsing.patch
Normal file
71
0009-x86-Fix-clock-frequency-parsing.patch
Normal file
@ -0,0 +1,71 @@
|
||||
From c149b054fe5b1851860fd01d54596ea75f5008d3 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Wed, 13 Apr 2022 11:45:08 +0530
|
||||
Subject: [PATCH 09/15] x86: Fix clock frequency parsing
|
||||
|
||||
AMD processors do not advertise a base clock frequency as
|
||||
a part of the "model name" in /proc/cpuinfo. The parsing
|
||||
must fail in order to let os_calibrate() determine clock
|
||||
speed from cpufreq information or from TSC instead.
|
||||
|
||||
Since the parser fails to find "@", strcspn() returns the
|
||||
length of the line instead and sscanf() ends up scanning
|
||||
garbage values beyond the null terminator that match the
|
||||
format specifier. To avoid this, add an additional check
|
||||
that makes the condition fail if "@" is not found.
|
||||
|
||||
Fixes: eaeed92 ("Powerpc: Fix CPU% utilization for PowerVMs")
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
x86/util.c | 19 ++++++++++++++-----
|
||||
1 file changed, 14 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/x86/util.c b/x86/util.c
|
||||
index fdff877..655a677 100644
|
||||
--- a/x86/util.c
|
||||
+++ b/x86/util.c
|
||||
@@ -67,27 +67,36 @@ rdtsc(void)
|
||||
|
||||
/*
|
||||
* Check the cpu name in proc info. Intel CPUs always have @ x.y
|
||||
- * Ghz and that is the TSC frequency.
|
||||
+ * GHz and that is the TSC frequency. AMD CPUs do not advertise
|
||||
+ * clock frequency as a part of the model name.
|
||||
*/
|
||||
int
|
||||
arch__cpuinfo_freq(double *freq, char *unit)
|
||||
{
|
||||
FILE *f;
|
||||
char *line = NULL;
|
||||
- size_t len = 0;
|
||||
+ size_t idx, len = 0;
|
||||
int ret = -1;
|
||||
|
||||
if ((f = fopen(CPUINFO_PATH, "r")) == NULL) {
|
||||
return (-1);
|
||||
}
|
||||
|
||||
- while (getline(&line, &len, f) > 0) {
|
||||
+ while ((len = getline(&line, &len, f)) > 0) {
|
||||
if (strncmp(line, "model name", sizeof ("model name") - 1) != 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
- if (sscanf(line + strcspn(line, "@") + 1, "%lf%10s",
|
||||
- freq, unit) == 2) {
|
||||
+ idx = strcspn(line, "@") + 1;
|
||||
+
|
||||
+ /*
|
||||
+ * The model name will not change for other processors. So
|
||||
+ * bail out if "@" is not found.
|
||||
+ */
|
||||
+ if (idx >= len)
|
||||
+ break;
|
||||
+
|
||||
+ if (sscanf(line + idx, "%lf%10s", freq, unit) == 2) {
|
||||
if (strcasecmp(unit, "GHz") == 0) {
|
||||
*freq *= GHZ;
|
||||
} else if (strcasecmp(unit, "Mhz") == 0) {
|
||||
--
|
||||
2.31.1
|
||||
|
135
0010-x86-zen-Add-Zen-3-support.patch
Normal file
135
0010-x86-zen-Add-Zen-3-support.patch
Normal file
@ -0,0 +1,135 @@
|
||||
From e9bd7eaa767c987fcb8d6879e7c7509a24bcb17c Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Fri, 20 Jan 2023 13:56:37 +0530
|
||||
Subject: [PATCH 10/15] x86/zen: Add Zen 3 support
|
||||
|
||||
Add vendor and family identification as well as the relevant
|
||||
events to count per-process memory accesseses and CPU usage
|
||||
on AMD Zen 3 family of processors.
|
||||
|
||||
Key changes include the use of the LsAnyFillsFromSys event
|
||||
instead of LsDmndFillsFromSys for counting local and remote
|
||||
memory accesses. While LsDmndFillsFromSys covers only demand
|
||||
cache fills, LsAnyFillsFromSys covers all cache fills
|
||||
including prefetches.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
x86/include/types.h | 5 +++--
|
||||
x86/include/zen.h | 1 +
|
||||
x86/plat.c | 8 +++++++-
|
||||
x86/zen.c | 14 ++++++++++++++
|
||||
4 files changed, 25 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/x86/include/types.h b/x86/include/types.h
|
||||
index 0843bd1..4aa5fa6 100644
|
||||
--- a/x86/include/types.h
|
||||
+++ b/x86/include/types.h
|
||||
@@ -47,10 +47,11 @@ typedef enum {
|
||||
CPU_SKX,
|
||||
CPU_ICX,
|
||||
CPU_SPR,
|
||||
- CPU_ZEN
|
||||
+ CPU_ZEN,
|
||||
+ CPU_ZEN3
|
||||
} cpu_type_t;
|
||||
|
||||
-#define CPU_TYPE_NUM 13
|
||||
+#define CPU_TYPE_NUM 14
|
||||
|
||||
typedef enum {
|
||||
PERF_COUNT_INVALID = -1,
|
||||
diff --git a/x86/include/zen.h b/x86/include/zen.h
|
||||
index be61324..b5c40f5 100644
|
||||
--- a/x86/include/zen.h
|
||||
+++ b/x86/include/zen.h
|
||||
@@ -40,6 +40,7 @@ extern "C" {
|
||||
struct _plat_event_config;
|
||||
|
||||
extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
||||
+extern void zen3_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
||||
extern void zen_ll_config(struct _plat_event_config *);
|
||||
extern int zen_offcore_num(void);
|
||||
|
||||
diff --git a/x86/plat.c b/x86/plat.c
|
||||
index f79837a..35561dc 100644
|
||||
--- a/x86/plat.c
|
||||
+++ b/x86/plat.c
|
||||
@@ -52,7 +52,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = {
|
||||
skl_profiling_config,
|
||||
icx_profiling_config,
|
||||
spr_profiling_config,
|
||||
- zen_profiling_config
|
||||
+ zen_profiling_config,
|
||||
+ zen3_profiling_config
|
||||
};
|
||||
|
||||
pfn_plat_ll_config_t
|
||||
@@ -69,6 +70,7 @@ s_plat_ll_config[CPU_TYPE_NUM] = {
|
||||
skl_ll_config,
|
||||
icx_ll_config,
|
||||
spr_ll_config,
|
||||
+ zen_ll_config,
|
||||
zen_ll_config
|
||||
};
|
||||
|
||||
@@ -86,6 +88,7 @@ s_plat_offcore_num[CPU_TYPE_NUM] = {
|
||||
skl_offcore_num,
|
||||
icx_offcore_num,
|
||||
spr_offcore_num,
|
||||
+ zen_offcore_num,
|
||||
zen_offcore_num
|
||||
};
|
||||
|
||||
@@ -190,6 +193,8 @@ cpu_type_get(void)
|
||||
}
|
||||
} else if (family == 23) {
|
||||
type = CPU_ZEN;
|
||||
+ } else if (family == 25) {
|
||||
+ type = CPU_ZEN3;
|
||||
}
|
||||
|
||||
return (type);
|
||||
@@ -230,6 +235,7 @@ plat_detect(void)
|
||||
case CPU_ICX:
|
||||
case CPU_SPR:
|
||||
case CPU_ZEN:
|
||||
+ case CPU_ZEN3:
|
||||
ret = 0;
|
||||
s_cpu_type = cpu_type;
|
||||
break;
|
||||
diff --git a/x86/zen.c b/x86/zen.c
|
||||
index 67a425b..dd37d03 100644
|
||||
--- a/x86/zen.c
|
||||
+++ b/x86/zen.c
|
||||
@@ -52,6 +52,14 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = {
|
||||
{ PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" },
|
||||
};
|
||||
|
||||
+static plat_event_config_t s_zen3_config[PERF_COUNT_NUM] = {
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000004044, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoRemote" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" },
|
||||
+};
|
||||
+
|
||||
/*
|
||||
* Owing to the nature of IBS uop tagging, a higher sampling period is
|
||||
* required to capture meaningful samples. All samples may not originate
|
||||
@@ -67,6 +75,12 @@ zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
||||
plat_config_get(perf_count_id, cfg, s_zen_config);
|
||||
}
|
||||
|
||||
+void
|
||||
+zen3_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
||||
+{
|
||||
+ plat_config_get(perf_count_id, cfg, s_zen3_config);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
zen_ibs_op_pmu_type(void)
|
||||
{
|
||||
--
|
||||
2.31.1
|
||||
|
145
0011-x86-zen-Add-Zen-4-support.patch
Normal file
145
0011-x86-zen-Add-Zen-4-support.patch
Normal file
@ -0,0 +1,145 @@
|
||||
From 7fc232a4df2013089300b0c23490d7d07c9c0165 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Fri, 20 Jan 2023 14:19:29 +0530
|
||||
Subject: [PATCH 11/15] x86/zen: Add Zen 4 support
|
||||
|
||||
Add vendor and family identification as well as the relevant
|
||||
events to count per-process memory accesseses and CPU usage
|
||||
on AMD Zen 4 family of processors.
|
||||
|
||||
Key changes include the use of the LsNotHaltedP0Cyc event to
|
||||
count cycles at P0 frequency. This improves the accuracy of
|
||||
the utilization metrics as, unlike the typical cycles event,
|
||||
this is clock frequency invariant.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
x86/include/types.h | 5 +++--
|
||||
x86/include/zen.h | 1 +
|
||||
x86/plat.c | 18 ++++++++++++++----
|
||||
x86/zen.c | 14 ++++++++++++++
|
||||
4 files changed, 32 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/x86/include/types.h b/x86/include/types.h
|
||||
index 4aa5fa6..bb965f9 100644
|
||||
--- a/x86/include/types.h
|
||||
+++ b/x86/include/types.h
|
||||
@@ -48,10 +48,11 @@ typedef enum {
|
||||
CPU_ICX,
|
||||
CPU_SPR,
|
||||
CPU_ZEN,
|
||||
- CPU_ZEN3
|
||||
+ CPU_ZEN3,
|
||||
+ CPU_ZEN4
|
||||
} cpu_type_t;
|
||||
|
||||
-#define CPU_TYPE_NUM 14
|
||||
+#define CPU_TYPE_NUM 15
|
||||
|
||||
typedef enum {
|
||||
PERF_COUNT_INVALID = -1,
|
||||
diff --git a/x86/include/zen.h b/x86/include/zen.h
|
||||
index b5c40f5..cbdfcd8 100644
|
||||
--- a/x86/include/zen.h
|
||||
+++ b/x86/include/zen.h
|
||||
@@ -41,6 +41,7 @@ struct _plat_event_config;
|
||||
|
||||
extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
||||
extern void zen3_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
||||
+extern void zen4_profiling_config(perf_count_id_t, struct _plat_event_config *);
|
||||
extern void zen_ll_config(struct _plat_event_config *);
|
||||
extern int zen_offcore_num(void);
|
||||
|
||||
diff --git a/x86/plat.c b/x86/plat.c
|
||||
index 35561dc..fe2bf01 100644
|
||||
--- a/x86/plat.c
|
||||
+++ b/x86/plat.c
|
||||
@@ -53,7 +53,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = {
|
||||
icx_profiling_config,
|
||||
spr_profiling_config,
|
||||
zen_profiling_config,
|
||||
- zen3_profiling_config
|
||||
+ zen3_profiling_config,
|
||||
+ zen4_profiling_config
|
||||
};
|
||||
|
||||
pfn_plat_ll_config_t
|
||||
@@ -71,6 +72,7 @@ s_plat_ll_config[CPU_TYPE_NUM] = {
|
||||
icx_ll_config,
|
||||
spr_ll_config,
|
||||
zen_ll_config,
|
||||
+ zen_ll_config,
|
||||
zen_ll_config
|
||||
};
|
||||
|
||||
@@ -89,6 +91,7 @@ s_plat_offcore_num[CPU_TYPE_NUM] = {
|
||||
icx_offcore_num,
|
||||
spr_offcore_num,
|
||||
zen_offcore_num,
|
||||
+ zen_offcore_num,
|
||||
zen_offcore_num
|
||||
};
|
||||
|
||||
@@ -191,10 +194,16 @@ cpu_type_get(void)
|
||||
type = CPU_SPR;
|
||||
break;
|
||||
}
|
||||
- } else if (family == 23) {
|
||||
+ } else if (family == 23) { /* Family 17h */
|
||||
type = CPU_ZEN;
|
||||
- } else if (family == 25) {
|
||||
- type = CPU_ZEN3;
|
||||
+ } else if (family == 25) { /* Family 19h */
|
||||
+ if ((model >= 0x00 && model <= 0x0f) ||
|
||||
+ (model >= 0x20 && model <= 0x2f) ||
|
||||
+ (model >= 0x40 && model <= 0x5f)) {
|
||||
+ type = CPU_ZEN3;
|
||||
+ } else {
|
||||
+ type = CPU_ZEN4;
|
||||
+ }
|
||||
}
|
||||
|
||||
return (type);
|
||||
@@ -236,6 +245,7 @@ plat_detect(void)
|
||||
case CPU_SPR:
|
||||
case CPU_ZEN:
|
||||
case CPU_ZEN3:
|
||||
+ case CPU_ZEN4:
|
||||
ret = 0;
|
||||
s_cpu_type = cpu_type;
|
||||
break;
|
||||
diff --git a/x86/zen.c b/x86/zen.c
|
||||
index dd37d03..c21eb1a 100644
|
||||
--- a/x86/zen.c
|
||||
+++ b/x86/zen.c
|
||||
@@ -60,6 +60,14 @@ static plat_event_config_t s_zen3_config[PERF_COUNT_NUM] = {
|
||||
{ PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" },
|
||||
};
|
||||
|
||||
+static plat_event_config_t s_zen4_config[PERF_COUNT_NUM] = {
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000004044, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoRemote" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000100000120, 0, 0, 0, 0, "LsNotHaltedP0Cyc.P0FreqCyc" },
|
||||
+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" },
|
||||
+ { PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" },
|
||||
+};
|
||||
+
|
||||
/*
|
||||
* Owing to the nature of IBS uop tagging, a higher sampling period is
|
||||
* required to capture meaningful samples. All samples may not originate
|
||||
@@ -81,6 +89,12 @@ zen3_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
||||
plat_config_get(perf_count_id, cfg, s_zen3_config);
|
||||
}
|
||||
|
||||
+void
|
||||
+zen4_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg)
|
||||
+{
|
||||
+ plat_config_get(perf_count_id, cfg, s_zen4_config);
|
||||
+}
|
||||
+
|
||||
static int
|
||||
zen_ibs_op_pmu_type(void)
|
||||
{
|
||||
--
|
||||
2.31.1
|
||||
|
75
0012-x86-Add-feature-tracker.patch
Normal file
75
0012-x86-Add-feature-tracker.patch
Normal file
@ -0,0 +1,75 @@
|
||||
From 34b5ee97323d0bc62ca2d0beae3e99b2213752c2 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Fri, 20 Jan 2023 14:39:28 +0530
|
||||
Subject: [PATCH 12/15] x86: Add feature tracker
|
||||
|
||||
Add a file to keep track of features available on AMD and
|
||||
Intel processors.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
x86/FEATURES | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 52 insertions(+)
|
||||
create mode 100755 x86/FEATURES
|
||||
|
||||
diff --git a/x86/FEATURES b/x86/FEATURES
|
||||
new file mode 100755
|
||||
index 0000000..7ece0d6
|
||||
--- /dev/null
|
||||
+++ b/x86/FEATURES
|
||||
@@ -0,0 +1,52 @@
|
||||
+Features supported on X86:
|
||||
+--------------------------
|
||||
+
|
||||
+Per process/thread:
|
||||
+
|
||||
+| Feature | Supported |
|
||||
+|-----------------------------------------------------------|---------------|
|
||||
+| | AMD | Intel |
|
||||
+|-----------------------------------------------------------|---------------|
|
||||
+| RMA | Y | Y |
|
||||
+| LMA | Y | Y |
|
||||
+| CPI | Y | Y |
|
||||
+| CPU% | Y | Y |
|
||||
+| Memory area ADDR | Y | Y |
|
||||
+| Memory area SIZE | Y | Y |
|
||||
+| Memory area ACCESS% | Y | Y |
|
||||
+| Memory area LAT(ns) | Y | Y |
|
||||
+| Memory area DESC | Y | Y |
|
||||
+| Node ACCESS% | Y | Y |
|
||||
+| Node LAT(ns) | Y | Y |
|
||||
+| Call-chain when process generates RMA / LMA / CYCLES / IR | Y | Y |
|
||||
+| Call-chain when process accesses the memory area | Y | Y |
|
||||
+| PQOS CMT/MBM | N | Y |
|
||||
+
|
||||
+Per Node:
|
||||
+
|
||||
+| Feature | Supported |
|
||||
+|-----------------------------------------------------------|---------------|
|
||||
+| | AMD | Intel |
|
||||
+|-----------------------------------------------------------|---------------|
|
||||
+| RMA | Y | Y |
|
||||
+| LMA | Y | Y |
|
||||
+| CPU | Y | Y |
|
||||
+| CPU% | Y | Y |
|
||||
+| MEM total | Y | Y |
|
||||
+| MEM free | Y | Y |
|
||||
+| MEM active | Y | Y |
|
||||
+| MEM inactive | Y | Y |
|
||||
+| Dirty | Y | Y |
|
||||
+| Writeback | Y | Y |
|
||||
+| Mapped | Y | Y |
|
||||
+| QPI/UPI 0 bandwidth | N | Y |
|
||||
+| QPI/UPI 1 bandwidth | N | Y |
|
||||
+| Memory controller bandwidth | N | Y |
|
||||
+
|
||||
+Other:
|
||||
+
|
||||
+| Feature | Supported |
|
||||
+|-----------------------------------------------------------|---------------|
|
||||
+| | AMD | Intel |
|
||||
+|-----------------------------------------------------------|---------------|
|
||||
+| mgen testcase | Y | Y |
|
||||
--
|
||||
2.31.1
|
||||
|
58
0013-common-Fix-perf-init-for-large-systems.patch
Normal file
58
0013-common-Fix-perf-init-for-large-systems.patch
Normal file
@ -0,0 +1,58 @@
|
||||
From b4543efe798bbc255519fdcec73484cbd43472d1 Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Mon, 18 Apr 2022 10:59:26 +0530
|
||||
Subject: [PATCH 13/15] common: Fix perf init for large systems
|
||||
|
||||
Large systems with hundreds of CPUs can run into issues
|
||||
during perf event initialization because of the default
|
||||
resource limits for file descriptors. Set RLIMIT_NOFILE
|
||||
explicitly to a fairly large value to avoid them.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
common/os/os_perf.c | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
diff --git a/common/os/os_perf.c b/common/os/os_perf.c
|
||||
index 44ca43d..49fdaaa 100644
|
||||
--- a/common/os/os_perf.c
|
||||
+++ b/common/os/os_perf.c
|
||||
@@ -28,6 +28,7 @@
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stdlib.h>
|
||||
+#include <sys/resource.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
@@ -850,12 +851,27 @@ ll_init(pf_conf_t *conf)
|
||||
int
|
||||
os_perf_init(void)
|
||||
{
|
||||
+ struct rlimit limit;
|
||||
int ringsize, size;
|
||||
|
||||
s_profiling_recbuf = NULL;
|
||||
s_ll_recbuf = NULL;
|
||||
s_partpause_enabled = B_FALSE;
|
||||
|
||||
+ /*
|
||||
+ * Depending on the number of available CPUs in the system, the
|
||||
+ * default fd limit may be exceeded. Set it to a large value to
|
||||
+ * avoid running into problems.
|
||||
+ */
|
||||
+ limit.rlim_cur = 32768;
|
||||
+ limit.rlim_max = 32768;
|
||||
+
|
||||
+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0) {
|
||||
+ exit_msg_put("Failed to setup perf!\n");
|
||||
+ debug_print(NULL, 2, "os_perf_init failed\n");
|
||||
+ return (-1);
|
||||
+ }
|
||||
+
|
||||
ringsize = pf_ringsize_init();
|
||||
size = ((ringsize / sizeof (pf_profiling_rbrec_t)) + 1) *
|
||||
sizeof (pf_profiling_rec_t);
|
||||
--
|
||||
2.31.1
|
||||
|
30
0014-common-Increase-count-of-possible-CPUs-per-node.patch
Normal file
30
0014-common-Increase-count-of-possible-CPUs-per-node.patch
Normal file
@ -0,0 +1,30 @@
|
||||
From 6f6cc3b24d84c413556639b64a62aca6ad0b21cc Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Mon, 10 Oct 2022 14:55:45 +0530
|
||||
Subject: [PATCH 14/15] common: Increase count of possible CPUs per-node
|
||||
|
||||
Upcoming AMD Zen 4 processors support up to 256 threads per
|
||||
NUMA node in NPS1 configuration. Hence, increase the number
|
||||
of possible CPUs per-node to 256.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
common/include/types.h | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/common/include/types.h b/common/include/types.h
|
||||
index efe3055..05b411a 100644
|
||||
--- a/common/include/types.h
|
||||
+++ b/common/include/types.h
|
||||
@@ -116,7 +116,7 @@ typedef enum {
|
||||
#define UI_COUNT_NUM 5
|
||||
|
||||
#define NNODES_MAX 64
|
||||
-#define NCPUS_NODE_MAX 128
|
||||
+#define NCPUS_NODE_MAX 256
|
||||
#define NCPUS_MAX (NNODES_MAX * NCPUS_NODE_MAX)
|
||||
#define NPROCS_NAX 4096
|
||||
#define LL_THRESH 128
|
||||
--
|
||||
2.31.1
|
||||
|
36
0015-common-Fix-some-typos.patch
Normal file
36
0015-common-Fix-some-typos.patch
Normal file
@ -0,0 +1,36 @@
|
||||
From 8c3fe7b2debf74566a6017c92eebc7cb23f9deca Mon Sep 17 00:00:00 2001
|
||||
From: Sandipan Das <sandipan.das@amd.com>
|
||||
Date: Thu, 10 Nov 2022 10:43:04 +0530
|
||||
Subject: [PATCH 15/15] common: Fix some typos
|
||||
|
||||
Fix some typos in the messages shown when an user attempts
|
||||
to monitor a process or thread that has already exited.
|
||||
|
||||
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
|
||||
---
|
||||
common/win.c | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/common/win.c b/common/win.c
|
||||
index 087efe5..d0a8f3b 100644
|
||||
--- a/common/win.c
|
||||
+++ b/common/win.c
|
||||
@@ -3489,13 +3489,13 @@ win_warn_msg(warn_type_t warn_type)
|
||||
break;
|
||||
|
||||
case WARN_INVALID_PID:
|
||||
- (void) strncpy(content, "Process exists, "
|
||||
+ (void) strncpy(content, "Process exited, "
|
||||
"return to home window ...",
|
||||
WIN_LINECHAR_MAX);
|
||||
break;
|
||||
|
||||
case WARN_INVALID_LWPID:
|
||||
- (void) strncpy(content, "Thread exists, "
|
||||
+ (void) strncpy(content, "Thread exited, "
|
||||
"return to home window ...",
|
||||
WIN_LINECHAR_MAX);
|
||||
break;
|
||||
--
|
||||
2.31.1
|
||||
|
16
numatop.spec
16
numatop.spec
@ -24,6 +24,22 @@ BuildRequires: numactl-devel
|
||||
ExclusiveArch: x86_64 ppc64le
|
||||
|
||||
#Patch001: v2.2-001-Initial-support-for-SPR.patch
|
||||
Patch0001: 0001-configure.ac-Fix-build-error-when-libnuma-is-missed.patch
|
||||
Patch0002: 0002-Update-the-error-message.patch
|
||||
Patch0003: 0003-Update-README.patch
|
||||
Patch0004: 0004-x86-Prepare-for-multi-vendor-support.patch
|
||||
Patch0005: 0005-x86-zen-Add-initial-support.patch
|
||||
Patch0006: 0006-common-Add-sample-period-to-platform-event-config.patch
|
||||
Patch0007: 0007-common-Add-exclude-guest-to-platform-event-config.patch
|
||||
Patch0008: 0008-x86-zen-Add-support-for-memory-access-stats.patch
|
||||
Patch0009: 0009-x86-Fix-clock-frequency-parsing.patch
|
||||
Patch0010: 0010-x86-zen-Add-Zen-3-support.patch
|
||||
Patch0011: 0011-x86-zen-Add-Zen-4-support.patch
|
||||
Patch0012: 0012-x86-Add-feature-tracker.patch
|
||||
Patch0013: 0013-common-Fix-perf-init-for-large-systems.patch
|
||||
Patch0014: 0014-common-Increase-count-of-possible-CPUs-per-node.patch
|
||||
Patch0015: 0015-common-Fix-some-typos.patch
|
||||
|
||||
|
||||
%description
|
||||
NumaTOP is an observation tool for runtime memory locality characterization and
|
||||
|
Loading…
Reference in New Issue
Block a user