diff --git a/.gitignore b/.gitignore index 6b7189b..66109e9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ numatop*.tar.?z /v2.3.tar.gz +/v2.4.tar.gz diff --git a/.numatop.metadata b/.numatop.metadata new file mode 100644 index 0000000..541cbef --- /dev/null +++ b/.numatop.metadata @@ -0,0 +1 @@ +ce5205379830715d004d8e129e43128e001dfdc6 v2.4.tar.gz diff --git a/0001-configure.ac-Fix-build-error-when-libnuma-is-missed.patch b/0001-configure.ac-Fix-build-error-when-libnuma-is-missed.patch deleted file mode 100644 index 7d5cdad..0000000 --- a/0001-configure.ac-Fix-build-error-when-libnuma-is-missed.patch +++ /dev/null @@ -1,42 +0,0 @@ -From 2715969f92f5e8d6c60488a65ccef73fef57fa6e Mon Sep 17 00:00:00 2001 -From: Zhengjun Xing -Date: Mon, 31 Oct 2022 14:22:32 +0800 -Subject: [PATCH 01/15] configure.ac : Fix build error when libnuma is missed -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When libnuma is installed after running autogen.sh (which didn’t fails) it -silently fails linking with missing symbols. To avoid this issue just make -autoconf error out if libnuma is missing. - -Signed-off-by: Zhengjun Xing ---- - configure.ac | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/configure.ac b/configure.ac -index fd945f4..36edcc5 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -2,7 +2,7 @@ - # Process this file with autoconf to produce a configure script. - - AC_PREREQ([2.69]) --AC_INIT([numatop], [v2.1], [yao.jin@intel.com]) -+AC_INIT([numatop], [v2.3], [zhengjun.xing@intel.com]) - AM_INIT_AUTOMAKE([-Wno-portability no-dist-gzip dist-xz foreign subdir-objects]) - AC_CONFIG_SRCDIR([common/numatop.c]) - AC_CONFIG_HEADERS([config.h]) -@@ -18,7 +18,7 @@ AC_PROG_CC - AC_PROG_INSTALL - - # Checks for libraries. --AC_CHECK_LIB([numa], [numa_free]) -+AC_CHECK_LIB([numa], [numa_free], [], [ AC_MSG_ERROR([numactl-devel or libnuma-dev(el) is required but was not found]) exit -1]) - AC_CHECK_LIB([pthread], [pthread_create]) - - PKG_CHECK_MODULES([CHECK], [check]) --- -2.31.1 - diff --git a/0002-Update-the-error-message.patch b/0002-Update-the-error-message.patch deleted file mode 100644 index c840c75..0000000 --- a/0002-Update-the-error-message.patch +++ /dev/null @@ -1,30 +0,0 @@ -From b9157a8e3ba3a2a0af3d8f755a32a3b57cad04c9 Mon Sep 17 00:00:00 2001 -From: Zhengjun Xing -Date: Mon, 31 Oct 2022 15:15:08 +0800 -Subject: [PATCH 02/15] Update the error message - -Update error message for cases that needs to increase ulimit. -For example, SPR needs to set the max open files to be more -than 1024, while in the most system, the default is 1024. - -Signed-off-by: Zhengjun Xing ---- - common/os/os_perf.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/common/os/os_perf.c b/common/os/os_perf.c -index 44263e8..f2f1104 100644 ---- a/common/os/os_perf.c -+++ b/common/os/os_perf.c -@@ -650,7 +650,7 @@ os_profiling_start(perf_ctl_t *ctl, perf_task_t *task) - proc_ll_clear(NULL); - - if (profiling_start(ctl, (task_profiling_t *)(task)) != 0) { -- exit_msg_put("Fail to setup perf (probably permission denied)!\n"); -+ exit_msg_put("Fail to setup perf (probably permission denied or need to increase the ulimit)!\n"); - debug_print(NULL, 2, "os_profiling_start failed\n"); - perf_status_set(PERF_STATUS_PROFILING_FAILED); - return (-1); --- -2.31.1 - diff --git a/0003-Update-README.patch b/0003-Update-README.patch deleted file mode 100644 index 80f5abc..0000000 --- a/0003-Update-README.patch +++ /dev/null @@ -1,56 +0,0 @@ -From ff75e35508183b5ed39d50122c71293e8e65a86f Mon Sep 17 00:00:00 2001 -From: Zhengjun Xing -Date: Mon, 31 Oct 2022 15:25:04 +0800 -Subject: [PATCH 03/15] Update README - -Update README, add "check" for build Build Dependencies, add tips -for running NumaTOP. - -Signed-off-by: Zhengjun Xing ---- - README.md | 24 +++++++++++++++++++++++- - 1 file changed, 23 insertions(+), 1 deletion(-) - -diff --git a/README.md b/README.md -index 6910c78..d16a486 100644 ---- a/README.md -+++ b/README.md -@@ -12,12 +12,34 @@ the `mgen` program for help information. - - ## Build Dependencies - --NumaTOP requires following libraries: -+NumaTOP requires following libraries or packages: - - * numactl-devel or libnuma-dev(el) - * libncurses - * libpthread - -+* check -+ -+## Run NumaTOP -+ -+NumaTOP requires running as root. -+ # ./numatop -+ -+In many systems, the default max open files are 1024, for platforms (like SPR) -+that have more CPUs, they require the system with the max open files should -+bigger than 1024, otherwise, the error can be "Fail to setup perf": -+ -+ # ulimit -n -+ 1024 <------the max open files are 1024 -+ # ./numatop -+ NumaTOP is starting ... -+ Fail to setup perf (probably permission denied)! -+ -+Need to enlarge the max open files: -+ -+ # ulimit -n 8192 -+ # ulimit -n -+ 8192 <------now the max open files are 8192 - - ## Supported Kernels - --- -2.31.1 - diff --git a/0004-x86-Prepare-for-multi-vendor-support.patch b/0004-x86-Prepare-for-multi-vendor-support.patch deleted file mode 100644 index e18094a..0000000 --- a/0004-x86-Prepare-for-multi-vendor-support.patch +++ /dev/null @@ -1,265 +0,0 @@ -From f39f29d200b83c568748afc4483feb544b4f6bd6 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Wed, 16 Feb 2022 16:01:18 +0530 -Subject: [PATCH 04/15] x86: Prepare for multi-vendor support - -In order to support x86 processors from other vendors, move -existing platform-specific code for Intel processors to the -new x86 directory and update the build files accordingly. - -Signed-off-by: Sandipan Das ---- - Makefile.am | 36 ++++++++++++++++----------------- - README.md | 2 +- - common/include/os/plat.h | 2 +- - common/include/types.h | 2 +- - configure.ac | 2 +- - test/mgen/{intel => x86}/util.c | 0 - {intel => x86}/bdw.c | 0 - {intel => x86}/include/bdw.h | 0 - {intel => x86}/include/nhm.h | 0 - {intel => x86}/include/skl.h | 0 - {intel => x86}/include/snb.h | 0 - {intel => x86}/include/types.h | 6 +++--- - {intel => x86}/include/util.h | 6 +++--- - {intel => x86}/include/wsm.h | 0 - {intel => x86}/nhm.c | 0 - {intel => x86}/plat.c | 0 - {intel => x86}/skl.c | 0 - {intel => x86}/snb.c | 0 - {intel => x86}/ui_perf_map.c | 0 - {intel => x86}/util.c | 0 - {intel => x86}/wsm.c | 0 - 21 files changed, 28 insertions(+), 28 deletions(-) - rename test/mgen/{intel => x86}/util.c (100%) - rename {intel => x86}/bdw.c (100%) - rename {intel => x86}/include/bdw.h (100%) - rename {intel => x86}/include/nhm.h (100%) - rename {intel => x86}/include/skl.h (100%) - rename {intel => x86}/include/snb.h (100%) - rename {intel => x86}/include/types.h (95%) - rename {intel => x86}/include/util.h (94%) - rename {intel => x86}/include/wsm.h (100%) - rename {intel => x86}/nhm.c (100%) - rename {intel => x86}/plat.c (100%) - rename {intel => x86}/skl.c (100%) - rename {intel => x86}/snb.c (100%) - rename {intel => x86}/ui_perf_map.c (100%) - rename {intel => x86}/util.c (100%) - rename {intel => x86}/wsm.c (100%) - -diff --git a/Makefile.am b/Makefile.am -index 643704a..438a9fc 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -58,23 +58,23 @@ libnumatop_la_SOURCES = \ - common/util.c \ - common/win.c - --if CPU_INTEL -+if CPU_X86 - libnumatop_la_SOURCES += \ -- intel/include/bdw.h \ -- intel/include/nhm.h \ -- intel/include/skl.h \ -- intel/include/snb.h \ -- intel/include/types.h \ -- intel/include/util.h \ -- intel/include/wsm.h \ -- intel/bdw.c \ -- intel/nhm.c \ -- intel/plat.c \ -- intel/skl.c \ -- intel/snb.c \ -- intel/ui_perf_map.c \ -- intel/util.c \ -- intel/wsm.c -+ x86/include/bdw.h \ -+ x86/include/nhm.h \ -+ x86/include/skl.h \ -+ x86/include/snb.h \ -+ x86/include/types.h \ -+ x86/include/util.h \ -+ x86/include/wsm.h \ -+ x86/bdw.c \ -+ x86/nhm.c \ -+ x86/plat.c \ -+ x86/skl.c \ -+ x86/snb.c \ -+ x86/ui_perf_map.c \ -+ x86/util.c \ -+ x86/wsm.c - endif - - if CPU_PPC -@@ -106,9 +106,9 @@ if CPU_PPC - mgen_SOURCES += \ - test/mgen/powerpc/util.c - endif --if CPU_INTEL -+if CPU_X86 - mgen_SOURCES += \ -- test/mgen/intel/util.c -+ test/mgen/x86/util.c - endif - - TESTS = test/mgen.01.sh test/mgen.02.sh -diff --git a/README.md b/README.md -index d16a486..e96f0a8 100644 ---- a/README.md -+++ b/README.md -@@ -58,7 +58,7 @@ http://www.gossamer-threads.com/lists/linux/kernel/1964864 - - common: common code for all platforms. - --intel : Intel platform-specific code. -+x86 : Intel platform-specific code. - - powerpc: PowerPC platform-specific code. - -diff --git a/common/include/os/plat.h b/common/include/os/plat.h -index 35629dc..e35093d 100644 ---- a/common/include/os/plat.h -+++ b/common/include/os/plat.h -@@ -35,7 +35,7 @@ - #ifdef __powerpc64__ - #include "../../../powerpc/include/types.h" - #else --#include "../../../intel/include/types.h" -+#include "../../../x86/include/types.h" - #endif - - #ifdef __cplusplus -diff --git a/common/include/types.h b/common/include/types.h -index fc9c592..3e30f7c 100644 ---- a/common/include/types.h -+++ b/common/include/types.h -@@ -34,7 +34,7 @@ - #ifdef __powerpc64__ - #include "../../powerpc/include/types.h" - #else --#include "../../intel/include/types.h" -+#include "../../x86/include/types.h" - #endif - - #ifdef __cplusplus -diff --git a/configure.ac b/configure.ac -index 36edcc5..71fa92d 100644 ---- a/configure.ac -+++ b/configure.ac -@@ -10,7 +10,7 @@ LT_INIT - AC_CONFIG_MACRO_DIRS([m4]) - - AC_CANONICAL_HOST --AM_CONDITIONAL(CPU_INTEL, test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xi686") -+AM_CONDITIONAL(CPU_X86, test "x$host_cpu" = "xx86_64" -o "x$host_cpu" = "xi686") - AM_CONDITIONAL(CPU_PPC, test "x$host_cpu" = "xpowerpc64" -o "x$host_cpu" = "xpowerpc64le") - - # Checks for programs. -diff --git a/test/mgen/intel/util.c b/test/mgen/x86/util.c -similarity index 100% -rename from test/mgen/intel/util.c -rename to test/mgen/x86/util.c -diff --git a/intel/bdw.c b/x86/bdw.c -similarity index 100% -rename from intel/bdw.c -rename to x86/bdw.c -diff --git a/intel/include/bdw.h b/x86/include/bdw.h -similarity index 100% -rename from intel/include/bdw.h -rename to x86/include/bdw.h -diff --git a/intel/include/nhm.h b/x86/include/nhm.h -similarity index 100% -rename from intel/include/nhm.h -rename to x86/include/nhm.h -diff --git a/intel/include/skl.h b/x86/include/skl.h -similarity index 100% -rename from intel/include/skl.h -rename to x86/include/skl.h -diff --git a/intel/include/snb.h b/x86/include/snb.h -similarity index 100% -rename from intel/include/snb.h -rename to x86/include/snb.h -diff --git a/intel/include/types.h b/x86/include/types.h -similarity index 95% -rename from intel/include/types.h -rename to x86/include/types.h -index 76c7ad3..1a15b3a 100644 ---- a/intel/include/types.h -+++ b/x86/include/types.h -@@ -27,8 +27,8 @@ - * POSSIBILITY OF SUCH DAMAGE. - */ - --#ifndef _NUMATOP_INTEL_TYPES_H --#define _NUMATOP_INTEL_TYPES_H -+#ifndef _NUMATOP_X86_TYPES_H -+#define _NUMATOP_X86_TYPES_H - - #include "../../common/include/types.h" - -@@ -62,4 +62,4 @@ typedef enum { - - #define PERF_COUNT_NUM 5 - --#endif /* _NUMATOP_INTEL_TYPES_H */ -+#endif /* _NUMATOP_X86_TYPES_H */ -diff --git a/intel/include/util.h b/x86/include/util.h -similarity index 94% -rename from intel/include/util.h -rename to x86/include/util.h -index 7026e99..37a6300 100644 ---- a/intel/include/util.h -+++ b/x86/include/util.h -@@ -27,8 +27,8 @@ - * POSSIBILITY OF SUCH DAMAGE. - */ - --#ifndef _NUMATOP_INTEL_UTIL_H --#define _NUMATOP_INTEL_UTIL_H -+#ifndef _NUMATOP_X86_UTIL_H -+#define _NUMATOP_X86_UTIL_H - - #define CPU_FAMILY(eax) \ - (((eax) & 0x0F00) >> 8) -@@ -39,4 +39,4 @@ - #define CPU_EXT_MODEL(eax) \ - (((eax) & 0xF0000) >> 16) - --#endif /* _NUMATOP_INTEL_UTIL_H */ -+#endif /* _NUMATOP_X86_UTIL_H */ -diff --git a/intel/include/wsm.h b/x86/include/wsm.h -similarity index 100% -rename from intel/include/wsm.h -rename to x86/include/wsm.h -diff --git a/intel/nhm.c b/x86/nhm.c -similarity index 100% -rename from intel/nhm.c -rename to x86/nhm.c -diff --git a/intel/plat.c b/x86/plat.c -similarity index 100% -rename from intel/plat.c -rename to x86/plat.c -diff --git a/intel/skl.c b/x86/skl.c -similarity index 100% -rename from intel/skl.c -rename to x86/skl.c -diff --git a/intel/snb.c b/x86/snb.c -similarity index 100% -rename from intel/snb.c -rename to x86/snb.c -diff --git a/intel/ui_perf_map.c b/x86/ui_perf_map.c -similarity index 100% -rename from intel/ui_perf_map.c -rename to x86/ui_perf_map.c -diff --git a/intel/util.c b/x86/util.c -similarity index 100% -rename from intel/util.c -rename to x86/util.c -diff --git a/intel/wsm.c b/x86/wsm.c -similarity index 100% -rename from intel/wsm.c -rename to x86/wsm.c --- -2.31.1 - diff --git a/0005-x86-zen-Add-initial-support.patch b/0005-x86-zen-Add-initial-support.patch deleted file mode 100644 index 7a4adaf..0000000 --- a/0005-x86-zen-Add-initial-support.patch +++ /dev/null @@ -1,322 +0,0 @@ -From fdf9b3ce90d1f435fe837added7373e25e6045b2 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Wed, 16 Feb 2022 18:05:27 +0530 -Subject: [PATCH 05/15] x86/zen: Add initial support - -Add vendor and family identification as well as the relevant -events to count per-process memory accesseses and CPU usage -on AMD Zen and Zen 2 family of processors. - -Signed-off-by: Sandipan Das ---- - Makefile.am | 4 ++- - README.md | 2 +- - x86/include/types.h | 5 ++-- - x86/include/util.h | 3 ++ - x86/include/zen.h | 50 ++++++++++++++++++++++++++++++++ - x86/plat.c | 29 +++++++++++++------ - x86/zen.c | 70 +++++++++++++++++++++++++++++++++++++++++++++ - 7 files changed, 151 insertions(+), 12 deletions(-) - create mode 100644 x86/include/zen.h - create mode 100644 x86/zen.c - -diff --git a/Makefile.am b/Makefile.am -index 438a9fc..ae11522 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -67,6 +67,7 @@ libnumatop_la_SOURCES += \ - x86/include/types.h \ - x86/include/util.h \ - x86/include/wsm.h \ -+ x86/include/zen.h \ - x86/bdw.c \ - x86/nhm.c \ - x86/plat.c \ -@@ -74,7 +75,8 @@ libnumatop_la_SOURCES += \ - x86/snb.c \ - x86/ui_perf_map.c \ - x86/util.c \ -- x86/wsm.c -+ x86/wsm.c \ -+ x86/zen.c - endif - - if CPU_PPC -diff --git a/README.md b/README.md -index e96f0a8..9908e92 100644 ---- a/README.md -+++ b/README.md -@@ -58,7 +58,7 @@ http://www.gossamer-threads.com/lists/linux/kernel/1964864 - - common: common code for all platforms. - --x86 : Intel platform-specific code. -+x86 : Intel and AMD platform-specific code. - - powerpc: PowerPC platform-specific code. - -diff --git a/x86/include/types.h b/x86/include/types.h -index 1a15b3a..0843bd1 100644 ---- a/x86/include/types.h -+++ b/x86/include/types.h -@@ -46,10 +46,11 @@ typedef enum { - CPU_BDX, - CPU_SKX, - CPU_ICX, -- CPU_SPR -+ CPU_SPR, -+ CPU_ZEN - } cpu_type_t; - --#define CPU_TYPE_NUM 12 -+#define CPU_TYPE_NUM 13 - - typedef enum { - PERF_COUNT_INVALID = -1, -diff --git a/x86/include/util.h b/x86/include/util.h -index 37a6300..4d2534b 100644 ---- a/x86/include/util.h -+++ b/x86/include/util.h -@@ -36,6 +36,9 @@ - #define CPU_MODEL(eax) \ - (((eax) & 0x00F0) >> 4) - -+#define CPU_EXT_FAMILY(eax) \ -+ (((eax) & 0x0FF00000) >> 20) -+ - #define CPU_EXT_MODEL(eax) \ - (((eax) & 0xF0000) >> 16) - -diff --git a/x86/include/zen.h b/x86/include/zen.h -new file mode 100644 -index 0000000..be61324 ---- /dev/null -+++ b/x86/include/zen.h -@@ -0,0 +1,50 @@ -+/* -+ * Copyright (c) 2023, AMD Corporation -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * -+ * * Redistributions of source code must retain the above copyright notice, -+ * this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Intel Corporation nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef _NUMATOP_AMD_ZEN_H -+#define _NUMATOP_AMD_ZEN_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include -+#include -+#include "../../common/include/types.h" -+ -+struct _plat_event_config; -+ -+extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *); -+extern void zen_ll_config(struct _plat_event_config *); -+extern int zen_offcore_num(void); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _NUMATOP_AMD_ZEN_H */ -diff --git a/x86/plat.c b/x86/plat.c -index abf3766..f79837a 100644 ---- a/x86/plat.c -+++ b/x86/plat.c -@@ -36,6 +36,7 @@ - #include "include/snb.h" - #include "include/bdw.h" - #include "include/skl.h" -+#include "include/zen.h" - - pfn_plat_profiling_config_t - s_plat_profiling_config[CPU_TYPE_NUM] = { -@@ -50,7 +51,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = { - bdw_profiling_config, - skl_profiling_config, - icx_profiling_config, -- spr_profiling_config -+ spr_profiling_config, -+ zen_profiling_config - }; - - pfn_plat_ll_config_t -@@ -66,7 +68,8 @@ s_plat_ll_config[CPU_TYPE_NUM] = { - bdw_ll_config, - skl_ll_config, - icx_ll_config, -- spr_ll_config -+ spr_ll_config, -+ zen_ll_config - }; - - pfn_plat_offcore_num_t -@@ -82,7 +85,8 @@ s_plat_offcore_num[CPU_TYPE_NUM] = { - bdw_offcore_num, - skl_offcore_num, - icx_offcore_num, -- spr_offcore_num -+ spr_offcore_num, -+ zen_offcore_num - }; - - /* ARGSUSED */ -@@ -117,7 +121,7 @@ static cpu_type_t - cpu_type_get(void) - { - unsigned int eax, ebx, ecx, edx; -- int family, model, ext_model; -+ int family, model; - cpu_type_t type = CPU_UNSUP; - char vendor[16]; - -@@ -129,7 +133,8 @@ cpu_type_get(void) - (void) strncpy(&vendor[8], (char *)(&edx), 4); - vendor[12] = 0; - -- if (strncmp(vendor, "Genu" "ntel" "ineI", 12) != 0) { -+ if (strncmp(vendor, "Genu" "ntel" "ineI", 12) != 0 && -+ strncmp(vendor, "Auth" "cAMD" "enti", 12) != 0) { - return (CPU_UNSUP); - } - -@@ -138,11 +143,16 @@ cpu_type_get(void) - - family = CPU_FAMILY(eax); - model = CPU_MODEL(eax); -- ext_model = CPU_EXT_MODEL(eax); - -- if (family == 6) { -- model = (ext_model << 4) + model; -+ /* Extended Model ID is considered only when Family ID is either 6 or 15 */ -+ if (family == 6 || family == 15) -+ model += CPU_EXT_MODEL(eax) << 4; -+ -+ /* Extended Family ID is considered only when Family ID is 15 */ -+ if (family == 15) -+ family += CPU_EXT_FAMILY(eax); - -+ if (family == 6) { - switch (model) { - case 26: - type = CPU_NHM_EP; -@@ -178,6 +188,8 @@ cpu_type_get(void) - type = CPU_SPR; - break; - } -+ } else if (family == 23) { -+ type = CPU_ZEN; - } - - return (type); -@@ -217,6 +229,7 @@ plat_detect(void) - case CPU_SKX: - case CPU_ICX: - case CPU_SPR: -+ case CPU_ZEN: - ret = 0; - s_cpu_type = cpu_type; - break; -diff --git a/x86/zen.c b/x86/zen.c -new file mode 100644 -index 0000000..abf603a ---- /dev/null -+++ b/x86/zen.c -@@ -0,0 +1,70 @@ -+/* -+ * Copyright (c) 2023, AMD Corporation -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * -+ * * Redistributions of source code must retain the above copyright notice, -+ * this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Intel Corporation nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+/* This file contains the Zen platform specific functions. */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "../common/include/os/linux/perf_event.h" -+#include "../common/include/os/plat.h" -+#include "include/zen.h" -+ -+static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = { -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" }, -+ { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "ExRetOps" }, -+ { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" }, -+}; -+ -+static plat_event_config_t s_zen_ll = { -+ PERF_TYPE_RAW, 0, 0, 0, "Unsupported" -+}; -+ -+void -+zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg) -+{ -+ plat_config_get(perf_count_id, cfg, s_zen_config); -+} -+ -+void -+zen_ll_config(plat_event_config_t *cfg) -+{ -+ memcpy(cfg, &s_zen_ll, sizeof (plat_event_config_t)); -+} -+ -+int -+zen_offcore_num(void) -+{ -+ return (2); -+} --- -2.31.1 - diff --git a/0006-common-Add-sample-period-to-platform-event-config.patch b/0006-common-Add-sample-period-to-platform-event-config.patch deleted file mode 100644 index d2558e3..0000000 --- a/0006-common-Add-sample-period-to-platform-event-config.patch +++ /dev/null @@ -1,308 +0,0 @@ -From 4a8b8d47f4a240a95830dc05abd3c19e10b6d821 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Fri, 11 Nov 2022 13:56:09 +0530 -Subject: [PATCH 06/15] common: Add sample period to platform event config - -Precise PMU events are currently used for capturing memory -access statistics. The sample period used for such events is -currently hard-coded (LL_THRESH) and an universal value may -not work well on all platforms due to microarchitectural -differences in the design of the precise PMU. - -E.g. precise events are programmed through Instruction Based -Sampling (IBS) on AMD processors but that PMU does not have -the ability to tag only load-store operations. This leads to -the capture of many samples that are not relevant for the -current use-case. To get an appropriate amount of relevant -data, more samples need to be captured and then filtered. -This is achieved by increasing the sampling frequency. - -Add sample period as an additional attribute to the platform -event config structure so that a customized sample period -that works well on a specific platform can be passed during -event programming. If not set, a default value is chosen. - -Signed-off-by: Sandipan Das ---- - common/include/os/plat.h | 1 + - common/os/os_perf.c | 6 +++++- - powerpc/power8.c | 14 +++++++------- - powerpc/power9.c | 14 +++++++------- - x86/bdw.c | 12 ++++++------ - x86/nhm.c | 12 ++++++------ - x86/skl.c | 32 ++++++++++++++++---------------- - x86/snb.c | 12 ++++++------ - x86/wsm.c | 22 +++++++++++----------- - x86/zen.c | 2 +- - 10 files changed, 66 insertions(+), 61 deletions(-) - -diff --git a/common/include/os/plat.h b/common/include/os/plat.h -index e35093d..34535cd 100644 ---- a/common/include/os/plat.h -+++ b/common/include/os/plat.h -@@ -53,6 +53,7 @@ typedef struct _plat_event_config { - uint64_t config; - uint64_t other_attr; - uint64_t extra_value; -+ uint64_t sample_period; - char desc[PLAT_EVENT_DESC_SIZE]; - } plat_event_config_t; - -diff --git a/common/os/os_perf.c b/common/os/os_perf.c -index f2f1104..f1036a9 100644 ---- a/common/os/os_perf.c -+++ b/common/os/os_perf.c -@@ -839,7 +839,11 @@ ll_init(pf_conf_t *conf) - conf->type = cfg.type; - conf->config = (cfg.config) | (cfg.other_attr << 16); - conf->config1 = cfg.extra_value; -- conf->sample_period = LL_PERIOD; -+ conf->sample_period = cfg.sample_period; -+ -+ /* If sample period is not set, choose a default value */ -+ if (!cfg.sample_period) -+ conf->sample_period = LL_PERIOD; - } - - int -diff --git a/powerpc/power8.c b/powerpc/power8.c -index b3cab75..a76851d 100644 ---- a/powerpc/power8.c -+++ b/powerpc/power8.c -@@ -38,16 +38,16 @@ - #include "include/power8.h" - - static plat_event_config_t s_power8_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_RAW, 0x600f4, 0, 0, "PM_RUN_CYC" }, -- { PERF_TYPE_RAW, 0x4c04c, 0, 0, "PM_DATA_FROM_DMEM" }, -- { PERF_TYPE_RAW, 0x1001e, 0, 0, "PM_CYC" }, -- { PERF_TYPE_RAW, 0x500fa, 0, 0, "PM_RUN_INST_CMPL" }, -- { PERF_TYPE_RAW, 0x2c048, 0, 0, "PM_DATA_FROM_LMEM" }, -- { PERF_TYPE_RAW, 0x3c04a, 0, 0, "PM_DATA_FROM_RMEM" }, -+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" }, -+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" }, -+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" }, -+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" }, -+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" }, -+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" }, - }; - - static plat_event_config_t s_power8_ll = { -- PERF_TYPE_RAW, 0x0000, 0, 0, "PM_SUSPENDED" -+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED" - }; - - void -diff --git a/powerpc/power9.c b/powerpc/power9.c -index c6f1cec..4b0bcfc 100644 ---- a/powerpc/power9.c -+++ b/powerpc/power9.c -@@ -38,16 +38,16 @@ - #include "include/power9.h" - - static plat_event_config_t s_power9_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_RAW, 0x600f4, 0, 0, "PM_RUN_CYC" }, -- { PERF_TYPE_RAW, 0x4c04c, 0, 0, "PM_DATA_FROM_DMEM" }, -- { PERF_TYPE_RAW, 0x1001e, 0, 0, "PM_CYC" }, -- { PERF_TYPE_RAW, 0x500fa, 0, 0, "PM_RUN_INST_CMPL" }, -- { PERF_TYPE_RAW, 0x2c048, 0, 0, "PM_DATA_FROM_LMEM" }, -- { PERF_TYPE_RAW, 0x3c04a, 0, 0, "PM_DATA_FROM_RMEM" }, -+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" }, -+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" }, -+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" }, -+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" }, -+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" }, -+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" }, - }; - - static plat_event_config_t s_power9_ll = { -- PERF_TYPE_RAW, 0x0000, 0, 0, "PM_SUSPENDED" -+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED" - }; - - void -diff --git a/x86/bdw.c b/x86/bdw.c -index 97e33ea..5640f7b 100644 ---- a/x86/bdw.c -+++ b/x86/bdw.c -@@ -40,15 +40,15 @@ - #include "include/bdw.h" - - static plat_event_config_t s_bdw_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_bdw_ll = { -- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold" - }; - - void -diff --git a/x86/nhm.c b/x86/nhm.c -index bf8c14f..d29d396 100644 ---- a/x86/nhm.c -+++ b/x86/nhm.c -@@ -41,15 +41,15 @@ - #include "include/nhm.h" - - static plat_event_config_t s_nhm_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_nhm_ll = { -- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, "mem_inst_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold" - }; - - static void -diff --git a/x86/skl.c b/x86/skl.c -index ace0833..6f81298 100644 ---- a/x86/skl.c -+++ b/x86/skl.c -@@ -40,31 +40,31 @@ - #include "include/skl.h" - - static plat_event_config_t s_skl_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_icx_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_spr_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_skl_ll = { -- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold" - }; - - void -diff --git a/x86/snb.c b/x86/snb.c -index eb89859..3d3185b 100644 ---- a/x86/snb.c -+++ b/x86/snb.c -@@ -40,15 +40,15 @@ - #include "include/snb.h" - - static plat_event_config_t s_snb_ep_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_snb_ll = { -- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, "mem_trans_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold" - }; - - void -diff --git a/x86/wsm.c b/x86/wsm.c -index f4285c2..16f68e4 100644 ---- a/x86/wsm.c -+++ b/x86/wsm.c -@@ -40,23 +40,23 @@ - #include "include/wsm.h" - - static plat_event_config_t s_wsmex_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_wsmep_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_wsm_ll = { -- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, "mem_inst_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold" - }; - - void -diff --git a/x86/zen.c b/x86/zen.c -index abf603a..c153a1a 100644 ---- a/x86/zen.c -+++ b/x86/zen.c -@@ -48,7 +48,7 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = { - }; - - static plat_event_config_t s_zen_ll = { -- PERF_TYPE_RAW, 0, 0, 0, "Unsupported" -+ PERF_TYPE_RAW, 0, 0, 0, 0, "Unsupported" - }; - - void --- -2.31.1 - diff --git a/0007-common-Add-exclude-guest-to-platform-event-config.patch b/0007-common-Add-exclude-guest-to-platform-event-config.patch deleted file mode 100644 index 3b24427..0000000 --- a/0007-common-Add-exclude-guest-to-platform-event-config.patch +++ /dev/null @@ -1,350 +0,0 @@ -From 9d665e4712f0dfa48603471c51ed3c87441030ad Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Tue, 13 Jun 2023 19:21:49 +0530 -Subject: [PATCH 07/15] common: Add exclude guest to platform event config - -Precise PMU events are currently used for capturing memory -access statistics. Currently, these events are programmed to -exclude guests and this does not work well on all platforms -due to differences in the design of the precise PMU. - -E.g. precise events are programmed through Instruction Based -Sampling (IBS) on AMD processors but that PMU does not have -the ability to ignore guests unlike the Core PMU. - -Add exclude guest as an additional attribute to the platform -event config structure so that precise events can be customized -to work on different platforms. - -Signed-off-by: Sandipan Das ---- - common/include/os/pfwrapper.h | 1 + - common/include/os/plat.h | 1 + - common/include/types.h | 1 + - common/os/os_perf.c | 1 + - common/os/pfwrapper.c | 2 +- - powerpc/power8.c | 14 +++++++------- - powerpc/power9.c | 14 +++++++------- - x86/bdw.c | 12 ++++++------ - x86/nhm.c | 12 ++++++------ - x86/skl.c | 32 ++++++++++++++++---------------- - x86/snb.c | 12 ++++++------ - x86/wsm.c | 22 +++++++++++----------- - x86/zen.c | 12 ++++++------ - 13 files changed, 70 insertions(+), 66 deletions(-) - -diff --git a/common/include/os/pfwrapper.h b/common/include/os/pfwrapper.h -index 414d6af..1864a10 100644 ---- a/common/include/os/pfwrapper.h -+++ b/common/include/os/pfwrapper.h -@@ -78,6 +78,7 @@ typedef struct _pf_conf { - uint64_t config; - uint64_t config1; - uint64_t sample_period; -+ bool exclude_guest; - } pf_conf_t; - - typedef struct _pf_profiling_rec { -diff --git a/common/include/os/plat.h b/common/include/os/plat.h -index 34535cd..ac4aac8 100644 ---- a/common/include/os/plat.h -+++ b/common/include/os/plat.h -@@ -54,6 +54,7 @@ typedef struct _plat_event_config { - uint64_t other_attr; - uint64_t extra_value; - uint64_t sample_period; -+ bool exclude_guest; - char desc[PLAT_EVENT_DESC_SIZE]; - } plat_event_config_t; - -diff --git a/common/include/types.h b/common/include/types.h -index 3e30f7c..efe3055 100644 ---- a/common/include/types.h -+++ b/common/include/types.h -@@ -30,6 +30,7 @@ - #define _NUMATOP_TYPES_H - - #include -+#include - #include "./os/os_types.h" - #ifdef __powerpc64__ - #include "../../powerpc/include/types.h" -diff --git a/common/os/os_perf.c b/common/os/os_perf.c -index f1036a9..44ca43d 100644 ---- a/common/os/os_perf.c -+++ b/common/os/os_perf.c -@@ -840,6 +840,7 @@ ll_init(pf_conf_t *conf) - conf->config = (cfg.config) | (cfg.other_attr << 16); - conf->config1 = cfg.extra_value; - conf->sample_period = cfg.sample_period; -+ conf->exclude_guest = cfg.exclude_guest; - - /* If sample period is not set, choose a default value */ - if (!cfg.sample_period) -diff --git a/common/os/pfwrapper.c b/common/os/pfwrapper.c -index b4c4f17..e08ce07 100644 ---- a/common/os/pfwrapper.c -+++ b/common/os/pfwrapper.c -@@ -432,7 +432,7 @@ pf_ll_setup(struct _perf_cpu *cpu, pf_conf_t *conf) - attr.config1 = conf->config1; - attr.sample_period = conf->sample_period; - attr.precise_ip = 1; -- attr.exclude_guest = 1; -+ attr.exclude_guest = conf->exclude_guest; - attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | PERF_SAMPLE_CPU | - PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN; - attr.disabled = 1; -diff --git a/powerpc/power8.c b/powerpc/power8.c -index a76851d..d8f4e01 100644 ---- a/powerpc/power8.c -+++ b/powerpc/power8.c -@@ -38,16 +38,16 @@ - #include "include/power8.h" - - static plat_event_config_t s_power8_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" }, -- { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" }, -- { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" }, -- { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" }, -- { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" }, -- { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" }, -+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, 0, "PM_RUN_CYC" }, -+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, 0, "PM_DATA_FROM_DMEM" }, -+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, 0, "PM_CYC" }, -+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, 0, "PM_RUN_INST_CMPL" }, -+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, 0, "PM_DATA_FROM_LMEM" }, -+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, 0, "PM_DATA_FROM_RMEM" }, - }; - - static plat_event_config_t s_power8_ll = { -- PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED" -+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, 1, "PM_SUSPENDED" - }; - - void -diff --git a/powerpc/power9.c b/powerpc/power9.c -index 4b0bcfc..9879ec7 100644 ---- a/powerpc/power9.c -+++ b/powerpc/power9.c -@@ -38,16 +38,16 @@ - #include "include/power9.h" - - static plat_event_config_t s_power9_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, "PM_RUN_CYC" }, -- { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, "PM_DATA_FROM_DMEM" }, -- { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, "PM_CYC" }, -- { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, "PM_RUN_INST_CMPL" }, -- { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, "PM_DATA_FROM_LMEM" }, -- { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, "PM_DATA_FROM_RMEM" }, -+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, 0, "PM_RUN_CYC" }, -+ { PERF_TYPE_RAW, 0x4c04c, 0, 0, 0, 0, "PM_DATA_FROM_DMEM" }, -+ { PERF_TYPE_RAW, 0x1001e, 0, 0, 0, 0, "PM_CYC" }, -+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, 0, "PM_RUN_INST_CMPL" }, -+ { PERF_TYPE_RAW, 0x2c048, 0, 0, 0, 0, "PM_DATA_FROM_LMEM" }, -+ { PERF_TYPE_RAW, 0x3c04a, 0, 0, 0, 0, "PM_DATA_FROM_RMEM" }, - }; - - static plat_event_config_t s_power9_ll = { -- PERF_TYPE_RAW, 0x0000, 0, 0, 0, "PM_SUSPENDED" -+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, 1, "PM_SUSPENDED" - }; - - void -diff --git a/x86/bdw.c b/x86/bdw.c -index 5640f7b..97eca67 100644 ---- a/x86/bdw.c -+++ b/x86/bdw.c -@@ -40,15 +40,15 @@ - #include "include/bdw.h" - - static plat_event_config_t s_bdw_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x604000001, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_bdw_ll = { -- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold" - }; - - void -diff --git a/x86/nhm.c b/x86/nhm.c -index d29d396..cf65705 100644 ---- a/x86/nhm.c -+++ b/x86/nhm.c -@@ -41,15 +41,15 @@ - #include "include/nhm.h" - - static plat_event_config_t s_nhm_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, INVALID_CODE_UMASK, 0, 0, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_nhm_ll = { -- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, 1, "mem_inst_retired.latency_above_threshold" - }; - - static void -diff --git a/x86/skl.c b/x86/skl.c -index 6f81298..a7bbc14 100644 ---- a/x86/skl.c -+++ b/x86/skl.c -@@ -40,31 +40,31 @@ - #include "include/skl.h" - - static plat_event_config_t s_skl_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x638000001, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x1f84000001, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_icx_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x730000001, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x104000001, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_spr_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x012A, 0x53, 0x730000001, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x012B, 0x53, 0x104000001, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_skl_ll = { -- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold" - }; - - void -diff --git a/x86/snb.c b/x86/snb.c -index 3d3185b..135ee1c 100644 ---- a/x86/snb.c -+++ b/x86/snb.c -@@ -40,15 +40,15 @@ - #include "include/snb.h" - - static plat_event_config_t s_snb_ep_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x67f800001, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x600400001, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_snb_ll = { -- PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, "mem_trans_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x01CD, 0x53, LL_THRESH, 0, 1, "mem_trans_retired.latency_above_threshold" - }; - - void -diff --git a/x86/wsm.c b/x86/wsm.c -index 16f68e4..7b122fd 100644 ---- a/x86/wsm.c -+++ b/x86/wsm.c -@@ -40,23 +40,23 @@ - #include "include/wsm.h" - - static plat_event_config_t s_wsmex_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_wsmep_profiling[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.core" }, -- { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, "off_core_response_0" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, "cpu_clk_unhalted.ref" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "instr_retired.any" }, -- { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, "off_core_response_1" } -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.core" }, -+ { PERF_TYPE_RAW, 0x01B7, 0x53, 0x2011, 0, 0, "off_core_response_0" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES, 0x53, 0, 0, 0, "cpu_clk_unhalted.ref" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "instr_retired.any" }, -+ { PERF_TYPE_RAW, 0x01BB, 0x53, 0x5011, 0, 0, "off_core_response_1" } - }; - - static plat_event_config_t s_wsm_ll = { -- PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, "mem_inst_retired.latency_above_threshold" -+ PERF_TYPE_RAW, 0x100B, 0x53, LL_THRESH, 0, 1, "mem_inst_retired.latency_above_threshold" - }; - - void -diff --git a/x86/zen.c b/x86/zen.c -index c153a1a..2f851a2 100644 ---- a/x86/zen.c -+++ b/x86/zen.c -@@ -40,15 +40,15 @@ - #include "include/zen.h" - - static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = { -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" }, -- { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, "LsNotHaltedCyc" }, -- { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, "ExRetOps" }, -- { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" }, -+ { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" }, -+ { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" }, - }; - - static plat_event_config_t s_zen_ll = { -- PERF_TYPE_RAW, 0, 0, 0, 0, "Unsupported" -+ PERF_TYPE_RAW, 0, 0, 0, 0, 0, "Unsupported" - }; - - void --- -2.31.1 - diff --git a/0008-x86-zen-Add-support-for-memory-access-stats.patch b/0008-x86-zen-Add-support-for-memory-access-stats.patch deleted file mode 100644 index 0e5bbed..0000000 --- a/0008-x86-zen-Add-support-for-memory-access-stats.patch +++ /dev/null @@ -1,156 +0,0 @@ -From aefc85d7b956c4df998afb4cfe5c413e5fd5b062 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Thu, 10 Mar 2022 10:32:51 +0530 -Subject: [PATCH 08/15] x86/zen: Add support for memory access stats - -Add support for capturing memory access statistics on Zen -processors using Instruction Based Sampling (IBS). - -IBS, by design, cannot tag specific types of ops and hence -cannot provide samples for only those ops that cause memory -access. Hence, additional post-processing is required for -filtering out irrelevant samples. To get an appropriate -volume of samples, the sampling frequency also needs to be -high. - -Signed-off-by: Sandipan Das ---- - common/os/pfwrapper.c | 20 +++++++++++++++++--- - x86/zen.c | 35 ++++++++++++++++++++++++++++++++++- - 2 files changed, 51 insertions(+), 4 deletions(-) - -diff --git a/common/os/pfwrapper.c b/common/os/pfwrapper.c -index e08ce07..d6102be 100644 ---- a/common/os/pfwrapper.c -+++ b/common/os/pfwrapper.c -@@ -434,7 +434,8 @@ pf_ll_setup(struct _perf_cpu *cpu, pf_conf_t *conf) - attr.precise_ip = 1; - attr.exclude_guest = conf->exclude_guest; - attr.sample_type = PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | PERF_SAMPLE_CPU | -- PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN; -+ PERF_SAMPLE_WEIGHT | PERF_SAMPLE_CALLCHAIN | -+ PERF_SAMPLE_DATA_SRC; - attr.disabled = 1; - - if ((fds[0] = pf_event_open(&attr, -1, cpu->cpuid, -1, 0)) < 0) { -@@ -481,6 +482,7 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size, - pf_ll_rec_t *rec) - { - struct { uint32_t pid, tid; } id; -+ union perf_mem_data_src data_src; - uint64_t i, addr, cpu, weight, nr, value, *ips; - int j, ret = -1; - -@@ -492,6 +494,7 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size, - * [ u64 nr; } - * { u64 ips[nr]; } - * { u64 weight; } -+ * { u64 data_src; } - * }; - */ - if (mmap_buffer_read(mhdr, &id, sizeof (id)) == -1) { -@@ -551,7 +554,18 @@ ll_sample_read(struct perf_event_mmap_page *mhdr, int size, - } - - size -= sizeof (weight); -- -+ -+ if (mmap_buffer_read(mhdr, &data_src, sizeof (data_src)) == -1) { -+ debug_print(NULL, 2, "ll_sample_read: read data_src failed.\n"); -+ goto L_EXIT; -+ } -+ -+ size -= sizeof (data_src); -+ -+ if (data_src.mem_op == PERF_MEM_OP_NA || -+ data_src.mem_op == PERF_MEM_OP_EXEC) -+ addr = 0; -+ - rec->ip_num = j; - rec->pid = id.pid; - rec->tid = id.tid; -@@ -575,7 +589,7 @@ ll_recbuf_update(pf_ll_rec_t *rec_arr, int *nrec, pf_ll_rec_t *rec) - { - int i; - -- if ((rec->pid == 0) || (rec->tid == 0)) { -+ if ((rec->pid == 0) || (rec->tid == 0) || (rec->addr == 0)) { - /* Just consider the user-land process/thread. */ - return; - } -diff --git a/x86/zen.c b/x86/zen.c -index 2f851a2..67a425b 100644 ---- a/x86/zen.c -+++ b/x86/zen.c -@@ -30,7 +30,9 @@ - - #include - #include -+#include - #include -+#include - #include - #include - #include -@@ -39,6 +41,9 @@ - #include "../common/include/os/plat.h" - #include "include/zen.h" - -+#define IBS_OP_PMU_TYPE_PATH \ -+ "/sys/bus/event_source/devices/ibs_op/type" -+ - static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = { - { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" }, - { PERF_TYPE_RAW, 0x0000000000004043, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Far" }, -@@ -47,8 +52,13 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = { - { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" }, - }; - -+/* -+ * Owing to the nature of IBS uop tagging, a higher sampling period is -+ * required to capture meaningful samples. All samples may not originate -+ * from a memory access instruction and require additional filtering. -+ */ - static plat_event_config_t s_zen_ll = { -- PERF_TYPE_RAW, 0, 0, 0, 0, 0, "Unsupported" -+ 0, 0x0000000000000000, 0, 0, LL_THRESH * 10, 0, "IbsOpCntCycles" - }; - - void -@@ -57,10 +67,33 @@ zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg) - plat_config_get(perf_count_id, cfg, s_zen_config); - } - -+static int -+zen_ibs_op_pmu_type(void) -+{ -+ int fd, type, i; -+ char buf[32]; -+ -+ if ((fd = open(IBS_OP_PMU_TYPE_PATH, O_RDONLY)) < 0) -+ return (-1); -+ -+ if ((i = read(fd, buf, sizeof (buf) - 1)) <= 0) { -+ close(fd); -+ return (-1); -+ } -+ -+ close(fd); -+ buf[i] = 0; -+ if ((type = atoi(buf)) == 0) -+ return (-1); -+ -+ return (type); -+} -+ - void - zen_ll_config(plat_event_config_t *cfg) - { - memcpy(cfg, &s_zen_ll, sizeof (plat_event_config_t)); -+ cfg->type = zen_ibs_op_pmu_type(); - } - - int --- -2.31.1 - diff --git a/0009-x86-Fix-clock-frequency-parsing.patch b/0009-x86-Fix-clock-frequency-parsing.patch deleted file mode 100644 index 0bb3399..0000000 --- a/0009-x86-Fix-clock-frequency-parsing.patch +++ /dev/null @@ -1,71 +0,0 @@ -From c149b054fe5b1851860fd01d54596ea75f5008d3 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Wed, 13 Apr 2022 11:45:08 +0530 -Subject: [PATCH 09/15] x86: Fix clock frequency parsing - -AMD processors do not advertise a base clock frequency as -a part of the "model name" in /proc/cpuinfo. The parsing -must fail in order to let os_calibrate() determine clock -speed from cpufreq information or from TSC instead. - -Since the parser fails to find "@", strcspn() returns the -length of the line instead and sscanf() ends up scanning -garbage values beyond the null terminator that match the -format specifier. To avoid this, add an additional check -that makes the condition fail if "@" is not found. - -Fixes: eaeed92 ("Powerpc: Fix CPU% utilization for PowerVMs") -Signed-off-by: Sandipan Das ---- - x86/util.c | 19 ++++++++++++++----- - 1 file changed, 14 insertions(+), 5 deletions(-) - -diff --git a/x86/util.c b/x86/util.c -index fdff877..655a677 100644 ---- a/x86/util.c -+++ b/x86/util.c -@@ -67,27 +67,36 @@ rdtsc(void) - - /* - * Check the cpu name in proc info. Intel CPUs always have @ x.y -- * Ghz and that is the TSC frequency. -+ * GHz and that is the TSC frequency. AMD CPUs do not advertise -+ * clock frequency as a part of the model name. - */ - int - arch__cpuinfo_freq(double *freq, char *unit) - { - FILE *f; - char *line = NULL; -- size_t len = 0; -+ size_t idx, len = 0; - int ret = -1; - - if ((f = fopen(CPUINFO_PATH, "r")) == NULL) { - return (-1); - } - -- while (getline(&line, &len, f) > 0) { -+ while ((len = getline(&line, &len, f)) > 0) { - if (strncmp(line, "model name", sizeof ("model name") - 1) != 0) { - continue; - } - -- if (sscanf(line + strcspn(line, "@") + 1, "%lf%10s", -- freq, unit) == 2) { -+ idx = strcspn(line, "@") + 1; -+ -+ /* -+ * The model name will not change for other processors. So -+ * bail out if "@" is not found. -+ */ -+ if (idx >= len) -+ break; -+ -+ if (sscanf(line + idx, "%lf%10s", freq, unit) == 2) { - if (strcasecmp(unit, "GHz") == 0) { - *freq *= GHZ; - } else if (strcasecmp(unit, "Mhz") == 0) { --- -2.31.1 - diff --git a/0010-x86-zen-Add-Zen-3-support.patch b/0010-x86-zen-Add-Zen-3-support.patch deleted file mode 100644 index fc3f7c4..0000000 --- a/0010-x86-zen-Add-Zen-3-support.patch +++ /dev/null @@ -1,135 +0,0 @@ -From e9bd7eaa767c987fcb8d6879e7c7509a24bcb17c Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Fri, 20 Jan 2023 13:56:37 +0530 -Subject: [PATCH 10/15] x86/zen: Add Zen 3 support - -Add vendor and family identification as well as the relevant -events to count per-process memory accesseses and CPU usage -on AMD Zen 3 family of processors. - -Key changes include the use of the LsAnyFillsFromSys event -instead of LsDmndFillsFromSys for counting local and remote -memory accesses. While LsDmndFillsFromSys covers only demand -cache fills, LsAnyFillsFromSys covers all cache fills -including prefetches. - -Signed-off-by: Sandipan Das ---- - x86/include/types.h | 5 +++-- - x86/include/zen.h | 1 + - x86/plat.c | 8 +++++++- - x86/zen.c | 14 ++++++++++++++ - 4 files changed, 25 insertions(+), 3 deletions(-) - -diff --git a/x86/include/types.h b/x86/include/types.h -index 0843bd1..4aa5fa6 100644 ---- a/x86/include/types.h -+++ b/x86/include/types.h -@@ -47,10 +47,11 @@ typedef enum { - CPU_SKX, - CPU_ICX, - CPU_SPR, -- CPU_ZEN -+ CPU_ZEN, -+ CPU_ZEN3 - } cpu_type_t; - --#define CPU_TYPE_NUM 13 -+#define CPU_TYPE_NUM 14 - - typedef enum { - PERF_COUNT_INVALID = -1, -diff --git a/x86/include/zen.h b/x86/include/zen.h -index be61324..b5c40f5 100644 ---- a/x86/include/zen.h -+++ b/x86/include/zen.h -@@ -40,6 +40,7 @@ extern "C" { - struct _plat_event_config; - - extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *); -+extern void zen3_profiling_config(perf_count_id_t, struct _plat_event_config *); - extern void zen_ll_config(struct _plat_event_config *); - extern int zen_offcore_num(void); - -diff --git a/x86/plat.c b/x86/plat.c -index f79837a..35561dc 100644 ---- a/x86/plat.c -+++ b/x86/plat.c -@@ -52,7 +52,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = { - skl_profiling_config, - icx_profiling_config, - spr_profiling_config, -- zen_profiling_config -+ zen_profiling_config, -+ zen3_profiling_config - }; - - pfn_plat_ll_config_t -@@ -69,6 +70,7 @@ s_plat_ll_config[CPU_TYPE_NUM] = { - skl_ll_config, - icx_ll_config, - spr_ll_config, -+ zen_ll_config, - zen_ll_config - }; - -@@ -86,6 +88,7 @@ s_plat_offcore_num[CPU_TYPE_NUM] = { - skl_offcore_num, - icx_offcore_num, - spr_offcore_num, -+ zen_offcore_num, - zen_offcore_num - }; - -@@ -190,6 +193,8 @@ cpu_type_get(void) - } - } else if (family == 23) { - type = CPU_ZEN; -+ } else if (family == 25) { -+ type = CPU_ZEN3; - } - - return (type); -@@ -230,6 +235,7 @@ plat_detect(void) - case CPU_ICX: - case CPU_SPR: - case CPU_ZEN: -+ case CPU_ZEN3: - ret = 0; - s_cpu_type = cpu_type; - break; -diff --git a/x86/zen.c b/x86/zen.c -index 67a425b..dd37d03 100644 ---- a/x86/zen.c -+++ b/x86/zen.c -@@ -52,6 +52,14 @@ static plat_event_config_t s_zen_config[PERF_COUNT_NUM] = { - { PERF_TYPE_RAW, 0x0000000000000843, 0, 0, 0, 0, "LsDmndFillsFromSys.DRAM_IO_Near" }, - }; - -+static plat_event_config_t s_zen3_config[PERF_COUNT_NUM] = { -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" }, -+ { PERF_TYPE_RAW, 0x0000000000004044, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoRemote" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" }, -+ { PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" }, -+}; -+ - /* - * Owing to the nature of IBS uop tagging, a higher sampling period is - * required to capture meaningful samples. All samples may not originate -@@ -67,6 +75,12 @@ zen_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg) - plat_config_get(perf_count_id, cfg, s_zen_config); - } - -+void -+zen3_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg) -+{ -+ plat_config_get(perf_count_id, cfg, s_zen3_config); -+} -+ - static int - zen_ibs_op_pmu_type(void) - { --- -2.31.1 - diff --git a/0011-x86-zen-Add-Zen-4-support.patch b/0011-x86-zen-Add-Zen-4-support.patch deleted file mode 100644 index d774051..0000000 --- a/0011-x86-zen-Add-Zen-4-support.patch +++ /dev/null @@ -1,145 +0,0 @@ -From 7fc232a4df2013089300b0c23490d7d07c9c0165 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Fri, 20 Jan 2023 14:19:29 +0530 -Subject: [PATCH 11/15] x86/zen: Add Zen 4 support - -Add vendor and family identification as well as the relevant -events to count per-process memory accesseses and CPU usage -on AMD Zen 4 family of processors. - -Key changes include the use of the LsNotHaltedP0Cyc event to -count cycles at P0 frequency. This improves the accuracy of -the utilization metrics as, unlike the typical cycles event, -this is clock frequency invariant. - -Signed-off-by: Sandipan Das ---- - x86/include/types.h | 5 +++-- - x86/include/zen.h | 1 + - x86/plat.c | 18 ++++++++++++++---- - x86/zen.c | 14 ++++++++++++++ - 4 files changed, 32 insertions(+), 6 deletions(-) - -diff --git a/x86/include/types.h b/x86/include/types.h -index 4aa5fa6..bb965f9 100644 ---- a/x86/include/types.h -+++ b/x86/include/types.h -@@ -48,10 +48,11 @@ typedef enum { - CPU_ICX, - CPU_SPR, - CPU_ZEN, -- CPU_ZEN3 -+ CPU_ZEN3, -+ CPU_ZEN4 - } cpu_type_t; - --#define CPU_TYPE_NUM 14 -+#define CPU_TYPE_NUM 15 - - typedef enum { - PERF_COUNT_INVALID = -1, -diff --git a/x86/include/zen.h b/x86/include/zen.h -index b5c40f5..cbdfcd8 100644 ---- a/x86/include/zen.h -+++ b/x86/include/zen.h -@@ -41,6 +41,7 @@ struct _plat_event_config; - - extern void zen_profiling_config(perf_count_id_t, struct _plat_event_config *); - extern void zen3_profiling_config(perf_count_id_t, struct _plat_event_config *); -+extern void zen4_profiling_config(perf_count_id_t, struct _plat_event_config *); - extern void zen_ll_config(struct _plat_event_config *); - extern int zen_offcore_num(void); - -diff --git a/x86/plat.c b/x86/plat.c -index 35561dc..fe2bf01 100644 ---- a/x86/plat.c -+++ b/x86/plat.c -@@ -53,7 +53,8 @@ s_plat_profiling_config[CPU_TYPE_NUM] = { - icx_profiling_config, - spr_profiling_config, - zen_profiling_config, -- zen3_profiling_config -+ zen3_profiling_config, -+ zen4_profiling_config - }; - - pfn_plat_ll_config_t -@@ -71,6 +72,7 @@ s_plat_ll_config[CPU_TYPE_NUM] = { - icx_ll_config, - spr_ll_config, - zen_ll_config, -+ zen_ll_config, - zen_ll_config - }; - -@@ -89,6 +91,7 @@ s_plat_offcore_num[CPU_TYPE_NUM] = { - icx_offcore_num, - spr_offcore_num, - zen_offcore_num, -+ zen_offcore_num, - zen_offcore_num - }; - -@@ -191,10 +194,16 @@ cpu_type_get(void) - type = CPU_SPR; - break; - } -- } else if (family == 23) { -+ } else if (family == 23) { /* Family 17h */ - type = CPU_ZEN; -- } else if (family == 25) { -- type = CPU_ZEN3; -+ } else if (family == 25) { /* Family 19h */ -+ if ((model >= 0x00 && model <= 0x0f) || -+ (model >= 0x20 && model <= 0x2f) || -+ (model >= 0x40 && model <= 0x5f)) { -+ type = CPU_ZEN3; -+ } else { -+ type = CPU_ZEN4; -+ } - } - - return (type); -@@ -236,6 +245,7 @@ plat_detect(void) - case CPU_SPR: - case CPU_ZEN: - case CPU_ZEN3: -+ case CPU_ZEN4: - ret = 0; - s_cpu_type = cpu_type; - break; -diff --git a/x86/zen.c b/x86/zen.c -index dd37d03..c21eb1a 100644 ---- a/x86/zen.c -+++ b/x86/zen.c -@@ -60,6 +60,14 @@ static plat_event_config_t s_zen3_config[PERF_COUNT_NUM] = { - { PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" }, - }; - -+static plat_event_config_t s_zen4_config[PERF_COUNT_NUM] = { -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, 0, 0, 0, 0, "LsNotHaltedCyc" }, -+ { PERF_TYPE_RAW, 0x0000000000004044, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoRemote" }, -+ { PERF_TYPE_RAW, 0x0000000100000120, 0, 0, 0, 0, "LsNotHaltedP0Cyc.P0FreqCyc" }, -+ { PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, 0x53, 0, 0, 0, "ExRetOps" }, -+ { PERF_TYPE_RAW, 0x0000000000000844, 0, 0, 0, 0, "LsAnyFillsFromSys.MemIoLocal" }, -+}; -+ - /* - * Owing to the nature of IBS uop tagging, a higher sampling period is - * required to capture meaningful samples. All samples may not originate -@@ -81,6 +89,12 @@ zen3_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg) - plat_config_get(perf_count_id, cfg, s_zen3_config); - } - -+void -+zen4_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg) -+{ -+ plat_config_get(perf_count_id, cfg, s_zen4_config); -+} -+ - static int - zen_ibs_op_pmu_type(void) - { --- -2.31.1 - diff --git a/0012-x86-Add-feature-tracker.patch b/0012-x86-Add-feature-tracker.patch deleted file mode 100644 index a9916c8..0000000 --- a/0012-x86-Add-feature-tracker.patch +++ /dev/null @@ -1,75 +0,0 @@ -From 34b5ee97323d0bc62ca2d0beae3e99b2213752c2 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Fri, 20 Jan 2023 14:39:28 +0530 -Subject: [PATCH 12/15] x86: Add feature tracker - -Add a file to keep track of features available on AMD and -Intel processors. - -Signed-off-by: Sandipan Das ---- - x86/FEATURES | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++ - 1 file changed, 52 insertions(+) - create mode 100755 x86/FEATURES - -diff --git a/x86/FEATURES b/x86/FEATURES -new file mode 100755 -index 0000000..7ece0d6 ---- /dev/null -+++ b/x86/FEATURES -@@ -0,0 +1,52 @@ -+Features supported on X86: -+-------------------------- -+ -+Per process/thread: -+ -+| Feature | Supported | -+|-----------------------------------------------------------|---------------| -+| | AMD | Intel | -+|-----------------------------------------------------------|---------------| -+| RMA | Y | Y | -+| LMA | Y | Y | -+| CPI | Y | Y | -+| CPU% | Y | Y | -+| Memory area ADDR | Y | Y | -+| Memory area SIZE | Y | Y | -+| Memory area ACCESS% | Y | Y | -+| Memory area LAT(ns) | Y | Y | -+| Memory area DESC | Y | Y | -+| Node ACCESS% | Y | Y | -+| Node LAT(ns) | Y | Y | -+| Call-chain when process generates RMA / LMA / CYCLES / IR | Y | Y | -+| Call-chain when process accesses the memory area | Y | Y | -+| PQOS CMT/MBM | N | Y | -+ -+Per Node: -+ -+| Feature | Supported | -+|-----------------------------------------------------------|---------------| -+| | AMD | Intel | -+|-----------------------------------------------------------|---------------| -+| RMA | Y | Y | -+| LMA | Y | Y | -+| CPU | Y | Y | -+| CPU% | Y | Y | -+| MEM total | Y | Y | -+| MEM free | Y | Y | -+| MEM active | Y | Y | -+| MEM inactive | Y | Y | -+| Dirty | Y | Y | -+| Writeback | Y | Y | -+| Mapped | Y | Y | -+| QPI/UPI 0 bandwidth | N | Y | -+| QPI/UPI 1 bandwidth | N | Y | -+| Memory controller bandwidth | N | Y | -+ -+Other: -+ -+| Feature | Supported | -+|-----------------------------------------------------------|---------------| -+| | AMD | Intel | -+|-----------------------------------------------------------|---------------| -+| mgen testcase | Y | Y | --- -2.31.1 - diff --git a/0013-common-Fix-perf-init-for-large-systems.patch b/0013-common-Fix-perf-init-for-large-systems.patch deleted file mode 100644 index 150f548..0000000 --- a/0013-common-Fix-perf-init-for-large-systems.patch +++ /dev/null @@ -1,58 +0,0 @@ -From b4543efe798bbc255519fdcec73484cbd43472d1 Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Mon, 18 Apr 2022 10:59:26 +0530 -Subject: [PATCH 13/15] common: Fix perf init for large systems - -Large systems with hundreds of CPUs can run into issues -during perf event initialization because of the default -resource limits for file descriptors. Set RLIMIT_NOFILE -explicitly to a fairly large value to avoid them. - -Signed-off-by: Sandipan Das ---- - common/os/os_perf.c | 16 ++++++++++++++++ - 1 file changed, 16 insertions(+) - -diff --git a/common/os/os_perf.c b/common/os/os_perf.c -index 44ca43d..49fdaaa 100644 ---- a/common/os/os_perf.c -+++ b/common/os/os_perf.c -@@ -28,6 +28,7 @@ - - #include - #include -+#include - #include - #include - #include -@@ -850,12 +851,27 @@ ll_init(pf_conf_t *conf) - int - os_perf_init(void) - { -+ struct rlimit limit; - int ringsize, size; - - s_profiling_recbuf = NULL; - s_ll_recbuf = NULL; - s_partpause_enabled = B_FALSE; - -+ /* -+ * Depending on the number of available CPUs in the system, the -+ * default fd limit may be exceeded. Set it to a large value to -+ * avoid running into problems. -+ */ -+ limit.rlim_cur = 32768; -+ limit.rlim_max = 32768; -+ -+ if (setrlimit(RLIMIT_NOFILE, &limit) < 0) { -+ exit_msg_put("Failed to setup perf!\n"); -+ debug_print(NULL, 2, "os_perf_init failed\n"); -+ return (-1); -+ } -+ - ringsize = pf_ringsize_init(); - size = ((ringsize / sizeof (pf_profiling_rbrec_t)) + 1) * - sizeof (pf_profiling_rec_t); --- -2.31.1 - diff --git a/0014-common-Increase-count-of-possible-CPUs-per-node.patch b/0014-common-Increase-count-of-possible-CPUs-per-node.patch deleted file mode 100644 index 3889a03..0000000 --- a/0014-common-Increase-count-of-possible-CPUs-per-node.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 6f6cc3b24d84c413556639b64a62aca6ad0b21cc Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Mon, 10 Oct 2022 14:55:45 +0530 -Subject: [PATCH 14/15] common: Increase count of possible CPUs per-node - -Upcoming AMD Zen 4 processors support up to 256 threads per -NUMA node in NPS1 configuration. Hence, increase the number -of possible CPUs per-node to 256. - -Signed-off-by: Sandipan Das ---- - common/include/types.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/common/include/types.h b/common/include/types.h -index efe3055..05b411a 100644 ---- a/common/include/types.h -+++ b/common/include/types.h -@@ -116,7 +116,7 @@ typedef enum { - #define UI_COUNT_NUM 5 - - #define NNODES_MAX 64 --#define NCPUS_NODE_MAX 128 -+#define NCPUS_NODE_MAX 256 - #define NCPUS_MAX (NNODES_MAX * NCPUS_NODE_MAX) - #define NPROCS_NAX 4096 - #define LL_THRESH 128 --- -2.31.1 - diff --git a/0015-common-Fix-some-typos.patch b/0015-common-Fix-some-typos.patch deleted file mode 100644 index ad228a8..0000000 --- a/0015-common-Fix-some-typos.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 8c3fe7b2debf74566a6017c92eebc7cb23f9deca Mon Sep 17 00:00:00 2001 -From: Sandipan Das -Date: Thu, 10 Nov 2022 10:43:04 +0530 -Subject: [PATCH 15/15] common: Fix some typos - -Fix some typos in the messages shown when an user attempts -to monitor a process or thread that has already exited. - -Signed-off-by: Sandipan Das ---- - common/win.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/common/win.c b/common/win.c -index 087efe5..d0a8f3b 100644 ---- a/common/win.c -+++ b/common/win.c -@@ -3489,13 +3489,13 @@ win_warn_msg(warn_type_t warn_type) - break; - - case WARN_INVALID_PID: -- (void) strncpy(content, "Process exists, " -+ (void) strncpy(content, "Process exited, " - "return to home window ...", - WIN_LINECHAR_MAX); - break; - - case WARN_INVALID_LWPID: -- (void) strncpy(content, "Thread exists, " -+ (void) strncpy(content, "Thread exited, " - "return to home window ...", - WIN_LINECHAR_MAX); - break; --- -2.31.1 - diff --git a/0016-numatop-powerpc-Add-power10-support.patch b/0016-numatop-powerpc-Add-power10-support.patch deleted file mode 100644 index 24a087d..0000000 --- a/0016-numatop-powerpc-Add-power10-support.patch +++ /dev/null @@ -1,251 +0,0 @@ -From 25839aa3e2b02b5c001f220b1beeff5a786f83f2 Mon Sep 17 00:00:00 2001 -From: Kajol Jain -Date: Thu, 6 Jul 2023 04:33:56 -0500 -Subject: [PATCH] numatop/powerpc: Add power10 support - -Add platform check for power10 processors. -Add new files called power10.c/power10.h, which includes -addition of the relevant events, to count per-process/per-thread -memory accesses and CPU usage information for power10 -processors. - -Signed-off-by: Kajol Jain ---- - Makefile.am | 2 ++ - numatop.8 | 2 +- - powerpc/include/power10.h | 50 ++++++++++++++++++++++++++++ - powerpc/include/types.h | 5 +-- - powerpc/plat.c | 14 ++++++-- - powerpc/power10.c | 69 +++++++++++++++++++++++++++++++++++++++ - 6 files changed, 136 insertions(+), 6 deletions(-) - create mode 100644 powerpc/include/power10.h - create mode 100644 powerpc/power10.c - -diff --git a/Makefile.am b/Makefile.am -index ae11522..f23e1a6 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -83,10 +83,12 @@ if CPU_PPC - libnumatop_la_SOURCES += \ - powerpc/include/power8.h \ - powerpc/include/power9.h \ -+ powerpc/include/power10.h \ - powerpc/include/types.h \ - powerpc/plat.c \ - powerpc/power8.c \ - powerpc/power9.c \ -+ powerpc/power10.c \ - powerpc/ui_perf_map.c \ - powerpc/util.c - endif -diff --git a/numatop.8 b/numatop.8 -index 9eb983c..7237093 100644 ---- a/numatop.8 -+++ b/numatop.8 -@@ -500,4 +500,4 @@ in 3.9. The following steps show how to get and apply the patch set. - \fBnumatop\fP supports the Intel Xeon processors: 5500-series, 6500/7500-series, - 5600 series, E7-x8xx-series, and E5-16xx/24xx/26xx/46xx-series. - \fBNote\fP: CPU microcode version 0x618 or 0x70c or later is required on --E5-16xx/24xx/26xx/46xx-series. It also supports IBM Power8 and Power9 processors. -+E5-16xx/24xx/26xx/46xx-series. It also supports IBM Power8, Power9 and Power10 processors. -diff --git a/powerpc/include/power10.h b/powerpc/include/power10.h -new file mode 100644 -index 0000000..bc6c7a9 ---- /dev/null -+++ b/powerpc/include/power10.h -@@ -0,0 +1,50 @@ -+/* -+ * Copyright (c) 2023, IBM Corporation -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * -+ * * Redistributions of source code must retain the above copyright notice, -+ * this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Intel Corporation nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#ifndef _NUMATOP_POWERPC_POWER10_H -+#define _NUMATOP_POWERPC_POWER10_H -+ -+#ifdef __cplusplus -+extern "C" { -+#endif -+ -+#include -+#include -+#include "../../common/include/types.h" -+ -+struct _plat_event_config; -+ -+extern void power10_profiling_config(perf_count_id_t, struct _plat_event_config *); -+extern void power10_ll_config(plat_event_config_t *cfg); -+extern int power10_offcore_num(void); -+ -+#ifdef __cplusplus -+} -+#endif -+ -+#endif /* _NUMATOP_POWERPC_POWER10_H */ -diff --git a/powerpc/include/types.h b/powerpc/include/types.h -index 1ffaafa..30f7eb6 100644 ---- a/powerpc/include/types.h -+++ b/powerpc/include/types.h -@@ -36,10 +36,11 @@ - typedef enum { - CPU_UNSUP = 0, - CPU_POWER8, -- CPU_POWER9 -+ CPU_POWER9, -+ CPU_POWER10 - } cpu_type_t; - --#define CPU_TYPE_NUM 3 -+#define CPU_TYPE_NUM 4 - - typedef enum { - PERF_COUNT_INVALID = -1, -diff --git a/powerpc/plat.c b/powerpc/plat.c -index e7f132d..bed27d5 100644 ---- a/powerpc/plat.c -+++ b/powerpc/plat.c -@@ -35,26 +35,30 @@ - #include "include/types.h" - #include "include/power8.h" - #include "include/power9.h" -+#include "include/power10.h" - - pfn_plat_profiling_config_t - s_plat_profiling_config[CPU_TYPE_NUM] = { - NULL, - power8_profiling_config, -- power9_profiling_config -+ power9_profiling_config, -+ power10_profiling_config - }; - - pfn_plat_ll_config_t - s_plat_ll_config[CPU_TYPE_NUM] = { - NULL, - power8_ll_config, -- power9_ll_config -+ power9_ll_config, -+ power10_ll_config - }; - - pfn_plat_offcore_num_t - s_plat_offcore_num[CPU_TYPE_NUM] = { - NULL, - power8_offcore_num, -- power9_offcore_num -+ power9_offcore_num, -+ power10_offcore_num - }; - - #define SPRN_PVR 0x11F -@@ -85,6 +89,10 @@ plat_detect(void) - s_cpu_type = CPU_POWER9; - ret = 0; - break; -+ case 0x80: -+ s_cpu_type = CPU_POWER10; -+ ret = 0; -+ break; - } - - return ret; -diff --git a/powerpc/power10.c b/powerpc/power10.c -new file mode 100644 -index 0000000..b979f64 ---- /dev/null -+++ b/powerpc/power10.c -@@ -0,0 +1,69 @@ -+/* -+ * Copyright (c) 2023, IBM Corporation -+ * -+ * Redistribution and use in source and binary forms, with or without -+ * modification, are permitted provided that the following conditions are met: -+ * -+ * * Redistributions of source code must retain the above copyright notice, -+ * this list of conditions and the following disclaimer. -+ * * Redistributions in binary form must reproduce the above copyright -+ * notice, this list of conditions and the following disclaimer in the -+ * documentation and/or other materials provided with the distribution. -+ * * Neither the name of Intel Corporation nor the names of its contributors -+ * may be used to endorse or promote products derived from this software -+ * without specific prior written permission. -+ * -+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -+ * POSSIBILITY OF SUCH DAMAGE. -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include "../common/include/os/linux/perf_event.h" -+#include "../common/include/os/plat.h" -+#include "include/power10.h" -+ -+static plat_event_config_t s_power10_profiling[PERF_COUNT_NUM] = { -+ { PERF_TYPE_RAW, 0x600f4, 0, 0, 0, 0, "PM_RUN_CYC" }, -+ { PERF_TYPE_RAW, 0x0F4040000004C040, 0, 0, 0, 0, "PM_DATA_FROM_DMEM" }, -+ { PERF_TYPE_RAW, 0x100f0, 0, 0, 0, 0, "PM_CYC" }, -+ { PERF_TYPE_RAW, 0x500fa, 0, 0, 0, 0, "PM_RUN_INST_CMPL" }, -+ { PERF_TYPE_RAW, 0x094040000002C040, 0, 0, 0, 0, "PM_DATA_FROM_LMEM" }, -+ { PERF_TYPE_RAW, 0x0D4040000003C040, 0, 0, 0, 0, "PM_DATA_FROM_RMEM" }, -+}; -+ -+static plat_event_config_t s_power10_ll = { -+ PERF_TYPE_RAW, 0x0000, 0, 0, 0, 1, "PM_SUSPENDED" -+}; -+ -+void -+power10_profiling_config(perf_count_id_t perf_count_id, plat_event_config_t *cfg) -+{ -+ plat_config_get(perf_count_id, cfg, s_power10_profiling); -+} -+ -+void -+power10_ll_config(plat_event_config_t *cfg) -+{ -+ memcpy(cfg, &s_power10_ll, sizeof (plat_event_config_t)); -+} -+ -+int -+power10_offcore_num(void) -+{ -+ return (3); -+} --- -2.31.1 - diff --git a/numatop.spec b/numatop.spec index 4caacb6..0d9c2d7 100644 --- a/numatop.spec +++ b/numatop.spec @@ -2,8 +2,8 @@ %undefine _ld_as_needed Name: numatop -Version: 2.3 -Release: 3%{?dist} +Version: 2.4 +Release: 1%{?dist} Summary: Memory access locality characterization and analysis License: BSD @@ -24,22 +24,6 @@ BuildRequires: numactl-devel ExclusiveArch: x86_64 ppc64le #Patch001: v2.2-001-Initial-support-for-SPR.patch -Patch0001: 0001-configure.ac-Fix-build-error-when-libnuma-is-missed.patch -Patch0002: 0002-Update-the-error-message.patch -Patch0003: 0003-Update-README.patch -Patch0004: 0004-x86-Prepare-for-multi-vendor-support.patch -Patch0005: 0005-x86-zen-Add-initial-support.patch -Patch0006: 0006-common-Add-sample-period-to-platform-event-config.patch -Patch0007: 0007-common-Add-exclude-guest-to-platform-event-config.patch -Patch0008: 0008-x86-zen-Add-support-for-memory-access-stats.patch -Patch0009: 0009-x86-Fix-clock-frequency-parsing.patch -Patch0010: 0010-x86-zen-Add-Zen-3-support.patch -Patch0011: 0011-x86-zen-Add-Zen-4-support.patch -Patch0012: 0012-x86-Add-feature-tracker.patch -Patch0013: 0013-common-Fix-perf-init-for-large-systems.patch -Patch0014: 0014-common-Increase-count-of-possible-CPUs-per-node.patch -Patch0015: 0015-common-Fix-some-typos.patch -Patch0016: 0016-numatop-powerpc-Add-power10-support.patch %description @@ -78,6 +62,9 @@ autoreconf --force --install --symlink %changelog +* Thu Feb 1 2024 Pingfan Liu - 2.4.1 +- Add initial support for EMR + * Fri Dec 8 2023 Pingfan Liu - 2.3.3 - Add Power10 support diff --git a/sources b/sources index 007f03f..4c4d21f 100644 --- a/sources +++ b/sources @@ -1 +1 @@ -SHA512 (v2.3.tar.gz) = 8d8483ba7ff0a82517df4dff7617b7899e19938460b26b0bf6dd04d5d498900f58bf30f9282c4d2b3525d84f028bc931602ce4dfd1eb48bf644e9fb4235c5859 +SHA512 (v2.4.tar.gz) = eb500424f56a3bcd19375cdca5f1c0d1f4ffbd9817bb0d42bb8224f2929c4cf1cdbb4005adf6a148f6e669db9337175c7dfbd3076aa2d99bfb08f537850efaff