797 lines
36 KiB
Diff
797 lines
36 KiB
Diff
commit ae449f73abd0849f05ab3e1f3a64bde0c670c645
|
|
Author: Anthony <adanalis@icl.utk.edu>
|
|
Date: Fri Jul 17 12:05:14 2020 -0400
|
|
|
|
Separated the cache preset events of AMD Zen1 and Zen2 and added some more.
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 8e96adfbd..2325bd4dc 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
|
#
|
|
CPU,amd64_fam17h
|
|
CPU,amd64_fam17h_zen1
|
|
-CPU,amd64_fam17h_zen2
|
|
#
|
|
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
|
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
|
@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT
|
|
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS
|
|
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
|
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
|
+# Events discovered via CAT
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
|
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
|
+#
|
|
+#
|
|
+CPU,amd64_fam17h_zen2
|
|
+# Events copied from zen1 that also exist on zen2
|
|
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
|
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K
|
|
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
|
+# Events discovered via CAT
|
|
+PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
|
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
|
+
|
|
#
|
|
#
|
|
CPU,Intel architectural PMU
|
|
@@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
|
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
|
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
|
|
|
+#########################
|
|
+# ARM Fujitsu A64FX #
|
|
+#########################
|
|
+CPU,arm_a64fx
|
|
+#
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
|
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
|
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
|
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
|
+
|
|
#
|
|
CPU,mips_74k
|
|
#
|
|
commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d
|
|
Author: Heike Jagode <jagode@icl.utk.edu>
|
|
Date: Thu Sep 24 13:33:38 2020 -0400
|
|
|
|
Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS)
|
|
for AMD zen2.
|
|
|
|
PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
|
explains that FLOP events require MergeEvent support, which was included
|
|
in the 5.6 kernel.
|
|
|
|
===>>> Hence, a kernel version 5.6 or greater is required.
|
|
|
|
NOTE: without the MergeEvent support in the kernel,
|
|
there is no guarantee that the SSE/AVX FLOP
|
|
events produce any useful data whatsoever.
|
|
|
|
These events have been tested and verified for
|
|
scalar flops, SSE, AVX, and FMA:
|
|
|
|
(1) for one AVX instruction (e.g. _mm256_add_pd()),
|
|
the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns
|
|
a count of 4 (in the case of double precision), and
|
|
a count of 8 (in the case of single precision).
|
|
|
|
(2) for one AVX FMA instruction (e.g. _mm256_macc_pd()),
|
|
the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns
|
|
a count of 8 (in the case of double precision), and
|
|
a count of 16 (in the case of single precision).
|
|
|
|
(3) for one SSE instruction (e.g. _mm_mul_pd()),
|
|
the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns
|
|
a count of 2 (in the case of double precision), and
|
|
a count of 4 (in the case of single precision).
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 2325bd4dc..2ff3e4d16 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
|
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
|
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
|
|
|
-#
|
|
-#
|
|
+# New FLOP event on zen2
|
|
+# PPR (under section 2.1.15.3. --
|
|
+# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
|
+# explains that FLOP events require MergeEvent support, which was included
|
|
+# in the 5.6 kernel.
|
|
+# Hence, a kernel version 5.6 or greater is required.
|
|
+# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
|
+# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
|
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+
|
|
+
|
|
CPU,Intel architectural PMU
|
|
CPU,ix86arch
|
|
#
|
|
commit 35f93252a6e222299c03f2c94912334488e76b02
|
|
Author: Heike Jagode <jagode@icl.utk.edu>
|
|
Date: Thu Sep 24 18:40:59 2020 -0400
|
|
|
|
Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP)
|
|
for AMD zen2.
|
|
|
|
For unoptimized code (like native MMM), these events may include
|
|
non-numeric floating-point instructions, e.g. MOVSD: move or merge
|
|
scalar double-precision floating-point value instructions.
|
|
|
|
Tested with:
|
|
1) SSE double: _mm_mul_pd / _mm_add_pd
|
|
2) SSE single: _mm_mul_ps / _mm_add_ps
|
|
3) AVX double: _mm256_mul_pd / _mm256_add_pd
|
|
4) AVX single: _mm256_mul_ps / _mm256_add_ps
|
|
5) FMA double: _mm256_macc_pd
|
|
6) FMA single: _mm256_macc_pd
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 2ff3e4d16..60a64564d 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
|
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+# Floating-point instructions (including non-numeric floating-point instructions,
|
|
+# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
|
+PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
|
|
|
|
CPU,Intel architectural PMU
|
|
commit 344f6493425d865577508ff32b6f65516b1b4394
|
|
Author: Heike Jagode <jagode@icl.utk.edu>
|
|
Date: Thu Sep 24 19:03:31 2020 -0400
|
|
|
|
Added missing 'PRESET' to csv file.
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 60a64564d..724d520f0 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
# Floating-point instructions (including non-numeric floating-point instructions,
|
|
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
|
-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
|
|
|
|
CPU,Intel architectural PMU
|
|
commit 4616aa717c5301a9a478876661eb8ac1f18c0333
|
|
Author: Heike Jagode <jagode@icl.utk.edu>
|
|
Date: Thu Oct 8 11:36:23 2020 -0400
|
|
|
|
For zen2, since FP_OPS counts both single- and double-prec operations
|
|
correctly, we don't need to confuse the user with additional
|
|
DP_OPS and SP_OPS events. So, I'm taking them out.
|
|
|
|
Same applies for events counting FP instructions.
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 724d520f0..9ebf557e1 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
|
# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
|
# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
|
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+# Since FP_OPS counts both single- and double-prec operations
|
|
+# correctly, we don't need to confuse the user with additional
|
|
+# DP_OPS and SP_OPS events. So, I'm taking them out.
|
|
+#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+#
|
|
# Floating-point instructions (including non-numeric floating-point instructions,
|
|
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
|
PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+# Since FP_INS counts both single- and double-prec instuctions
|
|
+# correctly, we don't need to confuse the user with additional
|
|
+# VEC_DP and VEC_SP events. So, I'm taking them out.
|
|
+#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
|
|
|
|
CPU,Intel architectural PMU
|
|
commit 274219e85ba8adcd2e9c78507adf7edb05b71daa
|
|
Author: Sebastian Mobo <smobo@vols.utk.edu>
|
|
Date: Thu Oct 8 13:40:21 2020 -0400
|
|
|
|
Added instruction-cache preset events for the Zen2.
|
|
|
|
Signed-off-by: Anthony <adanalis@icl.utk.edu>
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 9ebf557e1..fd75f9371 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
|
PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
|
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
|
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
|
-
|
|
+#
|
|
+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
|
+#
|
|
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
|
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
|
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
|
# New FLOP event on zen2
|
|
# PPR (under section 2.1.15.3. --
|
|
# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
|
commit b87ac4beda096086e0040f8ec1b44c4791a9739c
|
|
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
Date: Mon Dec 14 14:06:22 2020 +0900
|
|
|
|
Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA)
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index fd75f9371..164f05641 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
|
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
|
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
|
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
|
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
|
|
|
#
|
|
commit 869864f813f0681b5c9a4b65de2135c8708a2afb
|
|
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
Date: Mon Dec 14 19:34:59 2020 +0900
|
|
|
|
Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS).
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 164f05641..9192b1041 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
|
#########################
|
|
CPU,arm_a64fx
|
|
#
|
|
+PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
|
+PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
|
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
|
+PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
|
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
|
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
+PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
|
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
|
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
|
PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
|
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
|
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
|
PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
|
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
|
+PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
|
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
|
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
|
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
|
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
|
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
|
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
|
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
|
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
|
+PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
|
+PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
|
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
|
+PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
|
+PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
|
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL
|
|
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL
|
|
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
|
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
|
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
|
|
|
#
|
|
CPU,mips_74k
|
|
commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de
|
|
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
Date: Tue Dec 15 13:43:43 2020 +0900
|
|
|
|
modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 9192b1041..7b4ceb674 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
|
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
|
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
|
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC
|
|
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
|
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
|
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
|
-PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
|
+PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
|
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
|
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
|
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63
|
|
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
Date: Fri Feb 12 15:01:21 2021 +0900
|
|
|
|
remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx
|
|
|
|
There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts.
|
|
I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file.
|
|
I will issue the pullrequest again once I have identified how to handle the overcount.
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 7b4ceb674..0f5ec8344 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
|
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
|
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
|
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
-PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
|
-PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
|
+#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
|
+#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
|
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
|
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
|
commit 340f68940234f2db181147fc249907b4f1293e62
|
|
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
Date: Tue Feb 16 17:16:24 2021 +0900
|
|
|
|
remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx
|
|
|
|
PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH,
|
|
so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file.
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 0f5ec8344..4ef647959 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
|
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
|
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
|
-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
|
+#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
|
+#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
|
PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
|
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
|
PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
|
commit 02f34baafb868d183f21bebfd3c46574847b9929
|
|
Author: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
|
Date: Tue May 18 02:51:56 2021 +0530
|
|
|
|
Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip
|
|
|
|
Signed-off-by: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 4ef647959..d9e9da8a3 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X
|
|
# VEC_DP and VEC_SP events. So, I'm taking them out.
|
|
#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
|
+#
|
|
+#
|
|
+CPU,amd64_fam19h_zen3
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
|
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
|
+PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
|
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
|
+PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
|
+PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
|
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X
|
|
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
|
+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
|
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
|
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
|
+# RETIRED_SSE_AVX_FLOPS requires MergeEvent support.
|
|
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
|
+PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
|
+PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
|
+PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
|
+PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
|
|
|
|
|
CPU,Intel architectural PMU
|
|
commit 6964aa356fa606f320c7b871123aceb5c1f21999
|
|
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
Date: Tue Aug 24 14:17:29 2021 +0900
|
|
|
|
Fix the PAPI_FUL_CCY setting for a64fx
|
|
|
|
In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect.
|
|
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT
|
|
|
|
The correct settings are:.
|
|
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 4ef647959..74deb712f 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
|
PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
|
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
|
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
|
-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
|
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
|
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
|
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f
|
|
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
Date: Fri Mar 4 15:41:30 2022 +0900
|
|
|
|
Add PAPI idle-related preset events for a64fx
|
|
|
|
For a64fx, add four PAPI idle-related preset events
|
|
(PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL).
|
|
|
|
PAPI_BRU_IDL = BR_COMP_WAIT
|
|
PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT
|
|
PAPI_FPU_IDL = FL_COMP_WAIT
|
|
PAPI_LSU_IDL = LD_COMP_WAIT
|
|
|
|
The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT,
|
|
and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting"
|
|
on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:.
|
|
https://github.com/fujitsu/A64FX/blob/master/doc
|
|
|
|
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 74deb712f..1cd498e91 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
|
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
|
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
|
PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
|
+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT
|
|
+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT
|
|
+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT
|
|
+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT
|
|
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
|
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
commit 3c5364839f583185c1e8dca58d5fe36c9ec82876
|
|
Author: Daniel Barry <dbarry@vols.utk.edu>
|
|
Date: Tue Aug 30 23:17:30 2022 +0000
|
|
|
|
papi_avail: add presets for Intel Ice Lake SP
|
|
|
|
Define preset events for the Intel Ice Lake SP processor.
|
|
These presets have been verified using the Counter Analysis Toolkit benchmarks.
|
|
|
|
These changes have been tested on the Intel Ice Lake architecture.
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index a013f58af..8f23e030c 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
|
|
|
# End of hsw,bdw,skl,clx list
|
|
#
|
|
+
|
|
+# Intel Ice Lake SP events
|
|
+CPU,icx
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
|
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
|
+# Loads and stores
|
|
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS
|
|
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES
|
|
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES
|
|
+# L1 cache
|
|
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
|
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
|
|
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD
|
|
+# L2 cache
|
|
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
|
|
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
|
|
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
|
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
|
+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
|
|
+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
|
|
+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
|
+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
|
+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
|
|
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
|
|
+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
|
|
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
|
|
+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
|
|
+# L3 cache
|
|
+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
|
+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD
|
|
+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
|
+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
|
+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
|
|
+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS
|
|
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
|
|
+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
|
|
+# SMP
|
|
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
|
|
+# Branches
|
|
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
|
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
|
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
|
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
|
+#FLOPs
|
|
+# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
|
|
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
|
+# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
|
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
|
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
|
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
|
+# End of icx list
|
|
+
|
|
#
|
|
# Intel MIC / Xeon-Phi / Knights Landing
|
|
# Intel Knights Mill
|
|
commit d4da29b07befb9f7c11e351dbfef835b74cdd67a
|
|
Author: John Linford <jlinford@nvidia.com>
|
|
Date: Mon Mar 20 17:11:37 2023 -0500
|
|
|
|
Add minimal events for Arm Neoverse N1
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 8f23e030c..a4d5a9756 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX
|
|
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
|
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
|
|
|
+#########################
|
|
+# ARM Neoverse N1 #
|
|
+#########################
|
|
+CPU,arm_n1
|
|
+#
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
|
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
|
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
|
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
|
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
|
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
|
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
|
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
|
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
|
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
|
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
|
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
|
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
|
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
|
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
+
|
|
#
|
|
CPU,mips_74k
|
|
#
|
|
commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6
|
|
Author: John Linford <jlinford@nvidia.com>
|
|
Date: Mon Mar 20 14:54:41 2023 -0500
|
|
|
|
Add minimal events for Arm Neoverse V1
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index a4d5a9756..207d6d1db 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
|
|
+#########################
|
|
+# ARM Neoverse V1 #
|
|
+#########################
|
|
+CPU,arm_v1
|
|
+#
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
|
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
|
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
|
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
|
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
|
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
|
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
|
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
|
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
|
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
|
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
|
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
|
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
|
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
|
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
+
|
|
#
|
|
CPU,mips_74k
|
|
#
|
|
commit e911f951115bb551925c5b07e7f5b721d5fe3bbe
|
|
Author: John Linford <jlinford@nvidia.com>
|
|
Date: Mon Mar 20 17:14:18 2023 -0500
|
|
|
|
Add minimal events for Arm Neoverse N2
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index 207d6d1db..d27d956c1 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
|
|
+#########################
|
|
+# ARM Neoverse N2 #
|
|
+#########################
|
|
+CPU,arm_n2
|
|
+#
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
|
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
|
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
|
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
|
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
|
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
|
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
|
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
|
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
|
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
|
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
|
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
|
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
|
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
|
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
+
|
|
#########################
|
|
# ARM Neoverse V1 #
|
|
#########################
|
|
commit 05dc580247cb18fca882a33d8e356d79032d2ed1
|
|
Author: John Linford <jlinford@nvidia.com>
|
|
Date: Mon Mar 20 17:08:35 2023 -0500
|
|
|
|
Add minimal events for Arm Neoverse V2
|
|
|
|
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
|
index d27d956c1..549e337c7 100644
|
|
--- a/src/papi_events.csv
|
|
+++ b/src/papi_events.csv
|
|
@@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
|
|
+#########################
|
|
+# ARM Neoverse V2 #
|
|
+#########################
|
|
+CPU,arm_v2
|
|
+#
|
|
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
|
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
|
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
|
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
|
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
|
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
|
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
|
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
|
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
|
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
|
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
|
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
|
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
|
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
|
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
|
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
|
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
|
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
|
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
|
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
|
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
|
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
|
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
|
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
|
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
|
+
|
|
#
|
|
CPU,mips_74k
|
|
#
|