Import from AlmaLinux stable repository
This commit is contained in:
parent
5ed6138e95
commit
1f69c6df79
796
SOURCES/papi-701eventupdate.patch
Normal file
796
SOURCES/papi-701eventupdate.patch
Normal file
@ -0,0 +1,796 @@
|
||||
commit ae449f73abd0849f05ab3e1f3a64bde0c670c645
|
||||
Author: Anthony <adanalis@icl.utk.edu>
|
||||
Date: Fri Jul 17 12:05:14 2020 -0400
|
||||
|
||||
Separated the cache preset events of AMD Zen1 and Zen2 and added some more.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8e96adfbd..2325bd4dc 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
||||
#
|
||||
CPU,amd64_fam17h
|
||||
CPU,amd64_fam17h_zen1
|
||||
-CPU,amd64_fam17h_zen2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT
|
||||
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS
|
||||
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam17h_zen2
|
||||
+# Events copied from zen1 that also exist on zen2
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+
|
||||
#
|
||||
#
|
||||
CPU,Intel architectural PMU
|
||||
@@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
|
||||
+#########################
|
||||
+# ARM Fujitsu A64FX #
|
||||
+#########################
|
||||
+CPU,arm_a64fx
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 13:33:38 2020 -0400
|
||||
|
||||
Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS)
|
||||
for AMD zen2.
|
||||
|
||||
PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
explains that FLOP events require MergeEvent support, which was included
|
||||
in the 5.6 kernel.
|
||||
|
||||
===>>> Hence, a kernel version 5.6 or greater is required.
|
||||
|
||||
NOTE: without the MergeEvent support in the kernel,
|
||||
there is no guarantee that the SSE/AVX FLOP
|
||||
events produce any useful data whatsoever.
|
||||
|
||||
These events have been tested and verified for
|
||||
scalar flops, SSE, AVX, and FMA:
|
||||
|
||||
(1) for one AVX instruction (e.g. _mm256_add_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns
|
||||
a count of 4 (in the case of double precision), and
|
||||
a count of 8 (in the case of single precision).
|
||||
|
||||
(2) for one AVX FMA instruction (e.g. _mm256_macc_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns
|
||||
a count of 8 (in the case of double precision), and
|
||||
a count of 16 (in the case of single precision).
|
||||
|
||||
(3) for one SSE instruction (e.g. _mm_mul_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns
|
||||
a count of 2 (in the case of double precision), and
|
||||
a count of 4 (in the case of single precision).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2325bd4dc..2ff3e4d16 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
|
||||
-#
|
||||
-#
|
||||
+# New FLOP event on zen2
|
||||
+# PPR (under section 2.1.15.3. --
|
||||
+# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
+# explains that FLOP events require MergeEvent support, which was included
|
||||
+# in the 5.6 kernel.
|
||||
+# Hence, a kernel version 5.6 or greater is required.
|
||||
+# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
+# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+
|
||||
+
|
||||
CPU,Intel architectural PMU
|
||||
CPU,ix86arch
|
||||
#
|
||||
commit 35f93252a6e222299c03f2c94912334488e76b02
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 18:40:59 2020 -0400
|
||||
|
||||
Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP)
|
||||
for AMD zen2.
|
||||
|
||||
For unoptimized code (like native MMM), these events may include
|
||||
non-numeric floating-point instructions, e.g. MOVSD: move or merge
|
||||
scalar double-precision floating-point value instructions.
|
||||
|
||||
Tested with:
|
||||
1) SSE double: _mm_mul_pd / _mm_add_pd
|
||||
2) SSE single: _mm_mul_ps / _mm_add_ps
|
||||
3) AVX double: _mm256_mul_pd / _mm256_add_pd
|
||||
4) AVX single: _mm256_mul_ps / _mm256_add_ps
|
||||
5) FMA double: _mm256_macc_pd
|
||||
6) FMA single: _mm256_macc_pd
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2ff3e4d16..60a64564d 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
+# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
+PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 344f6493425d865577508ff32b6f65516b1b4394
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 19:03:31 2020 -0400
|
||||
|
||||
Added missing 'PRESET' to csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 60a64564d..724d520f0 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 4616aa717c5301a9a478876661eb8ac1f18c0333
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Oct 8 11:36:23 2020 -0400
|
||||
|
||||
For zen2, since FP_OPS counts both single- and double-prec operations
|
||||
correctly, we don't need to confuse the user with additional
|
||||
DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
|
||||
Same applies for events counting FP instructions.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 724d520f0..9ebf557e1 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Since FP_OPS counts both single- and double-prec operations
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+# Since FP_INS counts both single- and double-prec instuctions
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 274219e85ba8adcd2e9c78507adf7edb05b71daa
|
||||
Author: Sebastian Mobo <smobo@vols.utk.edu>
|
||||
Date: Thu Oct 8 13:40:21 2020 -0400
|
||||
|
||||
Added instruction-cache preset events for the Zen2.
|
||||
|
||||
Signed-off-by: Anthony <adanalis@icl.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9ebf557e1..fd75f9371 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
-
|
||||
+#
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+#
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
# New FLOP event on zen2
|
||||
# PPR (under section 2.1.15.3. --
|
||||
# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
commit b87ac4beda096086e0040f8ec1b44c4791a9739c
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 14:06:22 2020 +0900
|
||||
|
||||
Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA)
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index fd75f9371..164f05641 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
|
||||
#
|
||||
commit 869864f813f0681b5c9a4b65de2135c8708a2afb
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 19:34:59 2020 +0900
|
||||
|
||||
Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 164f05641..9192b1041 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
#########################
|
||||
CPU,arm_a64fx
|
||||
#
|
||||
+PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
||||
|
||||
#
|
||||
CPU,mips_74k
|
||||
commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Dec 15 13:43:43 2020 +0900
|
||||
|
||||
modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9192b1041..7b4ceb674 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
-PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Feb 12 15:01:21 2021 +0900
|
||||
|
||||
remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx
|
||||
|
||||
There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts.
|
||||
I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file.
|
||||
I will issue the pullrequest again once I have identified how to handle the overcount.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 7b4ceb674..0f5ec8344 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
-PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
-PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
commit 340f68940234f2db181147fc249907b4f1293e62
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Feb 16 17:16:24 2021 +0900
|
||||
|
||||
remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx
|
||||
|
||||
PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH,
|
||||
so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 0f5ec8344..4ef647959 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
commit 02f34baafb868d183f21bebfd3c46574847b9929
|
||||
Author: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
Date: Tue May 18 02:51:56 2021 +0530
|
||||
|
||||
Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip
|
||||
|
||||
Signed-off-by: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..d9e9da8a3 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X
|
||||
# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam19h_zen3
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
+# RETIRED_SSE_AVX_FLOPS requires MergeEvent support.
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||||
+PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||||
+PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
+PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 6964aa356fa606f320c7b871123aceb5c1f21999
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Aug 24 14:17:29 2021 +0900
|
||||
|
||||
Fix the PAPI_FUL_CCY setting for a64fx
|
||||
|
||||
In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT
|
||||
|
||||
The correct settings are:.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..74deb712f 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Mar 4 15:41:30 2022 +0900
|
||||
|
||||
Add PAPI idle-related preset events for a64fx
|
||||
|
||||
For a64fx, add four PAPI idle-related preset events
|
||||
(PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL).
|
||||
|
||||
PAPI_BRU_IDL = BR_COMP_WAIT
|
||||
PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT
|
||||
PAPI_FPU_IDL = FL_COMP_WAIT
|
||||
PAPI_LSU_IDL = LD_COMP_WAIT
|
||||
|
||||
The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT,
|
||||
and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting"
|
||||
on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:.
|
||||
https://github.com/fujitsu/A64FX/blob/master/doc
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 74deb712f..1cd498e91 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT
|
||||
+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT
|
||||
+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT
|
||||
+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
commit 3c5364839f583185c1e8dca58d5fe36c9ec82876
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Tue Aug 30 23:17:30 2022 +0000
|
||||
|
||||
papi_avail: add presets for Intel Ice Lake SP
|
||||
|
||||
Define preset events for the Intel Ice Lake SP processor.
|
||||
These presets have been verified using the Counter Analysis Toolkit benchmarks.
|
||||
|
||||
These changes have been tested on the Intel Ice Lake architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index a013f58af..8f23e030c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
||||
|
||||
# End of hsw,bdw,skl,clx list
|
||||
#
|
||||
+
|
||||
+# Intel Ice Lake SP events
|
||||
+CPU,icx
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
+# Loads and stores
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES
|
||||
+# L1 cache
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD
|
||||
+# L2 cache
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
|
||||
+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
|
||||
+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
|
||||
+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
|
||||
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
|
||||
+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
|
||||
+# L3 cache
|
||||
+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD
|
||||
+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
|
||||
+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS
|
||||
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
|
||||
+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
|
||||
+# SMP
|
||||
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
|
||||
+# Branches
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
||||
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
+#FLOPs
|
||||
+# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
+# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+# End of icx list
|
||||
+
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
# Intel Knights Mill
|
||||
commit d4da29b07befb9f7c11e351dbfef835b74cdd67a
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:11:37 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse N1
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8f23e030c..a4d5a9756 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX
|
||||
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse N1 #
|
||||
+#########################
|
||||
+CPU,arm_n1
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 14:54:41 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse V1
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index a4d5a9756..207d6d1db 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse V1 #
|
||||
+#########################
|
||||
+CPU,arm_v1
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit e911f951115bb551925c5b07e7f5b721d5fe3bbe
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:14:18 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse N2
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 207d6d1db..d27d956c1 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse N2 #
|
||||
+#########################
|
||||
+CPU,arm_n2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#########################
|
||||
# ARM Neoverse V1 #
|
||||
#########################
|
||||
commit 05dc580247cb18fca882a33d8e356d79032d2ed1
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:08:35 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse V2
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index d27d956c1..549e337c7 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse V2 #
|
||||
+#########################
|
||||
+CPU,arm_v2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
321
SOURCES/papi-71eventupdate.patch
Normal file
321
SOURCES/papi-71eventupdate.patch
Normal file
@ -0,0 +1,321 @@
|
||||
commit b969d25f2a87a53365e3e9a040533b093544a05d
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Apr 3 22:30:14 2023 +0000
|
||||
|
||||
Update Neoverse V2 events
|
||||
|
||||
Add/remove PAPI events to match available hardware counters
|
||||
All tests pass on NVIDIA Grace
|
||||
|
||||
Disclaimer:
|
||||
The PAPI team was not able to verify the functionality included in this
|
||||
commit.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 549e337c..3089d2d4 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
CPU,arm_v2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL
|
||||
+#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed
|
||||
+#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue
|
||||
+#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes
|
||||
+#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled
|
||||
+#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle
|
||||
+PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations
|
||||
+#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations
|
||||
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed
|
||||
+#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions
|
||||
PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
-PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
-PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
-PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
-PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses
|
||||
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
-PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
-PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes
|
||||
+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE
|
||||
+PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits
|
||||
+PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns
|
||||
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses
|
||||
|
||||
#
|
||||
CPU,mips_74k
|
||||
|
||||
commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Wed Jun 7 14:38:39 2023 +0000
|
||||
|
||||
add branch presets for Zen3 and Zen4
|
||||
|
||||
These changes include all branching preset events for Zen3 and Zen4,
|
||||
validated using the Counter Analysis Toolkit.
|
||||
|
||||
For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches
|
||||
taken, in order to adhere to the preset's meaning.
|
||||
|
||||
These changes have been tested on the AMD Zen3 and Zen4 architectures.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 3089d2d4..319cf82c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
-PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||||
PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||||
@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||||
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||||
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam19h_zen4
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit da93ed4dd1fadb70ccee62a976597ff431c9f58c
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Mon Jun 12 17:27:59 2023 +0000
|
||||
|
||||
add flops presets for Zen4
|
||||
|
||||
These changes include FLOPs presets for Zen4, validated using the
|
||||
Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the AMD Zen4 architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 319cf82c..f6a40a35 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI
|
||||
PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL
|
||||
+PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC
|
||||
+PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL
|
||||
+PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||||
+PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||||
+PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit a31c3a4e9788e03fee113263a9f94bd638a66721
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Wed Jun 21 15:13:47 2023 +0000
|
||||
|
||||
add cycles and instructions presets for Zen4
|
||||
|
||||
These changes include the 'total cycles' and 'instructions completed'
|
||||
presets for Zen4, validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the AMD Zen4 architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index f6a40a35..86e11fe6 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS
|
||||
PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||||
PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||||
PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit 94303410ce97a84408b0b2d727701a60c6f137aa
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Sun Jul 23 15:38:36 2023 +0000
|
||||
|
||||
add various Sapphire Rapids presets
|
||||
|
||||
These changes include cycles, instructions, branching, and FLOPs presets
|
||||
for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the Intel Sapphire Rapids architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 86e11fe6..eac0855f 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_
|
||||
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
# End of icx list
|
||||
|
||||
+# Intel Sapphire Rapids events
|
||||
+CPU,spr
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
+# FLOPs
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+# Branches
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
||||
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
+# End of spr list
|
||||
+
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
# Intel Knights Mill
|
||||
|
||||
commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Tue Jul 25 12:16:56 2023 +0000
|
||||
|
||||
add more Ice Lake FLOPs presets
|
||||
|
||||
Since there are enough counters available to monitor both single- and
|
||||
double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and
|
||||
PAPI_VEC_INS are all defined.
|
||||
These presets have been validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the Intel Ice Lake architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index eac0855f..df82ac1c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
# End of icx list
|
||||
|
||||
# Intel Sapphire Rapids events
|
@ -1,34 +0,0 @@
|
||||
commit 9a44d82928ed17ba2ff21eb88b89c5829d0ea30e
|
||||
Author: Steve Kaufmann <steven.kaufmann@hpe.com>
|
||||
Date: Wed Jun 24 14:08:08 2020 -0400
|
||||
|
||||
Added PAPI preset support for Fujitsu A64FX.
|
||||
|
||||
Signed-off-by: Heike Jagode <jagode@icl.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8e96adfbd..1b5c15542 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1877,6 +1877,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
|
||||
+#########################
|
||||
+# ARM Fujitsu A64FX #
|
||||
+#########################
|
||||
+CPU,arm_a64fx
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
637
SOURCES/papi-arm64fastread.patch
Normal file
637
SOURCES/papi-arm64fastread.patch
Normal file
@ -0,0 +1,637 @@
|
||||
commit 9a1f2d897f4086bc1d60102de984c849445b5e97
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Feb 21 19:18:40 2023 +0900
|
||||
|
||||
PAPI_read performance improvement for the arm64 processor
|
||||
|
||||
We developed PAPI_read performance improvements for the arm64 processor
|
||||
with a plan to port direct user space PMU register access processing from
|
||||
libperf to the papi library without using libperf.
|
||||
|
||||
The workaround has been implemented that stores the counter value at the
|
||||
time of reset and subtracts the counter value at the time of reset from
|
||||
the read counter value at the next read.
|
||||
When reset processing is called, the value of pc->offset is cleared to 0,
|
||||
and only the counter value read from the PMU counter is referenced.
|
||||
There was no problem with the counters FAILED with negative values during
|
||||
the multiplex+reset test, except for sdsc2-mpx and sdsc4-mpx.
|
||||
To apply the workaround only during reset, the _pe_reset function call sets
|
||||
the reset_flag and the next _pe_start function call clears the reset_flag.
|
||||
The workaround works if the mmap_read_self function is called between calls
|
||||
to the _pe_reset function and the next call to the _pe_start function.
|
||||
|
||||
Switching PMU register direct access from user space from OFF to ON is done by
|
||||
changing the setting of the kernel variable "/proc/sys/kernel/perf_user_access".
|
||||
|
||||
Setting PMU Register Direct Access from User Space Off
|
||||
$ echo 0 > /proc/sys/kernel/perf_user_access
|
||||
$ cat /proc/sys/kernel/perf_user_access
|
||||
0
|
||||
|
||||
Setting PMU Register Direct Access from User Space ON
|
||||
$ echo 1 > /proc/sys/kernel/perf_user_access
|
||||
$ cat /proc/sys/kernel/perf_user_access
|
||||
1
|
||||
|
||||
Performance of PAPI_read has been improved as expected from the execution
|
||||
result of the papi_cost command.
|
||||
|
||||
Improvement effect of switching PMU register direct access from user space
|
||||
from OFF to ON
|
||||
|
||||
Total cost for PAPI_read (2 counters) over 1000000 iterations
|
||||
min cycles: 689 -> 28
|
||||
max cycles: 3876 -> 1323
|
||||
mean cycles: 724.471979 -> 28.888076
|
||||
|
||||
Total cost for PAPI_read_ts (2 counters) over 1000000 iterations
|
||||
min cycles: 693 -> 29
|
||||
max cycles: 4066 -> 3718
|
||||
mean cycles: 726.753003 -> 29.977226
|
||||
|
||||
Total cost for PAPI_read (1 derived_[add|sub] counter) over 1000000 iterations
|
||||
min cycles: 698 -> 28
|
||||
max cycles: 7406 -> 2346
|
||||
mean cycles: 728.527079 -> 28.880691
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
||||
index b4877d18e..331288c55 100644
|
||||
--- a/src/components/perf_event/perf_event.c
|
||||
+++ b/src/components/perf_event/perf_event.c
|
||||
@@ -682,6 +682,12 @@ set_up_mmap( pe_control_t *ctl, int evt_idx)
|
||||
|
||||
|
||||
|
||||
+/* Request user access for arm64 */
|
||||
+static inline void arm64_request_user_access(struct perf_event_attr *hw_event)
|
||||
+{
|
||||
+ hw_event->config1=0x2; /* Request user access */
|
||||
+}
|
||||
+
|
||||
/* Open all events in the control state */
|
||||
static int
|
||||
open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
@@ -735,6 +741,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
if (( i == 0 ) || (ctl->multiplexed)) {
|
||||
ctl->events[i].attr.pinned = !ctl->multiplexed;
|
||||
ctl->events[i].attr.disabled = 1;
|
||||
+#if defined(__aarch64__)
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ arm64_request_user_access(&ctl->events[i].attr);
|
||||
+ }
|
||||
+#endif
|
||||
ctl->events[i].group_leader_fd=-1;
|
||||
ctl->events[i].attr.read_format = get_read_format(
|
||||
ctl->multiplexed,
|
||||
@@ -743,6 +754,11 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
} else {
|
||||
ctl->events[i].attr.pinned=0;
|
||||
ctl->events[i].attr.disabled = 0;
|
||||
+#if defined(__aarch64__)
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ arm64_request_user_access(&ctl->events[i].attr);
|
||||
+ }
|
||||
+#endif
|
||||
ctl->events[i].group_leader_fd=ctl->events[0].event_fd;
|
||||
ctl->events[i].attr.read_format = get_read_format(
|
||||
ctl->multiplexed,
|
||||
@@ -1047,8 +1063,16 @@ _pe_reset( hwd_context_t *ctx, hwd_control_state_t *ctl )
|
||||
|
||||
/* We need to reset all of the events, not just the group leaders */
|
||||
for( i = 0; i < pe_ctl->num_events; i++ ) {
|
||||
- ret = ioctl( pe_ctl->events[i].event_fd,
|
||||
- PERF_EVENT_IOC_RESET, NULL );
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ ret = ioctl( pe_ctl->events[i].event_fd,
|
||||
+ PERF_EVENT_IOC_RESET, NULL );
|
||||
+ pe_ctl->reset_counts[i] = mmap_read_reset_count(
|
||||
+ pe_ctl->events[i].mmap_buf);
|
||||
+ pe_ctl->reset_flag = 1;
|
||||
+ } else {
|
||||
+ ret = ioctl( pe_ctl->events[i].event_fd,
|
||||
+ PERF_EVENT_IOC_RESET, NULL );
|
||||
+ }
|
||||
if ( ret == -1 ) {
|
||||
PAPIERROR("ioctl(%d, PERF_EVENT_IOC_RESET, NULL) "
|
||||
"returned error, Linux says: %s",
|
||||
@@ -1119,6 +1143,8 @@ _pe_rdpmc_read( hwd_context_t *ctx, hwd_control_state_t *ctl,
|
||||
for ( i = 0; i < pe_ctl->num_events; i++ ) {
|
||||
|
||||
count = mmap_read_self(pe_ctl->events[i].mmap_buf,
|
||||
+ pe_ctl->reset_flag,
|
||||
+ pe_ctl->reset_counts[i],
|
||||
&enabled,&running);
|
||||
|
||||
if (count==0xffffffffffffffffULL) {
|
||||
@@ -1438,6 +1464,10 @@ _pe_start( hwd_context_t *ctx, hwd_control_state_t *ctl )
|
||||
pe_ctl->events[i].event_fd);
|
||||
ret=ioctl( pe_ctl->events[i].event_fd,
|
||||
PERF_EVENT_IOC_ENABLE, NULL) ;
|
||||
+ if (_perf_event_vector.cmp_info.fast_counter_read) {
|
||||
+ pe_ctl->reset_counts[i] = 0LL;
|
||||
+ pe_ctl->reset_flag = 0;
|
||||
+ }
|
||||
|
||||
/* ioctls always return -1 on failure */
|
||||
if (ret == -1) {
|
||||
@@ -2297,6 +2327,29 @@ _pe_shutdown_component( void ) {
|
||||
}
|
||||
|
||||
|
||||
+#if defined(__aarch64__)
|
||||
+/* Check access PMU counter from User space for arm64 support */
|
||||
+static int _pe_detect_arm64_access(void) {
|
||||
+
|
||||
+ FILE *fff;
|
||||
+ int perf_user_access;
|
||||
+ int retval;
|
||||
+
|
||||
+ fff=fopen("/proc/sys/kernel/perf_user_access","r");
|
||||
+ if (fff==NULL) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+
|
||||
+ /* 1 means you can access PMU counter from User space */
|
||||
+ /* 0 means you can not access PMU counter from User space */
|
||||
+ retval=fscanf(fff,"%d",&perf_user_access);
|
||||
+ if (retval!=1) fprintf(stderr,"Error reading /proc/sys/kernel/perf_user_access\n");
|
||||
+ fclose(fff);
|
||||
+
|
||||
+ return perf_user_access;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
/* Check the mmap page for rdpmc support */
|
||||
static int _pe_detect_rdpmc(void) {
|
||||
|
||||
@@ -2305,10 +2358,13 @@ static int _pe_detect_rdpmc(void) {
|
||||
void *addr;
|
||||
struct perf_event_mmap_page *our_mmap;
|
||||
int page_size=getpagesize();
|
||||
+#if defined(__aarch64__)
|
||||
+ int retval;
|
||||
+#endif
|
||||
|
||||
-#if defined(__i386__) || defined (__x86_64__)
|
||||
+#if defined(__i386__) || defined (__x86_64__) || defined(__aarch64__)
|
||||
#else
|
||||
- /* We only support rdpmc on x86 for now */
|
||||
+ /* We support rdpmc on x86 and arm64 for now */
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
@@ -2318,12 +2374,23 @@ static int _pe_detect_rdpmc(void) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
+#if defined(__aarch64__)
|
||||
+ /* Detect if we can use PMU counter from User space for arm64 */
|
||||
+ retval = _pe_detect_arm64_access();
|
||||
+ if (retval == 0) {
|
||||
+ return 0;
|
||||
+ }
|
||||
+#endif
|
||||
+
|
||||
/* Create a fake instructions event so we can read a mmap page */
|
||||
memset(&pe,0,sizeof(struct perf_event_attr));
|
||||
|
||||
pe.type=PERF_TYPE_HARDWARE;
|
||||
pe.size=sizeof(struct perf_event_attr);
|
||||
pe.config=PERF_COUNT_HW_INSTRUCTIONS;
|
||||
+#if defined(__aarch64__)
|
||||
+ arm64_request_user_access(&pe);
|
||||
+#endif
|
||||
pe.exclude_kernel=1;
|
||||
pe.disabled=1;
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event_lib.h b/src/components/perf_event/perf_event_lib.h
|
||||
index 0c50ab9f0..cfba8ac49 100644
|
||||
--- a/src/components/perf_event/perf_event_lib.h
|
||||
+++ b/src/components/perf_event/perf_event_lib.h
|
||||
@@ -36,6 +36,8 @@ typedef struct {
|
||||
pid_t tid; /* thread we are monitoring */
|
||||
pe_event_info_t events[PERF_EVENT_MAX_MPX_COUNTERS];
|
||||
long long counts[PERF_EVENT_MAX_MPX_COUNTERS];
|
||||
+ unsigned int reset_flag;
|
||||
+ long long reset_counts[PERF_EVENT_MAX_MPX_COUNTERS];
|
||||
} pe_control_t;
|
||||
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 92dca4fd0..097286865 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -29,6 +29,74 @@ sys_perf_event_open( struct perf_event_attr *hw_event,
|
||||
return ret;
|
||||
}
|
||||
|
||||
+
|
||||
+/*
|
||||
+ * We define u64 as uint64_t for every architecture
|
||||
+ * so that we can print it with "%"PRIx64 without getting warnings.
|
||||
+ *
|
||||
+ * typedef __u64 u64;
|
||||
+ * typedef __s64 s64;
|
||||
+ */
|
||||
+typedef uint64_t u64;
|
||||
+typedef int64_t s64;
|
||||
+
|
||||
+typedef __u32 u32;
|
||||
+typedef __s32 s32;
|
||||
+
|
||||
+typedef __u16 u16;
|
||||
+typedef __s16 s16;
|
||||
+
|
||||
+typedef __u8 u8;
|
||||
+typedef __s8 s8;
|
||||
+
|
||||
+
|
||||
+#ifdef __SIZEOF_INT128__
|
||||
+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
|
||||
+{
|
||||
+ return (u64)(((unsigned __int128)a * b) >> shift);
|
||||
+}
|
||||
+
|
||||
+#else
|
||||
+
|
||||
+#ifdef __i386__
|
||||
+static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
+{
|
||||
+ u32 high, low;
|
||||
+
|
||||
+ asm ("mull %[b]" : "=a" (low), "=d" (high)
|
||||
+ : [a] "a" (a), [b] "rm" (b) );
|
||||
+
|
||||
+ return low | ((u64)high) << 32;
|
||||
+}
|
||||
+#else
|
||||
+static inline u64 mul_u32_u32(u32 a, u32 b)
|
||||
+{
|
||||
+ return (u64)a * b;
|
||||
+}
|
||||
+#endif
|
||||
+
|
||||
+static inline u64 mul_u64_u32_shr(u64 a, u32 b, unsigned int shift)
|
||||
+{
|
||||
+ u32 ah, al;
|
||||
+ u64 ret;
|
||||
+
|
||||
+ al = a;
|
||||
+ ah = a >> 32;
|
||||
+
|
||||
+ ret = mul_u32_u32(al, b) >> shift;
|
||||
+ if (ah)
|
||||
+ ret += mul_u32_u32(ah, b) << (32 - shift);
|
||||
+
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
+#endif /* __SIZEOF_INT128__ */
|
||||
+
|
||||
+#ifndef ARRAY_SIZE
|
||||
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
#if defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
|
||||
@@ -52,19 +120,140 @@ static inline unsigned long long rdpmc(unsigned int counter) {
|
||||
|
||||
#define barrier() __asm__ volatile("" ::: "memory")
|
||||
|
||||
+
|
||||
+#elif defined(__aarch64__)
|
||||
+
|
||||
+/* Indirect stringification. Doing two levels allows the parameter to be a
|
||||
+ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO)
|
||||
+ * converts to "bar".
|
||||
+ */
|
||||
+
|
||||
+#define __stringify_1(x...) #x
|
||||
+#define __stringify(x...) __stringify_1(x)
|
||||
+
|
||||
+#define read_sysreg(r) ({ \
|
||||
+ u64 __val; \
|
||||
+ asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \
|
||||
+ __val; \
|
||||
+})
|
||||
+
|
||||
+static u64 read_pmccntr(void)
|
||||
+{
|
||||
+ return read_sysreg(pmccntr_el0);
|
||||
+}
|
||||
+
|
||||
+#define PMEVCNTR_READ(idx) \
|
||||
+ static u64 read_pmevcntr_##idx(void) { \
|
||||
+ return read_sysreg(pmevcntr##idx##_el0); \
|
||||
+ }
|
||||
+
|
||||
+PMEVCNTR_READ(0);
|
||||
+PMEVCNTR_READ(1);
|
||||
+PMEVCNTR_READ(2);
|
||||
+PMEVCNTR_READ(3);
|
||||
+PMEVCNTR_READ(4);
|
||||
+PMEVCNTR_READ(5);
|
||||
+PMEVCNTR_READ(6);
|
||||
+PMEVCNTR_READ(7);
|
||||
+PMEVCNTR_READ(8);
|
||||
+PMEVCNTR_READ(9);
|
||||
+PMEVCNTR_READ(10);
|
||||
+PMEVCNTR_READ(11);
|
||||
+PMEVCNTR_READ(12);
|
||||
+PMEVCNTR_READ(13);
|
||||
+PMEVCNTR_READ(14);
|
||||
+PMEVCNTR_READ(15);
|
||||
+PMEVCNTR_READ(16);
|
||||
+PMEVCNTR_READ(17);
|
||||
+PMEVCNTR_READ(18);
|
||||
+PMEVCNTR_READ(19);
|
||||
+PMEVCNTR_READ(20);
|
||||
+PMEVCNTR_READ(21);
|
||||
+PMEVCNTR_READ(22);
|
||||
+PMEVCNTR_READ(23);
|
||||
+PMEVCNTR_READ(24);
|
||||
+PMEVCNTR_READ(25);
|
||||
+PMEVCNTR_READ(26);
|
||||
+PMEVCNTR_READ(27);
|
||||
+PMEVCNTR_READ(28);
|
||||
+PMEVCNTR_READ(29);
|
||||
+PMEVCNTR_READ(30);
|
||||
+
|
||||
+/*
|
||||
+ * Read a value direct from PMEVCNTR<idx>
|
||||
+ */
|
||||
+static u64 rdpmc(unsigned int counter)
|
||||
+{
|
||||
+ static u64 (* const read_f[])(void) = {
|
||||
+ read_pmevcntr_0,
|
||||
+ read_pmevcntr_1,
|
||||
+ read_pmevcntr_2,
|
||||
+ read_pmevcntr_3,
|
||||
+ read_pmevcntr_4,
|
||||
+ read_pmevcntr_5,
|
||||
+ read_pmevcntr_6,
|
||||
+ read_pmevcntr_7,
|
||||
+ read_pmevcntr_8,
|
||||
+ read_pmevcntr_9,
|
||||
+ read_pmevcntr_10,
|
||||
+ read_pmevcntr_11,
|
||||
+ read_pmevcntr_13,
|
||||
+ read_pmevcntr_12,
|
||||
+ read_pmevcntr_14,
|
||||
+ read_pmevcntr_15,
|
||||
+ read_pmevcntr_16,
|
||||
+ read_pmevcntr_17,
|
||||
+ read_pmevcntr_18,
|
||||
+ read_pmevcntr_19,
|
||||
+ read_pmevcntr_20,
|
||||
+ read_pmevcntr_21,
|
||||
+ read_pmevcntr_22,
|
||||
+ read_pmevcntr_23,
|
||||
+ read_pmevcntr_24,
|
||||
+ read_pmevcntr_25,
|
||||
+ read_pmevcntr_26,
|
||||
+ read_pmevcntr_27,
|
||||
+ read_pmevcntr_28,
|
||||
+ read_pmevcntr_29,
|
||||
+ read_pmevcntr_30,
|
||||
+ read_pmccntr
|
||||
+ };
|
||||
+
|
||||
+ if (counter < ARRAY_SIZE(read_f))
|
||||
+ return (read_f[counter])();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+static u64 rdtsc(void) { return read_sysreg(cntvct_el0); }
|
||||
+
|
||||
+#define barrier() asm volatile("dmb ish" : : : "memory")
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
|
||||
+
|
||||
+static inline u64 adjust_cap_usr_time_short(u64 a, u64 b, u64 c)
|
||||
+{
|
||||
+ u64 ret;
|
||||
+ ret = b + ((a - b) & c);
|
||||
+ return ret;
|
||||
+}
|
||||
+
|
||||
/* based on the code in include/uapi/linux/perf_event.h */
|
||||
static inline unsigned long long mmap_read_self(void *addr,
|
||||
+ int user_reset_flag,
|
||||
+ unsigned long long reset,
|
||||
unsigned long long *en,
|
||||
unsigned long long *ru) {
|
||||
|
||||
struct perf_event_mmap_page *pc = addr;
|
||||
|
||||
- uint32_t seq, time_mult, time_shift, index, width;
|
||||
+ uint32_t seq, time_mult = 0, time_shift = 0, index, width;
|
||||
int64_t count;
|
||||
uint64_t enabled, running;
|
||||
- uint64_t cyc, time_offset;
|
||||
+ uint64_t cyc = 0, time_offset = 0, time_cycles = 0, time_mask = ~0ULL;
|
||||
int64_t pmc = 0;
|
||||
- uint64_t quot, rem;
|
||||
uint64_t delta = 0;
|
||||
|
||||
|
||||
@@ -96,12 +285,11 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
time_mult = pc->time_mult;
|
||||
time_shift = pc->time_shift;
|
||||
|
||||
- quot=(cyc>>time_shift);
|
||||
- rem = cyc & (((uint64_t)1 << time_shift) - 1);
|
||||
- delta = time_offset + (quot * time_mult) +
|
||||
- ((rem * time_mult) >> time_shift);
|
||||
+ if (pc->cap_user_time_short) {
|
||||
+ time_cycles = pc->time_cycles;
|
||||
+ time_mask = pc->time_mask;
|
||||
+ }
|
||||
}
|
||||
- enabled+=delta;
|
||||
|
||||
/* actually do the measurement */
|
||||
|
||||
@@ -116,8 +304,9 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
/* numbers which break if an IOC_RESET is done */
|
||||
width = pc->pmc_width;
|
||||
count = pc->offset;
|
||||
- count<<=(64-width);
|
||||
- count>>=(64-width);
|
||||
+ if (user_reset_flag == 1) {
|
||||
+ count = 0;
|
||||
+ }
|
||||
|
||||
/* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */
|
||||
/* while actual perf_event.h has cap_user_rdpmc */
|
||||
@@ -130,14 +319,14 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
pmc = rdpmc(index-1);
|
||||
|
||||
/* sign extend result */
|
||||
+ if (user_reset_flag == 1) {
|
||||
+ pmc-=reset;
|
||||
+ }
|
||||
pmc<<=(64-width);
|
||||
pmc>>=(64-width);
|
||||
|
||||
/* add current count into the existing kernel count */
|
||||
count+=pmc;
|
||||
-
|
||||
- /* Only adjust if index is valid */
|
||||
- running+=delta;
|
||||
} else {
|
||||
/* Falling back because rdpmc not supported */
|
||||
/* for this event. */
|
||||
@@ -148,14 +337,66 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
|
||||
} while (pc->lock != seq);
|
||||
|
||||
+ if (enabled != running) {
|
||||
+
|
||||
+ /* Adjust for cap_usr_time_short, a nop if not */
|
||||
+ cyc = adjust_cap_usr_time_short(cyc, time_cycles, time_mask);
|
||||
+
|
||||
+ delta = time_offset + mul_u64_u32_shr(cyc, time_mult, time_shift);
|
||||
+
|
||||
+ enabled+=delta;
|
||||
+ if (index)
|
||||
+ /* Only adjust if index is valid */
|
||||
+ running+=delta;
|
||||
+ }
|
||||
+
|
||||
if (en) *en=enabled;
|
||||
if (ru) *ru=running;
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
+static inline unsigned long long mmap_read_reset_count(void *addr) {
|
||||
+
|
||||
+ struct perf_event_mmap_page *pc = addr;
|
||||
+ uint32_t seq, index;
|
||||
+ uint64_t count = 0;
|
||||
+
|
||||
+ if (pc == NULL) {
|
||||
+ return count;
|
||||
+ }
|
||||
+
|
||||
+ do {
|
||||
+ /* The barrier ensures we get the most up to date */
|
||||
+ /* version of the pc->lock variable */
|
||||
+
|
||||
+ seq=pc->lock;
|
||||
+ barrier();
|
||||
+
|
||||
+ /* actually do the measurement */
|
||||
+
|
||||
+ /* Ugh, libpfm4 perf_event.h has cap_usr_rdpmc */
|
||||
+ /* while actual perf_event.h has cap_user_rdpmc */
|
||||
+
|
||||
+ /* Index of register to read */
|
||||
+ /* 0 means stopped/not-active */
|
||||
+ /* Need to subtract 1 to get actual index to rdpmc() */
|
||||
+ index = pc->index;
|
||||
+
|
||||
+ if (pc->cap_usr_rdpmc && index) {
|
||||
+ /* Read counter value */
|
||||
+ count = rdpmc(index-1);
|
||||
+ }
|
||||
+ barrier();
|
||||
+
|
||||
+ } while (pc->lock != seq);
|
||||
+
|
||||
+ return count;
|
||||
+}
|
||||
+
|
||||
#else
|
||||
static inline unsigned long long mmap_read_self(void *addr,
|
||||
+ int user_reset_flag,
|
||||
unsigned long long *en,
|
||||
unsigned long long *ru) {
|
||||
|
||||
commit 693dd5c014d1f0b9a3eae63de051389ed8eb338b
|
||||
Author: Giuseppe Congiu <gcongiu@icl.utk.edu>
|
||||
Date: Tue Feb 21 07:46:14 2023 -0500
|
||||
|
||||
perf_event: bug fix in mmap_read_self
|
||||
|
||||
Commit 9a1f2d897 broke the perf_event component for power cpus. The
|
||||
mmap_read_self function is missing one argument. This patch restores the
|
||||
missing argument in the function.
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 097286865..7ad3524f0 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -397,6 +397,7 @@ static inline unsigned long long mmap_read_reset_count(void *addr) {
|
||||
#else
|
||||
static inline unsigned long long mmap_read_self(void *addr,
|
||||
int user_reset_flag,
|
||||
+ unsigned long long reset,
|
||||
unsigned long long *en,
|
||||
unsigned long long *ru) {
|
||||
|
||||
commit 1b3e75b7f11c7e2b7c590948216d6aaeec299010
|
||||
Author: Giuseppe Congiu <gcongiu@icl.utk.edu>
|
||||
Date: Tue Feb 21 14:21:03 2023 +0100
|
||||
|
||||
perf_event: add missing mmap_read_reset_count for non default cpus
|
||||
|
||||
Power cpus do not have a version of mmap_read_reset_count. Implement the
|
||||
missing function.
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 7ad3524f0..73e82c8ae 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -409,6 +409,11 @@ static inline unsigned long long mmap_read_self(void *addr,
|
||||
return (unsigned long long)(-1);
|
||||
}
|
||||
|
||||
+static inline unsigned long long mmap_read_reset_count(void *addr __attribute__((unused))) {
|
||||
+
|
||||
+ return (unsigned long long)(-1);
|
||||
+}
|
||||
+
|
||||
#endif
|
||||
|
||||
/* These functions are based on builtin-record.c in the */
|
||||
commit 37d0c77b7b4d00a958dff50dc715cf63e0cd6084
|
||||
Author: Giuseppe Congiu <gcongiu@icl.utk.edu>
|
||||
Date: Tue Feb 21 14:22:53 2023 +0100
|
||||
|
||||
perf_event: used unused attribute in mmap_read_self
|
||||
|
||||
diff --git a/src/components/perf_event/perf_helpers.h b/src/components/perf_event/perf_helpers.h
|
||||
index 73e82c8ae..59c8a2fc8 100644
|
||||
--- a/src/components/perf_event/perf_helpers.h
|
||||
+++ b/src/components/perf_event/perf_helpers.h
|
||||
@@ -395,16 +395,11 @@ static inline unsigned long long mmap_read_reset_count(void *addr) {
|
||||
}
|
||||
|
||||
#else
|
||||
-static inline unsigned long long mmap_read_self(void *addr,
|
||||
- int user_reset_flag,
|
||||
- unsigned long long reset,
|
||||
- unsigned long long *en,
|
||||
- unsigned long long *ru) {
|
||||
-
|
||||
- (void)addr;
|
||||
-
|
||||
- *en=0;
|
||||
- *ru=0;
|
||||
+static inline unsigned long long mmap_read_self(void *addr __attribute__((unused)),
|
||||
+ int user_reset_flag __attribute__((unused)),
|
||||
+ unsigned long long reset __attribute__((unused)),
|
||||
+ unsigned long long *en __attribute__((unused)),
|
||||
+ unsigned long long *ru __attribute__((unused))) {
|
||||
|
||||
return (unsigned long long)(-1);
|
||||
}
|
106
SOURCES/papi-thread_init.patch
Normal file
106
SOURCES/papi-thread_init.patch
Normal file
@ -0,0 +1,106 @@
|
||||
commit 3625bdbad9fd57d1cdb1e5615854545167d4adcb
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Wed Aug 26 17:18:29 2020 -0400
|
||||
|
||||
This modifies PAPI_library_init() to initialize components in two classes,
|
||||
separated by the initialization of the papi thread structure. The first class
|
||||
is those that need no thread structure, currently everything but perf_event and
|
||||
perf_event_uncore. Following the init of the threading structure, we init the
|
||||
second class (perf_event and perf_event_uncore) that DOES need the thread
|
||||
structure to successfully init_component(). This required a change to
|
||||
_papi_hwi_init_global(), to add an argument to distinguish which class it
|
||||
should initialize.
|
||||
|
||||
diff --git a/src/papi.c b/src/papi.c
|
||||
index 33cc2993..ed75af49 100644
|
||||
--- a/src/papi.c
|
||||
+++ b/src/papi.c
|
||||
@@ -1151,7 +1151,23 @@ PAPI_library_init( int version )
|
||||
papi_return( init_retval );
|
||||
}
|
||||
|
||||
- /* Initialize thread globals, including the main threads */
|
||||
+ /* Initialize component globals EXCEPT for perf_event, perf_event_uncore.
|
||||
+ * To avoid race conditions, these components use the thread local storage
|
||||
+ * construct initialized by _papi_hwi_init_global_threads(), from within
|
||||
+ * their init_component(). So these must have init_component() run AFTER
|
||||
+ * _papi_hwi_init_global_threads. Other components demand that init threads
|
||||
+ * run AFTER init_component(), which sets up globals they need.
|
||||
+ */
|
||||
+
|
||||
+ tmp = _papi_hwi_init_global( 0 ); /* Selector 0 to skip perf_event, perf_event_uncore */
|
||||
+ if ( tmp ) {
|
||||
+ init_retval = tmp;
|
||||
+ _papi_hwi_shutdown_global_internal( );
|
||||
+ _in_papi_library_init_cnt--;
|
||||
+ papi_return( init_retval );
|
||||
+ }
|
||||
+
|
||||
+ /* Initialize thread globals, including the main threads */
|
||||
|
||||
tmp = _papi_hwi_init_global_threads( );
|
||||
if ( tmp ) {
|
||||
@@ -1161,9 +1177,9 @@ PAPI_library_init( int version )
|
||||
papi_return( init_retval );
|
||||
}
|
||||
|
||||
- /* Initialize component globals */
|
||||
+ /* Initialize perf_event, perf_event_uncore components */
|
||||
|
||||
- tmp = _papi_hwi_init_global( );
|
||||
+ tmp = _papi_hwi_init_global( 1 ); /* Selector 1 for only perf_event, perf_event_uncore */
|
||||
if ( tmp ) {
|
||||
init_retval = tmp;
|
||||
_papi_hwi_shutdown_global_internal( );
|
||||
diff --git a/src/papi_internal.c b/src/papi_internal.c
|
||||
index 5a1ccd43..e6dd319c 100644
|
||||
--- a/src/papi_internal.c
|
||||
+++ b/src/papi_internal.c
|
||||
@@ -1928,11 +1928,13 @@ int papi_num_components = ( sizeof ( _papi_hwd ) / sizeof ( *_papi_hwd ) ) - 1;
|
||||
* Routine that initializes all available components.
|
||||
* A component is available if a pointer to its info vector
|
||||
* appears in the NULL terminated_papi_hwd table.
|
||||
+ * Modified to accept an arg: 0=do not init perf_event or
|
||||
+ * perf_event_uncore. 1=init ONLY perf_event or perf_event_uncore.
|
||||
*/
|
||||
int
|
||||
-_papi_hwi_init_global( void )
|
||||
+_papi_hwi_init_global( int PE_OR_PEU )
|
||||
{
|
||||
- int retval, i = 0;
|
||||
+ int retval, is_pe_peu, i = 0;
|
||||
|
||||
retval = _papi_hwi_innoculate_os_vector( &_papi_os_vector );
|
||||
if ( retval != PAPI_OK ) {
|
||||
@@ -1940,14 +1942,16 @@ _papi_hwi_init_global( void )
|
||||
}
|
||||
|
||||
while ( _papi_hwd[i] ) {
|
||||
-
|
||||
+ is_pe_peu = 0;
|
||||
+ if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event") == 0) is_pe_peu=1;
|
||||
+ if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event_uncore") == 0) is_pe_peu=1;
|
||||
retval = _papi_hwi_innoculate_vector( _papi_hwd[i] );
|
||||
if ( retval != PAPI_OK ) {
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* We can be disabled by user before init */
|
||||
- if (!_papi_hwd[i]->cmp_info.disabled) {
|
||||
+ if (!_papi_hwd[i]->cmp_info.disabled && (PE_OR_PEU == is_pe_peu)) {
|
||||
retval = _papi_hwd[i]->init_component( i );
|
||||
_papi_hwd[i]->cmp_info.disabled=retval;
|
||||
|
||||
diff --git a/src/papi_internal.h b/src/papi_internal.h
|
||||
index 6492fea4..e0f5acd7 100644
|
||||
--- a/src/papi_internal.h
|
||||
+++ b/src/papi_internal.h
|
||||
@@ -467,7 +467,7 @@ int _papi_hwi_read( hwd_context_t * context, EventSetInfo_t * ESI,
|
||||
long long *values );
|
||||
int _papi_hwi_cleanup_eventset( EventSetInfo_t * ESI );
|
||||
int _papi_hwi_convert_eventset_to_multiplex( _papi_int_multiplex_t * mpx );
|
||||
-int _papi_hwi_init_global( void );
|
||||
+int _papi_hwi_init_global( int PE_OR_PEU );
|
||||
int _papi_hwi_init_global_internal( void );
|
||||
int _papi_hwi_init_os(void);
|
||||
void _papi_hwi_init_errors(void);
|
@ -11,7 +11,7 @@
|
||||
Summary: Performance Application Programming Interface
|
||||
Name: papi
|
||||
Version: 6.0.0
|
||||
Release: 12%{?dist}
|
||||
Release: 16%{?dist}
|
||||
License: BSD
|
||||
Requires: papi-libs = %{version}-%{release}
|
||||
URL: http://icl.cs.utk.edu/papi/
|
||||
@ -22,11 +22,14 @@ URL: http://icl.cs.utk.edu/papi/
|
||||
# so when papi is rebased to a newer version it can be used as is.
|
||||
Source0: http://icl.cs.utk.edu/projects/papi/downloads/%{name}-%{version}-noiozone.tar.gz
|
||||
Patch1: papi-python3.patch
|
||||
Patch2: papi-a64fx.patch
|
||||
Patch4: papi-config.patch
|
||||
Patch5: papi-nostatic.patch
|
||||
Patch6: papi-lto.patch
|
||||
Patch7: papi-rhbz1923967.patch
|
||||
Patch21: papi-arm64fastread.patch
|
||||
Patch31: papi-701eventupdate.patch
|
||||
Patch40: papi-thread_init.patch
|
||||
Patch41: papi-71eventupdate.patch
|
||||
BuildRequires: make
|
||||
BuildRequires: autoconf
|
||||
BuildRequires: doxygen
|
||||
@ -36,9 +39,9 @@ BuildRequires: kernel-headers >= 2.6.32
|
||||
BuildRequires: chrpath
|
||||
BuildRequires: lm_sensors-devel
|
||||
%if %{without bundled_libpfm}
|
||||
BuildRequires: libpfm-devel >= 4.6.0-1
|
||||
BuildRequires: libpfm-devel >= 4.13.0-1
|
||||
%if %{with_static}
|
||||
BuildRequires: libpfm-static >= 4.6.0-1
|
||||
BuildRequires: libpfm-static >= 4.13.0-1
|
||||
%endif
|
||||
%endif
|
||||
# Following required for net component
|
||||
@ -95,11 +98,14 @@ the PAPI user-space libraries and interfaces.
|
||||
%prep
|
||||
%setup -q
|
||||
%patch1 -p1 -b .python3
|
||||
%patch2 -p1 -b .a64fx
|
||||
%patch4 -p1
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
%patch7 -p1
|
||||
%patch21 -p1
|
||||
%patch31 -p1
|
||||
%patch40 -p1
|
||||
%patch41 -p1
|
||||
|
||||
%build
|
||||
|
||||
@ -192,6 +198,18 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so*
|
||||
%endif
|
||||
|
||||
%changelog
|
||||
* Fri Nov 17 2023 William Cohen <wcohen@redhat.com> - 6.0.0-16
|
||||
- Update papi event presets (RHEL-9333, RHEL-9334, RHEL-9335)
|
||||
|
||||
* Fri Jun 16 2023 William Cohen <wcohen@redhat.com> - 6.0.0-15
|
||||
- Address thread initialization order. (RHBZ#2215582)
|
||||
|
||||
* Thu May 4 2023 William Cohen <wcohen@redhat.com> - 6.0.0-14
|
||||
- Update papi event presets (RHBZ#2111923, RHBZ#2111942, RHBZ#2111947)
|
||||
|
||||
* Thu Apr 27 2023 William Cohen <wcohen@redhat.com> - 6.0.0-13
|
||||
- Improve aarch64 read speed. (rhbz2186927)
|
||||
|
||||
* Thu May 26 2022 William Cohen <wcohen@redhat.com> - 6.0.0-12
|
||||
- Disable problematic IBM Power9 events. (RHBZ#1923967)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user