Compare commits
No commits in common. "c8" and "c8s-rhbz2161146" have entirely different histories.
c8
...
c8s-rhbz21
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
||||
SOURCES/papi-5.6.0.tar.gz
|
||||
/papi-5.6.0.tar.gz
|
||||
|
@ -1,205 +0,0 @@
|
||||
commit 61616f7ddaaef1b79df85f0a3e969c886604de6c
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Mon Apr 2 17:47:31 2018 -0400
|
||||
|
||||
PAPI preset event support for Intel Knights Mill.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index faa4ae2f5..bb11f61d3 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -851,8 +851,10 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
||||
#
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
+# Intel Knights Mill
|
||||
#
|
||||
CPU,knl
|
||||
+CPU,knm
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INSTRUCTIONS_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES
|
||||
PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
@@ -885,6 +887,7 @@ PRESET,PAPI_BR_NTK,DERIVED_SUB,BR_INST_RETIRED:JCC,BR_INST_RETIRED:TAKEN_JCC
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,RS_FULL_STALL:ANY
|
||||
PRESET,PAPI_STL_ICY,NOT_DERIVED,NO_ALLOC_CYCLES:ANY
|
||||
#
|
||||
+# End of knl,knm list
|
||||
|
||||
CPU,Intel Core2
|
||||
CPU,Intel Core
|
||||
commit 85003c716d76eff47607fa0967537c6cf63d8348
|
||||
Author: Steve Walk <swalk.cavium@gmail.com>
|
||||
Date: Fri Jun 8 15:50:50 2018 -0400
|
||||
|
||||
enable Cavium ThunderX2 support
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index bb11f61d3..46827f180 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1841,6 +1841,31 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_READ_ACCESS
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_WRITE_ACCESS
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_READ_REFILL
|
||||
PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_WRITE_REFILL
|
||||
+
|
||||
+#####################
|
||||
+# ARM ThunderX2 #
|
||||
+#####################
|
||||
+CPU,arm_thunderx2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_RETIRED
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_RETIRED
|
||||
+PRESET,PAPI_L1_DCA,DERIVED_ADD,L1D_CACHE_RD,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit 111d01df256f691c2a2d2e14028fa4ebc9e63bed
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Tue Jan 22 17:09:29 2019 -0500
|
||||
|
||||
papi_events: add cascade lake X support
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index f5bcf1a46..009074449 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -724,9 +724,11 @@ CPU,hsw_ep
|
||||
CPU,bdw
|
||||
CPU,bdw_ep
|
||||
CPU,skl
|
||||
-CPU,skx
|
||||
# Note, libpfm4 treats Kaby Lake as just a form of skylake
|
||||
CPU,kbl
|
||||
+CPU,skx
|
||||
+# Note, libpfm4 treats Cascade Lake-X as just a form of skylake-X
|
||||
+CPU,clx
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_THREAD_UNHALTED:THREAD_P
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
commit d5a1a9ae2e4102e03063e76e242d4a3547cd5df3
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Wed Jan 23 16:58:10 2019 -0500
|
||||
|
||||
papi_events: the skylake events are actually split in two, make sure cascadelake gets both cases too
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 009074449..361813847 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -832,6 +832,7 @@ PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH
|
||||
|
||||
CPU,skl
|
||||
CPU,skx
|
||||
+CPU,clx
|
||||
# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
@@ -849,7 +850,7 @@ PRESET,PAPI_STL_ICY,NOT_DERIVED,IDQ_UOPS_NOT_DELIVERED:CYCLES_0_UOPS_DELIV_CORE
|
||||
PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
||||
|
||||
|
||||
-# End of hsw,bdw,skl list
|
||||
+# End of hsw,bdw,skl,clx list
|
||||
#
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
commit c9d0702caf582179cf89f28d987a68e48b9af0e9
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Mon May 20 16:31:45 2019 -0400
|
||||
|
||||
I have added PAPI POWER9 event definitions for PAPI_L2_DCR, PAPI_L2_DCW, PAPI_BR_CN, PAPI_BR_NTK, PAPI_BR_UCN, and PAPI_BR_TKN.
|
||||
These events have been tested. Their patterns of behavior were measured during the execution of performance benchmarks on Summit's POWER9 processors.
|
||||
The patterns of behavior for the corresponding events on Intel Haswell processors were measured during the execution of the same performance benchmarks.
|
||||
The respective events from each architecture behave similarly.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 361813847..f658931ed 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1588,6 +1588,8 @@ PRESET,PAPI_L1_DCA,DERIVED_ADD,PM_LD_REF_L1,PM_ST_CMPL
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,PM_DATA_FROM_L2MISS
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,PM_L2_LD_MISS
|
||||
PRESET,PAPI_L2_STM,NOT_DERIVED,PM_L2_ST_MISS
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,PM_DATA_FROM_L2
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,PM_L2_ST_HIT
|
||||
PRESET,PAPI_L3_DCR,NOT_DERIVED,PM_DATA_FROM_L2MISS
|
||||
PRESET,PAPI_L3_DCM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM
|
||||
PRESET,PAPI_L3_LDM,DERIVED_ADD,PM_DATA_FROM_LMEM,PM_DATA_FROM_RMEM
|
||||
@@ -1617,6 +1619,10 @@ PRESET,PAPI_LST_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1,PM_ST_FIN
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,PM_BRU_FIN
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,PM_TAKEN_BR_MPRED_CMPL
|
||||
PRESET,PAPI_BR_PRC,NOT_DERIVED,PM_BR_PRED
|
||||
+PRESET,PAPI_BR_CN,DERIVED_SUB,PM_BR_CMPL,PM_BR_UNCOND
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_POSTFIX,N0|N1|-|N2|-|,PM_BR_CMPL,PM_BR_UNCOND,PM_BR_TAKEN_CMPL
|
||||
+PRESET,PAPI_BR_UCN,NOT_DERIVED,PM_BR_UNCOND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,PM_BR_CORECT_PRED_TAKEN_CMPL
|
||||
PRESET,PAPI_FXU_IDL,NOT_DERIVED,PM_FXU_IDLE
|
||||
#
|
||||
CPU,ultra12
|
||||
commit 6440c5995a10db05959325b1192368734bfa7e5b
|
||||
Author: Carl Love <cel@us.ibm.com>
|
||||
Date: Wed Aug 14 07:52:50 2019 -0400
|
||||
|
||||
Per Carl Love, "The POWER9 event PM_BR_TAKEN_CMPL includes conditional and unconditional branches. The equation for event PAPI_BR_NTK should not include the event PM_BR_UNCOND as PM_BR_TAKEN_CMPL already counts unconditional branches. The POWER9 event PM_LD_REF_L1 includes hits and misses to the L1. Thus we should not be adding PM_LS_MISS_L1_ALT when calculating PAPI_LD_INS on POWER9."
|
||||
|
||||
The definitions for these preset events were changed accordingly, and their patterns of behavior were measured during the execution of performance benchmarks on the IBM POWER9 processors on Summit. The patterns of behavior for the corresponding events on the Intel Skylake and Broadwell processors were measured during the execution of the same performance benchmarks. The respective events from each architecture behave similarly. In addition, the new definitions pass the PAPI validation tests.
|
||||
|
||||
Signed-off-by: Daniel Barry <dbarry@vols.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index f658931ed..8df74866e 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1613,14 +1613,14 @@ PRESET,PAPI_TOT_CYC,NOT_DERIVED,PM_RUN_CYC
|
||||
PRESET,PAPI_HW_INT,NOT_DERIVED,PM_EXT_INT
|
||||
PRESET,PAPI_STL_ICY,DERIVED_POSTFIX,N0|N1|-|,PM_RUN_CYC,PM_1PLUS_PPC_DISP
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,PM_ST_FIN
|
||||
-PRESET,PAPI_LD_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1_ALT
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,PM_LD_REF_L1
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,PM_LSU_FIN
|
||||
PRESET,PAPI_LST_INS,DERIVED_ADD,PM_LD_REF_L1,PM_LD_MISS_L1,PM_ST_FIN
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,PM_BRU_FIN
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,PM_TAKEN_BR_MPRED_CMPL
|
||||
PRESET,PAPI_BR_PRC,NOT_DERIVED,PM_BR_PRED
|
||||
PRESET,PAPI_BR_CN,DERIVED_SUB,PM_BR_CMPL,PM_BR_UNCOND
|
||||
-PRESET,PAPI_BR_NTK,DERIVED_POSTFIX,N0|N1|-|N2|-|,PM_BR_CMPL,PM_BR_UNCOND,PM_BR_TAKEN_CMPL
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_POSTFIX,N0|N1|-|,PM_BR_CMPL,PM_BR_TAKEN_CMPL
|
||||
PRESET,PAPI_BR_UCN,NOT_DERIVED,PM_BR_UNCOND
|
||||
PRESET,PAPI_BR_TKN,NOT_DERIVED,PM_BR_CORECT_PRED_TAKEN_CMPL
|
||||
PRESET,PAPI_FXU_IDL,NOT_DERIVED,PM_FXU_IDLE
|
||||
commit 20890adcb59a1c1648cb70be65332c03a3781e1a
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Thu Jan 16 16:43:51 2020 -0500
|
||||
|
||||
Added two machine types to papi_events.csv to be in line with
|
||||
libpfm4 update to support amd64_fam17h_zen1 and zen2.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 97446ad2c..8e96adfbd 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -396,6 +396,8 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
||||
#
|
||||
#
|
||||
CPU,amd64_fam17h
|
||||
+CPU,amd64_fam17h_zen1
|
||||
+CPU,amd64_fam17h_zen2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
@ -1,796 +0,0 @@
|
||||
commit ae449f73abd0849f05ab3e1f3a64bde0c670c645
|
||||
Author: Anthony <adanalis@icl.utk.edu>
|
||||
Date: Fri Jul 17 12:05:14 2020 -0400
|
||||
|
||||
Separated the cache preset events of AMD Zen1 and Zen2 and added some more.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8e96adfbd..2325bd4dc 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
||||
#
|
||||
CPU,amd64_fam17h
|
||||
CPU,amd64_fam17h_zen1
|
||||
-CPU,amd64_fam17h_zen2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT
|
||||
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS
|
||||
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam17h_zen2
|
||||
+# Events copied from zen1 that also exist on zen2
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+
|
||||
#
|
||||
#
|
||||
CPU,Intel architectural PMU
|
||||
@@ -1877,6 +1897,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
|
||||
+#########################
|
||||
+# ARM Fujitsu A64FX #
|
||||
+#########################
|
||||
+CPU,arm_a64fx
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 13:33:38 2020 -0400
|
||||
|
||||
Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS)
|
||||
for AMD zen2.
|
||||
|
||||
PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
explains that FLOP events require MergeEvent support, which was included
|
||||
in the 5.6 kernel.
|
||||
|
||||
===>>> Hence, a kernel version 5.6 or greater is required.
|
||||
|
||||
NOTE: without the MergeEvent support in the kernel,
|
||||
there is no guarantee that the SSE/AVX FLOP
|
||||
events produce any useful data whatsoever.
|
||||
|
||||
These events have been tested and verified for
|
||||
scalar flops, SSE, AVX, and FMA:
|
||||
|
||||
(1) for one AVX instruction (e.g. _mm256_add_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns
|
||||
a count of 4 (in the case of double precision), and
|
||||
a count of 8 (in the case of single precision).
|
||||
|
||||
(2) for one AVX FMA instruction (e.g. _mm256_macc_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns
|
||||
a count of 8 (in the case of double precision), and
|
||||
a count of 16 (in the case of single precision).
|
||||
|
||||
(3) for one SSE instruction (e.g. _mm_mul_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns
|
||||
a count of 2 (in the case of double precision), and
|
||||
a count of 4 (in the case of single precision).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2325bd4dc..2ff3e4d16 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
|
||||
-#
|
||||
-#
|
||||
+# New FLOP event on zen2
|
||||
+# PPR (under section 2.1.15.3. --
|
||||
+# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
+# explains that FLOP events require MergeEvent support, which was included
|
||||
+# in the 5.6 kernel.
|
||||
+# Hence, a kernel version 5.6 or greater is required.
|
||||
+# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
+# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+
|
||||
+
|
||||
CPU,Intel architectural PMU
|
||||
CPU,ix86arch
|
||||
#
|
||||
commit 35f93252a6e222299c03f2c94912334488e76b02
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 18:40:59 2020 -0400
|
||||
|
||||
Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP)
|
||||
for AMD zen2.
|
||||
|
||||
For unoptimized code (like native MMM), these events may include
|
||||
non-numeric floating-point instructions, e.g. MOVSD: move or merge
|
||||
scalar double-precision floating-point value instructions.
|
||||
|
||||
Tested with:
|
||||
1) SSE double: _mm_mul_pd / _mm_add_pd
|
||||
2) SSE single: _mm_mul_ps / _mm_add_ps
|
||||
3) AVX double: _mm256_mul_pd / _mm256_add_pd
|
||||
4) AVX single: _mm256_mul_ps / _mm256_add_ps
|
||||
5) FMA double: _mm256_macc_pd
|
||||
6) FMA single: _mm256_macc_pd
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2ff3e4d16..60a64564d 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
+# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
+PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 344f6493425d865577508ff32b6f65516b1b4394
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 19:03:31 2020 -0400
|
||||
|
||||
Added missing 'PRESET' to csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 60a64564d..724d520f0 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 4616aa717c5301a9a478876661eb8ac1f18c0333
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Oct 8 11:36:23 2020 -0400
|
||||
|
||||
For zen2, since FP_OPS counts both single- and double-prec operations
|
||||
correctly, we don't need to confuse the user with additional
|
||||
DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
|
||||
Same applies for events counting FP instructions.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 724d520f0..9ebf557e1 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Since FP_OPS counts both single- and double-prec operations
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+# Since FP_INS counts both single- and double-prec instuctions
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 274219e85ba8adcd2e9c78507adf7edb05b71daa
|
||||
Author: Sebastian Mobo <smobo@vols.utk.edu>
|
||||
Date: Thu Oct 8 13:40:21 2020 -0400
|
||||
|
||||
Added instruction-cache preset events for the Zen2.
|
||||
|
||||
Signed-off-by: Anthony <adanalis@icl.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9ebf557e1..fd75f9371 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
-
|
||||
+#
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+#
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
# New FLOP event on zen2
|
||||
# PPR (under section 2.1.15.3. --
|
||||
# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
commit b87ac4beda096086e0040f8ec1b44c4791a9739c
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 14:06:22 2020 +0900
|
||||
|
||||
Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA)
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index fd75f9371..164f05641 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
|
||||
#
|
||||
commit 869864f813f0681b5c9a4b65de2135c8708a2afb
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 19:34:59 2020 +0900
|
||||
|
||||
Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 164f05641..9192b1041 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
#########################
|
||||
CPU,arm_a64fx
|
||||
#
|
||||
+PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
||||
|
||||
#
|
||||
CPU,mips_74k
|
||||
commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Dec 15 13:43:43 2020 +0900
|
||||
|
||||
modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9192b1041..7b4ceb674 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
-PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Feb 12 15:01:21 2021 +0900
|
||||
|
||||
remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx
|
||||
|
||||
There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts.
|
||||
I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file.
|
||||
I will issue the pullrequest again once I have identified how to handle the overcount.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 7b4ceb674..0f5ec8344 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
-PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
-PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
commit 340f68940234f2db181147fc249907b4f1293e62
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Feb 16 17:16:24 2021 +0900
|
||||
|
||||
remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx
|
||||
|
||||
PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH,
|
||||
so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 0f5ec8344..4ef647959 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
commit 02f34baafb868d183f21bebfd3c46574847b9929
|
||||
Author: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
Date: Tue May 18 02:51:56 2021 +0530
|
||||
|
||||
Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip
|
||||
|
||||
Signed-off-by: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..d9e9da8a3 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X
|
||||
# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam19h_zen3
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
+# RETIRED_SSE_AVX_FLOPS requires MergeEvent support.
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||||
+PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||||
+PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
+PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
commit 6964aa356fa606f320c7b871123aceb5c1f21999
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Aug 24 14:17:29 2021 +0900
|
||||
|
||||
Fix the PAPI_FUL_CCY setting for a64fx
|
||||
|
||||
In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT
|
||||
|
||||
The correct settings are:.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..74deb712f 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Mar 4 15:41:30 2022 +0900
|
||||
|
||||
Add PAPI idle-related preset events for a64fx
|
||||
|
||||
For a64fx, add four PAPI idle-related preset events
|
||||
(PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL).
|
||||
|
||||
PAPI_BRU_IDL = BR_COMP_WAIT
|
||||
PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT
|
||||
PAPI_FPU_IDL = FL_COMP_WAIT
|
||||
PAPI_LSU_IDL = LD_COMP_WAIT
|
||||
|
||||
The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT,
|
||||
and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting"
|
||||
on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:.
|
||||
https://github.com/fujitsu/A64FX/blob/master/doc
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 74deb712f..1cd498e91 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT
|
||||
+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT
|
||||
+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT
|
||||
+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
commit 3c5364839f583185c1e8dca58d5fe36c9ec82876
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Tue Aug 30 23:17:30 2022 +0000
|
||||
|
||||
papi_avail: add presets for Intel Ice Lake SP
|
||||
|
||||
Define preset events for the Intel Ice Lake SP processor.
|
||||
These presets have been verified using the Counter Analysis Toolkit benchmarks.
|
||||
|
||||
These changes have been tested on the Intel Ice Lake architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index a013f58af..8f23e030c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -929,6 +929,63 @@ PRESET,PAPI_CA_ITV,NOT_DERIVED,OFFCORE_RESPONSE_0:SNP_HIT_WITH_FWD
|
||||
|
||||
# End of hsw,bdw,skl,clx list
|
||||
#
|
||||
+
|
||||
+# Intel Ice Lake SP events
|
||||
+CPU,icx
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
+# Loads and stores
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_LOADS
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,MEM_INST_RETIRED:ALL_STORES
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,MEM_INST_RETIRED:ALL_LOADS,MEM_INST_RETIRED:ALL_STORES
|
||||
+# L1 cache
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D:REPLACEMENT
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D:REPLACEMENT,L2_RQSTS:ALL_CODE_RD
|
||||
+# L2 cache
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_REFERENCES
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2_RQSTS:ALL_DEMAND_DATA_RD
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,L2_RQSTS:CODE_RD_HIT
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+#PRESET,PAPI_L2_TCH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_HIT
|
||||
+#PRESET,PAPI_L2_TCM,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L2_MISS
|
||||
+PRESET,PAPI_L2_DCM,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L2_ICA,NOT_DERIVED,L2_RQSTS:ALL_CODE_RD
|
||||
+#PRESET,PAPI_L2_LDH,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_HIT
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2_RQSTS:DEMAND_DATA_RD_MISS
|
||||
+PRESET,PAPI_L2_TCA,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_REFERENCES,L2_RQSTS:ALL_CODE_RD
|
||||
+PRESET,PAPI_L2_TCM,NOT_DERIVED,LLC_REFERENCES
|
||||
+PRESET,PAPI_L2_TCR,DERIVED_ADD,L2_RQSTS:ALL_DEMAND_DATA_RD,L2_RQSTS:ALL_CODE_RD
|
||||
+# L3 cache
|
||||
+PRESET,PAPI_L3_DCA,DERIVED_SUB,LLC_REFERENCES,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L3_DCR,NOT_DERIVED,OFFCORE_REQUESTS:DEMAND_DATA_RD
|
||||
+PRESET,PAPI_L3_ICA,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+PRESET,PAPI_L3_ICR,NOT_DERIVED,L2_RQSTS:CODE_RD_MISS
|
||||
+#PRESET,PAPI_L3_LDH,NOT_DERIVED,MEM_LOAD_UOPS_RETIRED:L3_HIT
|
||||
+PRESET,PAPI_L3_LDM,NOT_DERIVED,MEM_LOAD_RETIRED:L3_MISS
|
||||
+PRESET,PAPI_L3_TCA,NOT_DERIVED,LLC_REFERENCES
|
||||
+PRESET,PAPI_L3_TCM,NOT_DERIVED,LLC_MISSES
|
||||
+# SMP
|
||||
+PRESET,PAPI_CA_SHR,NOT_DERIVED,OFFCORE_REQUESTS:ALL_DATA_RD
|
||||
+# Branches
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
||||
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
+#FLOPs
|
||||
+# PAPI_DP_OPS = FP_ARITH:SCALAR_DOUBLE + 2*FP_ARITH:128B_PACKED_DOUBLE + 4*256B_PACKED_DOUBLE + 8*512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
+# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+# End of icx list
|
||||
+
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
# Intel Knights Mill
|
||||
commit d4da29b07befb9f7c11e351dbfef835b74cdd67a
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:11:37 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse N1
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8f23e030c..a4d5a9756 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2059,6 +2059,41 @@ PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIX
|
||||
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse N1 #
|
||||
+#########################
|
||||
+CPU,arm_n1
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit 88e686f877abcf19c5f50d4e23cbf8ea920a40b6
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 14:54:41 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse V1
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index a4d5a9756..207d6d1db 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse V1 #
|
||||
+#########################
|
||||
+CPU,arm_v1
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit e911f951115bb551925c5b07e7f5b721d5fe3bbe
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:14:18 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse N2
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 207d6d1db..d27d956c1 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2094,6 +2094,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse N2 #
|
||||
+#########################
|
||||
+CPU,arm_n2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#########################
|
||||
# ARM Neoverse V1 #
|
||||
#########################
|
||||
commit 05dc580247cb18fca882a33d8e356d79032d2ed1
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Mar 20 17:08:35 2023 -0500
|
||||
|
||||
Add minimal events for Arm Neoverse V2
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index d27d956c1..549e337c7 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2164,6 +2164,41 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
|
||||
+#########################
|
||||
+# ARM Neoverse V2 #
|
||||
+#########################
|
||||
+CPU,arm_v2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
+PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
@ -1,321 +0,0 @@
|
||||
commit b969d25f2a87a53365e3e9a040533b093544a05d
|
||||
Author: John Linford <jlinford@nvidia.com>
|
||||
Date: Mon Apr 3 22:30:14 2023 +0000
|
||||
|
||||
Update Neoverse V2 events
|
||||
|
||||
Add/remove PAPI events to match available hardware counters
|
||||
All tests pass on NVIDIA Grace
|
||||
|
||||
Disclaimer:
|
||||
The PAPI team was not able to verify the functionality included in this
|
||||
commit.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 549e337c..3089d2d4 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -2170,34 +2170,113 @@ PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
CPU,arm_v2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_TOT_IIS,Instructions issued
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,STALL
|
||||
+#NOT_IMPLEMENTED,PAPI_FUL_CCY,Cycles with maximum instructions completed
|
||||
+#NOT_IMPLEMENTED,PAPI_FUL_ICY,Cycles with maximum instruction issue
|
||||
+#NOT_IMPLEMENTED,PAPI_FXU_IDL,Cycles integer units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_LSU_IDL,Cycles load/store units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_RCY,Cycles Stalled Waiting for memory Reads
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_SCY,Cycles Stalled Waiting for memory accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_MEM_WCY,Cycles Stalled Waiting for memory writes
|
||||
+#NOT_IMPLEMENTED,PAPI_FP_STAL,Cycles the FP unit(s) are stalled
|
||||
+#NOT_IMPLEMENTED,PAPI_FPU_IDL,Cycles floating point units are idle
|
||||
+#NOT_IMPLEMENTED,PAPI_BRU_IDL,Cycles branch units are idle
|
||||
+PRESET,PAPI_STL_ICY,NOT_DERIVED,STALL
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_ADD,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_SP_OPS,Floating point operations; optimized to count scaled single precision vector operations
|
||||
+#NOT_IMPLEMENTED,PAPI_DP_OPS,Floating point operations; optimized to count scaled double precision vector operations
|
||||
+PRESET,PAPI_FP_INS,DERIVED_ADD,FP_HP_SPEC,FP_SP_SPEC,FP_DP_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_FAD_INS,Floating point add instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FDV_INS,Floating point divide instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FMA_INS,FMA instructions completed
|
||||
+#NOT_IMPLEMENTED,PAPI_FML_INS,Floating point multiply instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FNV_INS,Floating point inverse instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_FSQ_INS,Floating point square root instructions
|
||||
PRESET,PAPI_VEC_INS,DERIVED_ADD,SVE_INST_SPEC,ASE_INST_SPEC
|
||||
+#NOT_IMPLEMENTED,PAPI_VEC_DP,Double precision vector/SIMD instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_VEC_SP,Single precision vector/SIMD instructions
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
-PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
-PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
-PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_CN,Conditional branch instructions
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_RETIRED,BR_MIS_PRED_RETIRED
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED_RETIRED
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_NTK,Conditional branch instructions not taken
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_TKN,Conditional branch instructions taken
|
||||
+#NOT_IMPLEMENTED,PAPI_BR_UCN,Unconditional branch instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_BTAC_M,Branch target address cache misses
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
PRESET,PAPI_LST_INS,DERIVED_ADD,LD_SPEC,ST_SPEC
|
||||
PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE_ACCESS
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE_ACCESS
|
||||
-PRESET,PAPI_L2_DCA,DERIVED_ADD,L2D_CACHE_RD,L2D_CACHE_WR
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_ICR,Level 1 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_ICW,Level 1 instruction cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_LDM,Level 1 load misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_STM,Level 1 store misses
|
||||
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE_ACCESS
|
||||
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE_ACCESS,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_TCR,Level 1 total cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L1_TCW,Level 1 total cache writes
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
-PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
-PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICA,Level 2 instruction cache accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICH,Level 2 instruction cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICM,Level 2 instruction cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICR,Level 2 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L2_ICW,Level 2 instruction cache writes
|
||||
+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L3_TCA,NOT_DERIVED,L3D_CACHE
|
||||
+PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCH,Level 3 data cache hits
|
||||
+PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCR,Level 3 data cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_DCW,Level 3 data cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICA,Level 3 instruction cache accesses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICH,Level 3 instruction cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICM,Level 3 instruction cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICR,Level 3 instruction cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_ICW,Level 3 instruction cache writes
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_LDM,Level 3 load misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_STM,Level 3 store misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCH,Level 3 total cache hits
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCM,Level 3 cache misses
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCR,Level 3 total cache reads
|
||||
+#NOT_IMPLEMENTED,PAPI_L3_TCW,Level 3 total cache writes
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_TLB_SD,Translation lookaside buffer shootdowns
|
||||
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_CLN,Requests for exclusive access to clean cache line
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_INV,Requests for cache line invalidation
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_ITV,Requests for cache line intervention
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_SHR,Requests for exclusive access to shared cache line
|
||||
+#NOT_IMPLEMENTED,PAPI_CA_SNP,Requests for a snoop
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_FAL,Failed store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_SUC,Successful store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions
|
||||
+#NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses
|
||||
|
||||
#
|
||||
CPU,mips_74k
|
||||
|
||||
commit 15f32cb3a2e6bdd9e51aa4043842f0130e9dcf24
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Wed Jun 7 14:38:39 2023 +0000
|
||||
|
||||
add branch presets for Zen3 and Zen4
|
||||
|
||||
These changes include all branching preset events for Zen3 and Zen4,
|
||||
validated using the Counter Analysis Toolkit.
|
||||
|
||||
For Zen3, PAPI_BR_TKN was modified to exclude unconditional branches
|
||||
taken, in order to adhere to the preset's meaning.
|
||||
|
||||
These changes have been tested on the AMD Zen3 and Zen4 architectures.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 3089d2d4..319cf82c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -488,8 +488,12 @@ CPU,amd64_fam19h_zen3
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
-PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,DERIVED_POSTFIX,N0|N1|-|N2|+|,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||||
PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||||
@@ -509,6 +513,16 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||||
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||||
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam19h_zen4
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_UCN,NOT_DERIVED,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDITIONAL_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit da93ed4dd1fadb70ccee62a976597ff431c9f58c
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Mon Jun 12 17:27:59 2023 +0000
|
||||
|
||||
add flops presets for Zen4
|
||||
|
||||
These changes include FLOPs presets for Zen4, validated using the
|
||||
Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the AMD Zen4 architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 319cf82c..f6a40a35 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -523,6 +523,14 @@ PRESET,PAPI_BR_TKN,DERIVED_SUB,RETIRED_TAKEN_BRANCH_INSTRUCTIONS,RETIRED_UNCONDI
|
||||
PRESET,PAPI_BR_NTK,DERIVED_SUB,RETIRED_BRANCH_INSTRUCTIONS,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,RETIRED_CONDITIONAL_BRANCH_INSTRUCTIONS,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL,RETIRED_FP_OPS_BY_TYPE:SCALAR_ALL
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_FP_OPS_BY_TYPE:VECTOR_ALL
|
||||
+PRESET,PAPI_FMA_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MAC,RETIRED_FP_OPS_BY_TYPE:SCALAR_MAC
|
||||
+PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS_BY_TYPE:SCALAR_MUL
|
||||
+PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||||
+PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||||
+PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit a31c3a4e9788e03fee113263a9f94bd638a66721
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Wed Jun 21 15:13:47 2023 +0000
|
||||
|
||||
add cycles and instructions presets for Zen4
|
||||
|
||||
These changes include the 'total cycles' and 'instructions completed'
|
||||
presets for Zen4, validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the AMD Zen4 architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index f6a40a35..86e11fe6 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -531,6 +531,8 @@ PRESET,PAPI_FML_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_MUL,RETIRED_FP_OPS
|
||||
PRESET,PAPI_FAD_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_ADD,RETIRED_FP_OPS_BY_TYPE:SCALAR_ADD
|
||||
PRESET,PAPI_FDV_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_DIV,RETIRED_FP_OPS_BY_TYPE:SCALAR_DIV
|
||||
PRESET,PAPI_FSQ_INS,DERIVED_ADD,RETIRED_FP_OPS_BY_TYPE:VECTOR_SQRT,RETIRED_FP_OPS_BY_TYPE:SCALAR_SQRT
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit 94303410ce97a84408b0b2d727701a60c6f137aa
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Sun Jul 23 15:38:36 2023 +0000
|
||||
|
||||
add various Sapphire Rapids presets
|
||||
|
||||
These changes include cycles, instructions, branching, and FLOPs presets
|
||||
for Intel Sapphire Rapids, validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the Intel Sapphire Rapids architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 86e11fe6..eac0855f 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1010,6 +1010,29 @@ PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_
|
||||
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
# End of icx list
|
||||
|
||||
+# Intel Sapphire Rapids events
|
||||
+CPU,spr
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CLK_UNHALTED:THREAD_P
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED:ANY_P
|
||||
+PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES
|
||||
+# FLOPs
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+# Branches
|
||||
+PRESET,PAPI_BR_UCN,DERIVED_SUB,BR_INST_RETIRED:ALL_BRANCHES,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_CN,NOT_DERIVED,BR_INST_RETIRED:COND
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,BR_INST_RETIRED:COND_TAKEN
|
||||
+PRESET,PAPI_BR_NTK,NOT_DERIVED,BR_INST_RETIRED:COND_NTAKEN
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_INST_RETIRED:COND,BR_MISP_RETIRED:COND
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
+# End of spr list
|
||||
+
|
||||
#
|
||||
# Intel MIC / Xeon-Phi / Knights Landing
|
||||
# Intel Knights Mill
|
||||
|
||||
commit 42b14987ca1a7028b6cf6fdc190a2fa6a0fd8e18
|
||||
Author: Daniel Barry <dbarry@vols.utk.edu>
|
||||
Date: Tue Jul 25 12:16:56 2023 +0000
|
||||
|
||||
add more Ice Lake FLOPs presets
|
||||
|
||||
Since there are enough counters available to monitor both single- and
|
||||
double-precision floating-point events, PAPI_FP_OPS, PAPI_FP_INS, and
|
||||
PAPI_VEC_INS are all defined.
|
||||
These presets have been validated using the Counter Analysis Toolkit.
|
||||
|
||||
These changes have been tested on the Intel Ice Lake architecture.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index eac0855f..df82ac1c 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1006,8 +1006,11 @@ PRESET,PAPI_BR_INS,NOT_DERIVED,BR_INST_RETIRED:ALL_BRANCHES
|
||||
PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|N1|2|*|+|N2|4|*|+|N3|8|*|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
# PAPI_SP_OPS = FP_ARITH:SCALAR_SINGLE + 4*FP_ARITH:128B_PACKED_SINGLE + 8*256B_PACKED_SINGLE + 16*512B_PACKED_SINGLE
|
||||
PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|N1|4|*|+|N2|8|*|+|N3|16|*|+|N4|+|N5|2|*|+|N6|4|*|+|N7|8|*|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
+PRESET,PAPI_FP_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|N6|N7|+|+|+|+|+|+|+|,FP_ARITH_INST_RETIRED:SCALAR_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:SCALAR_DOUBLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_VEC_DP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_DOUBLE,FP_ARITH:128B_PACKED_DOUBLE,FP_ARITH:256B_PACKED_DOUBLE,FP_ARITH:512B_PACKED_DOUBLE
|
||||
PRESET,PAPI_VEC_SP,DERIVED_POSTFIX,N0|N1|N2|N3|+|+|+|,FP_ARITH:SCALAR_SINGLE,FP_ARITH:128B_PACKED_SINGLE,FP_ARITH:256B_PACKED_SINGLE,FP_ARITH:512B_PACKED_SINGLE
|
||||
+PRESET,PAPI_VEC_INS,DERIVED_POSTFIX,N0|N1|N2|N3|N4|N5|+|+|+|+|+|,FP_ARITH_INST_RETIRED:128B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:256B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:512B_PACKED_SINGLE,FP_ARITH_INST_RETIRED:128B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:256B_PACKED_DOUBLE,FP_ARITH_INST_RETIRED:512B_PACKED_DOUBLE
|
||||
# End of icx list
|
||||
|
||||
# Intel Sapphire Rapids events
|
@ -1,156 +0,0 @@
|
||||
commit 77ee6b54f4080ca27b7efcb4c91679d0f1e090b5
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Fri Jan 24 10:25:36 2020 -0500
|
||||
|
||||
New libpfm4 contains "aliased" pmus for backward compatibility,
|
||||
amd64_fam17h == amd64_fam17h_zen1; this causes us to put BOTH pmus
|
||||
into the PMUs supported string and double the events in native_avail.
|
||||
This update recognizes when aliases exist (the names must be hard-coded)
|
||||
and uses only one of the most recent name.
|
||||
|
||||
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
|
||||
index 3b5f8d13f..3262608cd 100644
|
||||
--- a/src/components/perf_event/pe_libpfm4_events.c
|
||||
+++ b/src/components/perf_event/pe_libpfm4_events.c
|
||||
@@ -31,6 +31,9 @@
|
||||
// used to step through the attributes when enumerating events
|
||||
static int attr_idx;
|
||||
|
||||
+/* alias flags to handle amd_fam17h, amd_fam17h_zen1 both present PMUs*/
|
||||
+static int amd64_fam17h_zen1_present = 0;
|
||||
+
|
||||
/** @class find_existing_event
|
||||
* @brief looks up an event, returns it if it exists
|
||||
*
|
||||
@@ -482,7 +485,13 @@ static struct native_event_t *allocate_native_event(
|
||||
*
|
||||
* @returns returns a libpfm event number
|
||||
* @retval PAPI_ENOEVENT Could not find an event
|
||||
- *
|
||||
+ * Operational note: _pe_libpfm4_init() must be called first to set
|
||||
+ * flags for synonymous PMUs. At this writing only
|
||||
+ * amd64_fam17h_zen1_present is defined.
|
||||
+ * Operational note: We indirectly return the pmu_idx within the
|
||||
+ * event data; the calling code uses that to set
|
||||
+ * pmu_idx for subsequent calls. All we do is find
|
||||
+ * the next valid pmu, if any.
|
||||
*/
|
||||
|
||||
static int
|
||||
@@ -511,6 +520,12 @@ get_first_event_next_pmu(int pmu_idx, int pmu_type)
|
||||
break;
|
||||
}
|
||||
|
||||
+ if ((ret==PFM_SUCCESS) && amd64_fam17h_zen1_present && strcmp(pinfo.name, "amd64_fam17h") == 0) {
|
||||
+ /* Skip as if invalid; we want the PMU amd64_fam17h_zen1 instead. */
|
||||
+ pmu_idx++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) {
|
||||
|
||||
pidx=pinfo.first_event;
|
||||
@@ -1159,6 +1174,35 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
event_table->default_pmu.size = sizeof(pfm_pmu_info_t);
|
||||
retval=pfm_get_pmu_info(0, &(event_table->default_pmu));
|
||||
|
||||
+ SUBDBG("Prescan for aliases.")
|
||||
+ /* We have to see if we have aliases in there as separate PMUs, */
|
||||
+ /* we don't want both PMUs with all the events duplicated. */
|
||||
+ /* For aliases, either is valid alone, but if both are present */
|
||||
+ /* specify a preference in the code. */
|
||||
+ /* Alias: amd64_fam17h_zen1 over amd64_fam17h. */
|
||||
+ /* Alias flags are static ints global to this file. */
|
||||
+ i=0;
|
||||
+ while(1) {
|
||||
+ memset(&pinfo,0,sizeof(pfm_pmu_info_t));
|
||||
+ pinfo.size = sizeof(pfm_pmu_info_t);
|
||||
+ retval=pfm_get_pmu_info(i, &pinfo);
|
||||
+
|
||||
+ /* We're done if we hit an invalid PMU entry */
|
||||
+ /* We can't check against PFM_PMU_MAX as that might not */
|
||||
+ /* match if libpfm4 is dynamically linked */
|
||||
+
|
||||
+ if (retval==PFM_ERR_INVAL) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if ( (retval==PFM_SUCCESS) && (pinfo.name != NULL) &&
|
||||
+ (pmu_is_present_and_right_type(&pinfo,pmu_type)) &&
|
||||
+ (strcmp(pinfo.name,"amd64_fam17h_zen1") == 0) ) {
|
||||
+ amd64_fam17h_zen1_present = 1;
|
||||
+ }
|
||||
+ i++;
|
||||
+ }
|
||||
+
|
||||
SUBDBG("Detected pmus:\n");
|
||||
i=0;
|
||||
while(1) {
|
||||
@@ -1177,6 +1221,12 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) &&
|
||||
(pmu_is_present_and_right_type(&pinfo,pmu_type))) {
|
||||
|
||||
+ /* skip if it is amd64_fam17h and zen1 is also present. */
|
||||
+ if (strcmp(pinfo.name,"amd64_fam17h") == 0 && amd64_fam17h_zen1_present) {
|
||||
+ i++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
SUBDBG("\t%d %s %s %d\n",i,
|
||||
pinfo.name,pinfo.desc,pinfo.type);
|
||||
|
||||
@@ -1193,11 +1243,9 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
/* Hack to have "default core" PMU */
|
||||
if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
|
||||
strcmp(pinfo.name,"ix86arch")) {
|
||||
-
|
||||
- SUBDBG("\t %s is default\n",pinfo.name);
|
||||
- memcpy(&(event_table->default_pmu),
|
||||
- &pinfo,sizeof(pfm_pmu_info_t));
|
||||
- found_default++;
|
||||
+ memcpy(&(event_table->default_pmu),
|
||||
+ &pinfo,sizeof(pfm_pmu_info_t));
|
||||
+ found_default++;
|
||||
}
|
||||
}
|
||||
|
||||
commit 79fe2a025afb8acb317032030c8847c9cbfd0162
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Jan 5 13:45:34 2021 +0900
|
||||
|
||||
Get model_string for ARM processor from pfm_get_pmu_info() function
|
||||
|
||||
On ARM processors, the model_string does not appear in /proc/cpuinfo.
|
||||
Instead of looking at the /proc/cpuinfo information, you can look at the lscpu command information at the following URL:.
|
||||
https://github.com/google/cpu_features/issues/26
|
||||
http://suihkulokki.blogspot.com/2018/02/making-sense-of-proccpuinfo-on-arm.html
|
||||
|
||||
The libpfm4 library identifies the ARM processor type from the "CPU implement" and "CPU part" in the /proc/cpuinfo information.
|
||||
The papi library can use the pfm_get_pmu_info() function from the libpfm4 library to obtain a string identifying the ARM processor type.
|
||||
|
||||
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
|
||||
index a84819cc0..744851ff0 100644
|
||||
--- a/src/components/perf_event/pe_libpfm4_events.c
|
||||
+++ b/src/components/perf_event/pe_libpfm4_events.c
|
||||
@@ -1149,6 +1149,7 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
|
||||
pfm_err_t retval = PFM_SUCCESS;
|
||||
pfm_pmu_info_t pinfo;
|
||||
+ unsigned int strSize;
|
||||
|
||||
/* allocate the native event structure */
|
||||
event_table->num_native_events=0;
|
||||
@@ -1247,6 +1248,13 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
&pinfo,sizeof(pfm_pmu_info_t));
|
||||
found_default++;
|
||||
}
|
||||
+ if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
|
||||
+ ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM)) {
|
||||
+ if (strlen(_papi_hwi_system_info.hw_info.model_string) == 0) {
|
||||
+ strSize = sizeof(_papi_hwi_system_info.hw_info.model_string);
|
||||
+ strncpy( _papi_hwi_system_info.hw_info.model_string, pinfo.desc, strSize - 1);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
if (pmu_type==PMU_TYPE_UNCORE) {
|
@ -1,90 +0,0 @@
|
||||
commit c754f3bf1763358aaf70c0d64bc6cc2df29d8fec
|
||||
Author: Vince Weaver <vincent.weaver@maine.edu>
|
||||
Date: Thu Jan 10 20:42:56 2019 -0500
|
||||
|
||||
perf_event: fix granularity setting for attached processes
|
||||
|
||||
the old code was setting the granularity wrong when attaching to a CPU.
|
||||
|
||||
diff --git a/src/components/perf_event/perf_event.c b/src/components/perf_event/perf_event.c
|
||||
index 1f4bbcee..2f2f380e 100644
|
||||
--- a/src/components/perf_event/perf_event.c
|
||||
+++ b/src/components/perf_event/perf_event.c
|
||||
@@ -684,11 +684,23 @@ open_pe_events( pe_context_t *ctx, pe_control_t *ctl )
|
||||
int i, ret = PAPI_OK;
|
||||
long pid;
|
||||
|
||||
- if (ctl->granularity==PAPI_GRN_SYS) {
|
||||
- pid = -1;
|
||||
+
|
||||
+ /* Set the pid setting */
|
||||
+ /* If attached, this is the pid of process we are attached to. */
|
||||
+ /* If GRN_THRD then it is 0 meaning current process only */
|
||||
+ /* If GRN_SYS then it is -1 meaning all procs on this CPU */
|
||||
+ /* Note if GRN_SYS then CPU must be specified, not -1 */
|
||||
+
|
||||
+ if (ctl->attached) {
|
||||
+ pid = ctl->tid;
|
||||
}
|
||||
else {
|
||||
- pid = ctl->tid;
|
||||
+ if (ctl->granularity==PAPI_GRN_SYS) {
|
||||
+ pid = -1;
|
||||
+ }
|
||||
+ else {
|
||||
+ pid = 0;
|
||||
+ }
|
||||
}
|
||||
|
||||
for( i = 0; i < ctl->num_events; i++ ) {
|
||||
@@ -1650,6 +1662,7 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
return ret;
|
||||
}
|
||||
|
||||
+ pe_ctl->attached = 1;
|
||||
pe_ctl->tid = option->attach.tid;
|
||||
|
||||
/* If events have been already been added, something may */
|
||||
@@ -1662,7 +1675,9 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
case PAPI_DETACH:
|
||||
pe_ctl = ( pe_control_t *) ( option->attach.ESI->ctl_state );
|
||||
|
||||
+ pe_ctl->attached = 0;
|
||||
pe_ctl->tid = 0;
|
||||
+
|
||||
return PAPI_OK;
|
||||
|
||||
case PAPI_CPU_ATTACH:
|
||||
@@ -1676,11 +1691,6 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
}
|
||||
/* looks like we are allowed so set cpu number */
|
||||
|
||||
- /* this tells the kernel not to count for a thread */
|
||||
- /* should we warn if we try to set both? perf_event */
|
||||
- /* will reject it. */
|
||||
- pe_ctl->tid = -1;
|
||||
-
|
||||
pe_ctl->cpu = option->cpu.cpu_num;
|
||||
|
||||
return PAPI_OK;
|
||||
@@ -1696,7 +1706,7 @@ _pe_ctl( hwd_context_t *ctx, int code, _papi_int_option_t *option )
|
||||
return ret;
|
||||
}
|
||||
/* looks like we are allowed, so set event set level counting domains */
|
||||
- pe_ctl->domain = option->domain.domain;
|
||||
+ pe_ctl->domain = option->domain.domain;
|
||||
return PAPI_OK;
|
||||
|
||||
case PAPI_GRANUL:
|
||||
diff --git a/src/components/perf_event/perf_event_lib.h b/src/components/perf_event/perf_event_lib.h
|
||||
index f4ad0c5d..0c50ab9f 100644
|
||||
--- a/src/components/perf_event/perf_event_lib.h
|
||||
+++ b/src/components/perf_event/perf_event_lib.h
|
||||
@@ -30,6 +30,7 @@ typedef struct {
|
||||
unsigned int overflow; /* overflow enable */
|
||||
unsigned int inherit; /* inherit enable */
|
||||
unsigned int overflow_signal; /* overflow signal */
|
||||
+ unsigned int attached; /* attached to a process */
|
||||
int cidx; /* current component */
|
||||
int cpu; /* which cpu to measure */
|
||||
pid_t tid; /* thread we are monitoring */
|
7
gating.yaml
Normal file
7
gating.yaml
Normal file
@ -0,0 +1,7 @@
|
||||
--- !Policy
|
||||
product_versions:
|
||||
- rhel-8
|
||||
decision_context: osci_compose_gate
|
||||
rules:
|
||||
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier1-gating.functional}
|
||||
- !PassingTestCaseRule {test_case_name: baseos-ci.brew-build.tier1-gating-aarch64.functional}
|
382
papi-bz1908126.patch
Normal file
382
papi-bz1908126.patch
Normal file
@ -0,0 +1,382 @@
|
||||
commit 77ee6b54f4080ca27b7efcb4c91679d0f1e090b5
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Fri Jan 24 10:25:36 2020 -0500
|
||||
|
||||
New libpfm4 contains "aliased" pmus for backward compatibility,
|
||||
amd64_fam17h == amd64_fam17h_zen1; this causes us to put BOTH pmus
|
||||
into the PMUs supported string and double the events in native_avail.
|
||||
This update recognizes when aliases exist (the names must be hard-coded)
|
||||
and uses only one of the most recent name.
|
||||
|
||||
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
|
||||
index 3b5f8d13f..3262608cd 100644
|
||||
--- a/src/components/perf_event/pe_libpfm4_events.c
|
||||
+++ b/src/components/perf_event/pe_libpfm4_events.c
|
||||
@@ -31,6 +31,9 @@
|
||||
// used to step through the attributes when enumerating events
|
||||
static int attr_idx;
|
||||
|
||||
+/* alias flags to handle amd_fam17h, amd_fam17h_zen1 both present PMUs*/
|
||||
+static int amd64_fam17h_zen1_present = 0;
|
||||
+
|
||||
/** @class find_existing_event
|
||||
* @brief looks up an event, returns it if it exists
|
||||
*
|
||||
@@ -482,7 +485,13 @@ static struct native_event_t *allocate_native_event(
|
||||
*
|
||||
* @returns returns a libpfm event number
|
||||
* @retval PAPI_ENOEVENT Could not find an event
|
||||
- *
|
||||
+ * Operational note: _pe_libpfm4_init() must be called first to set
|
||||
+ * flags for synonymous PMUs. At this writing only
|
||||
+ * amd64_fam17h_zen1_present is defined.
|
||||
+ * Operational note: We indirectly return the pmu_idx within the
|
||||
+ * event data; the calling code uses that to set
|
||||
+ * pmu_idx for subsequent calls. All we do is find
|
||||
+ * the next valid pmu, if any.
|
||||
*/
|
||||
|
||||
static int
|
||||
@@ -511,6 +520,12 @@ get_first_event_next_pmu(int pmu_idx, int pmu_type)
|
||||
break;
|
||||
}
|
||||
|
||||
+ if ((ret==PFM_SUCCESS) && amd64_fam17h_zen1_present && strcmp(pinfo.name, "amd64_fam17h") == 0) {
|
||||
+ /* Skip as if invalid; we want the PMU amd64_fam17h_zen1 instead. */
|
||||
+ pmu_idx++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
if ((ret==PFM_SUCCESS) && pmu_is_present_and_right_type(&pinfo,pmu_type)) {
|
||||
|
||||
pidx=pinfo.first_event;
|
||||
@@ -1159,6 +1174,35 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
event_table->default_pmu.size = sizeof(pfm_pmu_info_t);
|
||||
retval=pfm_get_pmu_info(0, &(event_table->default_pmu));
|
||||
|
||||
+ SUBDBG("Prescan for aliases.")
|
||||
+ /* We have to see if we have aliases in there as separate PMUs, */
|
||||
+ /* we don't want both PMUs with all the events duplicated. */
|
||||
+ /* For aliases, either is valid alone, but if both are present */
|
||||
+ /* specify a preference in the code. */
|
||||
+ /* Alias: amd64_fam17h_zen1 over amd64_fam17h. */
|
||||
+ /* Alias flags are static ints global to this file. */
|
||||
+ i=0;
|
||||
+ while(1) {
|
||||
+ memset(&pinfo,0,sizeof(pfm_pmu_info_t));
|
||||
+ pinfo.size = sizeof(pfm_pmu_info_t);
|
||||
+ retval=pfm_get_pmu_info(i, &pinfo);
|
||||
+
|
||||
+ /* We're done if we hit an invalid PMU entry */
|
||||
+ /* We can't check against PFM_PMU_MAX as that might not */
|
||||
+ /* match if libpfm4 is dynamically linked */
|
||||
+
|
||||
+ if (retval==PFM_ERR_INVAL) {
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if ( (retval==PFM_SUCCESS) && (pinfo.name != NULL) &&
|
||||
+ (pmu_is_present_and_right_type(&pinfo,pmu_type)) &&
|
||||
+ (strcmp(pinfo.name,"amd64_fam17h_zen1") == 0) ) {
|
||||
+ amd64_fam17h_zen1_present = 1;
|
||||
+ }
|
||||
+ i++;
|
||||
+ }
|
||||
+
|
||||
SUBDBG("Detected pmus:\n");
|
||||
i=0;
|
||||
while(1) {
|
||||
@@ -1177,6 +1221,12 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
if ((retval==PFM_SUCCESS) && (pinfo.name != NULL) &&
|
||||
(pmu_is_present_and_right_type(&pinfo,pmu_type))) {
|
||||
|
||||
+ /* skip if it is amd64_fam17h and zen1 is also present. */
|
||||
+ if (strcmp(pinfo.name,"amd64_fam17h") == 0 && amd64_fam17h_zen1_present) {
|
||||
+ i++;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
SUBDBG("\t%d %s %s %d\n",i,
|
||||
pinfo.name,pinfo.desc,pinfo.type);
|
||||
|
||||
@@ -1193,11 +1243,9 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
/* Hack to have "default core" PMU */
|
||||
if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
|
||||
strcmp(pinfo.name,"ix86arch")) {
|
||||
-
|
||||
- SUBDBG("\t %s is default\n",pinfo.name);
|
||||
- memcpy(&(event_table->default_pmu),
|
||||
- &pinfo,sizeof(pfm_pmu_info_t));
|
||||
- found_default++;
|
||||
+ memcpy(&(event_table->default_pmu),
|
||||
+ &pinfo,sizeof(pfm_pmu_info_t));
|
||||
+ found_default++;
|
||||
}
|
||||
}
|
||||
|
||||
commit 79fe2a025afb8acb317032030c8847c9cbfd0162
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Jan 5 13:45:34 2021 +0900
|
||||
|
||||
Get model_string for ARM processor from pfm_get_pmu_info() function
|
||||
|
||||
On ARM processors, the model_string does not appear in /proc/cpuinfo.
|
||||
Instead of looking at the /proc/cpuinfo information, you can look at the lscpu command information at the following URL:.
|
||||
https://github.com/google/cpu_features/issues/26
|
||||
http://suihkulokki.blogspot.com/2018/02/making-sense-of-proccpuinfo-on-arm.html
|
||||
|
||||
The libpfm4 library identifies the ARM processor type from the "CPU implement" and "CPU part" in the /proc/cpuinfo information.
|
||||
The papi library can use the pfm_get_pmu_info() function from the libpfm4 library to obtain a string identifying the ARM processor type.
|
||||
|
||||
diff --git a/src/components/perf_event/pe_libpfm4_events.c b/src/components/perf_event/pe_libpfm4_events.c
|
||||
index a84819cc0..744851ff0 100644
|
||||
--- a/src/components/perf_event/pe_libpfm4_events.c
|
||||
+++ b/src/components/perf_event/pe_libpfm4_events.c
|
||||
@@ -1149,6 +1149,7 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
|
||||
pfm_err_t retval = PFM_SUCCESS;
|
||||
pfm_pmu_info_t pinfo;
|
||||
+ unsigned int strSize;
|
||||
|
||||
/* allocate the native event structure */
|
||||
event_table->num_native_events=0;
|
||||
@@ -1247,6 +1248,13 @@ _pe_libpfm4_init(papi_vector_t *component, int cidx,
|
||||
&pinfo,sizeof(pfm_pmu_info_t));
|
||||
found_default++;
|
||||
}
|
||||
+ if ( (pinfo.type==PFM_PMU_TYPE_CORE) &&
|
||||
+ ( _papi_hwi_system_info.hw_info.vendor == PAPI_VENDOR_ARM)) {
|
||||
+ if (strlen(_papi_hwi_system_info.hw_info.model_string) == 0) {
|
||||
+ strSize = sizeof(_papi_hwi_system_info.hw_info.model_string);
|
||||
+ strncpy( _papi_hwi_system_info.hw_info.model_string, pinfo.desc, strSize - 1);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
if (pmu_type==PMU_TYPE_UNCORE) {
|
||||
commit 85003c716d76eff47607fa0967537c6cf63d8348
|
||||
Author: Steve Walk <swalk.cavium@gmail.com>
|
||||
Date: Fri Jun 8 15:50:50 2018 -0400
|
||||
|
||||
enable Cavium ThunderX2 support
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index bb11f61d3..46827f180 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1841,6 +1841,31 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_READ_ACCESS
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_WRITE_ACCESS
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_READ_REFILL
|
||||
PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_WRITE_REFILL
|
||||
+
|
||||
+#####################
|
||||
+# ARM ThunderX2 #
|
||||
+#####################
|
||||
+CPU,arm_thunderx2
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_RETIRED
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_RETIRED
|
||||
+PRESET,PAPI_L1_DCA,DERIVED_ADD,L1D_CACHE_RD,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
|
||||
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit 9a44d82928ed17ba2ff21eb88b89c5829d0ea30e
|
||||
Author: Steve Kaufmann <steven.kaufmann@hpe.com>
|
||||
Date: Wed Jun 24 14:08:08 2020 -0400
|
||||
|
||||
Added PAPI preset support for Fujitsu A64FX.
|
||||
|
||||
Signed-off-by: Heike Jagode <jagode@icl.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8e96adfbd..1b5c15542 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1877,6 +1877,21 @@ PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
|
||||
PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
|
||||
PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
|
||||
+#########################
|
||||
+# ARM Fujitsu A64FX #
|
||||
+#########################
|
||||
+CPU,arm_a64fx
|
||||
+#
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+
|
||||
#
|
||||
CPU,mips_74k
|
||||
#
|
||||
commit b87ac4beda096086e0040f8ec1b44c4791a9739c
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 14:06:22 2020 +0900
|
||||
|
||||
Corrected typo for A64FX support (PAPI_L2_DCH is a typo of PAPI_L2_DCA)
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index fd75f9371..164f05641 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1937,7 +1937,7 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L2_DCH,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
|
||||
#
|
||||
commit 869864f813f0681b5c9a4b65de2135c8708a2afb
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Mon Dec 14 19:34:59 2020 +0900
|
||||
|
||||
Add or modify various A64FX support events, including floating point events (PAPI_FP_OPS, PAPI_SP_OPS, PAPI_DP_OPS).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 164f05641..9192b1041 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1930,15 +1930,46 @@ PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
|
||||
#########################
|
||||
CPU,arm_a64fx
|
||||
#
|
||||
+PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
+PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
+PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
+PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
+PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
-PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
|
||||
+PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_DCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
|
||||
+PRESET,PAPI_L2_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_L2_TCM,DERIVED_SUB,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL
|
||||
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
|
||||
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
|
||||
|
||||
#
|
||||
CPU,mips_74k
|
||||
commit 7a3c22763ef2ba00a2b8cb069c3501f35ecb13de
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Dec 15 13:43:43 2020 +0900
|
||||
|
||||
modify PAPI_FP_INS and PAPI_VEC_INS for A64FX supports
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9192b1041..7b4ceb674 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1941,11 +1941,11 @@ PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
|
||||
-PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC
|
||||
PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
|
||||
PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
|
||||
PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
|
||||
-PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
commit 530d4763fb8e6dd52109387bd58c8c1305fd6b63
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Feb 12 15:01:21 2021 +0900
|
||||
|
||||
remove PAPI_L1_DCA and PAPI_L1_DCH for a64fx
|
||||
|
||||
There seems to be a problem with PAPI_L1_DCA and PAPI_L1_DCH for a64fx that prefetch overcounts.
|
||||
I delete (comment out) PAPI_L1_DCA and PAPI_L1_DCH for a64fx from the papi_events.csv file.
|
||||
I will issue the pullrequest again once I have identified how to handle the overcount.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 7b4ceb674..0f5ec8344 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1949,8 +1949,8 @@ PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
|
||||
PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
|
||||
PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
|
||||
PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
|
||||
-PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
-PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
|
||||
+#PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
commit 340f68940234f2db181147fc249907b4f1293e62
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Feb 16 17:16:24 2021 +0900
|
||||
|
||||
remove PAPI_L1_TCA and PAPI_L1_TCH for a64fx
|
||||
|
||||
PAPI_L1_TCA and PAPI_L1_TCH for a64fx measure L1D_CACHE just like PAPI_L1_DCA and PAPI_L1_DCH,
|
||||
so I delete (comment out) PAPI_L1_TCA and PAPI_L1_TCH for a64fx from the papi_events.csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 0f5ec8344..4ef647959 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1955,8 +1955,8 @@ PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
|
||||
PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
|
||||
-PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
-PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
+#PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
|
||||
+#PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
|
||||
PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
|
||||
PRESET,PAPI_L2_DCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|+|,L2D_CACHE,L2D_CACHE_REFILL,L2D_SWAP_DM,L2D_CACHE_MIBMCH_PRF
|
61
papi-rhbz2037417.patch
Normal file
61
papi-rhbz2037417.patch
Normal file
@ -0,0 +1,61 @@
|
||||
commit 6964aa356fa606f320c7b871123aceb5c1f21999
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Tue Aug 24 14:17:29 2021 +0900
|
||||
|
||||
Fix the PAPI_FUL_CCY setting for a64fx
|
||||
|
||||
In a64fx, the maximum number of instruction commits is 4, so the following setting was incorrect.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT-4INST_COMMIT
|
||||
|
||||
The correct settings are:.
|
||||
PAPI_FUL_CCY=CPU_CYCLES-0INST_COMMIT-1INST_COMMIT-2INST_COMMIT-3INST_COMMIT
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..74deb712f 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1934,7 +1934,7 @@ PRESET,PAPI_PRF_DM,DERIVED_SUB,L2D_CACHE_REFILL_PRF,L2D_CACHE_MIBMCH_PRF
|
||||
PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
-PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT,4INST_COMMIT
|
||||
+PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
||||
commit fbf3b9e3d17c4ec4bd7e33410c44fc5aed57e36f
|
||||
Author: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
Date: Fri Mar 4 15:41:30 2022 +0900
|
||||
|
||||
Add PAPI idle-related preset events for a64fx
|
||||
|
||||
For a64fx, add four PAPI idle-related preset events
|
||||
(PAPI_BRU_IDL/PAPI_FXU_IDL/PAPI_FPU_IDL/PAPI_LSU_IDL).
|
||||
|
||||
PAPI_BRU_IDL = BR_COMP_WAIT
|
||||
PAPI_FXU_IDL = EU_COMP_WAIT - FL_COMP_WAIT
|
||||
PAPI_FPU_IDL = FL_COMP_WAIT
|
||||
PAPI_LSU_IDL = LD_COMP_WAIT
|
||||
|
||||
The specifications of BR_COMP_WAIT, EU_COMP_WAIT, FL_COMP_WAIT,
|
||||
and LD_COMP_WAIT can be found in the "14.4. Cycle Accounting"
|
||||
on A64FX_Microarchitecture_Manual_en_1.5.pdf at the following URL:.
|
||||
https://github.com/fujitsu/A64FX/blob/master/doc
|
||||
|
||||
Signed-off-by: Masahiko, Yamada <yamada.masahiko@fujitsu.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 74deb712f..1cd498e91 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -1935,6 +1935,10 @@ PRESET,PAPI_MEM_SCY,NOT_DERIVED,LD_COMP_WAIT_L2_MISS
|
||||
PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
|
||||
PRESET,PAPI_STL_CCY,NOT_DERIVED,0INST_COMMIT
|
||||
PRESET,PAPI_FUL_CCY,DERIVED_SUB,CPU_CYCLES,0INST_COMMIT,1INST_COMMIT,2INST_COMMIT,3INST_COMMIT
|
||||
+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT
|
||||
+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT
|
||||
+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT
|
||||
+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT
|
||||
PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
|
||||
PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
|
||||
PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
|
302
papi-zen.patch
Normal file
302
papi-zen.patch
Normal file
@ -0,0 +1,302 @@
|
||||
commit 20890adcb59a1c1648cb70be65332c03a3781e1a
|
||||
Author: Anthony Castaldo <TonyCastaldo@icl.utk.edu>
|
||||
Date: Thu Jan 16 16:43:51 2020 -0500
|
||||
|
||||
Added two machine types to papi_events.csv to be in line with
|
||||
libpfm4 update to support amd64_fam17h_zen1 and zen2.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 97446ad2c..8e96adfbd 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -396,6 +396,8 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
||||
#
|
||||
#
|
||||
CPU,amd64_fam17h
|
||||
+CPU,amd64_fam17h_zen1
|
||||
+CPU,amd64_fam17h_zen2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
|
||||
commit ae449f73abd0849f05ab3e1f3a64bde0c670c645
|
||||
Author: Anthony <adanalis@icl.utk.edu>
|
||||
Date: Fri Jul 17 12:05:14 2020 -0400
|
||||
|
||||
Separated the cache preset events of AMD Zen1 and Zen2 and added some more.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 8e96adfbd..2325bd4dc 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -397,7 +397,6 @@ PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SINGLE_DIV_OPS:DOUBLE
|
||||
#
|
||||
CPU,amd64_fam17h
|
||||
CPU,amd64_fam17h_zen1
|
||||
-CPU,amd64_fam17h_zen2
|
||||
#
|
||||
PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
@@ -434,6 +433,27 @@ PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_MULT_FLOPS:DP_MULT
|
||||
PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_ADD_SUB_FLOPS:DP_ADD_SUB_FLOPS
|
||||
PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_OPERATIONS:SP_DIV_FLOPS:DP_DIV_FLOPS,NOTE,"Counts both divide and square root instructions"
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam17h_zen2
|
||||
+# Events copied from zen1 that also exist on zen2
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+# Events discovered via CAT
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
+
|
||||
#
|
||||
#
|
||||
CPU,Intel architectural PMU
|
||||
commit ccc22b5dda46fea8933d99950c3e30b5298cdd1d
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 13:33:38 2020 -0400
|
||||
|
||||
Added presets for floating-point operations (FP_OPS, DP_OPS, SP_OPS)
|
||||
for AMD zen2.
|
||||
|
||||
PPR (under section 2.1.15.3. -- https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
explains that FLOP events require MergeEvent support, which was included
|
||||
in the 5.6 kernel.
|
||||
|
||||
===>>> Hence, a kernel version 5.6 or greater is required.
|
||||
|
||||
NOTE: without the MergeEvent support in the kernel,
|
||||
there is no guarantee that the SSE/AVX FLOP
|
||||
events produce any useful data whatsoever.
|
||||
|
||||
These events have been tested and verified for
|
||||
scalar flops, SSE, AVX, and FMA:
|
||||
|
||||
(1) for one AVX instruction (e.g. _mm256_add_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS event returns
|
||||
a count of 4 (in the case of double precision), and
|
||||
a count of 8 (in the case of single precision).
|
||||
|
||||
(2) for one AVX FMA instruction (e.g. _mm256_macc_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MAC_FLOPS event returns
|
||||
a count of 8 (in the case of double precision), and
|
||||
a count of 16 (in the case of single precision).
|
||||
|
||||
(3) for one SSE instruction (e.g. _mm_mul_pd()),
|
||||
the RETIRED_SSE_AVX_FLOPS:MULT_FLOPS event returns
|
||||
a count of 2 (in the case of double precision), and
|
||||
a count of 4 (in the case of single precision).
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2325bd4dc..2ff3e4d16 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -454,8 +454,19 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
|
||||
-#
|
||||
-#
|
||||
+# New FLOP event on zen2
|
||||
+# PPR (under section 2.1.15.3. --
|
||||
+# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
+# explains that FLOP events require MergeEvent support, which was included
|
||||
+# in the 5.6 kernel.
|
||||
+# Hence, a kernel version 5.6 or greater is required.
|
||||
+# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
+# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+
|
||||
+
|
||||
CPU,Intel architectural PMU
|
||||
CPU,ix86arch
|
||||
#
|
||||
|
||||
commit 35f93252a6e222299c03f2c94912334488e76b02
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 18:40:59 2020 -0400
|
||||
|
||||
Added presets for floating-point instructions (FP_INS, VEC_DP, VEC_SP)
|
||||
for AMD zen2.
|
||||
|
||||
For unoptimized code (like native MMM), these events may include
|
||||
non-numeric floating-point instructions, e.g. MOVSD: move or merge
|
||||
scalar double-precision floating-point value instructions.
|
||||
|
||||
Tested with:
|
||||
1) SSE double: _mm_mul_pd / _mm_add_pd
|
||||
2) SSE single: _mm_mul_ps / _mm_add_ps
|
||||
3) AVX double: _mm256_mul_pd / _mm256_add_pd
|
||||
4) AVX single: _mm256_mul_ps / _mm256_add_ps
|
||||
5) FMA double: _mm256_macc_pd
|
||||
6) FMA single: _mm256_macc_pd
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 2ff3e4d16..60a64564d 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -465,6 +465,11 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
+# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
+PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit 344f6493425d865577508ff32b6f65516b1b4394
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Sep 24 19:03:31 2020 -0400
|
||||
|
||||
Added missing 'PRESET' to csv file.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 60a64564d..724d520f0 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -467,9 +467,9 @@ PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
-PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit 4616aa717c5301a9a478876661eb8ac1f18c0333
|
||||
Author: Heike Jagode <jagode@icl.utk.edu>
|
||||
Date: Thu Oct 8 11:36:23 2020 -0400
|
||||
|
||||
For zen2, since FP_OPS counts both single- and double-prec operations
|
||||
correctly, we don't need to confuse the user with additional
|
||||
DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
|
||||
Same applies for events counting FP instructions.
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 724d520f0..9ebf557e1 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -463,13 +463,20 @@ PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_
|
||||
# NOTE: without the MergeEvent support in the kernel, there is no guarantee
|
||||
# that this SSE/AVX FLOP event produces any useful data whatsoever.
|
||||
PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
-PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+# Since FP_OPS counts both single- and double-prec operations
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# DP_OPS and SP_OPS events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_DP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#PRESET,PAPI_SP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+#
|
||||
# Floating-point instructions (including non-numeric floating-point instructions,
|
||||
# e.g. Move or Merge Scalar Double-Precision Floating-Point values)
|
||||
PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
-PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+# Since FP_INS counts both single- and double-prec instuctions
|
||||
+# correctly, we don't need to confuse the user with additional
|
||||
+# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
+#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
||||
|
||||
commit 274219e85ba8adcd2e9c78507adf7edb05b71daa
|
||||
Author: Sebastian Mobo <smobo@vols.utk.edu>
|
||||
Date: Thu Oct 8 13:40:21 2020 -0400
|
||||
|
||||
Added instruction-cache preset events for the Zen2.
|
||||
|
||||
Signed-off-by: Anthony <adanalis@icl.utk.edu>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 9ebf557e1..fd75f9371 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -453,7 +453,12 @@ PRESET,PAPI_L1_DCA,NOT_DERIVED,perf::PERF_COUNT_HW_CACHE_L1D:ACCESS
|
||||
PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L
|
||||
PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_L_HIT_X
|
||||
-
|
||||
+#
|
||||
+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+#
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
# New FLOP event on zen2
|
||||
# PPR (under section 2.1.15.3. --
|
||||
# https://www.amd.com/system/files/TechDocs/54945_3.03_ppr_ZP_B2_pub.zip)
|
||||
|
||||
commit 02f34baafb868d183f21bebfd3c46574847b9929
|
||||
Author: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
Date: Tue May 18 02:51:56 2021 +0530
|
||||
|
||||
Added AMD Zen3 preset events. Refer section 2.1.17.2 of PPR for AMD family 19h model 01h, https://www.amd.com/system/files/TechDocs/55898_pub.zip
|
||||
|
||||
Signed-off-by: Swarup Sahoo <swarup-chandra.sahoo@amd.com>
|
||||
|
||||
diff --git a/src/papi_events.csv b/src/papi_events.csv
|
||||
index 4ef647959..d9e9da8a3 100644
|
||||
--- a/src/papi_events.csv
|
||||
+++ b/src/papi_events.csv
|
||||
@@ -482,6 +482,33 @@ PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X
|
||||
# VEC_DP and VEC_SP events. So, I'm taking them out.
|
||||
#PRESET,PAPI_VEC_DP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
#PRESET,PAPI_VEC_SP,NOT_DERIVED,RETIRED_MMX_FP_INSTRUCTIONS:SSE_INSTR:MMX_INSTR:X87_INSTR
|
||||
+#
|
||||
+#
|
||||
+CPU,amd64_fam19h_zen3
|
||||
+PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
|
||||
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
|
||||
+PRESET,PAPI_BR_INS,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_TKN,NOT_DERIVED,RETIRED_TAKEN_BRANCH_INSTRUCTIONS
|
||||
+PRESET,PAPI_BR_MSP,NOT_DERIVED,RETIRED_BRANCH_INSTRUCTIONS_MISPREDICTED
|
||||
+PRESET,PAPI_TLB_DM,NOT_DERIVED, L1_DTLB_MISS:TLB_RELOAD_1G_L2_MISS:TLB_RELOAD_2M_L2_MISS:TLB_RELOAD_COALESCED_PAGE_MISS:TLB_RELOAD_4K_L2_MISS:TLB_RELOAD_1G_L2_HIT:TLB_RELOAD_2M_L2_HIT:TLB_RELOAD_COALESCED_PAGE_HIT:TLB_RELOAD_4K_L2_HIT
|
||||
+PRESET,PAPI_TLB_IM,DERIVED_ADD,L1_ITLB_MISS_L2_ITLB_HIT,L1_ITLB_MISS_L2_ITLB_MISS:COALESCED4K:IF1G:IF2M:IF4K
|
||||
+PRESET,PAPI_L1_DCA,NOT_DERIVED,LS_DISPATCH:LD_ST_DISPATCH:STORE_DISPATCH:LD_DISPATCH
|
||||
+PRESET,PAPI_L1_DCM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C
|
||||
+PRESET,PAPI_L2_DCR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:RD_BLK_L:RD_BLK_X:LS_RD_BLK_C_S:CHANGE_TO_X
|
||||
+PRESET,PAPI_L2_DCH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:LS_RD_BLK_C_S:LS_RD_BLK_L_HIT_X:LS_RD_BLK_L_HIT_S:LS_RD_BLK_X
|
||||
+PRESET,PAPI_L2_ICR,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
|
||||
+PRESET,PAPI_L2_ICM,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_MISS
|
||||
+PRESET,PAPI_L2_ICH,NOT_DERIVED,CORE_TO_L2_CACHEABLE_REQUEST_ACCESS_STATUS:IC_FILL_HIT_X:IC_FILL_HIT_S
|
||||
+# RETIRED_SSE_AVX_FLOPS requires MergeEvent support.
|
||||
+PRESET,PAPI_VEC_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FP_OPS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ANY
|
||||
+PRESET,PAPI_FML_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:MULT_FLOPS
|
||||
+PRESET,PAPI_FAD_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:ADD_SUB_FLOPS
|
||||
+PRESET,PAPI_FDV_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
+PRESET,PAPI_FSQ_INS,NOT_DERIVED,RETIRED_SSE_AVX_FLOPS:DIV_FLOPS
|
||||
|
||||
|
||||
CPU,Intel architectural PMU
|
@ -8,7 +8,7 @@
|
||||
Summary: Performance Application Programming Interface
|
||||
Name: papi
|
||||
Version: 5.6.0
|
||||
Release: 20%{?dist}
|
||||
Release: 17%{?dist}
|
||||
License: BSD
|
||||
Group: Development/System
|
||||
Requires: papi-libs = %{version}-%{release}
|
||||
@ -21,14 +21,12 @@ Patch4: papi-thread_init.patch
|
||||
Patch5: papi-mx.patch
|
||||
Patch6: papi-bz1908126.patch
|
||||
Patch7: papi-rhbz1918721.patch
|
||||
Patch8: papi-rhbz2037417.patch
|
||||
Patch9: papi-rhbz2037426.patch
|
||||
Patch10: papi-rhbz2037427.patch
|
||||
Patch11: papi-zen.patch
|
||||
Patch20: papi-fastread.patch
|
||||
Patch21: papi-arm64fastread.patch
|
||||
Patch30: papi-560_600eventupdate.patch
|
||||
Patch31: papi-701eventupdate.patch
|
||||
Patch40: papi-granularity.patch
|
||||
Patch41: papi-71eventupdate.patch
|
||||
BuildRequires: autoconf
|
||||
BuildRequires: doxygen
|
||||
BuildRequires: ncurses-devel
|
||||
@ -99,14 +97,12 @@ the PAPI user-space libraries and interfaces.
|
||||
%patch5 -p1
|
||||
%patch6 -p1
|
||||
%patch7 -p1
|
||||
%patch8 -p1
|
||||
%patch9 -p1
|
||||
%patch10 -p1
|
||||
%patch11 -p1
|
||||
%patch20 -p1
|
||||
%patch21 -p1
|
||||
%patch30 -p1
|
||||
%patch31 -p1
|
||||
%patch40 -p1
|
||||
%patch41 -p1
|
||||
|
||||
%build
|
||||
%if %{without bundled_libpfm}
|
||||
@ -189,15 +185,6 @@ chrpath --delete $RPM_BUILD_ROOT%{_libdir}/*.so*
|
||||
%{_libdir}/*.a
|
||||
|
||||
%changelog
|
||||
* Fri Nov 17 2023 William Cohen <wcohen@redhat.com> - 5.6.0-20
|
||||
- Update papi event presets (RHEL-9320, RHEL-9336, RHEL-9337)
|
||||
|
||||
* Wed Jul 19 2023 William Cohen <wcohen@redhat.com> - 5.6.0-19
|
||||
- Fix granularity setting (rhbz2221846)
|
||||
|
||||
* Fri May 5 2023 William Cohen <wcohen@redhat.com> - 5.6.0-18
|
||||
- Add event presets for Arm Neoverse processors (rhbz2111982, rhbz2111988)
|
||||
|
||||
* Thu Apr 27 2023 William Cohen <wcohen@redhat.com> - 5.6.0-17
|
||||
- Improve aarch64 read speed. (rhbz2161146)
|
||||
|
Loading…
Reference in New Issue
Block a user